Completed
Pull Request — master (#58)
by
unknown
01:15
created

parse_elasticsearch_storage()   B

Complexity

Conditions 5

Size

Total Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
dl 0
loc 16
rs 8.5454
c 0
b 0
f 0
1
from __future__ import division
2
from __future__ import print_function
3
4
import argparse
5
import genericpath
6
import json
7
import ntpath
8
import os
9
import platform
10
import re
11
import subprocess
12
import sys
13
import types
14
from datetime import datetime
15
from decimal import Decimal
16
from functools import partial
17
18
try:
19
    from urllib.parse import urlparse, parse_qs
20
except ImportError:
21
    from urlparse import urlparse, parse_qs
22
23
from .compat import PY3
24
25
try:
26
    from subprocess import check_output
27
except ImportError:
28
    def check_output(*popenargs, **kwargs):
29
        if 'stdout' in kwargs:
30
            raise ValueError('stdout argument not allowed, it will be overridden.')
31
        process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
32
        output, unused_err = process.communicate()
33
        retcode = process.poll()
34
        if retcode:
35
            cmd = kwargs.get("args")
36
            if cmd is None:
37
                cmd = popenargs[0]
38
            raise subprocess.CalledProcessError(retcode, cmd)
39
        return output
40
41
TIME_UNITS = {
42
    "": "Seconds",
43
    "m": "Miliseconds (ms)",
44
    "u": "Microseconds (us)",
45
    "n": "Nanoseconds (ns)"
46
}
47
ALLOWED_COLUMNS = ["min", "max", "mean", "stddev", "median", "iqr", "outliers", "rounds", "iterations"]
48
49
50
class SecondsDecimal(Decimal):
51
    def __float__(self):
52
        return float(super(SecondsDecimal, self).__str__())
53
54
    def __str__(self):
55
        return "{0}s".format(format_time(float(super(SecondsDecimal, self).__str__())))
56
57
    @property
58
    def as_string(self):
59
        return super(SecondsDecimal, self).__str__()
60
61
62
class NameWrapper(object):
63
    def __init__(self, target):
64
        self.target = target
65
66
    def __str__(self):
67
        name = self.target.__module__ + "." if hasattr(self.target, '__module__') else ""
68
        name += self.target.__name__ if hasattr(self.target, '__name__') else repr(self.target)
69
        return name
70
71
    def __repr__(self):
72
        return "NameWrapper(%s)" % repr(self.target)
73
74
75
def get_tag(project_name=None):
76
    info = get_commit_info(project_name)
77
    parts = []
78
    if info['project']:
79
        parts.append(info['project'])
80
    parts.append(info['id'])
81
    parts.append(get_current_time())
82
    if info['dirty']:
83
        parts.append("uncommited-changes")
84
    return "_".join(parts)
85
86
87
def get_machine_id():
88
    return "%s-%s-%s-%s" % (
89
        platform.system(),
90
        platform.python_implementation(),
91
        ".".join(platform.python_version_tuple()[:2]),
92
        platform.architecture()[0]
93
    )
94
95
96
def get_project_name():
97
    if os.path.exists('.git'):
98
        try:
99
            project_address = check_output("git config --local remote.origin.url".split())
100
            if isinstance(project_address, bytes) and str != bytes:
101
                project_address = project_address.decode()
102
            project_name = re.findall(r'/([^/]*)\.git', project_address)[0]
103
            return project_name
104
        except (IndexError, subprocess.CalledProcessError):
105
            return os.path.basename(os.getcwd())
106
    elif os.path.exists('.hg'):
107
        try:
108
            project_address = check_output("hg path default".split())
109
            project_address = project_address.decode()
110
            project_name = project_address.split("/")[-1]
111
            return project_name.strip()
112
        except (IndexError, subprocess.CalledProcessError):
113
            return os.path.basename(os.getcwd())
114
    else:
115
        return os.path.basename(os.getcwd())
116
117
118
def get_commit_info(project_name=None):
119
    dirty = False
120
    commit = 'unversioned'
121
    project_name = project_name or get_project_name()
122
    try:
123
        if os.path.exists('.git'):
124
            desc = check_output('git describe --dirty --always --long --abbrev=40'.split(),
125
                                universal_newlines=True).strip()
126
            desc = desc.split('-')
127
            if desc[-1].strip() == 'dirty':
128
                dirty = True
129
                desc.pop()
130
            commit = desc[-1].strip('g')
131
        elif os.path.exists('.hg'):
132
            desc = check_output('hg id --id --debug'.split(), universal_newlines=True).strip()
133
            if desc[-1] == '+':
134
                dirty = True
135
            commit = desc.strip('+')
136
        return {
137
            'id': commit,
138
            'dirty': dirty,
139
            'project': project_name,
140
        }
141
    except Exception as exc:
142
        return {
143
            'id': 'unknown',
144
            'dirty': dirty,
145
            'error': repr(exc),
146
            'project': project_name,
147
        }
148
149
150
def get_current_time():
151
    return datetime.utcnow().strftime("%Y%m%d_%H%M%S")
152
153
154
def first_or_value(obj, value):
155
    if obj:
156
        value, = obj
157
158
    return value
159
160
161
def short_filename(path, machine_id=None):
162
    parts = []
163
    try:
164
        last = len(path.parts) - 1
165
    except AttributeError:
166
        return str(path)
167
    for pos, part in enumerate(path.parts):
168
        if not pos and part == machine_id:
169
            continue
170
        if pos == last:
171
            part = part.rsplit('.', 1)[0]
172
            # if len(part) > 16:
173
            #     part = "%.13s..." % part
174
        parts.append(part)
175
    return '/'.join(parts)
176
177
178
def load_timer(string):
179
    if "." not in string:
180
        raise argparse.ArgumentTypeError("Value for --benchmark-timer must be in dotted form. Eg: 'module.attr'.")
181
    mod, attr = string.rsplit(".", 1)
182
    if mod == 'pep418':
183
        if PY3:
184
            import time
185
            return NameWrapper(getattr(time, attr))
186
        else:
187
            from . import pep418
188
            return NameWrapper(getattr(pep418, attr))
189
    else:
190
        __import__(mod)
191
        mod = sys.modules[mod]
192
        return NameWrapper(getattr(mod, attr))
193
194
195
class RegressionCheck(object):
196
    def __init__(self, field, threshold):
197
        self.field = field
198
        self.threshold = threshold
199
200
    def fails(self, current, compared):
201
        val = self.compute(current, compared)
202
        if val > self.threshold:
203
            return "Field %r has failed %s: %.9f > %.9f" % (
204
                self.field, self.__class__.__name__, val, self.threshold
205
            )
206
207
208
class PercentageRegressionCheck(RegressionCheck):
209
    def compute(self, current, compared):
210
        val = compared[self.field]
211
        if not val:
212
            return float("inf")
213
        return current[self.field] / val * 100 - 100
214
215
216
class DifferenceRegressionCheck(RegressionCheck):
217
    def compute(self, current, compared):
218
        return current[self.field] - compared[self.field]
219
220
221
def parse_compare_fail(string,
222
                       rex=re.compile('^(?P<field>min|max|mean|median|stddev|iqr):'
223
                                      '((?P<percentage>[0-9]?[0-9])%|(?P<difference>[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?))$')):
224
    m = rex.match(string)
225
    if m:
226
        g = m.groupdict()
227
        if g['percentage']:
228
            return PercentageRegressionCheck(g['field'], int(g['percentage']))
229
        elif g['difference']:
230
            return DifferenceRegressionCheck(g['field'], float(g['difference']))
231
232
    raise argparse.ArgumentTypeError("Could not parse value: %r." % string)
233
234
235
def parse_warmup(string):
236
    string = string.lower().strip()
237
    if string == "auto":
238
        return platform.python_implementation() == "PyPy"
239
    elif string in ["off", "false", "no"]:
240
        return False
241
    elif string in ["on", "true", "yes", ""]:
242
        return True
243
    else:
244
        raise argparse.ArgumentTypeError("Could not parse value: %r." % string)
245
246
247
def name_formatter_short(bench):
248
    name = bench["name"]
249
    if bench["source"]:
250
        name = "%s (%.4s)" % (name, os.path.split(bench["source"])[-1])
251
    if name.startswith("test_"):
252
        name = name[5:]
253
    return name
254
255
256
def name_formatter_normal(bench):
257
    name = bench["name"]
258
    if bench["source"]:
259
        parts = bench["source"].split('/')
260
        parts[-1] = parts[-1][:12]
261
        name = "%s (%s)" % (name, '/'.join(parts))
262
    return name
263
264
265
def name_formatter_long(bench):
266
    if bench["source"]:
267
        return "%(fullname)s (%(source)s)" % bench
268
    else:
269
        return bench["fullname"]
270
271
272
NAME_FORMATTERS = {
273
    "short": name_formatter_short,
274
    "normal": name_formatter_normal,
275
    "long": name_formatter_long,
276
}
277
278
279
def parse_name_format(string):
280
    string = string.lower().strip()
281
    if string in NAME_FORMATTERS:
282
        return string
283
    else:
284
        raise argparse.ArgumentTypeError("Could not parse value: %r." % string)
285
286
287
def parse_timer(string):
288
    return str(load_timer(string))
289
290
291
def parse_sort(string):
292
    string = string.lower().strip()
293
    if string not in ("min", "max", "mean", "stddev", "name", "fullname"):
294
        raise argparse.ArgumentTypeError(
295
            "Unacceptable value: %r. "
296
            "Value for --benchmark-sort must be one of: 'min', 'max', 'mean', "
297
            "'stddev', 'name', 'fullname'." % string)
298
    return string
299
300
301
def parse_columns(string):
302
    columns = [str.strip(s) for s in string.lower().split(',')]
303
    invalid = set(columns) - set(ALLOWED_COLUMNS)
304
    if invalid:
305
        # there are extra items in columns!
306
        msg = "Invalid column name(s): %s. " % ', '.join(invalid)
307
        msg += "The only valid column names are: %s" % ', '.join(ALLOWED_COLUMNS)
308
        raise argparse.ArgumentTypeError(msg)
309
    return columns
310
311
312
def parse_rounds(string):
313
    try:
314
        value = int(string)
315
    except ValueError as exc:
316
        raise argparse.ArgumentTypeError(exc)
317
    else:
318
        if value < 1:
319
            raise argparse.ArgumentTypeError("Value for --benchmark-rounds must be at least 1.")
320
        return value
321
322
323
def parse_seconds(string):
324
    try:
325
        return SecondsDecimal(string).as_string
326
    except Exception as exc:
327
        raise argparse.ArgumentTypeError("Invalid decimal value %r: %r" % (string, exc))
328
329
330
def parse_save(string):
331
    if not string:
332
        raise argparse.ArgumentTypeError("Can't be empty.")
333
    illegal = ''.join(c for c in r"\/:*?<>|" if c in string)
334
    if illegal:
335
        raise argparse.ArgumentTypeError("Must not contain any of these characters: /:*?<>|\\ (it has %r)" % illegal)
336
    return string
337
338
339
def parse_elasticsearch_storage(string, default_index="benchmark", default_doctype="benchmark"):
340
    storage_url = urlparse(string)
341
    hosts = ["{scheme}://{netloc}".format(scheme=storage_url.scheme, netloc=netloc) for netloc in storage_url.netloc.split(',')]
342
    index = default_index
343
    doctype = default_doctype
344
    if storage_url.path:
345
        splitted = storage_url.path.strip("/").split("/")
346
        index = splitted[0]
347
        if len(splitted) >= 2:
348
            doctype = splitted[1]
349
    query = parse_qs(storage_url.query)
350
    try:
351
        project_name = query["project_name"][0]
352
    except KeyError:
353
        project_name = get_project_name()
354
    return hosts, index, doctype, project_name
355
356
357
def time_unit(value):
358
    if value < 1e-6:
359
        return "n", 1e9
360
    elif value < 1e-3:
361
        return "u", 1e6
362
    elif value < 1:
363
        return "m", 1e3
364
    else:
365
        return "", 1.
366
367
368
def format_time(value):
369
    unit, adjustment = time_unit(value)
370
    return "{0:.2f}{1:s}".format(value * adjustment, unit)
371
372
373
class cached_property(object):
374
    def __init__(self, func):
375
        self.__doc__ = getattr(func, '__doc__')
376
        self.func = func
377
378
    def __get__(self, obj, cls):
379
        if obj is None:
380
            return self
381
        value = obj.__dict__[self.func.__name__] = self.func(obj)
382
        return value
383
384
385
def funcname(f):
386
    try:
387
        if isinstance(f, partial):
388
            return f.func.__name__
389
        else:
390
            return f.__name__
391
    except AttributeError:
392
        return str(f)
393
394
395
def clonefunc(f):
396
    """Deep clone the given function to create a new one.
397
398
    By default, the PyPy JIT specializes the assembler based on f.__code__:
399
    clonefunc makes sure that you will get a new function with a **different**
400
    __code__, so that PyPy will produce independent assembler. This is useful
401
    e.g. for benchmarks and microbenchmarks, so you can make sure to compare
402
    apples to apples.
403
404
    Use it with caution: if abused, this might easily produce an explosion of
405
    produced assembler.
406
407
    from: https://bitbucket.org/antocuni/pypytools/src/tip/pypytools/util.py?at=default
408
    """
409
410
    # first of all, we clone the code object
411
    try:
412
        co = f.__code__
413
        if PY3:
414
            co2 = types.CodeType(co.co_argcount, co.co_kwonlyargcount,
415
                                 co.co_nlocals, co.co_stacksize, co.co_flags, co.co_code,
416
                                 co.co_consts, co.co_names, co.co_varnames, co.co_filename, co.co_name,
417
                                 co.co_firstlineno, co.co_lnotab, co.co_freevars, co.co_cellvars)
418
        else:
419
            co2 = types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize, co.co_flags, co.co_code,
420
                                 co.co_consts, co.co_names, co.co_varnames, co.co_filename, co.co_name,
421
                                 co.co_firstlineno, co.co_lnotab, co.co_freevars, co.co_cellvars)
422
        #
423
        # then, we clone the function itself, using the new co2
424
        return types.FunctionType(co2, f.__globals__, f.__name__, f.__defaults__, f.__closure__)
425
    except AttributeError:
426
        return f
427
428
429
def format_dict(obj):
430
    return "{%s}" % ", ".join("%s: %s" % (k, json.dumps(v)) for k, v in sorted(obj.items()))
431
432
433
class SafeJSONEncoder(json.JSONEncoder):
434
    def default(self, o):
435
        return "UNSERIALIZABLE[%r]" % o
436
437
438
def safe_dumps(obj, **kwargs):
439
    return json.dumps(obj, cls=SafeJSONEncoder, **kwargs)
440
441
442
def report_progress(iterable, terminal_reporter, format_string, **kwargs):
443
    total = len(iterable)
444
445
    def progress_reporting_wrapper():
446
        for pos, item in enumerate(iterable):
447
            string = format_string.format(pos=pos + 1, total=total, value=item, **kwargs)
448
            terminal_reporter.rewrite(string, black=True, bold=True)
449
            yield string, item
450
    return progress_reporting_wrapper()
451
452
453
def report_noprogress(iterable, *args, **kwargs):
454
    for pos, item in enumerate(iterable):
455
        yield "", item
456
457
458
def slugify(name):
459
    for c in "\/:*?<>| ":
460
        name = name.replace(c, '_').replace('__', '_')
461
    return name
462
463
464
def commonpath(paths):
465
    """Given a sequence of path names, returns the longest common sub-path."""
466
467
    if not paths:
468
        raise ValueError('commonpath() arg is an empty sequence')
469
470
    if isinstance(paths[0], bytes):
471
        sep = b'\\'
472
        altsep = b'/'
473
        curdir = b'.'
474
    else:
475
        sep = '\\'
476
        altsep = '/'
477
        curdir = '.'
478
479
    try:
480
        drivesplits = [ntpath.splitdrive(p.replace(altsep, sep).lower()) for p in paths]
481
        split_paths = [p.split(sep) for d, p in drivesplits]
482
483
        try:
484
            isabs, = set(p[:1] == sep for d, p in drivesplits)
485
        except ValueError:
486
            raise ValueError("Can't mix absolute and relative paths")
487
488
        # Check that all drive letters or UNC paths match. The check is made only
489
        # now otherwise type errors for mixing strings and bytes would not be
490
        # caught.
491
        if len(set(d for d, p in drivesplits)) != 1:
492
            raise ValueError("Paths don't have the same drive")
493
494
        drive, path = ntpath.splitdrive(paths[0].replace(altsep, sep))
495
        common = path.split(sep)
496
        common = [c for c in common if c and c != curdir]
497
498
        split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
499
        s1 = min(split_paths)
500
        s2 = max(split_paths)
501
        for i, c in enumerate(s1):
502
            if c != s2[i]:
503
                common = common[:i]
504
                break
505
        else:
506
            common = common[:len(s1)]
507
508
        prefix = drive + sep if isabs else drive
509
        return prefix + sep.join(common)
510
    except (TypeError, AttributeError):
511
        genericpath._check_arg_types('commonpath', *paths)
512
        raise
513