Completed
Push — master ( 719867...30d2f4 )
by Ionel Cristian
01:05
created

parse_elasticsearch_storage()   B

Complexity

Conditions 6

Size

Total Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
c 0
b 0
f 0
dl 0
loc 16
rs 8
1
from __future__ import division
2
from __future__ import print_function
3
4
import argparse
5
import genericpath
6
import json
7
import ntpath
8
import os
9
import platform
10
import re
11
import subprocess
12
import sys
13
import types
14
from datetime import datetime
15
from decimal import Decimal
16
from functools import partial
17
18
try:
19
    from urllib.parse import urlparse, parse_qs
20
except ImportError:
21
    from urlparse import urlparse, parse_qs
22
23
from .compat import PY3
24
25
try:
26
    from subprocess import check_output
27
except ImportError:
28
    def check_output(*popenargs, **kwargs):
29
        if 'stdout' in kwargs:
30
            raise ValueError('stdout argument not allowed, it will be overridden.')
31
        process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
32
        output, unused_err = process.communicate()
33
        retcode = process.poll()
34
        if retcode:
35
            cmd = kwargs.get("args")
36
            if cmd is None:
37
                cmd = popenargs[0]
38
            raise subprocess.CalledProcessError(retcode, cmd)
39
        return output
40
41
TIME_UNITS = {
42
    "": "Seconds",
43
    "m": "Miliseconds (ms)",
44
    "u": "Microseconds (us)",
45
    "n": "Nanoseconds (ns)"
46
}
47
ALLOWED_COLUMNS = ["min", "max", "mean", "stddev", "median", "iqr", "outliers", "rounds", "iterations"]
48
49
50
class SecondsDecimal(Decimal):
51
    def __float__(self):
52
        return float(super(SecondsDecimal, self).__str__())
53
54
    def __str__(self):
55
        return "{0}s".format(format_time(float(super(SecondsDecimal, self).__str__())))
56
57
    @property
58
    def as_string(self):
59
        return super(SecondsDecimal, self).__str__()
60
61
62
class NameWrapper(object):
63
    def __init__(self, target):
64
        self.target = target
65
66
    def __str__(self):
67
        name = self.target.__module__ + "." if hasattr(self.target, '__module__') else ""
68
        name += self.target.__name__ if hasattr(self.target, '__name__') else repr(self.target)
69
        return name
70
71
    def __repr__(self):
72
        return "NameWrapper(%s)" % repr(self.target)
73
74
75
def get_tag(project_name=None):
76
    info = get_commit_info(project_name)
77
    parts = []
78
    if info['project']:
79
        parts.append(info['project'])
80
    parts.append(info['id'])
81
    parts.append(get_current_time())
82
    if info['dirty']:
83
        parts.append("uncommited-changes")
84
    return "_".join(parts)
85
86
87
def get_machine_id():
88
    return "%s-%s-%s-%s" % (
89
        platform.system(),
90
        platform.python_implementation(),
91
        ".".join(platform.python_version_tuple()[:2]),
92
        platform.architecture()[0]
93
    )
94
95
96
def get_project_name():
97
    if os.path.exists('.git'):
98
        try:
99
            project_address = check_output("git config --local remote.origin.url".split())
100
            if isinstance(project_address, bytes) and str != bytes:
101
                project_address = project_address.decode()
102
            project_name = re.findall(r'/([^/]*)\.git', project_address)[0]
103
            return project_name
104
        except (IndexError, subprocess.CalledProcessError):
105
            return os.path.basename(os.getcwd())
106
    elif os.path.exists('.hg'):
107
        try:
108
            project_address = check_output("hg path default".split())
109
            project_address = project_address.decode()
110
            project_name = project_address.split("/")[-1]
111
            return project_name.strip()
112
        except (IndexError, subprocess.CalledProcessError):
113
            return os.path.basename(os.getcwd())
114
    else:
115
        return os.path.basename(os.getcwd())
116
117
118
def get_commit_info(project_name=None):
119
    dirty = False
120
    commit = 'unversioned'
121
    project_name = project_name or get_project_name()
122
    try:
123
        if os.path.exists('.git'):
124
            desc = check_output('git describe --dirty --always --long --abbrev=40'.split(),
125
                                universal_newlines=True).strip()
126
            desc = desc.split('-')
127
            if desc[-1].strip() == 'dirty':
128
                dirty = True
129
                desc.pop()
130
            commit = desc[-1].strip('g')
131
        elif os.path.exists('.hg'):
132
            desc = check_output('hg id --id --debug'.split(), universal_newlines=True).strip()
133
            if desc[-1] == '+':
134
                dirty = True
135
            commit = desc.strip('+')
136
        return {
137
            'id': commit,
138
            'dirty': dirty,
139
            'project': project_name,
140
        }
141
    except Exception as exc:
142
        return {
143
            'id': 'unknown',
144
            'dirty': dirty,
145
            'error': repr(exc),
146
            'project': project_name,
147
        }
148
149
150
def get_current_time():
151
    return datetime.utcnow().strftime("%Y%m%d_%H%M%S")
152
153
154
def first_or_value(obj, value):
155
    if obj:
156
        value, = obj
157
158
    return value
159
160
161
def short_filename(path, machine_id=None):
162
    parts = []
163
    try:
164
        last = len(path.parts) - 1
165
    except AttributeError:
166
        return str(path)
167
    for pos, part in enumerate(path.parts):
168
        if not pos and part == machine_id:
169
            continue
170
        if pos == last:
171
            part = part.rsplit('.', 1)[0]
172
            # if len(part) > 16:
173
            #     part = "%.13s..." % part
174
        parts.append(part)
175
    return '/'.join(parts)
176
177
178
def load_timer(string):
179
    if "." not in string:
180
        raise argparse.ArgumentTypeError("Value for --benchmark-timer must be in dotted form. Eg: 'module.attr'.")
181
    mod, attr = string.rsplit(".", 1)
182
    if mod == 'pep418':
183
        if PY3:
184
            import time
185
            return NameWrapper(getattr(time, attr))
186
        else:
187
            from . import pep418
188
            return NameWrapper(getattr(pep418, attr))
189
    else:
190
        __import__(mod)
191
        mod = sys.modules[mod]
192
        return NameWrapper(getattr(mod, attr))
193
194
195
class RegressionCheck(object):
196
    def __init__(self, field, threshold):
197
        self.field = field
198
        self.threshold = threshold
199
200
    def fails(self, current, compared):
201
        val = self.compute(current, compared)
202
        if val > self.threshold:
203
            return "Field %r has failed %s: %.9f > %.9f" % (
204
                self.field, self.__class__.__name__, val, self.threshold
205
            )
206
207
208
class PercentageRegressionCheck(RegressionCheck):
209
    def compute(self, current, compared):
210
        val = compared[self.field]
211
        if not val:
212
            return float("inf")
213
        return current[self.field] / val * 100 - 100
214
215
216
class DifferenceRegressionCheck(RegressionCheck):
217
    def compute(self, current, compared):
218
        return current[self.field] - compared[self.field]
219
220
221
def parse_compare_fail(string,
222
                       rex=re.compile('^(?P<field>min|max|mean|median|stddev|iqr):'
223
                                      '((?P<percentage>[0-9]?[0-9])%|(?P<difference>[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?))$')):
224
    m = rex.match(string)
225
    if m:
226
        g = m.groupdict()
227
        if g['percentage']:
228
            return PercentageRegressionCheck(g['field'], int(g['percentage']))
229
        elif g['difference']:
230
            return DifferenceRegressionCheck(g['field'], float(g['difference']))
231
232
    raise argparse.ArgumentTypeError("Could not parse value: %r." % string)
233
234
235
def parse_warmup(string):
236
    string = string.lower().strip()
237
    if string == "auto":
238
        return platform.python_implementation() == "PyPy"
239
    elif string in ["off", "false", "no"]:
240
        return False
241
    elif string in ["on", "true", "yes", ""]:
242
        return True
243
    else:
244
        raise argparse.ArgumentTypeError("Could not parse value: %r." % string)
245
246
247
def name_formatter_short(bench):
248
    name = bench["name"]
249
    if bench["source"]:
250
        name = "%s (%.4s)" % (name, os.path.split(bench["source"])[-1])
251
    if name.startswith("test_"):
252
        name = name[5:]
253
    return name
254
255
256
def name_formatter_normal(bench):
257
    name = bench["name"]
258
    if bench["source"]:
259
        parts = bench["source"].split('/')
260
        parts[-1] = parts[-1][:12]
261
        name = "%s (%s)" % (name, '/'.join(parts))
262
    return name
263
264
265
def name_formatter_long(bench):
266
    if bench["source"]:
267
        return "%(fullname)s (%(source)s)" % bench
268
    else:
269
        return bench["fullname"]
270
271
272
NAME_FORMATTERS = {
273
    "short": name_formatter_short,
274
    "normal": name_formatter_normal,
275
    "long": name_formatter_long,
276
}
277
278
279
def parse_name_format(string):
280
    string = string.lower().strip()
281
    if string in NAME_FORMATTERS:
282
        return string
283
    else:
284
        raise argparse.ArgumentTypeError("Could not parse value: %r." % string)
285
286
287
def parse_timer(string):
288
    return str(load_timer(string))
289
290
291
def parse_sort(string):
292
    string = string.lower().strip()
293
    if string not in ("min", "max", "mean", "stddev", "name", "fullname"):
294
        raise argparse.ArgumentTypeError(
295
            "Unacceptable value: %r. "
296
            "Value for --benchmark-sort must be one of: 'min', 'max', 'mean', "
297
            "'stddev', 'name', 'fullname'." % string)
298
    return string
299
300
301
def parse_columns(string):
302
    columns = [str.strip(s) for s in string.lower().split(',')]
303
    invalid = set(columns) - set(ALLOWED_COLUMNS)
304
    if invalid:
305
        # there are extra items in columns!
306
        msg = "Invalid column name(s): %s. " % ', '.join(invalid)
307
        msg += "The only valid column names are: %s" % ', '.join(ALLOWED_COLUMNS)
308
        raise argparse.ArgumentTypeError(msg)
309
    return columns
310
311
312
def parse_rounds(string):
313
    try:
314
        value = int(string)
315
    except ValueError as exc:
316
        raise argparse.ArgumentTypeError(exc)
317
    else:
318
        if value < 1:
319
            raise argparse.ArgumentTypeError("Value for --benchmark-rounds must be at least 1.")
320
        return value
321
322
323
def parse_seconds(string):
324
    try:
325
        return SecondsDecimal(string).as_string
326
    except Exception as exc:
327
        raise argparse.ArgumentTypeError("Invalid decimal value %r: %r" % (string, exc))
328
329
330
def parse_save(string):
331
    if not string:
332
        raise argparse.ArgumentTypeError("Can't be empty.")
333
    illegal = ''.join(c for c in r"\/:*?<>|" if c in string)
334
    if illegal:
335
        raise argparse.ArgumentTypeError("Must not contain any of these characters: /:*?<>|\\ (it has %r)" % illegal)
336
    return string
337
338
339
def parse_elasticsearch_storage(string, default_index="benchmark", default_doctype="benchmark"):
340
    storage_url = urlparse(string)
341
    hosts = ["{scheme}://{netloc}".format(scheme=storage_url.scheme, netloc=netloc) for netloc in storage_url.netloc.split(',')]
342
    index = default_index
343
    doctype = default_doctype
344
    if storage_url.path and storage_url.path != "/":
345
        splitted = storage_url.path.strip("/").split("/")
346
        index = splitted[0]
347
        if len(splitted) >= 2:
348
            doctype = splitted[1]
349
    query = parse_qs(storage_url.query)
350
    try:
351
        project_name = query["project_name"][0]
352
    except KeyError:
353
        project_name = get_project_name()
354
    return hosts, index, doctype, project_name
355
356
357
def load_storage(storage, **kwargs):
358
    if "://" not in storage:
359
        storage = "file://" + storage
360
    if storage.startswith("file://"):
361
        from .storage.file import FileStorage
362
        return FileStorage(storage[len("file://"):], **kwargs)
363
    elif storage.startswith("elasticsearch+"):
364
        from .storage.elasticsearch import ElasticsearchStorage
365
        # TODO update benchmark_autosave
366
        return ElasticsearchStorage(*parse_elasticsearch_storage(storage[len("elasticsearch+"):]), **kwargs)
367
    else:
368
        raise argparse.ArgumentTypeError("Storage must be in form of file://path or "
369
                                         "elasticsearch+http[s]://host1,host2/index/doctype")
370
371
372
def time_unit(value):
373
    if value < 1e-6:
374
        return "n", 1e9
375
    elif value < 1e-3:
376
        return "u", 1e6
377
    elif value < 1:
378
        return "m", 1e3
379
    else:
380
        return "", 1.
381
382
383
def format_time(value):
384
    unit, adjustment = time_unit(value)
385
    return "{0:.2f}{1:s}".format(value * adjustment, unit)
386
387
388
class cached_property(object):
389
    def __init__(self, func):
390
        self.__doc__ = getattr(func, '__doc__')
391
        self.func = func
392
393
    def __get__(self, obj, cls):
394
        if obj is None:
395
            return self
396
        value = obj.__dict__[self.func.__name__] = self.func(obj)
397
        return value
398
399
400
def funcname(f):
401
    try:
402
        if isinstance(f, partial):
403
            return f.func.__name__
404
        else:
405
            return f.__name__
406
    except AttributeError:
407
        return str(f)
408
409
410
def clonefunc(f):
411
    """Deep clone the given function to create a new one.
412
413
    By default, the PyPy JIT specializes the assembler based on f.__code__:
414
    clonefunc makes sure that you will get a new function with a **different**
415
    __code__, so that PyPy will produce independent assembler. This is useful
416
    e.g. for benchmarks and microbenchmarks, so you can make sure to compare
417
    apples to apples.
418
419
    Use it with caution: if abused, this might easily produce an explosion of
420
    produced assembler.
421
422
    from: https://bitbucket.org/antocuni/pypytools/src/tip/pypytools/util.py?at=default
423
    """
424
425
    # first of all, we clone the code object
426
    try:
427
        co = f.__code__
428
        if PY3:
429
            co2 = types.CodeType(co.co_argcount, co.co_kwonlyargcount,
430
                                 co.co_nlocals, co.co_stacksize, co.co_flags, co.co_code,
431
                                 co.co_consts, co.co_names, co.co_varnames, co.co_filename, co.co_name,
432
                                 co.co_firstlineno, co.co_lnotab, co.co_freevars, co.co_cellvars)
433
        else:
434
            co2 = types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize, co.co_flags, co.co_code,
435
                                 co.co_consts, co.co_names, co.co_varnames, co.co_filename, co.co_name,
436
                                 co.co_firstlineno, co.co_lnotab, co.co_freevars, co.co_cellvars)
437
        #
438
        # then, we clone the function itself, using the new co2
439
        return types.FunctionType(co2, f.__globals__, f.__name__, f.__defaults__, f.__closure__)
440
    except AttributeError:
441
        return f
442
443
444
def format_dict(obj):
445
    return "{%s}" % ", ".join("%s: %s" % (k, json.dumps(v)) for k, v in sorted(obj.items()))
446
447
448
class SafeJSONEncoder(json.JSONEncoder):
449
    def default(self, o):
450
        return "UNSERIALIZABLE[%r]" % o
451
452
453
def safe_dumps(obj, **kwargs):
454
    return json.dumps(obj, cls=SafeJSONEncoder, **kwargs)
455
456
457
def report_progress(iterable, terminal_reporter, format_string, **kwargs):
458
    total = len(iterable)
459
460
    def progress_reporting_wrapper():
461
        for pos, item in enumerate(iterable):
462
            string = format_string.format(pos=pos + 1, total=total, value=item, **kwargs)
463
            terminal_reporter.rewrite(string, black=True, bold=True)
464
            yield string, item
465
    return progress_reporting_wrapper()
466
467
468
def report_noprogress(iterable, *args, **kwargs):
469
    for pos, item in enumerate(iterable):
470
        yield "", item
471
472
473
def slugify(name):
474
    for c in "\/:*?<>| ":
475
        name = name.replace(c, '_').replace('__', '_')
476
    return name
477
478
479
def commonpath(paths):
480
    """Given a sequence of path names, returns the longest common sub-path."""
481
482
    if not paths:
483
        raise ValueError('commonpath() arg is an empty sequence')
484
485
    if isinstance(paths[0], bytes):
486
        sep = b'\\'
487
        altsep = b'/'
488
        curdir = b'.'
489
    else:
490
        sep = '\\'
491
        altsep = '/'
492
        curdir = '.'
493
494
    try:
495
        drivesplits = [ntpath.splitdrive(p.replace(altsep, sep).lower()) for p in paths]
496
        split_paths = [p.split(sep) for d, p in drivesplits]
497
498
        try:
499
            isabs, = set(p[:1] == sep for d, p in drivesplits)
500
        except ValueError:
501
            raise ValueError("Can't mix absolute and relative paths")
502
503
        # Check that all drive letters or UNC paths match. The check is made only
504
        # now otherwise type errors for mixing strings and bytes would not be
505
        # caught.
506
        if len(set(d for d, p in drivesplits)) != 1:
507
            raise ValueError("Paths don't have the same drive")
508
509
        drive, path = ntpath.splitdrive(paths[0].replace(altsep, sep))
510
        common = path.split(sep)
511
        common = [c for c in common if c and c != curdir]
512
513
        split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
514
        s1 = min(split_paths)
515
        s2 = max(split_paths)
516
        for i, c in enumerate(s1):
517
            if c != s2[i]:
518
                common = common[:i]
519
                break
520
        else:
521
            common = common[:len(s1)]
522
523
        prefix = drive + sep if isabs else drive
524
        return prefix + sep.join(common)
525
    except (TypeError, AttributeError):
526
        genericpath._check_arg_types('commonpath', *paths)
527
        raise
528
529
530
def get_cprofile_functions(stats):
531
    """
532
    Convert pstats structure to list of sorted dicts about each function.
533
    """
534
    result = []
535
    # this assumes that you run py.test from project root dir
536
    project_dir_parent = os.path.dirname(os.getcwd())
537
538
    for function_info, run_info in stats.stats.items():
539
        file_path = function_info[0]
540
        if file_path.startswith(project_dir_parent):
541
            file_path = file_path[len(project_dir_parent):].lstrip('/')
542
        function_name = '{0}:{1}({2})'.format(file_path, function_info[1], function_info[2])
543
544
        # if the function is recursive write number of 'total calls/primitive calls'
545
        if run_info[0] == run_info[1]:
546
            calls = str(run_info[0])
547
        else:
548
            calls = '{1}/{0}'.format(run_info[0], run_info[1])
549
550
        result.append(dict(ncalls_recursion=calls,
551
                           ncalls=run_info[1],
552
                           tottime=run_info[2],
553
                           tottime_per=run_info[2] / run_info[0] if run_info[0] > 0 else 0,
554
                           cumtime=run_info[3],
555
                           cumtime_per=run_info[3] / run_info[0] if run_info[0] > 0 else 0,
556
                           function_name=function_name))
557
558
    return result
559