Completed
Push — master ( 3e0f17...aa1f5a )
by Ionel Cristian
03:57
created

src.pytest_benchmark.BenchmarkFixture.__init__()   B

Complexity

Conditions 2

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 2
dl 0
loc 26
rs 8.8571
1
from __future__ import division
2
from __future__ import print_function
3
4
import argparse
5
import gc
6
import json
7
import operator
8
import os
9
import platform
10
import sys
11
import time
12
import traceback
13
from collections import defaultdict
14
from datetime import datetime
15
from math import ceil
16
from math import isinf
17
18
import py
19
import pytest
20
21
from . import __version__
22
from .compat import INT
23
from .compat import XRANGE
24
from .timers import compute_timer_precision
25
from .timers import default_timer
26
from .utils import NameWrapper
27
from .utils import SecondsDecimal
28
from .utils import cached_property
29
from .utils import first_or_value
30
from .utils import format_dict
31
from .utils import format_time
32
from .utils import get_commit_info
33
from .utils import get_current_time
34
from .utils import get_tag
35
from .utils import load_timer
36
from .utils import parse_compare_fail
37
from .utils import parse_rounds
38
from .utils import parse_save
39
from .utils import parse_seconds
40
from .utils import parse_sort
41
from .utils import parse_columns
42
from .utils import parse_timer
43
from .utils import parse_warmup
44
from .utils import report_progress
45
from .utils import time_unit
46
47
try:
48
    import statistics
49
except (ImportError, SyntaxError):
50
    statistics = False
51
    statistics_error = traceback.format_exc()
52
else:
53
    from .stats import Stats
54
55
NUMBER_FMT = "{0:,.4f}" if sys.version_info[:2] > (2, 6) else "{0:.4f}"
56
ALIGNED_NUMBER_FMT = "{0:>{1},.4f}{2:<{3}}" if sys.version_info[:2] > (2, 6) else "{0:>{1}.4f}{2:<{3}}"
57
HISTOGRAM_CURRENT = "now"
58
59
60
class PerformanceRegression(pytest.UsageError):
61
    pass
62
63
64
class FixtureAlreadyUsed(Exception):
65
    pass
66
67
68
def pytest_report_header(config):
69
    bs = config._benchmarksession
70
71
    return ("benchmark: %(version)s (defaults:"
72
            " timer=%(timer)s"
73
            " disable_gc=%(disable_gc)s"
74
            " min_rounds=%(min_rounds)s"
75
            " min_time=%(min_time)s"
76
            " max_time=%(max_time)s"
77
            " calibration_precision=%(calibration_precision)s"
78
            " warmup=%(warmup)s"
79
            " warmup_iterations=%(warmup_iterations)s"
80
            ")") % dict(
81
        bs.options,
82
        version=__version__,
83
        timer=bs.options.get("timer"),
84
    )
85
86
87
def pytest_addoption(parser):
88
    group = parser.getgroup("benchmark")
89
    group.addoption(
90
        "--benchmark-min-time",
91
        metavar="SECONDS", type=parse_seconds, default="0.000005",
92
        help="Minimum time per round in seconds. Default: %(default)r"
93
    )
94
    group.addoption(
95
        "--benchmark-max-time",
96
        metavar="SECONDS", type=parse_seconds, default="1.0",
97
        help="Maximum run time per test - it will be repeated until this total time is reached. It may be "
98
             "exceeded if test function is very slow or --benchmark-min-rounds is large (it takes precedence). "
99
             "Default: %(default)r"
100
    )
101
    group.addoption(
102
        "--benchmark-min-rounds",
103
        metavar="NUM", type=parse_rounds, default=5,
104
        help="Minimum rounds, even if total time would exceed `--max-time`. Default: %(default)r"
105
    )
106
    group.addoption(
107
        "--benchmark-sort",
108
        metavar="COL", type=parse_sort, default="min",
109
        help="Column to sort on. Can be one of: 'min', 'max', 'mean', 'stddev', "
110
             "'name', 'fullname'. Default: %(default)r"
111
    )
112
    group.addoption(
113
        "--benchmark-group-by",
114
        metavar="LABEL", default="group",
115
        help="How to group tests. Can be one of: 'group', 'name', 'fullname', 'func', 'fullfunc', "
116
             "'param' or 'param:NAME', where NAME is the name passed to @pytest.parametrize."
117
             " Default: %(default)r"
118
    )
119
    group.addoption(
120
        "--benchmark-columns",
121
        metavar="LABELS", type=parse_columns,
122
        default="min, max, mean, stddev, median, iqr, outliers, rounds, iterations",
123
        help='Comma-separated list of columns to show in the result table. Default: "%(default)s"')
124
    group.addoption(
125
        "--benchmark-timer",
126
        metavar="FUNC", type=parse_timer, default=str(NameWrapper(default_timer)),
127
        help="Timer to use when measuring time. Default: %(default)r"
128
    )
129
    group.addoption(
130
        "--benchmark-calibration-precision",
131
        metavar="NUM", type=int, default=10,
132
        help="Precision to use when calibrating number of iterations. Precision of 10 will make the timer look 10 times"
133
             " more accurate, at a cost of less precise measure of deviations. Default: %(default)r"
134
    )
135
    group.addoption(
136
        "--benchmark-warmup",
137
        metavar="KIND", nargs="?", default=parse_warmup("auto"), type=parse_warmup,
138
        help="Activates warmup. Will run the test function up to number of times in the calibration phase. "
139
             "See `--benchmark-warmup-iterations`. Note: Even the warmup phase obeys --benchmark-max-time. "
140
             "Available KIND: 'auto', 'off', 'on'. Default: 'auto' (automatically activate on PyPy)."
141
    )
142
    group.addoption(
143
        "--benchmark-warmup-iterations",
144
        metavar="NUM", type=int, default=100000,
145
        help="Max number of iterations to run in the warmup phase. Default: %(default)r"
146
    )
147
    group.addoption(
148
        "--benchmark-verbose",
149
        action="store_true", default=False,
150
        help="Dump diagnostic and progress information."
151
    )
152
    group.addoption(
153
        "--benchmark-disable-gc",
154
        action="store_true", default=False,
155
        help="Disable GC during benchmarks."
156
    )
157
    group.addoption(
158
        "--benchmark-skip",
159
        action="store_true", default=False,
160
        help="Skip running any tests that contain benchmarks."
161
    )
162
    group.addoption(
163
        "--benchmark-disable",
164
        action="store_true", default=False,
165
        help="Disable benchmarks. Benchmarked functions are only ran once and no stats are reported. Use this is you "
166
             "want to run the test but don't do any benchmarking."
167
    )
168
    group.addoption(
169
        "--benchmark-only",
170
        action="store_true", default=False,
171
        help="Only run benchmarks."
172
    )
173
    group.addoption(
174
        "--benchmark-save",
175
        metavar="NAME", type=parse_save,
176
        help="Save the current run into 'STORAGE-PATH/counter_NAME.json'."
177
    )
178
    tag = get_tag()
179
    group.addoption(
180
        "--benchmark-autosave",
181
        action='store_const', const=tag,
182
        help="Autosave the current run into 'STORAGE-PATH/counter_%s.json" % tag,
183
    )
184
    group.addoption(
185
        "--benchmark-save-data",
186
        action="store_true",
187
        help="Use this to make --benchmark-save and --benchmark-autosave include all the timing data,"
188
             " not just the stats.",
189
    )
190
    group.addoption(
191
        "--benchmark-compare",
192
        metavar="NUM", nargs="?", default=[], const=True,
193
        help="Compare the current run against run NUM or the latest saved run if unspecified."
194
    )
195
    group.addoption(
196
        "--benchmark-compare-fail",
197
        metavar="EXPR", nargs="+", type=parse_compare_fail,
198
        help="Fail test if performance regresses according to given EXPR"
199
             " (eg: min:5%% or mean:0.001 for number of seconds). Can be used multiple times."
200
    )
201
    group.addoption(
202
        "--benchmark-storage",
203
        metavar="STORAGE-PATH", default="./.benchmarks/%s-%s-%s-%s" % (
204
            platform.system(),
205
            platform.python_implementation(),
206
            ".".join(platform.python_version_tuple()[:2]),
207
            platform.architecture()[0]
208
        ),
209
        help="Specify a different path to store the runs (when --benchmark-save or --benchmark-autosave are used). "
210
             "Default: %(default)r",
211
    )
212
    prefix = "benchmark_%s" % get_current_time()
213
    group.addoption(
214
        "--benchmark-histogram",
215
        action='append', metavar="FILENAME-PREFIX", nargs="?", default=[], const=prefix,
216
        help="Plot graphs of min/max/avg/stddev over time in FILENAME-PREFIX-test_name.svg. If FILENAME-PREFIX contains"
217
             " slashes ('/') then directories will be created. Default: %r" % prefix
218
    )
219
    group.addoption(
220
        "--benchmark-json",
221
        metavar="PATH", type=argparse.FileType('wb'),
222
        help="Dump a JSON report into PATH. "
223
             "Note that this will include the complete data (all the timings, not just the stats)."
224
    )
225
226
227
def pytest_addhooks(pluginmanager):
228
    from . import hookspec
229
230
    method = getattr(pluginmanager, "add_hookspecs", None)
231
    if method is None:
232
        method = pluginmanager.addhooks
233
    method(hookspec)
234
235
236
class BenchmarkStats(object):
237
    def __init__(self, fixture, iterations, options):
238
        self.name = fixture.name
239
        self.fullname = fixture.fullname
240
        self.group = fixture.group
241
        self.param = fixture.param
242
        self.params = fixture.params
243
244
        self.iterations = iterations
245
        self.stats = Stats()
246
        self.options = options
247
        self.fixture = fixture
248
249
    def __bool__(self):
250
        return bool(self.stats)
251
252
    def __nonzero__(self):
253
        return bool(self.stats)
254
255
    def get(self, key, default=None):
256
        try:
257
            return getattr(self.stats, key)
258
        except AttributeError:
259
            return getattr(self, key, default)
260
261
    def __getitem__(self, key):
262
        try:
263
            return getattr(self.stats, key)
264
        except AttributeError:
265
            return getattr(self, key)
266
267
    @property
268
    def has_error(self):
269
        return self.fixture.has_error
270
271
    def json(self, include_data=True):
272
        if include_data:
273
            return dict(self.stats.as_dict, data=self.stats.data)
274
        else:
275
            return self.stats.as_dict
276
277
    def update(self, duration):
278
        self.stats.update(duration / self.iterations)
279
280
281
class BenchmarkFixture(object):
282
    _precisions = {}
283
284
    @classmethod
285
    def _get_precision(cls, timer):
286
        if timer in cls._precisions:
287
            return cls._precisions[timer]
288
        else:
289
            return cls._precisions.setdefault(timer, compute_timer_precision(timer))
290
291
    def __init__(self, node, disable_gc, timer, min_rounds, min_time, max_time, warmup, warmup_iterations,
292
                 calibration_precision, add_stats, logger, warner, disable, group=None):
293
        self.name = node.name
294
        self.fullname = node._nodeid
295
        self.disable = disable
296
        if hasattr(node, 'callspec'):
297
            self.param = node.callspec.id
298
            self.params = node.callspec.params
299
        else:
300
            self.param = None
301
            self.params = None
302
        self.group = group
303
        self.has_error = False
304
305
        self._disable_gc = disable_gc
306
        self._timer = timer.target
307
        self._min_rounds = min_rounds
308
        self._max_time = float(max_time)
309
        self._min_time = float(min_time)
310
        self._add_stats = add_stats
311
        self._calibration_precision = calibration_precision
312
        self._warmup = warmup and warmup_iterations
313
        self._logger = logger
314
        self._warner = warner
315
        self._cleanup_callbacks = []
316
        self._mode = None
317
318
    def _make_runner(self, function_to_benchmark, args, kwargs):
319
        def runner(loops_range, timer=self._timer):
320
            gc_enabled = gc.isenabled()
321
            if self._disable_gc:
322
                gc.disable()
323
            tracer = sys.gettrace()
324
            sys.settrace(None)
325
            try:
326
                if loops_range:
327
                    start = timer()
328
                    for _ in loops_range:
329
                        function_to_benchmark(*args, **kwargs)
330
                    end = timer()
331
                    return end - start
332
                else:
333
                    start = timer()
334
                    result = function_to_benchmark(*args, **kwargs)
335
                    end = timer()
336
                    return end - start, result
337
            finally:
338
                sys.settrace(tracer)
339
                if gc_enabled:
340
                    gc.enable()
341
342
        return runner
343
344
    def _make_stats(self, iterations):
345
        stats = BenchmarkStats(self, iterations=iterations, options={
346
            "disable_gc": self._disable_gc,
347
            "timer": self._timer,
348
            "min_rounds": self._min_rounds,
349
            "max_time": self._max_time,
350
            "min_time": self._min_time,
351
            "warmup": self._warmup,
352
        })
353
        self._add_stats(stats)
354
        return stats
355
356
    def __call__(self, function_to_benchmark, *args, **kwargs):
357
        if self._mode:
358
            self.has_error = True
359
            raise FixtureAlreadyUsed(
360
                "Fixture can only be used once. Previously it was used in %s mode." % self._mode)
361
        try:
362
            self._mode = 'benchmark(...)'
363
            return self._raw(function_to_benchmark, *args, **kwargs)
364
        except Exception:
365
            self.has_error = True
366
            raise
367
368
    def pedantic(self, target, args=(), kwargs=None, setup=None, rounds=1, warmup_rounds=0, iterations=1):
369
        if self._mode:
370
            self.has_error = True
371
            raise FixtureAlreadyUsed(
372
                "Fixture can only be used once. Previously it was used in %s mode." % self._mode)
373
        try:
374
            self._mode = 'benchmark.pedantic(...)'
375
            return self._raw_pedantic(target, args=args, kwargs=kwargs, setup=setup, rounds=rounds,
376
                                      warmup_rounds=warmup_rounds, iterations=iterations)
377
        except Exception:
378
            self.has_error = True
379
            raise
380
381
    def _raw(self, function_to_benchmark, *args, **kwargs):
382
        if not self.disable:
383
            runner = self._make_runner(function_to_benchmark, args, kwargs)
384
385
            duration, iterations, loops_range = self._calibrate_timer(runner)
386
387
            # Choose how many time we must repeat the test
388
            rounds = int(ceil(self._max_time / duration))
389
            rounds = max(rounds, self._min_rounds)
390
            rounds = min(rounds, sys.maxsize)
391
392
            stats = self._make_stats(iterations)
393
394
            self._logger.debug("  Running %s rounds x %s iterations ..." % (rounds, iterations), yellow=True, bold=True)
395
            run_start = time.time()
396
            if self._warmup:
397
                warmup_rounds = min(rounds, max(1, int(self._warmup / iterations)))
398
                self._logger.debug("  Warmup %s rounds x %s iterations ..." % (warmup_rounds, iterations))
399
                for _ in XRANGE(warmup_rounds):
400
                    runner(loops_range)
401
            for _ in XRANGE(rounds):
402
                stats.update(runner(loops_range))
403
            self._logger.debug("  Ran for %ss." % format_time(time.time() - run_start), yellow=True, bold=True)
404
        return function_to_benchmark(*args, **kwargs)
405
406
    def _raw_pedantic(self, target, args=(), kwargs=None, setup=None, rounds=1, warmup_rounds=0, iterations=1):
407
        if kwargs is None:
408
            kwargs = {}
409
410
        has_args = bool(args or kwargs)
411
412
        if not isinstance(iterations, INT) or iterations < 1:
413
            raise ValueError("Must have positive int for `iterations`.")
414
415
        if not isinstance(rounds, INT) or rounds < 1:
416
            raise ValueError("Must have positive int for `rounds`.")
417
418
        if not isinstance(warmup_rounds, INT) or warmup_rounds < 0:
419
            raise ValueError("Must have positive int for `warmup_rounds`.")
420
421
        if iterations > 1 and setup:
422
            raise ValueError("Can't use more than 1 `iterations` with a `setup` function.")
423
424
        def make_arguments(args=args, kwargs=kwargs):
425
            if setup:
426
                maybe_args = setup()
427
                if maybe_args:
428
                    if has_args:
429
                        raise TypeError("Can't use `args` or `kwargs` if `setup` returns the arguments.")
430
                    args, kwargs = maybe_args
431
            return args, kwargs
432
433
        if self.disable:
434
            args, kwargs = make_arguments()
435
            return target(*args, **kwargs)
436
437
        stats = self._make_stats(iterations)
438
        loops_range = XRANGE(iterations) if iterations > 1 else None
439
        for _ in XRANGE(warmup_rounds):
440
            args, kwargs = make_arguments()
441
442
            runner = self._make_runner(target, args, kwargs)
443
            runner(loops_range)
444
445
        for _ in XRANGE(rounds):
446
            args, kwargs = make_arguments()
447
448
            runner = self._make_runner(target, args, kwargs)
449
            if loops_range:
450
                duration = runner(loops_range)
451
            else:
452
                duration, result = runner(loops_range)
453
            stats.update(duration)
454
455
        if loops_range:
456
            args, kwargs = make_arguments()
457
            result = target(*args, **kwargs)
458
        return result
459
460
    def weave(self, target, **kwargs):
461
        try:
462
            import aspectlib
463
        except ImportError as exc:
464
            raise ImportError(exc.args, "Please install aspectlib or pytest-benchmark[aspect]")
465
466
        def aspect(function):
467
            def wrapper(*args, **kwargs):
468
                return self(function, *args, **kwargs)
469
470
            return wrapper
471
472
        self._cleanup_callbacks.append(aspectlib.weave(target, aspect, **kwargs).rollback)
473
474
    patch = weave
475
476
    def _cleanup(self):
477
        while self._cleanup_callbacks:
478
            callback = self._cleanup_callbacks.pop()
479
            callback()
480
        if not self._mode:
481
            self._logger.warn("BENCHMARK-U1", "Benchmark fixture was not used at all in this test!",
482
                              warner=self._warner, suspend=True)
483
484
    def _calibrate_timer(self, runner):
485
        timer_precision = self._get_precision(self._timer)
486
        min_time = max(self._min_time, timer_precision * self._calibration_precision)
487
        min_time_estimate = min_time * 5 / self._calibration_precision
488
        self._logger.debug("")
489
        self._logger.debug("  Timer precision: %ss" % format_time(timer_precision), yellow=True, bold=True)
490
        self._logger.debug("  Calibrating to target round %ss; will estimate when reaching %ss." % (
491
            format_time(min_time), format_time(min_time_estimate)), yellow=True, bold=True)
492
493
        loops = 1
494
        while True:
495
            loops_range = XRANGE(loops)
496
            duration = runner(loops_range)
497
            if self._warmup:
498
                warmup_start = time.time()
499
                warmup_iterations = 0
500
                warmup_rounds = 0
501
                while time.time() - warmup_start < self._max_time and warmup_iterations < self._warmup:
502
                    duration = min(duration, runner(loops_range))
503
                    warmup_rounds += 1
504
                    warmup_iterations += loops
505
                self._logger.debug("    Warmup: %ss (%s x %s iterations)." % (
506
                    format_time(time.time() - warmup_start),
507
                    warmup_rounds, loops
508
                ))
509
510
            self._logger.debug("    Measured %s iterations: %ss." % (loops, format_time(duration)), yellow=True)
511
            if duration >= min_time:
512
                break
513
514
            if duration >= min_time_estimate:
515
                # coarse estimation of the number of loops
516
                loops = int(ceil(min_time * loops / duration))
517
                self._logger.debug("    Estimating %s iterations." % loops, green=True)
518
                if loops == 1:
519
                    # If we got a single loop then bail early - nothing to calibrate if the the
520
                    # test function is 100 times slower than the timer resolution.
521
                    loops_range = XRANGE(loops)
522
                    break
523
            else:
524
                loops *= 10
525
        return duration, loops, loops_range
526
527
528
class Logger(object):
529
    def __init__(self, verbose, config):
530
        self.verbose = verbose
531
        self.term = py.io.TerminalWriter(file=sys.stderr)
532
        self.capman = config.pluginmanager.getplugin("capturemanager")
533
        self.pytest_warn = config.warn
534
        try:
535
            self.pytest_warn_has_fslocation = 'fslocation' in config.warn.func_code.co_varnames
536
        except AttributeError:
537
            self.pytest_warn_has_fslocation = False
538
539
    def warn(self, code, text, warner=None, suspend=False, fslocation=None):
540
        if self.verbose:
541
            if suspend and self.capman:
542
                self.capman.suspendcapture(in_=True)
543
            self.term.line("")
544
            self.term.sep("-", red=True, bold=True)
545
            self.term.write(" WARNING: ", red=True, bold=True)
546
            self.term.line(text, red=True)
547
            self.term.sep("-", red=True, bold=True)
548
            if suspend and self.capman:
549
                self.capman.resumecapture()
550
        if warner is None:
551
            warner = self.pytest_warn
552
        if fslocation and self.pytest_warn_has_fslocation:
553
            warner(code=code, message=text, fslocation=fslocation)
554
        else:
555
            warner(code=code, message=text)
556
557
    def error(self, text):
558
        self.term.line("")
559
        self.term.sep("-", red=True, bold=True)
560
        self.term.line(text, red=True, bold=True)
561
        self.term.sep("-", red=True, bold=True)
562
563
    def info(self, text, **kwargs):
564
        if not kwargs or kwargs == {'bold': True}:
565
            kwargs['purple'] = True
566
        self.term.line(text, **kwargs)
567
568
    def debug(self, text, **kwargs):
569
        if self.verbose:
570
            if self.capman:
571
                self.capman.suspendcapture(in_=True)
572
            self.info(text, **kwargs)
573
            if self.capman:
574
                self.capman.resumecapture()
575
576
577
class BenchmarkSession(object):
578
    compare_mapping = None
579
580
    def __init__(self, config):
581
        self.verbose = config.getoption("benchmark_verbose")
582
        self.logger = Logger(self.verbose, config)
583
        self.config = config
584
        self.options = dict(
585
            min_time=SecondsDecimal(config.getoption("benchmark_min_time")),
586
            min_rounds=config.getoption("benchmark_min_rounds"),
587
            max_time=SecondsDecimal(config.getoption("benchmark_max_time")),
588
            timer=load_timer(config.getoption("benchmark_timer")),
589
            calibration_precision=config.getoption("benchmark_calibration_precision"),
590
            disable_gc=config.getoption("benchmark_disable_gc"),
591
            warmup=config.getoption("benchmark_warmup"),
592
            warmup_iterations=config.getoption("benchmark_warmup_iterations"),
593
        )
594
        self.skip = config.getoption("benchmark_skip")
595
        self.disable = config.getoption("benchmark_disable")
596
597
        if config.getoption("dist", "no") != "no" and not self.skip:
598
            self.logger.warn(
599
                "BENCHMARK-U2",
600
                "Benchmarks are automatically disabled because xdist plugin is active."
601
                "Benchmarks cannot be performed reliably in a parallelized environment.",
602
                fslocation="::"
603
            )
604
            self.disable = True
605
        if hasattr(config, "slaveinput"):
606
            self.disable = True
607
        if not statistics:
608
            self.logger.warn(
609
                "BENCHMARK-U3",
610
                "Benchmarks are automatically disabled because we could not import `statistics`\n\n%s" %
611
                statistics_error,
612
                fslocation="::"
613
            )
614
            self.disable = True
615
616
        self.only = config.getoption("benchmark_only")
617
        self.sort = config.getoption("benchmark_sort")
618
        self.columns = config.getoption("benchmark_columns")
619
        if self.skip and self.only:
620
            raise pytest.UsageError("Can't have both --benchmark-only and --benchmark-skip options.")
621
        if self.disable and self.only:
622
            raise pytest.UsageError(
623
                "Can't have both --benchmark-only and --benchmark-disable options. Note that --benchmark-disable is "
624
                "automatically activated if xdist is on or you're missing the statistics dependency.")
625
        self._benchmarks = []
626
        self.group_by = config.getoption("benchmark_group_by")
627
        self.save = config.getoption("benchmark_save")
628
        self.autosave = config.getoption("benchmark_autosave")
629
        self.save_data = config.getoption("benchmark_save_data")
630
        self.json = config.getoption("benchmark_json")
631
        self.compare = config.getoption("benchmark_compare")
632
        self.compare_fail = config.getoption("benchmark_compare_fail")
633
        self.performance_regressions = []
634
        self.storage = py.path.local(config.getoption("benchmark_storage"))
635
        self.storage.ensure(dir=1)
636
        self.histogram = first_or_value(config.getoption("benchmark_histogram"), False)
637
638
    @property
639
    def benchmarks(self):
640
        return [bench for bench in self._benchmarks if bench]
641
642
    @property
643
    def storage_fslocation(self):
644
        return self.storage.relto(os.getcwd())
645
646
    @cached_property
647
    def compare_file(self):
648
        if self.compare:
649
            files = self.storage.listdir("[0-9][0-9][0-9][0-9]_*.json", sort=True)
650
            if files:
651
                if self.compare is True:
652
                    files.sort()
653
                    return files[-1]
654
                else:
655
                    files = [f for f in files if str(f.basename).startswith(self.compare)]
656
                    if len(files) == 1:
657
                        return files[0]
658
659
                    if not files:
660
                        self.logger.warn("BENCHMARK-C1", "Can't compare. No benchmark files matched %r" % self.compare,
661
                                         fslocation=self.storage_fslocation)
662
                    elif len(files) > 1:
663
                        self.logger.warn(
664
                            "BENCHMARK-C2", "Can't compare. Too many benchmark files matched %r:\n - %s" % (
665
                                self.compare, '\n - '.join(map(str, files))
666
                            ),
667
                            fslocation=self.storage_fslocation)
668
            else:
669
                msg = "Can't compare. No benchmark files in %r. " \
670
                      "Expected files matching [0-9][0-9][0-9][0-9]_*.json." % str(self.storage)
671
                if self.compare is True:
672
                    msg += " Can't load the previous benchmark."
673
                    code = "BENCHMARK-C3"
674
                else:
675
                    msg += " Can't match anything to %r." % self.compare
676
                    code = "BENCHMARK-C4"
677
                self.logger.warn(code, msg, fslocation=self.storage_fslocation)
678
                return
679
680
    @property
681
    def next_num(self):
682
        files = self.storage.listdir("[0-9][0-9][0-9][0-9]_*.json")
683
        files.sort(reverse=True)
684
        if not files:
685
            return "0001"
686
        for f in files:
687
            try:
688
                return "%04i" % (int(str(f.basename).split('_')[0]) + 1)
689
            except ValueError:
690
                raise
691
692
    def handle_saving(self):
693
        if self.json:
694
            output_json = self.config.hook.pytest_benchmark_generate_json(
695
                config=self.config,
696
                benchmarks=self.benchmarks,
697
                include_data=True
698
            )
699
            self.config.hook.pytest_benchmark_update_json(
700
                config=self.config,
701
                benchmarks=self.benchmarks,
702
                output_json=output_json
703
            )
704
            with self.json as fh:
705
                fh.write(json.dumps(output_json, ensure_ascii=True, indent=4).encode())
706
            self.logger.info("Wrote benchmark data in %s" % self.json, purple=True)
707
708
        save = self.save or self.autosave
709
        if save:
710
            output_json = self.config.hook.pytest_benchmark_generate_json(
711
                config=self.config,
712
                benchmarks=self.benchmarks,
713
                include_data=self.save_data
714
            )
715
            self.config.hook.pytest_benchmark_update_json(
716
                config=self.config,
717
                benchmarks=self.benchmarks,
718
                output_json=output_json
719
            )
720
            output_file = self.storage.join("%s_%s.json" % (self.next_num, save))
721
            assert not output_file.exists()
722
723
            with output_file.open('wb') as fh:
724
                fh.write(json.dumps(output_json, ensure_ascii=True, indent=4).encode())
725
            self.logger.info("Saved benchmark data in %s" % output_file)
726
727
    def handle_loading(self):
728
        if self.compare_file:
729
            self.compare_name = self.compare_file.basename.split('_')[0]
730
            with self.compare_file.open('rU') as fh:
731
                try:
732
                    compared_benchmark = json.load(fh)
733
                except Exception as exc:
734
                    self.logger.warn("BENCHMARK-C5", "Failed to load %s: %s" % (self.compare_file, exc),
735
                                     fslocation=self.storage_fslocation)
736
                    return
737
738
            machine_info = self.config.hook.pytest_benchmark_generate_machine_info(config=self.config)
739
            self.config.hook.pytest_benchmark_update_machine_info(config=self.config, machine_info=machine_info)
740
            self.config.hook.pytest_benchmark_compare_machine_info(config=self.config, benchmarksession=self,
741
                                                                   machine_info=machine_info,
742
                                                                   compared_benchmark=compared_benchmark)
743
            self.compare_mapping = dict((bench['fullname'], bench) for bench in compared_benchmark['benchmarks'])
744
745
            self.logger.info("Comparing against benchmark %s:" % self.compare_file.basename, bold=True)
746
            self.logger.info("| commit info: %s" % format_dict(compared_benchmark['commit_info']))
747
            self.logger.info("| saved at: %s" % compared_benchmark['datetime'])
748
            self.logger.info("| saved using pytest-benchmark %s:" % compared_benchmark['version'])
749
750
    def display(self, tr):
751
        if not self.benchmarks:
752
            return
753
754
        tr.ensure_newline()
755
        self.handle_saving()
756
        self.handle_loading()
757
        if self.benchmarks:
758
            self.display_results_table(tr)
759
            self.check_regressions()
760
            self.handle_histogram()
761
762
    def check_regressions(self):
763
        if self.compare_fail and not self.compare_file:
764
            raise pytest.UsageError("--benchmark-compare-fail requires valid --benchmark-compare.")
765
766
        if self.performance_regressions:
767
            self.logger.error("Performance has regressed:\n%s" % "\n".join(
768
                "\t%s - %s" % line for line in self.performance_regressions
769
            ))
770
            raise PerformanceRegression("Performance has regressed.")
771
772
    def handle_histogram(self):
773
        if self.histogram:
774
            from .histogram import make_plot
775
776
            history = {}
777
            for bench_file in self.storage.listdir("[0-9][0-9][0-9][0-9]_*.json"):
778
                with bench_file.open('rU') as fh:
779
                    fullname = bench_file.purebasename
780
                    if '_' in fullname:
781
                        id_, name = fullname.split('_', 1)
782
                    else:
783
                        id_, name = fullname, ''
784
                    data = history[id_] = json.load(fh)
785
                    data['name'] = name
786
                    data['mapping'] = dict((bench['fullname'], bench) for bench in data['benchmarks'])
787
788
            for bench in self.benchmarks:
789
                name = bench.fullname
790
                for c in "\/:*?<>|":
791
                    name = name.replace(c, '_').replace('__', '_')
792
                output_file = py.path.local("%s-%s.svg" % (self.histogram, name)).ensure()
793
794
                table = list(self.generate_histogram_table(bench, history, sorted(history)))
795
796
                plot = make_plot(
797
                    bench_name=bench.fullname,
798
                    table=table,
799
                    compare=self.compare_file,
800
                    annotations=history,
801
                    sort=self.sort,
802
                    current=HISTOGRAM_CURRENT,
803
                )
804
                plot.render_to_file(str(output_file))
805
                self.logger.info("Generated histogram %s" % output_file, bold=True)
806
807
    @staticmethod
808
    def generate_histogram_table(current, history, sequence):
809
        for name in sequence:
810
            trial = history[name]
811
            for bench in trial["benchmarks"]:
812
                if bench["fullname"] == current.fullname:
813
                    found = True
814
                else:
815
                    found = False
816
817
                if found:
818
                    yield "%s" % name, bench["stats"]
819
                    break
820
821
        yield HISTOGRAM_CURRENT, current.json()
822
823
    def apply_compare(self, benchmarks, compare_name, compare_mapping):
824
        result = []
825
        for bench in benchmarks:
826
            if bench.fullname in compare_mapping:
827
                stats = compare_mapping[bench.fullname]["stats"]
828
                result.extend([
829
                    dict(bench.json(include_data=False),
830
                         name="{0} ({1})".format(bench.name, "NOW"),
831
                         iterations=bench.iterations),
832
                    dict(stats, name="{0} ({1})".format(bench.name, compare_name)),
833
                ])
834
                if self.compare_fail:
835
                    for check in self.compare_fail:
836
                        fail = check.fails(bench, stats)
837
                        if fail:
838
                            self.performance_regressions.append((bench.fullname, fail))
839
            else:
840
                result.append(bench)
841
        return result
842
843
    def display_results_table(self, tr):
844
        tr.write_line("")
845
        tr.rewrite("Computing stats ...", black=True, bold=True)
846
        groups = self.config.hook.pytest_benchmark_group_stats(
847
            config=self.config,
848
            benchmarks=self.benchmarks,
849
            group_by=self.group_by
850
        )
851
        for line, (group, benchmarks) in report_progress(groups, tr, "Computing stats ... group {pos}/{total}"):
852
            if self.compare_file:
853
                benchmarks = self.apply_compare(benchmarks, self.compare_name, self.compare_mapping)
854
            benchmarks = sorted(benchmarks, key=operator.itemgetter(self.sort))
855
856
            worst = {}
857
            best = {}
858
            solo = len(benchmarks) == 1
859
            for line, prop in report_progress(("min", "max", "mean", "median", "iqr", "stddev"), tr, "{line}: {value}", line=line):
860
                worst[prop] = max(bench[prop] for _, bench in report_progress(
861
                    benchmarks, tr, "{line} ({pos}/{total})", line=line))
862
                best[prop] = min(bench[prop] for _, bench in report_progress(
863
                    benchmarks, tr, "{line} ({pos}/{total})", line=line))
864
            for line, prop in report_progress(("outliers", "rounds", "iterations"), tr, "{line}: {value}", line=line):
865
                worst[prop] = max(benchmark[prop] for _, benchmark in report_progress(
866
                    benchmarks, tr, "{line} ({pos}/{total})", line=line))
867
868
            time_unit_key = self.sort
869
            if self.sort in ("name", "fullname"):
870
                time_unit_key = "min"
871
            unit, adjustment = time_unit(best.get(self.sort, benchmarks[0][time_unit_key]))
872
            labels = {
873
                "name": "Name (time in %ss)" % unit,
874
                "min": "Min",
875
                "max": "Max",
876
                "mean": "Mean",
877
                "stddev": "StdDev",
878
                "rounds": "Rounds",
879
                "iterations": "Iterations",
880
                "iqr": "IQR",
881
                "median": "Median",
882
                "outliers": "Outliers(*)",
883
            }
884
            widths = {
885
                "name": 3 + max(len(labels["name"]), max(len(benchmark["name"]) for benchmark in benchmarks)),
886
                "rounds": 2 + max(len(labels["rounds"]), len(str(worst["rounds"]))),
887
                "iterations": 2 + max(len(labels["iterations"]), len(str(worst["iterations"]))),
888
                "outliers": 2 + max(len(labels["outliers"]), len(str(worst["outliers"]))),
889
            }
890
            for prop in "min", "max", "mean", "stddev", "median", "iqr":
891
                widths[prop] = 2 + max(len(labels[prop]), max(
892
                    len(NUMBER_FMT.format(bench[prop] * adjustment))
893
                    for bench in benchmarks
894
                ))
895
896
            rpadding = 0 if solo else 10
897
            labels_line = labels["name"].ljust(widths["name"]) + "".join(
898
                labels[prop].rjust(widths[prop]) + (
899
                    " " * rpadding
900
                    if prop not in ["outliers", "rounds", "iterations"]
901
                    else ""
902
                )
903
                for prop in self.columns
904
            )
905
            tr.rewrite("")
906
            tr.write_line(
907
                (" benchmark%(name)s: %(count)s tests " % dict(
908
                    count=len(benchmarks),
909
                    name="" if group is None else " %r" % group,
910
                )).center(len(labels_line), "-"),
911
                yellow=True,
912
            )
913
            tr.write_line(labels_line)
914
            tr.write_line("-" * len(labels_line), yellow=True)
915
916
            for bench in benchmarks:
917
                has_error = bench.get("has_error")
918
                tr.write(bench["name"].ljust(widths["name"]), red=has_error, invert=has_error)
919
                for prop in self.columns:
920
                    if prop in ("min", "max", "mean", "stddev", "median", "iqr"):
921
                        tr.write(
922
                            ALIGNED_NUMBER_FMT.format(
923
                                bench[prop] * adjustment,
924
                                widths[prop],
925
                                self.compute_baseline_scale(best[prop], bench[prop], rpadding),
926
                                rpadding
927
                            ),
928
                            green=not solo and bench[prop] == best.get(prop),
929
                            red=not solo and bench[prop] == worst.get(prop),
930
                            bold=True,
931
                        )
932
                    else:
933
                        tr.write("{0:>{1}}".format(bench[prop], widths[prop]))
934
                tr.write("\n")
935
            tr.write_line("-" * len(labels_line), yellow=True)
936
            tr.write_line("")
937
        tr.write_line("(*) Outliers: 1 Standard Deviation from Mean; "
938
                      "1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile.", bold=True, black=True)
939
940
    def compute_baseline_scale(self, baseline, value, width):
941
        if not width:
942
            return ""
943
        if value == baseline:
944
            return " (1.0)".ljust(width)
945
946
        scale = abs(value / baseline) if baseline else float("inf")
947
        if scale > 1000:
948
            if isinf(scale):
949
                return " (inf)".ljust(width)
950
            else:
951
                return " (>1000.0)".ljust(width)
952
        else:
953
            return " ({0:.2f})".format(scale).ljust(width)
954
955
956
def pytest_benchmark_compare_machine_info(config, benchmarksession, machine_info, compared_benchmark):
957
    if compared_benchmark["machine_info"] != machine_info:
958
        benchmarksession.logger.warn(
959
            "BENCHMARK-C6",
960
            "Benchmark machine_info is different. Current: %s VS saved: %s." % (
961
                format_dict(machine_info),
962
                format_dict(compared_benchmark["machine_info"]),
963
            ),
964
            fslocation=benchmarksession.storage_fslocation
965
        )
966
967
if hasattr(pytest, 'hookimpl'):
968
    _hookwrapper = pytest.hookimpl(hookwrapper=True)
969
else:
970
    _hookwrapper = pytest.mark.hookwrapper
971
972
973
@_hookwrapper
974
def pytest_runtest_call(item):
975
    bs = item.config._benchmarksession
976
    fixure = hasattr(item, "funcargs") and item.funcargs.get("benchmark")
977
    if isinstance(fixure, BenchmarkFixture):
978
        if bs.skip:
979
            pytest.skip("Skipping benchmark (--benchmark-skip active).")
980
        else:
981
            yield
982
    else:
983
        if bs.only:
984
            pytest.skip("Skipping non-benchmark (--benchmark-only active).")
985
        else:
986
            yield
987
988
989
def pytest_benchmark_group_stats(config, benchmarks, group_by):
990
    groups = defaultdict(list)
991
    for bench in benchmarks:
992
        if group_by == "group":
993
            groups[bench.group].append(bench)
994
        elif group_by == "name":
995
            groups[bench.name].append(bench)
996
        elif group_by == "func":
997
            groups[bench.name.split("[")[0]].append(bench)
998
        elif group_by == "fullfunc":
999
            groups[bench.fullname.split("[")[0]].append(bench)
1000
        elif group_by == "fullname":
1001
            groups[bench.fullname].append(bench)
1002
        elif group_by == "param":
1003
            groups[bench.param].append(bench)
1004
        elif group_by.startswith("param:"):
1005
            param_name = group_by[len("param:"):]
1006
            param_value = bench.params[param_name]
1007
            groups[param_value].append(bench)
1008
        else:
1009
            raise NotImplementedError("Unsupported grouping %r." % group_by)
1010
    #
1011
    for grouped_benchmarks in groups.values():
1012
        grouped_benchmarks.sort(key=operator.attrgetter("fullname" if "full" in group_by else "name"))
1013
    return sorted(groups.items(), key=lambda pair: pair[0] or "")
1014
1015
1016
def pytest_terminal_summary(terminalreporter):
1017
    try:
1018
        terminalreporter.config._benchmarksession.display(terminalreporter)
1019
    except PerformanceRegression:
1020
        raise
1021
    except Exception:
1022
        terminalreporter.config._benchmarksession.logger.error("\n%s" % traceback.format_exc())
1023
        raise
1024
1025
1026
def pytest_benchmark_generate_machine_info():
1027
    python_implementation = platform.python_implementation()
1028
    python_implementation_version = platform.python_version()
1029
    if python_implementation == 'PyPy':
1030
        python_implementation_version = '%d.%d.%d' % sys.pypy_version_info[:3]
1031
        if sys.pypy_version_info.releaselevel != 'final':
1032
            python_implementation_version += '-%s%d' % sys.pypy_version_info[3:]
1033
    return {
1034
        "node": platform.node(),
1035
        "processor": platform.processor(),
1036
        "machine": platform.machine(),
1037
        "python_compiler": platform.python_compiler(),
1038
        "python_implementation": python_implementation,
1039
        "python_implementation_version": python_implementation_version,
1040
        "python_version": platform.python_version(),
1041
        "python_build": platform.python_build(),
1042
        "release": platform.release(),
1043
        "system": platform.system()
1044
    }
1045
1046
1047
def pytest_benchmark_generate_commit_info(config):
1048
    return get_commit_info()
1049
1050
1051
def pytest_benchmark_generate_json(config, benchmarks, include_data):
1052
    machine_info = config.hook.pytest_benchmark_generate_machine_info(config=config)
1053
    config.hook.pytest_benchmark_update_machine_info(config=config, machine_info=machine_info)
1054
1055
    commit_info = config.hook.pytest_benchmark_generate_commit_info(config=config)
1056
    config.hook.pytest_benchmark_update_commit_info(config=config, commit_info=commit_info)
1057
1058
    benchmarks_json = []
1059
    output_json = {
1060
        "machine_info": machine_info,
1061
        "commit_info": commit_info,
1062
        "benchmarks": benchmarks_json,
1063
        "datetime": datetime.utcnow().isoformat(),
1064
        "version": __version__,
1065
    }
1066
    for bench in benchmarks:
1067
        if not bench.has_error:
1068
            benchmarks_json.append({
1069
                "group": bench.group,
1070
                "name": bench.name,
1071
                "fullname": bench.fullname,
1072
                "params": bench.params,
1073
                "stats": dict(bench.json(include_data=include_data), iterations=bench.iterations),
1074
                "options": dict(
1075
                    (k, v.__name__ if callable(v) else v) for k, v in bench.options.items()
1076
                )
1077
            })
1078
    return output_json
1079
1080
1081
@pytest.fixture(scope="function")
1082
def benchmark(request):
1083
    bs = request.config._benchmarksession
1084
1085
    if bs.skip:
1086
        pytest.skip("Benchmarks are skipped (--benchmark-skip was used).")
1087
    else:
1088
        node = request.node
1089
        marker = node.get_marker("benchmark")
1090
        options = marker.kwargs if marker else {}
1091
        if "timer" in options:
1092
            options["timer"] = NameWrapper(options["timer"])
1093
        fixture = BenchmarkFixture(
1094
            node,
1095
            add_stats=bs._benchmarks.append,
1096
            logger=bs.logger,
1097
            warner=request.node.warn,
1098
            disable=bs.disable,
1099
            **dict(bs.options, **options)
1100
        )
1101
        request.addfinalizer(fixture._cleanup)
1102
        return fixture
1103
1104
1105
@pytest.fixture(scope="function")
1106
def benchmark_weave(benchmark):
1107
    return benchmark.weave
1108
1109
1110
def pytest_runtest_setup(item):
1111
    marker = item.get_marker("benchmark")
1112
    if marker:
1113
        if marker.args:
1114
            raise ValueError("benchmark mark can't have positional arguments.")
1115
        for name in marker.kwargs:
1116
            if name not in (
1117
                    "max_time", "min_rounds", "min_time", "timer", "group", "disable_gc", "warmup",
1118
                    "warmup_iterations", "calibration_precision"):
1119
                raise ValueError("benchmark mark can't have %r keyword argument." % name)
1120
1121
1122
@pytest.mark.trylast  # force the other plugins to initialise, fixes issue with capture not being properly initialised
1123
def pytest_configure(config):
1124
    config.addinivalue_line("markers", "benchmark: mark a test with custom benchmark settings.")
1125
    config._benchmarksession = BenchmarkSession(config)
1126
    config.pluginmanager.register(config._benchmarksession, "pytest-benchmark")
1127