Completed
Push — master ( aa1f5a...5dce03 )
by Ionel Cristian
01:11
created

src.pytest_benchmark.add_display_options()   B

Complexity

Conditions 1

Size

Total Lines 37

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 37
rs 8.8571
1
from __future__ import division
2
from __future__ import print_function
3
4
import argparse
5
import gc
6
import json
7
import operator
8
import os
9
import platform
10
import sys
11
import time
12
import traceback
13
from collections import defaultdict
14
from datetime import datetime
15
from math import ceil
16
from math import isinf
17
18
import py
19
import pytest
20
21
from . import __version__
22
from .compat import INT
23
from .compat import XRANGE
24
from .timers import compute_timer_precision
25
from .timers import default_timer
26
from .utils import NameWrapper
27
from .utils import SecondsDecimal
28
from .utils import cached_property
29
from .utils import first_or_value
30
from .utils import format_dict
31
from .utils import format_time
32
from .utils import get_commit_info
33
from .utils import get_current_time
34
from .utils import get_tag
35
from .utils import load_timer
36
from .utils import parse_compare_fail
37
from .utils import parse_rounds
38
from .utils import parse_save
39
from .utils import parse_seconds
40
from .utils import parse_sort
41
from .utils import parse_columns
42
from .utils import parse_timer
43
from .utils import parse_warmup
44
from .utils import report_progress
45
from .utils import time_unit
46
47
try:
48
    import statistics
49
except (ImportError, SyntaxError):
50
    statistics = False
51
    statistics_error = traceback.format_exc()
52
else:
53
    from .stats import Stats
54
55
NUMBER_FMT = "{0:,.4f}" if sys.version_info[:2] > (2, 6) else "{0:.4f}"
56
ALIGNED_NUMBER_FMT = "{0:>{1},.4f}{2:<{3}}" if sys.version_info[:2] > (2, 6) else "{0:>{1}.4f}{2:<{3}}"
57
HISTOGRAM_CURRENT = "now"
58
59
60
class PerformanceRegression(pytest.UsageError):
61
    pass
62
63
64
class FixtureAlreadyUsed(Exception):
65
    pass
66
67
68
def pytest_report_header(config):
69
    bs = config._benchmarksession
70
71
    return ("benchmark: %(version)s (defaults:"
72
            " timer=%(timer)s"
73
            " disable_gc=%(disable_gc)s"
74
            " min_rounds=%(min_rounds)s"
75
            " min_time=%(min_time)s"
76
            " max_time=%(max_time)s"
77
            " calibration_precision=%(calibration_precision)s"
78
            " warmup=%(warmup)s"
79
            " warmup_iterations=%(warmup_iterations)s"
80
            ")") % dict(
81
        bs.options,
82
        version=__version__,
83
        timer=bs.options.get("timer"),
84
    )
85
86
87
def add_display_options(addoption):
88
    addoption(
89
        "--benchmark-sort",
90
        metavar="COL", type=parse_sort, default="min",
91
        help="Column to sort on. Can be one of: 'min', 'max', 'mean', 'stddev', "
92
             "'name', 'fullname'. Default: %(default)r"
93
    )
94
    addoption(
95
        "--benchmark-group-by",
96
        metavar="LABEL", default="group",
97
        help="How to group tests. Can be one of: 'group', 'name', 'fullname', 'func', 'fullfunc', "
98
             "'param' or 'param:NAME', where NAME is the name passed to @pytest.parametrize."
99
             " Default: %(default)r"
100
    )
101
    addoption(
102
        "--benchmark-columns",
103
        metavar="LABELS", type=parse_columns,
104
        default="min, max, mean, stddev, median, iqr, outliers, rounds, iterations",
105
        help='Comma-separated list of columns to show in the result table. Default: "%(default)s"'
106
    )
107
    addoption(
108
        "--benchmark-storage",
109
        metavar="STORAGE-PATH", default="./.benchmarks/%s-%s-%s-%s" % (
110
            platform.system(),
111
            platform.python_implementation(),
112
            ".".join(platform.python_version_tuple()[:2]),
113
            platform.architecture()[0]
114
        ),
115
        help="Specify a different path to store the runs (when --benchmark-save or --benchmark-autosave are used). "
116
             "Default: %(default)r",
117
    )
118
    prefix = "benchmark_%s" % get_current_time()
119
    addoption(
120
        "--benchmark-histogram",
121
        action='append', metavar="FILENAME-PREFIX", nargs="?", default=[], const=prefix,
122
        help="Plot graphs of min/max/avg/stddev over time in FILENAME-PREFIX-test_name.svg. If FILENAME-PREFIX contains"
123
             " slashes ('/') then directories will be created. Default: %r" % prefix
124
    )
125
126
127
def pytest_addoption(parser):
128
    group = parser.getgroup("benchmark")
129
    group.addoption(
130
        "--benchmark-min-time",
131
        metavar="SECONDS", type=parse_seconds, default="0.000005",
132
        help="Minimum time per round in seconds. Default: %(default)r"
133
    )
134
    group.addoption(
135
        "--benchmark-max-time",
136
        metavar="SECONDS", type=parse_seconds, default="1.0",
137
        help="Maximum run time per test - it will be repeated until this total time is reached. It may be "
138
             "exceeded if test function is very slow or --benchmark-min-rounds is large (it takes precedence). "
139
             "Default: %(default)r"
140
    )
141
    group.addoption(
142
        "--benchmark-min-rounds",
143
        metavar="NUM", type=parse_rounds, default=5,
144
        help="Minimum rounds, even if total time would exceed `--max-time`. Default: %(default)r"
145
    )
146
    group.addoption(
147
        "--benchmark-timer",
148
        metavar="FUNC", type=parse_timer, default=str(NameWrapper(default_timer)),
149
        help="Timer to use when measuring time. Default: %(default)r"
150
    )
151
    group.addoption(
152
        "--benchmark-calibration-precision",
153
        metavar="NUM", type=int, default=10,
154
        help="Precision to use when calibrating number of iterations. Precision of 10 will make the timer look 10 times"
155
             " more accurate, at a cost of less precise measure of deviations. Default: %(default)r"
156
    )
157
    group.addoption(
158
        "--benchmark-warmup",
159
        metavar="KIND", nargs="?", default=parse_warmup("auto"), type=parse_warmup,
160
        help="Activates warmup. Will run the test function up to number of times in the calibration phase. "
161
             "See `--benchmark-warmup-iterations`. Note: Even the warmup phase obeys --benchmark-max-time. "
162
             "Available KIND: 'auto', 'off', 'on'. Default: 'auto' (automatically activate on PyPy)."
163
    )
164
    group.addoption(
165
        "--benchmark-warmup-iterations",
166
        metavar="NUM", type=int, default=100000,
167
        help="Max number of iterations to run in the warmup phase. Default: %(default)r"
168
    )
169
    group.addoption(
170
        "--benchmark-verbose",
171
        action="store_true", default=False,
172
        help="Dump diagnostic and progress information."
173
    )
174
    group.addoption(
175
        "--benchmark-disable-gc",
176
        action="store_true", default=False,
177
        help="Disable GC during benchmarks."
178
    )
179
    group.addoption(
180
        "--benchmark-skip",
181
        action="store_true", default=False,
182
        help="Skip running any tests that contain benchmarks."
183
    )
184
    group.addoption(
185
        "--benchmark-disable",
186
        action="store_true", default=False,
187
        help="Disable benchmarks. Benchmarked functions are only ran once and no stats are reported. Use this is you "
188
             "want to run the test but don't do any benchmarking."
189
    )
190
    group.addoption(
191
        "--benchmark-only",
192
        action="store_true", default=False,
193
        help="Only run benchmarks."
194
    )
195
    group.addoption(
196
        "--benchmark-save",
197
        metavar="NAME", type=parse_save,
198
        help="Save the current run into 'STORAGE-PATH/counter_NAME.json'."
199
    )
200
    tag = get_tag()
201
    group.addoption(
202
        "--benchmark-autosave",
203
        action='store_const', const=tag,
204
        help="Autosave the current run into 'STORAGE-PATH/counter_%s.json" % tag,
205
    )
206
    group.addoption(
207
        "--benchmark-save-data",
208
        action="store_true",
209
        help="Use this to make --benchmark-save and --benchmark-autosave include all the timing data,"
210
             " not just the stats.",
211
    )
212
    group.addoption(
213
        "--benchmark-json",
214
        metavar="PATH", type=argparse.FileType('wb'),
215
        help="Dump a JSON report into PATH. "
216
             "Note that this will include the complete data (all the timings, not just the stats)."
217
    )
218
    group.addoption(
219
        "--benchmark-compare",
220
        metavar="NUM", nargs="?", default=[], const=True,
221
        help="Compare the current run against run NUM or the latest saved run if unspecified."
222
    )
223
    group.addoption(
224
        "--benchmark-compare-fail",
225
        metavar="EXPR", nargs="+", type=parse_compare_fail,
226
        help="Fail test if performance regresses according to given EXPR"
227
             " (eg: min:5%% or mean:0.001 for number of seconds). Can be used multiple times."
228
    )
229
    add_display_options(group.addoption)
230
231
232
def pytest_addhooks(pluginmanager):
233
    from . import hookspec
234
235
    method = getattr(pluginmanager, "add_hookspecs", None)
236
    if method is None:
237
        method = pluginmanager.addhooks
238
    method(hookspec)
239
240
241
class BenchmarkStats(object):
242
    def __init__(self, fixture, iterations, options):
243
        self.name = fixture.name
244
        self.fullname = fixture.fullname
245
        self.group = fixture.group
246
        self.param = fixture.param
247
        self.params = fixture.params
248
249
        self.iterations = iterations
250
        self.stats = Stats()
251
        self.options = options
252
        self.fixture = fixture
253
254
    def __bool__(self):
255
        return bool(self.stats)
256
257
    def __nonzero__(self):
258
        return bool(self.stats)
259
260
    def get(self, key, default=None):
261
        try:
262
            return getattr(self.stats, key)
263
        except AttributeError:
264
            return getattr(self, key, default)
265
266
    def __getitem__(self, key):
267
        try:
268
            return getattr(self.stats, key)
269
        except AttributeError:
270
            return getattr(self, key)
271
272
    @property
273
    def has_error(self):
274
        return self.fixture.has_error
275
276
    def json(self, include_data=True):
277
        if include_data:
278
            return dict(self.stats.as_dict, data=self.stats.data)
279
        else:
280
            return self.stats.as_dict
281
282
    def update(self, duration):
283
        self.stats.update(duration / self.iterations)
284
285
286
class BenchmarkFixture(object):
287
    _precisions = {}
288
289
    @classmethod
290
    def _get_precision(cls, timer):
291
        if timer in cls._precisions:
292
            return cls._precisions[timer]
293
        else:
294
            return cls._precisions.setdefault(timer, compute_timer_precision(timer))
295
296
    def __init__(self, node, disable_gc, timer, min_rounds, min_time, max_time, warmup, warmup_iterations,
297
                 calibration_precision, add_stats, logger, warner, disable, group=None):
298
        self.name = node.name
299
        self.fullname = node._nodeid
300
        self.disable = disable
301
        if hasattr(node, 'callspec'):
302
            self.param = node.callspec.id
303
            self.params = node.callspec.params
304
        else:
305
            self.param = None
306
            self.params = None
307
        self.group = group
308
        self.has_error = False
309
310
        self._disable_gc = disable_gc
311
        self._timer = timer.target
312
        self._min_rounds = min_rounds
313
        self._max_time = float(max_time)
314
        self._min_time = float(min_time)
315
        self._add_stats = add_stats
316
        self._calibration_precision = calibration_precision
317
        self._warmup = warmup and warmup_iterations
318
        self._logger = logger
319
        self._warner = warner
320
        self._cleanup_callbacks = []
321
        self._mode = None
322
323
    def _make_runner(self, function_to_benchmark, args, kwargs):
324
        def runner(loops_range, timer=self._timer):
325
            gc_enabled = gc.isenabled()
326
            if self._disable_gc:
327
                gc.disable()
328
            tracer = sys.gettrace()
329
            sys.settrace(None)
330
            try:
331
                if loops_range:
332
                    start = timer()
333
                    for _ in loops_range:
334
                        function_to_benchmark(*args, **kwargs)
335
                    end = timer()
336
                    return end - start
337
                else:
338
                    start = timer()
339
                    result = function_to_benchmark(*args, **kwargs)
340
                    end = timer()
341
                    return end - start, result
342
            finally:
343
                sys.settrace(tracer)
344
                if gc_enabled:
345
                    gc.enable()
346
347
        return runner
348
349
    def _make_stats(self, iterations):
350
        stats = BenchmarkStats(self, iterations=iterations, options={
351
            "disable_gc": self._disable_gc,
352
            "timer": self._timer,
353
            "min_rounds": self._min_rounds,
354
            "max_time": self._max_time,
355
            "min_time": self._min_time,
356
            "warmup": self._warmup,
357
        })
358
        self._add_stats(stats)
359
        return stats
360
361
    def __call__(self, function_to_benchmark, *args, **kwargs):
362
        if self._mode:
363
            self.has_error = True
364
            raise FixtureAlreadyUsed(
365
                "Fixture can only be used once. Previously it was used in %s mode." % self._mode)
366
        try:
367
            self._mode = 'benchmark(...)'
368
            return self._raw(function_to_benchmark, *args, **kwargs)
369
        except Exception:
370
            self.has_error = True
371
            raise
372
373
    def pedantic(self, target, args=(), kwargs=None, setup=None, rounds=1, warmup_rounds=0, iterations=1):
374
        if self._mode:
375
            self.has_error = True
376
            raise FixtureAlreadyUsed(
377
                "Fixture can only be used once. Previously it was used in %s mode." % self._mode)
378
        try:
379
            self._mode = 'benchmark.pedantic(...)'
380
            return self._raw_pedantic(target, args=args, kwargs=kwargs, setup=setup, rounds=rounds,
381
                                      warmup_rounds=warmup_rounds, iterations=iterations)
382
        except Exception:
383
            self.has_error = True
384
            raise
385
386
    def _raw(self, function_to_benchmark, *args, **kwargs):
387
        if not self.disable:
388
            runner = self._make_runner(function_to_benchmark, args, kwargs)
389
390
            duration, iterations, loops_range = self._calibrate_timer(runner)
391
392
            # Choose how many time we must repeat the test
393
            rounds = int(ceil(self._max_time / duration))
394
            rounds = max(rounds, self._min_rounds)
395
            rounds = min(rounds, sys.maxsize)
396
397
            stats = self._make_stats(iterations)
398
399
            self._logger.debug("  Running %s rounds x %s iterations ..." % (rounds, iterations), yellow=True, bold=True)
400
            run_start = time.time()
401
            if self._warmup:
402
                warmup_rounds = min(rounds, max(1, int(self._warmup / iterations)))
403
                self._logger.debug("  Warmup %s rounds x %s iterations ..." % (warmup_rounds, iterations))
404
                for _ in XRANGE(warmup_rounds):
405
                    runner(loops_range)
406
            for _ in XRANGE(rounds):
407
                stats.update(runner(loops_range))
408
            self._logger.debug("  Ran for %ss." % format_time(time.time() - run_start), yellow=True, bold=True)
409
        return function_to_benchmark(*args, **kwargs)
410
411
    def _raw_pedantic(self, target, args=(), kwargs=None, setup=None, rounds=1, warmup_rounds=0, iterations=1):
412
        if kwargs is None:
413
            kwargs = {}
414
415
        has_args = bool(args or kwargs)
416
417
        if not isinstance(iterations, INT) or iterations < 1:
418
            raise ValueError("Must have positive int for `iterations`.")
419
420
        if not isinstance(rounds, INT) or rounds < 1:
421
            raise ValueError("Must have positive int for `rounds`.")
422
423
        if not isinstance(warmup_rounds, INT) or warmup_rounds < 0:
424
            raise ValueError("Must have positive int for `warmup_rounds`.")
425
426
        if iterations > 1 and setup:
427
            raise ValueError("Can't use more than 1 `iterations` with a `setup` function.")
428
429
        def make_arguments(args=args, kwargs=kwargs):
430
            if setup:
431
                maybe_args = setup()
432
                if maybe_args:
433
                    if has_args:
434
                        raise TypeError("Can't use `args` or `kwargs` if `setup` returns the arguments.")
435
                    args, kwargs = maybe_args
436
            return args, kwargs
437
438
        if self.disable:
439
            args, kwargs = make_arguments()
440
            return target(*args, **kwargs)
441
442
        stats = self._make_stats(iterations)
443
        loops_range = XRANGE(iterations) if iterations > 1 else None
444
        for _ in XRANGE(warmup_rounds):
445
            args, kwargs = make_arguments()
446
447
            runner = self._make_runner(target, args, kwargs)
448
            runner(loops_range)
449
450
        for _ in XRANGE(rounds):
451
            args, kwargs = make_arguments()
452
453
            runner = self._make_runner(target, args, kwargs)
454
            if loops_range:
455
                duration = runner(loops_range)
456
            else:
457
                duration, result = runner(loops_range)
458
            stats.update(duration)
459
460
        if loops_range:
461
            args, kwargs = make_arguments()
462
            result = target(*args, **kwargs)
463
        return result
464
465
    def weave(self, target, **kwargs):
466
        try:
467
            import aspectlib
468
        except ImportError as exc:
469
            raise ImportError(exc.args, "Please install aspectlib or pytest-benchmark[aspect]")
470
471
        def aspect(function):
472
            def wrapper(*args, **kwargs):
473
                return self(function, *args, **kwargs)
474
475
            return wrapper
476
477
        self._cleanup_callbacks.append(aspectlib.weave(target, aspect, **kwargs).rollback)
478
479
    patch = weave
480
481
    def _cleanup(self):
482
        while self._cleanup_callbacks:
483
            callback = self._cleanup_callbacks.pop()
484
            callback()
485
        if not self._mode:
486
            self._logger.warn("BENCHMARK-U1", "Benchmark fixture was not used at all in this test!",
487
                              warner=self._warner, suspend=True)
488
489
    def _calibrate_timer(self, runner):
490
        timer_precision = self._get_precision(self._timer)
491
        min_time = max(self._min_time, timer_precision * self._calibration_precision)
492
        min_time_estimate = min_time * 5 / self._calibration_precision
493
        self._logger.debug("")
494
        self._logger.debug("  Timer precision: %ss" % format_time(timer_precision), yellow=True, bold=True)
495
        self._logger.debug("  Calibrating to target round %ss; will estimate when reaching %ss." % (
496
            format_time(min_time), format_time(min_time_estimate)), yellow=True, bold=True)
497
498
        loops = 1
499
        while True:
500
            loops_range = XRANGE(loops)
501
            duration = runner(loops_range)
502
            if self._warmup:
503
                warmup_start = time.time()
504
                warmup_iterations = 0
505
                warmup_rounds = 0
506
                while time.time() - warmup_start < self._max_time and warmup_iterations < self._warmup:
507
                    duration = min(duration, runner(loops_range))
508
                    warmup_rounds += 1
509
                    warmup_iterations += loops
510
                self._logger.debug("    Warmup: %ss (%s x %s iterations)." % (
511
                    format_time(time.time() - warmup_start),
512
                    warmup_rounds, loops
513
                ))
514
515
            self._logger.debug("    Measured %s iterations: %ss." % (loops, format_time(duration)), yellow=True)
516
            if duration >= min_time:
517
                break
518
519
            if duration >= min_time_estimate:
520
                # coarse estimation of the number of loops
521
                loops = int(ceil(min_time * loops / duration))
522
                self._logger.debug("    Estimating %s iterations." % loops, green=True)
523
                if loops == 1:
524
                    # If we got a single loop then bail early - nothing to calibrate if the the
525
                    # test function is 100 times slower than the timer resolution.
526
                    loops_range = XRANGE(loops)
527
                    break
528
            else:
529
                loops *= 10
530
        return duration, loops, loops_range
531
532
533
class Logger(object):
534
    def __init__(self, verbose, config):
535
        self.verbose = verbose
536
        self.term = py.io.TerminalWriter(file=sys.stderr)
537
        self.capman = config.pluginmanager.getplugin("capturemanager")
538
        self.pytest_warn = config.warn
539
        try:
540
            self.pytest_warn_has_fslocation = 'fslocation' in config.warn.func_code.co_varnames
541
        except AttributeError:
542
            self.pytest_warn_has_fslocation = False
543
544
    def warn(self, code, text, warner=None, suspend=False, fslocation=None):
545
        if self.verbose:
546
            if suspend and self.capman:
547
                self.capman.suspendcapture(in_=True)
548
            self.term.line("")
549
            self.term.sep("-", red=True, bold=True)
550
            self.term.write(" WARNING: ", red=True, bold=True)
551
            self.term.line(text, red=True)
552
            self.term.sep("-", red=True, bold=True)
553
            if suspend and self.capman:
554
                self.capman.resumecapture()
555
        if warner is None:
556
            warner = self.pytest_warn
557
        if fslocation and self.pytest_warn_has_fslocation:
558
            warner(code=code, message=text, fslocation=fslocation)
559
        else:
560
            warner(code=code, message=text)
561
562
    def error(self, text):
563
        self.term.line("")
564
        self.term.sep("-", red=True, bold=True)
565
        self.term.line(text, red=True, bold=True)
566
        self.term.sep("-", red=True, bold=True)
567
568
    def info(self, text, **kwargs):
569
        if not kwargs or kwargs == {'bold': True}:
570
            kwargs['purple'] = True
571
        self.term.line(text, **kwargs)
572
573
    def debug(self, text, **kwargs):
574
        if self.verbose:
575
            if self.capman:
576
                self.capman.suspendcapture(in_=True)
577
            self.info(text, **kwargs)
578
            if self.capman:
579
                self.capman.resumecapture()
580
581
582
class BenchmarkSession(object):
583
    compare_mapping = None
584
585
    def __init__(self, config):
586
        self.verbose = config.getoption("benchmark_verbose")
587
        self.logger = Logger(self.verbose, config)
588
        self.config = config
589
        self.options = dict(
590
            min_time=SecondsDecimal(config.getoption("benchmark_min_time")),
591
            min_rounds=config.getoption("benchmark_min_rounds"),
592
            max_time=SecondsDecimal(config.getoption("benchmark_max_time")),
593
            timer=load_timer(config.getoption("benchmark_timer")),
594
            calibration_precision=config.getoption("benchmark_calibration_precision"),
595
            disable_gc=config.getoption("benchmark_disable_gc"),
596
            warmup=config.getoption("benchmark_warmup"),
597
            warmup_iterations=config.getoption("benchmark_warmup_iterations"),
598
        )
599
        self.skip = config.getoption("benchmark_skip")
600
        self.disable = config.getoption("benchmark_disable")
601
602
        if config.getoption("dist", "no") != "no" and not self.skip:
603
            self.logger.warn(
604
                "BENCHMARK-U2",
605
                "Benchmarks are automatically disabled because xdist plugin is active."
606
                "Benchmarks cannot be performed reliably in a parallelized environment.",
607
                fslocation="::"
608
            )
609
            self.disable = True
610
        if hasattr(config, "slaveinput"):
611
            self.disable = True
612
        if not statistics:
613
            self.logger.warn(
614
                "BENCHMARK-U3",
615
                "Benchmarks are automatically disabled because we could not import `statistics`\n\n%s" %
616
                statistics_error,
617
                fslocation="::"
618
            )
619
            self.disable = True
620
621
        self.only = config.getoption("benchmark_only")
622
        self.sort = config.getoption("benchmark_sort")
623
        self.columns = config.getoption("benchmark_columns")
624
        if self.skip and self.only:
625
            raise pytest.UsageError("Can't have both --benchmark-only and --benchmark-skip options.")
626
        if self.disable and self.only:
627
            raise pytest.UsageError(
628
                "Can't have both --benchmark-only and --benchmark-disable options. Note that --benchmark-disable is "
629
                "automatically activated if xdist is on or you're missing the statistics dependency.")
630
        self._benchmarks = []
631
        self.group_by = config.getoption("benchmark_group_by")
632
        self.save = config.getoption("benchmark_save")
633
        self.autosave = config.getoption("benchmark_autosave")
634
        self.save_data = config.getoption("benchmark_save_data")
635
        self.json = config.getoption("benchmark_json")
636
        self.compare = config.getoption("benchmark_compare")
637
        self.compare_fail = config.getoption("benchmark_compare_fail")
638
        self.performance_regressions = []
639
        self.storage = py.path.local(config.getoption("benchmark_storage"))
640
        self.storage.ensure(dir=1)
641
        self.histogram = first_or_value(config.getoption("benchmark_histogram"), False)
642
643
    @property
644
    def benchmarks(self):
645
        return [bench for bench in self._benchmarks if bench]
646
647
    @property
648
    def storage_fslocation(self):
649
        return self.storage.relto(os.getcwd())
650
651
    @cached_property
652
    def compare_file(self):
653
        if self.compare:
654
            files = self.storage.listdir("[0-9][0-9][0-9][0-9]_*.json", sort=True)
655
            if files:
656
                if self.compare is True:
657
                    files.sort()
658
                    return files[-1]
659
                else:
660
                    files = [f for f in files if str(f.basename).startswith(self.compare)]
661
                    if len(files) == 1:
662
                        return files[0]
663
664
                    if not files:
665
                        self.logger.warn("BENCHMARK-C1", "Can't compare. No benchmark files matched %r" % self.compare,
666
                                         fslocation=self.storage_fslocation)
667
                    elif len(files) > 1:
668
                        self.logger.warn(
669
                            "BENCHMARK-C2", "Can't compare. Too many benchmark files matched %r:\n - %s" % (
670
                                self.compare, '\n - '.join(map(str, files))
671
                            ),
672
                            fslocation=self.storage_fslocation)
673
            else:
674
                msg = "Can't compare. No benchmark files in %r. " \
675
                      "Expected files matching [0-9][0-9][0-9][0-9]_*.json." % str(self.storage)
676
                if self.compare is True:
677
                    msg += " Can't load the previous benchmark."
678
                    code = "BENCHMARK-C3"
679
                else:
680
                    msg += " Can't match anything to %r." % self.compare
681
                    code = "BENCHMARK-C4"
682
                self.logger.warn(code, msg, fslocation=self.storage_fslocation)
683
                return
684
685
    @property
686
    def next_num(self):
687
        files = self.storage.listdir("[0-9][0-9][0-9][0-9]_*.json")
688
        files.sort(reverse=True)
689
        if not files:
690
            return "0001"
691
        for f in files:
692
            try:
693
                return "%04i" % (int(str(f.basename).split('_')[0]) + 1)
694
            except ValueError:
695
                raise
696
697
    def handle_saving(self):
698
        if self.json:
699
            output_json = self.config.hook.pytest_benchmark_generate_json(
700
                config=self.config,
701
                benchmarks=self.benchmarks,
702
                include_data=True
703
            )
704
            self.config.hook.pytest_benchmark_update_json(
705
                config=self.config,
706
                benchmarks=self.benchmarks,
707
                output_json=output_json
708
            )
709
            with self.json as fh:
710
                fh.write(json.dumps(output_json, ensure_ascii=True, indent=4).encode())
711
            self.logger.info("Wrote benchmark data in %s" % self.json, purple=True)
712
713
        save = self.save or self.autosave
714
        if save:
715
            output_json = self.config.hook.pytest_benchmark_generate_json(
716
                config=self.config,
717
                benchmarks=self.benchmarks,
718
                include_data=self.save_data
719
            )
720
            self.config.hook.pytest_benchmark_update_json(
721
                config=self.config,
722
                benchmarks=self.benchmarks,
723
                output_json=output_json
724
            )
725
            output_file = self.storage.join("%s_%s.json" % (self.next_num, save))
726
            assert not output_file.exists()
727
728
            with output_file.open('wb') as fh:
729
                fh.write(json.dumps(output_json, ensure_ascii=True, indent=4).encode())
730
            self.logger.info("Saved benchmark data in %s" % output_file)
731
732
    def handle_loading(self):
733
        if self.compare_file:
734
            self.compare_name = self.compare_file.basename.split('_')[0]
735
            with self.compare_file.open('rU') as fh:
736
                try:
737
                    compared_benchmark = json.load(fh)
738
                except Exception as exc:
739
                    self.logger.warn("BENCHMARK-C5", "Failed to load %s: %s" % (self.compare_file, exc),
740
                                     fslocation=self.storage_fslocation)
741
                    return
742
743
            machine_info = self.config.hook.pytest_benchmark_generate_machine_info(config=self.config)
744
            self.config.hook.pytest_benchmark_update_machine_info(config=self.config, machine_info=machine_info)
745
            self.config.hook.pytest_benchmark_compare_machine_info(config=self.config, benchmarksession=self,
746
                                                                   machine_info=machine_info,
747
                                                                   compared_benchmark=compared_benchmark)
748
            self.compare_mapping = dict((bench['fullname'], bench) for bench in compared_benchmark['benchmarks'])
749
750
            self.logger.info("Comparing against benchmark %s:" % self.compare_file.basename, bold=True)
751
            self.logger.info("| commit info: %s" % format_dict(compared_benchmark['commit_info']))
752
            self.logger.info("| saved at: %s" % compared_benchmark['datetime'])
753
            self.logger.info("| saved using pytest-benchmark %s:" % compared_benchmark['version'])
754
755
    def display(self, tr):
756
        if not self.benchmarks:
757
            return
758
759
        tr.ensure_newline()
760
        self.handle_saving()
761
        self.handle_loading()
762
        if self.benchmarks:
763
            self.display_results_table(tr)
764
            self.check_regressions()
765
            self.handle_histogram()
766
767
    def check_regressions(self):
768
        if self.compare_fail and not self.compare_file:
769
            raise pytest.UsageError("--benchmark-compare-fail requires valid --benchmark-compare.")
770
771
        if self.performance_regressions:
772
            self.logger.error("Performance has regressed:\n%s" % "\n".join(
773
                "\t%s - %s" % line for line in self.performance_regressions
774
            ))
775
            raise PerformanceRegression("Performance has regressed.")
776
777
    def handle_histogram(self):
778
        if self.histogram:
779
            from .histogram import make_plot
780
781
            history = {}
782
            for bench_file in self.storage.listdir("[0-9][0-9][0-9][0-9]_*.json"):
783
                with bench_file.open('rU') as fh:
784
                    fullname = bench_file.purebasename
785
                    if '_' in fullname:
786
                        id_, name = fullname.split('_', 1)
787
                    else:
788
                        id_, name = fullname, ''
789
                    data = history[id_] = json.load(fh)
790
                    data['name'] = name
791
                    data['mapping'] = dict((bench['fullname'], bench) for bench in data['benchmarks'])
792
793
            for bench in self.benchmarks:
794
                name = bench.fullname
795
                for c in "\/:*?<>|":
796
                    name = name.replace(c, '_').replace('__', '_')
797
                output_file = py.path.local("%s-%s.svg" % (self.histogram, name)).ensure()
798
799
                table = list(self.generate_histogram_table(bench, history, sorted(history)))
800
801
                plot = make_plot(
802
                    bench_name=bench.fullname,
803
                    table=table,
804
                    compare=self.compare_file,
805
                    annotations=history,
806
                    sort=self.sort,
807
                    current=HISTOGRAM_CURRENT,
808
                )
809
                plot.render_to_file(str(output_file))
810
                self.logger.info("Generated histogram %s" % output_file, bold=True)
811
812
    @staticmethod
813
    def generate_histogram_table(current, history, sequence):
814
        for name in sequence:
815
            trial = history[name]
816
            for bench in trial["benchmarks"]:
817
                if bench["fullname"] == current.fullname:
818
                    found = True
819
                else:
820
                    found = False
821
822
                if found:
823
                    yield "%s" % name, bench["stats"]
824
                    break
825
826
        yield HISTOGRAM_CURRENT, current.json()
827
828
    def apply_compare(self, benchmarks, compare_name, compare_mapping):
829
        result = []
830
        for bench in benchmarks:
831
            if bench.fullname in compare_mapping:
832
                stats = compare_mapping[bench.fullname]["stats"]
833
                result.extend([
834
                    dict(bench.json(include_data=False),
835
                         name="{0} ({1})".format(bench.name, "NOW"),
836
                         iterations=bench.iterations),
837
                    dict(stats, name="{0} ({1})".format(bench.name, compare_name)),
838
                ])
839
                if self.compare_fail:
840
                    for check in self.compare_fail:
841
                        fail = check.fails(bench, stats)
842
                        if fail:
843
                            self.performance_regressions.append((bench.fullname, fail))
844
            else:
845
                result.append(bench)
846
        return result
847
848
    def display_results_table(self, tr):
849
        tr.write_line("")
850
        tr.rewrite("Computing stats ...", black=True, bold=True)
851
        groups = self.config.hook.pytest_benchmark_group_stats(
852
            config=self.config,
853
            benchmarks=self.benchmarks,
854
            group_by=self.group_by
855
        )
856
        for line, (group, benchmarks) in report_progress(groups, tr, "Computing stats ... group {pos}/{total}"):
857
            if self.compare_file:
858
                benchmarks = self.apply_compare(benchmarks, self.compare_name, self.compare_mapping)
859
            benchmarks = sorted(benchmarks, key=operator.itemgetter(self.sort))
860
861
            worst = {}
862
            best = {}
863
            solo = len(benchmarks) == 1
864
            for line, prop in report_progress(("min", "max", "mean", "median", "iqr", "stddev"), tr, "{line}: {value}", line=line):
865
                worst[prop] = max(bench[prop] for _, bench in report_progress(
866
                    benchmarks, tr, "{line} ({pos}/{total})", line=line))
867
                best[prop] = min(bench[prop] for _, bench in report_progress(
868
                    benchmarks, tr, "{line} ({pos}/{total})", line=line))
869
            for line, prop in report_progress(("outliers", "rounds", "iterations"), tr, "{line}: {value}", line=line):
870
                worst[prop] = max(benchmark[prop] for _, benchmark in report_progress(
871
                    benchmarks, tr, "{line} ({pos}/{total})", line=line))
872
873
            time_unit_key = self.sort
874
            if self.sort in ("name", "fullname"):
875
                time_unit_key = "min"
876
            unit, adjustment = time_unit(best.get(self.sort, benchmarks[0][time_unit_key]))
877
            labels = {
878
                "name": "Name (time in %ss)" % unit,
879
                "min": "Min",
880
                "max": "Max",
881
                "mean": "Mean",
882
                "stddev": "StdDev",
883
                "rounds": "Rounds",
884
                "iterations": "Iterations",
885
                "iqr": "IQR",
886
                "median": "Median",
887
                "outliers": "Outliers(*)",
888
            }
889
            widths = {
890
                "name": 3 + max(len(labels["name"]), max(len(benchmark["name"]) for benchmark in benchmarks)),
891
                "rounds": 2 + max(len(labels["rounds"]), len(str(worst["rounds"]))),
892
                "iterations": 2 + max(len(labels["iterations"]), len(str(worst["iterations"]))),
893
                "outliers": 2 + max(len(labels["outliers"]), len(str(worst["outliers"]))),
894
            }
895
            for prop in "min", "max", "mean", "stddev", "median", "iqr":
896
                widths[prop] = 2 + max(len(labels[prop]), max(
897
                    len(NUMBER_FMT.format(bench[prop] * adjustment))
898
                    for bench in benchmarks
899
                ))
900
901
            rpadding = 0 if solo else 10
902
            labels_line = labels["name"].ljust(widths["name"]) + "".join(
903
                labels[prop].rjust(widths[prop]) + (
904
                    " " * rpadding
905
                    if prop not in ["outliers", "rounds", "iterations"]
906
                    else ""
907
                )
908
                for prop in self.columns
909
            )
910
            tr.rewrite("")
911
            tr.write_line(
912
                (" benchmark%(name)s: %(count)s tests " % dict(
913
                    count=len(benchmarks),
914
                    name="" if group is None else " %r" % group,
915
                )).center(len(labels_line), "-"),
916
                yellow=True,
917
            )
918
            tr.write_line(labels_line)
919
            tr.write_line("-" * len(labels_line), yellow=True)
920
921
            for bench in benchmarks:
922
                has_error = bench.get("has_error")
923
                tr.write(bench["name"].ljust(widths["name"]), red=has_error, invert=has_error)
924
                for prop in self.columns:
925
                    if prop in ("min", "max", "mean", "stddev", "median", "iqr"):
926
                        tr.write(
927
                            ALIGNED_NUMBER_FMT.format(
928
                                bench[prop] * adjustment,
929
                                widths[prop],
930
                                self.compute_baseline_scale(best[prop], bench[prop], rpadding),
931
                                rpadding
932
                            ),
933
                            green=not solo and bench[prop] == best.get(prop),
934
                            red=not solo and bench[prop] == worst.get(prop),
935
                            bold=True,
936
                        )
937
                    else:
938
                        tr.write("{0:>{1}}".format(bench[prop], widths[prop]))
939
                tr.write("\n")
940
            tr.write_line("-" * len(labels_line), yellow=True)
941
            tr.write_line("")
942
        tr.write_line("(*) Outliers: 1 Standard Deviation from Mean; "
943
                      "1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile.", bold=True, black=True)
944
945
    def compute_baseline_scale(self, baseline, value, width):
946
        if not width:
947
            return ""
948
        if value == baseline:
949
            return " (1.0)".ljust(width)
950
951
        scale = abs(value / baseline) if baseline else float("inf")
952
        if scale > 1000:
953
            if isinf(scale):
954
                return " (inf)".ljust(width)
955
            else:
956
                return " (>1000.0)".ljust(width)
957
        else:
958
            return " ({0:.2f})".format(scale).ljust(width)
959
960
961
def pytest_benchmark_compare_machine_info(config, benchmarksession, machine_info, compared_benchmark):
962
    if compared_benchmark["machine_info"] != machine_info:
963
        benchmarksession.logger.warn(
964
            "BENCHMARK-C6",
965
            "Benchmark machine_info is different. Current: %s VS saved: %s." % (
966
                format_dict(machine_info),
967
                format_dict(compared_benchmark["machine_info"]),
968
            ),
969
            fslocation=benchmarksession.storage_fslocation
970
        )
971
972
if hasattr(pytest, 'hookimpl'):
973
    _hookwrapper = pytest.hookimpl(hookwrapper=True)
974
else:
975
    _hookwrapper = pytest.mark.hookwrapper
976
977
978
@_hookwrapper
979
def pytest_runtest_call(item):
980
    bs = item.config._benchmarksession
981
    fixure = hasattr(item, "funcargs") and item.funcargs.get("benchmark")
982
    if isinstance(fixure, BenchmarkFixture):
983
        if bs.skip:
984
            pytest.skip("Skipping benchmark (--benchmark-skip active).")
985
        else:
986
            yield
987
    else:
988
        if bs.only:
989
            pytest.skip("Skipping non-benchmark (--benchmark-only active).")
990
        else:
991
            yield
992
993
994
def pytest_benchmark_group_stats(config, benchmarks, group_by):
995
    groups = defaultdict(list)
996
    for bench in benchmarks:
997
        if group_by == "group":
998
            groups[bench.group].append(bench)
999
        elif group_by == "name":
1000
            groups[bench.name].append(bench)
1001
        elif group_by == "func":
1002
            groups[bench.name.split("[")[0]].append(bench)
1003
        elif group_by == "fullfunc":
1004
            groups[bench.fullname.split("[")[0]].append(bench)
1005
        elif group_by == "fullname":
1006
            groups[bench.fullname].append(bench)
1007
        elif group_by == "param":
1008
            groups[bench.param].append(bench)
1009
        elif group_by.startswith("param:"):
1010
            param_name = group_by[len("param:"):]
1011
            param_value = bench.params[param_name]
1012
            groups[param_value].append(bench)
1013
        else:
1014
            raise NotImplementedError("Unsupported grouping %r." % group_by)
1015
    #
1016
    for grouped_benchmarks in groups.values():
1017
        grouped_benchmarks.sort(key=operator.attrgetter("fullname" if "full" in group_by else "name"))
1018
    return sorted(groups.items(), key=lambda pair: pair[0] or "")
1019
1020
1021
def pytest_terminal_summary(terminalreporter):
1022
    try:
1023
        terminalreporter.config._benchmarksession.display(terminalreporter)
1024
    except PerformanceRegression:
1025
        raise
1026
    except Exception:
1027
        terminalreporter.config._benchmarksession.logger.error("\n%s" % traceback.format_exc())
1028
        raise
1029
1030
1031
def pytest_benchmark_generate_machine_info():
1032
    python_implementation = platform.python_implementation()
1033
    python_implementation_version = platform.python_version()
1034
    if python_implementation == 'PyPy':
1035
        python_implementation_version = '%d.%d.%d' % sys.pypy_version_info[:3]
1036
        if sys.pypy_version_info.releaselevel != 'final':
1037
            python_implementation_version += '-%s%d' % sys.pypy_version_info[3:]
1038
    return {
1039
        "node": platform.node(),
1040
        "processor": platform.processor(),
1041
        "machine": platform.machine(),
1042
        "python_compiler": platform.python_compiler(),
1043
        "python_implementation": python_implementation,
1044
        "python_implementation_version": python_implementation_version,
1045
        "python_version": platform.python_version(),
1046
        "python_build": platform.python_build(),
1047
        "release": platform.release(),
1048
        "system": platform.system()
1049
    }
1050
1051
1052
def pytest_benchmark_generate_commit_info(config):
1053
    return get_commit_info()
1054
1055
1056
def pytest_benchmark_generate_json(config, benchmarks, include_data):
1057
    machine_info = config.hook.pytest_benchmark_generate_machine_info(config=config)
1058
    config.hook.pytest_benchmark_update_machine_info(config=config, machine_info=machine_info)
1059
1060
    commit_info = config.hook.pytest_benchmark_generate_commit_info(config=config)
1061
    config.hook.pytest_benchmark_update_commit_info(config=config, commit_info=commit_info)
1062
1063
    benchmarks_json = []
1064
    output_json = {
1065
        "machine_info": machine_info,
1066
        "commit_info": commit_info,
1067
        "benchmarks": benchmarks_json,
1068
        "datetime": datetime.utcnow().isoformat(),
1069
        "version": __version__,
1070
    }
1071
    for bench in benchmarks:
1072
        if not bench.has_error:
1073
            benchmarks_json.append({
1074
                "group": bench.group,
1075
                "name": bench.name,
1076
                "fullname": bench.fullname,
1077
                "params": bench.params,
1078
                "stats": dict(bench.json(include_data=include_data), iterations=bench.iterations),
1079
                "options": dict(
1080
                    (k, v.__name__ if callable(v) else v) for k, v in bench.options.items()
1081
                )
1082
            })
1083
    return output_json
1084
1085
1086
@pytest.fixture(scope="function")
1087
def benchmark(request):
1088
    bs = request.config._benchmarksession
1089
1090
    if bs.skip:
1091
        pytest.skip("Benchmarks are skipped (--benchmark-skip was used).")
1092
    else:
1093
        node = request.node
1094
        marker = node.get_marker("benchmark")
1095
        options = marker.kwargs if marker else {}
1096
        if "timer" in options:
1097
            options["timer"] = NameWrapper(options["timer"])
1098
        fixture = BenchmarkFixture(
1099
            node,
1100
            add_stats=bs._benchmarks.append,
1101
            logger=bs.logger,
1102
            warner=request.node.warn,
1103
            disable=bs.disable,
1104
            **dict(bs.options, **options)
1105
        )
1106
        request.addfinalizer(fixture._cleanup)
1107
        return fixture
1108
1109
1110
@pytest.fixture(scope="function")
1111
def benchmark_weave(benchmark):
1112
    return benchmark.weave
1113
1114
1115
def pytest_runtest_setup(item):
1116
    marker = item.get_marker("benchmark")
1117
    if marker:
1118
        if marker.args:
1119
            raise ValueError("benchmark mark can't have positional arguments.")
1120
        for name in marker.kwargs:
1121
            if name not in (
1122
                    "max_time", "min_rounds", "min_time", "timer", "group", "disable_gc", "warmup",
1123
                    "warmup_iterations", "calibration_precision"):
1124
                raise ValueError("benchmark mark can't have %r keyword argument." % name)
1125
1126
1127
@pytest.mark.trylast  # force the other plugins to initialise, fixes issue with capture not being properly initialised
1128
def pytest_configure(config):
1129
    config.addinivalue_line("markers", "benchmark: mark a test with custom benchmark settings.")
1130
    config._benchmarksession = BenchmarkSession(config)
1131
    config.pluginmanager.register(config._benchmarksession, "pytest-benchmark")
1132