Completed
Pull Request — master (#37)
by
unknown
58s
created

src.pytest_benchmark.BenchmarkFixture.pedantic()   A

Complexity

Conditions 3

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 3
dl 0
loc 12
rs 9.4286
1
from __future__ import division
2
from __future__ import print_function
3
4
import argparse
5
import gc
6
import json
7
import operator
8
import os
9
import platform
10
import sys
11
import time
12
import traceback
13
from collections import defaultdict
14
from datetime import datetime
15
from math import ceil
16
from math import isinf
17
18
import py
19
import pytest
20
21
from . import __version__
22
from .compat import INT
23
from .compat import XRANGE
24
from .timers import compute_timer_precision
25
from .timers import default_timer
26
from .utils import NameWrapper
27
from .utils import SecondsDecimal
28
from .utils import cached_property
29
from .utils import first_or_value
30
from .utils import format_dict
31
from .utils import format_time
32
from .utils import get_commit_info
33
from .utils import get_current_time
34
from .utils import get_tag
35
from .utils import load_timer
36
from .utils import parse_compare_fail
37
from .utils import parse_rounds
38
from .utils import parse_save
39
from .utils import parse_seconds
40
from .utils import parse_sort
41
from .utils import parse_columns
42
from .utils import parse_timer
43
from .utils import parse_warmup
44
from .utils import report_progress
45
from .utils import time_unit
46
47
try:
48
    import statistics
49
except (ImportError, SyntaxError):
50
    statistics = False
51
    statistics_error = traceback.format_exc()
52
else:
53
    from .stats import Stats
54
55
NUMBER_FMT = "{0:,.4f}" if sys.version_info[:2] > (2, 6) else "{0:.4f}"
56
ALIGNED_NUMBER_FMT = "{0:>{1},.4f}{2:<{3}}" if sys.version_info[:2] > (2, 6) else "{0:>{1}.4f}{2:<{3}}"
57
HISTOGRAM_CURRENT = "now"
58
59
60
class PerformanceRegression(pytest.UsageError):
61
    pass
62
63
64
class FixtureAlreadyUsed(Exception):
65
    pass
66
67
68
def pytest_report_header(config):
69
    bs = config._benchmarksession
70
71
    return ("benchmark: %(version)s (defaults:"
72
            " timer=%(timer)s"
73
            " disable_gc=%(disable_gc)s"
74
            " min_rounds=%(min_rounds)s"
75
            " min_time=%(min_time)s"
76
            " max_time=%(max_time)s"
77
            " calibration_precision=%(calibration_precision)s"
78
            " warmup=%(warmup)s"
79
            " warmup_iterations=%(warmup_iterations)s"
80
            ")") % dict(
81
        bs.options,
82
        version=__version__,
83
        timer=bs.options.get("timer"),
84
    )
85
86
87
def pytest_addoption(parser):
88
    group = parser.getgroup("benchmark")
89
    group.addoption(
90
        "--benchmark-min-time",
91
        metavar="SECONDS", type=parse_seconds, default="0.000005",
92
        help="Minimum time per round in seconds. Default: %(default)r"
93
    )
94
    group.addoption(
95
        "--benchmark-max-time",
96
        metavar="SECONDS", type=parse_seconds, default="1.0",
97
        help="Maximum run time per test - it will be repeated until this total time is reached. It may be "
98
             "exceeded if test function is very slow or --benchmark-min-rounds is large (it takes precedence). "
99
             "Default: %(default)r"
100
    )
101
    group.addoption(
102
        "--benchmark-min-rounds",
103
        metavar="NUM", type=parse_rounds, default=5,
104
        help="Minimum rounds, even if total time would exceed `--max-time`. Default: %(default)r"
105
    )
106
    group.addoption(
107
        "--benchmark-sort",
108
        metavar="COL", type=parse_sort, default="min",
109
        help="Column to sort on. Can be one of: 'min', 'max', 'mean', 'stddev', "
110
             "'name', 'fullname'. Default: %(default)r"
111
    )
112
    group.addoption(
113
        "--benchmark-group-by",
114
        metavar="LABEL", default="group",
115
        help="How to group tests. Can be one of: 'group', 'name', 'fullname', 'func', 'fullfunc', "
116
             "'param' or 'param:NAME', where NAME is the name passed to @pytest.parametrize."
117
             " Default: %(default)r"
118
    )
119
    group.addoption(
120
        "--benchmark-columns",
121
        metavar="LABELS", type=parse_columns,
122
        default="min, max, mean, stddev, median, iqr, outliers, rounds, iterations",
123
        help='Comma-separated list of columns to show in the result table. Default: "%(default)s"')
124
    group.addoption(
125
        "--benchmark-timer",
126
        metavar="FUNC", type=parse_timer, default=str(NameWrapper(default_timer)),
127
        help="Timer to use when measuring time. Default: %(default)r"
128
    )
129
    group.addoption(
130
        "--benchmark-calibration-precision",
131
        metavar="NUM", type=int, default=10,
132
        help="Precision to use when calibrating number of iterations. Precision of 10 will make the timer look 10 times"
133
             " more accurate, at a cost of less precise measure of deviations. Default: %(default)r"
134
    )
135
    group.addoption(
136
        "--benchmark-warmup",
137
        metavar="KIND", nargs="?", default=parse_warmup("auto"), type=parse_warmup,
138
        help="Activates warmup. Will run the test function up to number of times in the calibration phase. "
139
             "See `--benchmark-warmup-iterations`. Note: Even the warmup phase obeys --benchmark-max-time. "
140
             "Available KIND: 'auto', 'off', 'on'. Default: 'auto' (automatically activate on PyPy)."
141
    )
142
    group.addoption(
143
        "--benchmark-warmup-iterations",
144
        metavar="NUM", type=int, default=100000,
145
        help="Max number of iterations to run in the warmup phase. Default: %(default)r"
146
    )
147
    group.addoption(
148
        "--benchmark-verbose",
149
        action="store_true", default=False,
150
        help="Dump diagnostic and progress information."
151
    )
152
    group.addoption(
153
        "--benchmark-disable-gc",
154
        action="store_true", default=False,
155
        help="Disable GC during benchmarks."
156
    )
157
    group.addoption(
158
        "--benchmark-skip",
159
        action="store_true", default=False,
160
        help="Skip running any tests that contain benchmarks."
161
    )
162
    group.addoption(
163
        "--benchmark-disable",
164
        action="store_true", default=False,
165
        help="Disable benchmarks. Benchmarked functions are only ran once and no stats are reported. Use this is you "
166
             "want to run the test but don't do any benchmarking."
167
    )
168
    group.addoption(
169
        "--benchmark-only",
170
        action="store_true", default=False,
171
        help="Only run benchmarks."
172
    )
173
    group.addoption(
174
        "--benchmark-save",
175
        metavar="NAME", type=parse_save,
176
        help="Save the current run into 'STORAGE-PATH/counter_NAME.json'."
177
    )
178
    tag = get_tag()
179
    group.addoption(
180
        "--benchmark-autosave",
181
        action='store_const', const=tag,
182
        help="Autosave the current run into 'STORAGE-PATH/counter_%s.json" % tag,
183
    )
184
    group.addoption(
185
        "--benchmark-save-data",
186
        action="store_true",
187
        help="Use this to make --benchmark-save and --benchmark-autosave include all the timing data,"
188
             " not just the stats.",
189
    )
190
    group.addoption(
191
        "--benchmark-compare",
192
        metavar="NUM", nargs="?", default=[], const=True,
193
        help="Compare the current run against run NUM or the latest saved run if unspecified."
194
    )
195
    group.addoption(
196
        "--benchmark-compare-fail",
197
        metavar="EXPR", nargs="+", type=parse_compare_fail,
198
        help="Fail test if performance regresses according to given EXPR"
199
             " (eg: min:5%% or mean:0.001 for number of seconds). Can be used multiple times."
200
    )
201
    group.addoption(
202
        "--benchmark-storage",
203
        metavar="STORAGE-PATH", default="./.benchmarks/%s-%s-%s-%s" % (
204
            platform.system(),
205
            platform.python_implementation(),
206
            ".".join(platform.python_version_tuple()[:2]),
207
            platform.architecture()[0]
208
        ),
209
        help="Specify a different path to store the runs (when --benchmark-save or --benchmark-autosave are used). "
210
             "Default: %(default)r",
211
    )
212
    prefix = "benchmark_%s" % get_current_time()
213
    group.addoption(
214
        "--benchmark-histogram",
215
        action='append', metavar="FILENAME-PREFIX", nargs="?", default=[], const=prefix,
216
        help="Plot graphs of min/max/avg/stddev over time in FILENAME-PREFIX-test_name.svg. If FILENAME-PREFIX contains"
217
             " slashes ('/') then directories will be created. Default: %r" % prefix
218
    )
219
    group.addoption(
220
        "--benchmark-json",
221
        metavar="PATH", type=argparse.FileType('wb'),
222
        help="Dump a JSON report into PATH. "
223
             "Note that this will include the complete data (all the timings, not just the stats)."
224
    )
225
226
227
def pytest_addhooks(pluginmanager):
228
    from . import hookspec
229
230
    method = getattr(pluginmanager, "add_hookspecs", None)
231
    if method is None:
232
        method = pluginmanager.addhooks
233
    method(hookspec)
234
235
236
class BenchmarkStats(object):
237
    def __init__(self, fixture, iterations, options):
238
        self.name = fixture.name
239
        self.fullname = fixture.fullname
240
        self.group = fixture.group
241
        self.param = fixture.param
242
        self.params = fixture.params
243
244
        self.iterations = iterations
245
        self.stats = Stats()
246
        self.options = options
247
        self.fixture = fixture
248
249
    def __bool__(self):
250
        return bool(self.stats)
251
252
    def __nonzero__(self):
253
        return bool(self.stats)
254
255
    def get(self, key, default=None):
256
        try:
257
            return getattr(self.stats, key)
258
        except AttributeError:
259
            return getattr(self, key, default)
260
261
    def __getitem__(self, key):
262
        try:
263
            return getattr(self.stats, key)
264
        except AttributeError:
265
            return getattr(self, key)
266
267
    @property
268
    def has_error(self):
269
        return self.fixture.has_error
270
271
    def json(self, include_data=True):
272
        if include_data:
273
            return dict(self.stats.as_dict, data=self.stats.data)
274
        else:
275
            return self.stats.as_dict
276
277
    def update(self, duration):
278
        self.stats.update(duration / self.iterations)
279
280
281
class BenchmarkFixture(object):
282
    _precisions = {}
283
284
    @classmethod
285
    def _get_precision(cls, timer):
286
        if timer in cls._precisions:
287
            return cls._precisions[timer]
288
        else:
289
            return cls._precisions.setdefault(timer, compute_timer_precision(timer))
290
291
    def __init__(self, node, disable_gc, timer, min_rounds, min_time, max_time, warmup, warmup_iterations,
292
                 calibration_precision, add_stats, logger, warner, disable, group=None):
293
        self.name = node.name
294
        self.fullname = node._nodeid
295
        self.disable = disable
296
        if hasattr(node, 'callspec'):
297
            self.param = node.callspec.id
298
            self.params = node.callspec.params
299
        else:
300
            self.param = None
301
            self.params = None
302
        self.group = group
303
        self.has_error = False
304
305
        self._disable_gc = disable_gc
306
        self._timer = timer.target
307
        self._min_rounds = min_rounds
308
        self._max_time = float(max_time)
309
        self._min_time = float(min_time)
310
        self._add_stats = add_stats
311
        self._calibration_precision = calibration_precision
312
        self._warmup = warmup and warmup_iterations
313
        self._logger = logger
314
        self._warner = warner
315
        self._cleanup_callbacks = []
316
        self._mode = None
317
318
    def _make_runner(self, function_to_benchmark, args, kwargs):
319
        def runner(loops_range, timer=self._timer):
320
            gc_enabled = gc.isenabled()
321
            if self._disable_gc:
322
                gc.disable()
323
            tracer = sys.gettrace()
324
            sys.settrace(None)
325
            try:
326
                if loops_range:
327
                    start = timer()
328
                    for _ in loops_range:
329
                        function_to_benchmark(*args, **kwargs)
330
                    end = timer()
331
                    return end - start
332
                else:
333
                    start = timer()
334
                    result = function_to_benchmark(*args, **kwargs)
335
                    end = timer()
336
                    return end - start, result
337
            finally:
338
                sys.settrace(tracer)
339
                if gc_enabled:
340
                    gc.enable()
341
342
        return runner
343
344
    def _make_stats(self, iterations):
345
        stats = BenchmarkStats(self, iterations=iterations, options={
346
            "disable_gc": self._disable_gc,
347
            "timer": self._timer,
348
            "min_rounds": self._min_rounds,
349
            "max_time": self._max_time,
350
            "min_time": self._min_time,
351
            "warmup": self._warmup,
352
        })
353
        self._add_stats(stats)
354
        return stats
355
356
    def __call__(self, function_to_benchmark, *args, **kwargs):
357
        if self._mode:
358
            self.has_error = True
359
            raise FixtureAlreadyUsed(
360
                "Fixture can only be used once. Previously it was used in %s mode." % self._mode)
361
        try:
362
            self._mode = 'benchmark(...)'
363
            return self._raw(function_to_benchmark, *args, **kwargs)
364
        except Exception:
365
            self.has_error = True
366
            raise
367
368
    def pedantic(self, target, args=(), kwargs=None, setup=None, rounds=1, warmup_rounds=0, iterations=1):
369
        if self._mode:
370
            self.has_error = True
371
            raise FixtureAlreadyUsed(
372
                "Fixture can only be used once. Previously it was used in %s mode." % self._mode)
373
        try:
374
            self._mode = 'benchmark.pedantic(...)'
375
            return self._raw_pedantic(target, args=args, kwargs=kwargs, setup=setup, rounds=rounds,
376
                                      warmup_rounds=warmup_rounds, iterations=iterations)
377
        except Exception:
378
            self.has_error = True
379
            raise
380
381
    def _raw(self, function_to_benchmark, *args, **kwargs):
382
        if not self.disable:
383
            runner = self._make_runner(function_to_benchmark, args, kwargs)
384
385
            duration, iterations, loops_range = self._calibrate_timer(runner)
386
387
            # Choose how many time we must repeat the test
388
            rounds = int(ceil(self._max_time / duration))
389
            rounds = max(rounds, self._min_rounds)
390
            rounds = min(rounds, sys.maxsize)
391
392
            stats = self._make_stats(iterations)
393
394
            self._logger.debug("  Running %s rounds x %s iterations ..." % (rounds, iterations), yellow=True, bold=True)
395
            run_start = time.time()
396
            if self._warmup:
397
                warmup_rounds = min(rounds, max(1, int(self._warmup / iterations)))
398
                self._logger.debug("  Warmup %s rounds x %s iterations ..." % (warmup_rounds, iterations))
399
                for _ in XRANGE(warmup_rounds):
400
                    runner(loops_range)
401
            for _ in XRANGE(rounds):
402
                stats.update(runner(loops_range))
403
            self._logger.debug("  Ran for %ss." % format_time(time.time() - run_start), yellow=True, bold=True)
404
        return function_to_benchmark(*args, **kwargs)
405
406
    def _raw_pedantic(self, target, args=(), kwargs=None, setup=None, rounds=1, warmup_rounds=0, iterations=1):
407
        if kwargs is None:
408
            kwargs = {}
409
410
        has_args = bool(args or kwargs)
411
412
        if not isinstance(iterations, INT) or iterations < 1:
413
            raise ValueError("Must have positive int for `iterations`.")
414
415
        if not isinstance(rounds, INT) or rounds < 1:
416
            raise ValueError("Must have positive int for `rounds`.")
417
418
        if not isinstance(warmup_rounds, INT) or warmup_rounds < 0:
419
            raise ValueError("Must have positive int for `warmup_rounds`.")
420
421
        if iterations > 1 and setup:
422
            raise ValueError("Can't use more than 1 `iterations` with a `setup` function.")
423
424
        def make_arguments(args=args, kwargs=kwargs):
425
            if setup:
426
                maybe_args = setup()
427
                if maybe_args:
428
                    if has_args:
429
                        raise TypeError("Can't use `args` or `kwargs` if `setup` returns the arguments.")
430
                    args, kwargs = maybe_args
431
            return args, kwargs
432
433
        if self.disable:
434
            args, kwargs = make_arguments()
435
            return target(*args, **kwargs)
436
437
        stats = self._make_stats(iterations)
438
        loops_range = XRANGE(iterations) if iterations > 1 else None
439
        for _ in XRANGE(warmup_rounds):
440
            args, kwargs = make_arguments()
441
442
            runner = self._make_runner(target, args, kwargs)
443
            runner(loops_range)
444
445
        for _ in XRANGE(rounds):
446
            args, kwargs = make_arguments()
447
448
            runner = self._make_runner(target, args, kwargs)
449
            if loops_range:
450
                duration = runner(loops_range)
451
            else:
452
                duration, result = runner(loops_range)
453
            stats.update(duration)
454
455
        if loops_range:
456
            args, kwargs = make_arguments()
457
            result = target(*args, **kwargs)
458
        return result
459
460
    def weave(self, target, **kwargs):
461
        try:
462
            import aspectlib
463
        except ImportError as exc:
464
            raise ImportError(exc.args, "Please install aspectlib or pytest-benchmark[aspect]")
465
466
        def aspect(function):
467
            def wrapper(*args, **kwargs):
468
                return self(function, *args, **kwargs)
469
470
            return wrapper
471
472
        self._cleanup_callbacks.append(aspectlib.weave(target, aspect, **kwargs).rollback)
473
474
    patch = weave
475
476
    def _cleanup(self):
477
        while self._cleanup_callbacks:
478
            callback = self._cleanup_callbacks.pop()
479
            callback()
480
        if not self._mode:
481
            self._logger.warn("BENCHMARK-U1", "Benchmark fixture was not used at all in this test!",
482
                              warner=self._warner, suspend=True)
483
484
    def _calibrate_timer(self, runner):
485
        timer_precision = self._get_precision(self._timer)
486
        min_time = max(self._min_time, timer_precision * self._calibration_precision)
487
        min_time_estimate = min_time * 5 / self._calibration_precision
488
        self._logger.debug("")
489
        self._logger.debug("  Timer precision: %ss" % format_time(timer_precision), yellow=True, bold=True)
490
        self._logger.debug("  Calibrating to target round %ss; will estimate when reaching %ss." % (
491
            format_time(min_time), format_time(min_time_estimate)), yellow=True, bold=True)
492
493
        loops = 1
494
        while True:
495
            loops_range = XRANGE(loops)
496
            duration = runner(loops_range)
497
            if self._warmup:
498
                warmup_start = time.time()
499
                warmup_iterations = 0
500
                warmup_rounds = 0
501
                while time.time() - warmup_start < self._max_time and warmup_iterations < self._warmup:
502
                    duration = min(duration, runner(loops_range))
503
                    warmup_rounds += 1
504
                    warmup_iterations += loops
505
                self._logger.debug("    Warmup: %ss (%s x %s iterations)." % (
506
                    format_time(time.time() - warmup_start),
507
                    warmup_rounds, loops
508
                ))
509
510
            self._logger.debug("    Measured %s iterations: %ss." % (loops, format_time(duration)), yellow=True)
511
            if duration >= min_time:
512
                break
513
514
            if duration >= min_time_estimate:
515
                # coarse estimation of the number of loops
516
                loops = int(ceil(min_time * loops / duration))
517
                self._logger.debug("    Estimating %s iterations." % loops, green=True)
518
                if loops == 1:
519
                    # If we got a single loop then bail early - nothing to calibrate if the the
520
                    # test function is 100 times slower than the timer resolution.
521
                    loops_range = XRANGE(loops)
522
                    break
523
            else:
524
                loops *= 10
525
        return duration, loops, loops_range
526
527
528
class Logger(object):
529
    def __init__(self, verbose, config):
530
        self.verbose = verbose
531
        self.term = py.io.TerminalWriter(file=sys.stderr)
532
        self.capman = config.pluginmanager.getplugin("capturemanager")
533
        self.pytest_warn = config.warn
534
        try:
535
            self.pytest_warn_has_fslocation = 'fslocation' in config.warn.func_code.co_varnames
536
        except AttributeError:
537
            self.pytest_warn_has_fslocation = False
538
539
    def warn(self, code, text, warner=None, suspend=False, fslocation=None):
540
        if self.verbose:
541
            if suspend and self.capman:
542
                self.capman.suspendcapture(in_=True)
543
            self.term.line("")
544
            self.term.sep("-", red=True, bold=True)
545
            self.term.write(" WARNING: ", red=True, bold=True)
546
            self.term.line(text, red=True)
547
            self.term.sep("-", red=True, bold=True)
548
            if suspend and self.capman:
549
                self.capman.resumecapture()
550
        if warner is None:
551
            warner = self.pytest_warn
552
        if fslocation and self.pytest_warn_has_fslocation:
553
            warner(code=code, message=text, fslocation=fslocation)
554
        else:
555
            warner(code=code, message=text)
556
557
    def error(self, text):
558
        self.term.line("")
559
        self.term.sep("-", red=True, bold=True)
560
        self.term.line(text, red=True, bold=True)
561
        self.term.sep("-", red=True, bold=True)
562
563
    def info(self, text, **kwargs):
564
        if not kwargs or kwargs == {'bold': True}:
565
            kwargs['purple'] = True
566
        self.term.line(text, **kwargs)
567
568
    def debug(self, text, **kwargs):
569
        if self.verbose:
570
            if self.capman:
571
                self.capman.suspendcapture(in_=True)
572
            self.info(text, **kwargs)
573
            if self.capman:
574
                self.capman.resumecapture()
575
576
577
class BenchmarkSession(object):
578
    compare_mapping = None
579
580
    def __init__(self, config):
581
        self.verbose = config.getoption("benchmark_verbose")
582
        self.logger = Logger(self.verbose, config)
583
        self.config = config
584
        self.options = dict(
585
            min_time=SecondsDecimal(config.getoption("benchmark_min_time")),
586
            min_rounds=config.getoption("benchmark_min_rounds"),
587
            max_time=SecondsDecimal(config.getoption("benchmark_max_time")),
588
            timer=load_timer(config.getoption("benchmark_timer")),
589
            calibration_precision=config.getoption("benchmark_calibration_precision"),
590
            disable_gc=config.getoption("benchmark_disable_gc"),
591
            warmup=config.getoption("benchmark_warmup"),
592
            warmup_iterations=config.getoption("benchmark_warmup_iterations"),
593
        )
594
        self.skip = config.getoption("benchmark_skip")
595
        self.disable = config.getoption("benchmark_disable")
596
597
        if config.getoption("dist", "no") != "no" and not self.skip:
598
            self.logger.warn(
599
                "BENCHMARK-U2",
600
                "Benchmarks are automatically disabled because xdist plugin is active."
601
                "Benchmarks cannot be performed reliably in a parallelized environment.",
602
                fslocation="::"
603
            )
604
            self.disable = True
605
        if hasattr(config, "slaveinput"):
606
            self.disable = True
607
        if not statistics:
608
            self.logger.warn(
609
                "BENCHMARK-U3",
610
                "Benchmarks are automatically disabled because we could not import `statistics`\n\n%s" %
611
                statistics_error,
612
                fslocation="::"
613
            )
614
            self.disable = True
615
616
        self.only = config.getoption("benchmark_only")
617
        self.sort = config.getoption("benchmark_sort")
618
        self.sort = "fullname"
619
        self.columns = config.getoption("benchmark_columns")
620
        if self.skip and self.only:
621
            raise pytest.UsageError("Can't have both --benchmark-only and --benchmark-skip options.")
622
        if self.disable and self.only:
623
            raise pytest.UsageError(
624
                "Can't have both --benchmark-only and --benchmark-disable options. Note that --benchmark-disable is "
625
                "automatically activated if xdist is on or you're missing the statistics dependency.")
626
        self._benchmarks = []
627
        self.group_by = config.getoption("benchmark_group_by")
628
        self.save = config.getoption("benchmark_save")
629
        self.autosave = config.getoption("benchmark_autosave")
630
        self.save_data = config.getoption("benchmark_save_data")
631
        self.json = config.getoption("benchmark_json")
632
        self.compare = config.getoption("benchmark_compare")
633
        self.compare_fail = config.getoption("benchmark_compare_fail")
634
        self.performance_regressions = []
635
        self.storage = py.path.local(config.getoption("benchmark_storage"))
636
        self.storage.ensure(dir=1)
637
        self.histogram = first_or_value(config.getoption("benchmark_histogram"), False)
638
639
    @property
640
    def benchmarks(self):
641
        return [bench for bench in self._benchmarks if bench]
642
643
    @property
644
    def storage_fslocation(self):
645
        return self.storage.relto(os.getcwd())
646
647
    @cached_property
648
    def compare_file(self):
649
        if self.compare:
650
            files = self.storage.listdir("[0-9][0-9][0-9][0-9]_*.json", sort=True)
651
            if files:
652
                if self.compare is True:
653
                    files.sort()
654
                    return files[-1]
655
                else:
656
                    files = [f for f in files if str(f.basename).startswith(self.compare)]
657
                    if len(files) == 1:
658
                        return files[0]
659
660
                    if not files:
661
                        self.logger.warn("BENCHMARK-C1", "Can't compare. No benchmark files matched %r" % self.compare,
662
                                         fslocation=self.storage_fslocation)
663
                    elif len(files) > 1:
664
                        self.logger.warn(
665
                            "BENCHMARK-C2", "Can't compare. Too many benchmark files matched %r:\n - %s" % (
666
                                self.compare, '\n - '.join(map(str, files))
667
                            ),
668
                            fslocation=self.storage_fslocation)
669
            else:
670
                msg = "Can't compare. No benchmark files in %r. " \
671
                      "Expected files matching [0-9][0-9][0-9][0-9]_*.json." % str(self.storage)
672
                if self.compare is True:
673
                    msg += " Can't load the previous benchmark."
674
                    code = "BENCHMARK-C3"
675
                else:
676
                    msg += " Can't match anything to %r." % self.compare
677
                    code = "BENCHMARK-C4"
678
                self.logger.warn(code, msg, fslocation=self.storage_fslocation)
679
                return
680
681
    @property
682
    def next_num(self):
683
        files = self.storage.listdir("[0-9][0-9][0-9][0-9]_*.json")
684
        files.sort(reverse=True)
685
        if not files:
686
            return "0001"
687
        for f in files:
688
            try:
689
                return "%04i" % (int(str(f.basename).split('_')[0]) + 1)
690
            except ValueError:
691
                raise
692
693
    def handle_saving(self):
694
        if self.json:
695
            output_json = self.config.hook.pytest_benchmark_generate_json(
696
                config=self.config,
697
                benchmarks=self.benchmarks,
698
                include_data=True
699
            )
700
            self.config.hook.pytest_benchmark_update_json(
701
                config=self.config,
702
                benchmarks=self.benchmarks,
703
                output_json=output_json
704
            )
705
            with self.json as fh:
706
                fh.write(json.dumps(output_json, ensure_ascii=True, indent=4).encode())
707
            self.logger.info("Wrote benchmark data in %s" % self.json, purple=True)
708
709
        save = self.save or self.autosave
710
        if save:
711
            output_json = self.config.hook.pytest_benchmark_generate_json(
712
                config=self.config,
713
                benchmarks=self.benchmarks,
714
                include_data=self.save_data
715
            )
716
            self.config.hook.pytest_benchmark_update_json(
717
                config=self.config,
718
                benchmarks=self.benchmarks,
719
                output_json=output_json
720
            )
721
            output_file = self.storage.join("%s_%s.json" % (self.next_num, save))
722
            assert not output_file.exists()
723
724
            with output_file.open('wb') as fh:
725
                fh.write(json.dumps(output_json, ensure_ascii=True, indent=4).encode())
726
            self.logger.info("Saved benchmark data in %s" % output_file)
727
728
    def handle_loading(self):
729
        if self.compare_file:
730
            self.compare_name = self.compare_file.basename.split('_')[0]
731
            with self.compare_file.open('rU') as fh:
732
                try:
733
                    compared_benchmark = json.load(fh)
734
                except Exception as exc:
735
                    self.logger.warn("BENCHMARK-C5", "Failed to load %s: %s" % (self.compare_file, exc),
736
                                     fslocation=self.storage_fslocation)
737
                    return
738
739
            machine_info = self.config.hook.pytest_benchmark_generate_machine_info(config=self.config)
740
            self.config.hook.pytest_benchmark_update_machine_info(config=self.config, machine_info=machine_info)
741
            self.config.hook.pytest_benchmark_compare_machine_info(config=self.config, benchmarksession=self,
742
                                                                   machine_info=machine_info,
743
                                                                   compared_benchmark=compared_benchmark)
744
            self.compare_mapping = dict((bench['fullname'], bench) for bench in compared_benchmark['benchmarks'])
745
746
            self.logger.info("Comparing against benchmark %s:" % self.compare_file.basename, bold=True)
747
            self.logger.info("| commit info: %s" % format_dict(compared_benchmark['commit_info']))
748
            self.logger.info("| saved at: %s" % compared_benchmark['datetime'])
749
            self.logger.info("| saved using pytest-benchmark %s:" % compared_benchmark['version'])
750
751
    def display(self, tr):
752
        if not self.benchmarks:
753
            return
754
755
        tr.ensure_newline()
756
        self.handle_saving()
757
        self.handle_loading()
758
        if self.benchmarks:
759
            self.display_results_table(tr)
760
            self.check_regressions()
761
            self.handle_histogram()
762
763
    def check_regressions(self):
764
        if self.compare_fail and not self.compare_file:
765
            raise pytest.UsageError("--benchmark-compare-fail requires valid --benchmark-compare.")
766
767
        if self.performance_regressions:
768
            self.logger.error("Performance has regressed:\n%s" % "\n".join(
769
                "\t%s - %s" % line for line in self.performance_regressions
770
            ))
771
            raise PerformanceRegression("Performance has regressed.")
772
773
    def handle_histogram(self):
774
        if self.histogram:
775
            from .histogram import make_plot
776
777
            history = {}
778
            for bench_file in self.storage.listdir("[0-9][0-9][0-9][0-9]_*.json"):
779
                with bench_file.open('rU') as fh:
780
                    fullname = bench_file.purebasename
781
                    if '_' in fullname:
782
                        id_, name = fullname.split('_', 1)
783
                    else:
784
                        id_, name = fullname, ''
785
                    data = history[id_] = json.load(fh)
786
                    data['name'] = name
787
                    data['mapping'] = dict((bench['fullname'], bench) for bench in data['benchmarks'])
788
789
            for bench in self.benchmarks:
790
                name = bench.fullname
791
                for c in "\/:*?<>|":
792
                    name = name.replace(c, '_').replace('__', '_')
793
                output_file = py.path.local("%s-%s.svg" % (self.histogram, name)).ensure()
794
795
                table = list(self.generate_histogram_table(bench, history, sorted(history)))
796
797
                plot = make_plot(
798
                    bench_name=bench.fullname,
799
                    table=table,
800
                    compare=self.compare_file,
801
                    annotations=history,
802
                    sort=self.sort,
803
                    current=HISTOGRAM_CURRENT,
804
                )
805
                plot.render_to_file(str(output_file))
806
                self.logger.info("Generated histogram %s" % output_file, bold=True)
807
808
    @staticmethod
809
    def generate_histogram_table(current, history, sequence):
810
        for name in sequence:
811
            trial = history[name]
812
            for bench in trial["benchmarks"]:
813
                if bench["fullname"] == current.fullname:
814
                    found = True
815
                else:
816
                    found = False
817
818
                if found:
819
                    yield "%s" % name, bench["stats"]
820
                    break
821
822
        yield HISTOGRAM_CURRENT, current.json()
823
824
    def apply_compare(self, benchmarks, compare_name, compare_mapping):
825
        result = []
826
        for bench in benchmarks:
827
            if bench.fullname in compare_mapping:
828
                stats = compare_mapping[bench.fullname]["stats"]
829
                result.extend([
830
                    dict(bench.json(include_data=False),
831
                         name="{0} ({1})".format(bench.name, "NOW"),
832
                         iterations=bench.iterations),
833
                    dict(stats, name="{0} ({1})".format(bench.name, compare_name)),
834
                ])
835
                if self.compare_fail:
836
                    for check in self.compare_fail:
837
                        fail = check.fails(bench, stats)
838
                        if fail:
839
                            self.performance_regressions.append((bench.fullname, fail))
840
            else:
841
                result.append(bench)
842
        return result
843
844
    def display_results_table(self, tr):
845
        tr.write_line("")
846
        tr.rewrite("Computing stats ...", black=True, bold=True)
847
        groups = self.config.hook.pytest_benchmark_group_stats(
848
            config=self.config,
849
            benchmarks=self.benchmarks,
850
            group_by=self.group_by
851
        )
852
        for line, (group, benchmarks) in report_progress(groups, tr, "Computing stats ... group {pos}/{total}"):
853
            if self.compare_file:
854
                benchmarks = self.apply_compare(benchmarks, self.compare_name, self.compare_mapping)
855
            benchmarks = sorted(benchmarks, key=operator.itemgetter(self.sort))
856
857
            worst = {}
858
            best = {}
859
            solo = len(benchmarks) == 1
860
            for line, prop in report_progress(("min", "max", "mean", "median", "iqr", "stddev"), tr, "{line}: {value}", line=line):
861
                worst[prop] = max(bench[prop] for _, bench in report_progress(
862
                    benchmarks, tr, "{line} ({pos}/{total})", line=line))
863
                best[prop] = min(bench[prop] for _, bench in report_progress(
864
                    benchmarks, tr, "{line} ({pos}/{total})", line=line))
865
            for line, prop in report_progress(("outliers", "rounds", "iterations"), tr, "{line}: {value}", line=line):
866
                worst[prop] = max(benchmark[prop] for _, benchmark in report_progress(
867
                    benchmarks, tr, "{line} ({pos}/{total})", line=line))
868
869
            time_unit_key = self.sort
870
            if self.sort in ("name", "fullname"):
871
                time_unit_key = "min"
872
            unit, adjustment = time_unit(best.get(self.sort, benchmarks[0][time_unit_key]))
873
            labels = {
874
                "name": "Name (time in %ss)" % unit,
875
                "min": "Min",
876
                "max": "Max",
877
                "mean": "Mean",
878
                "stddev": "StdDev",
879
                "rounds": "Rounds",
880
                "iterations": "Iterations",
881
                "iqr": "IQR",
882
                "median": "Median",
883
                "outliers": "Outliers(*)",
884
            }
885
            widths = {
886
                "name": 3 + max(len(labels["name"]), max(len(benchmark["name"]) for benchmark in benchmarks)),
887
                "rounds": 2 + max(len(labels["rounds"]), len(str(worst["rounds"]))),
888
                "iterations": 2 + max(len(labels["iterations"]), len(str(worst["iterations"]))),
889
                "outliers": 2 + max(len(labels["outliers"]), len(str(worst["outliers"]))),
890
            }
891
            for prop in "min", "max", "mean", "stddev", "median", "iqr":
892
                widths[prop] = 2 + max(len(labels[prop]), max(
893
                    len(NUMBER_FMT.format(bench[prop] * adjustment))
894
                    for bench in benchmarks
895
                ))
896
897
            rpadding = 0 if solo else 10
898
            labels_line = labels["name"].ljust(widths["name"]) + "".join(
899
                labels[prop].rjust(widths[prop]) + (
900
                    " " * rpadding
901
                    if prop not in ["outliers", "rounds", "iterations"]
902
                    else ""
903
                )
904
                for prop in self.columns
905
            )
906
            tr.rewrite("")
907
            tr.write_line(
908
                (" benchmark%(name)s: %(count)s tests " % dict(
909
                    count=len(benchmarks),
910
                    name="" if group is None else " %r" % group,
911
                )).center(len(labels_line), "-"),
912
                yellow=True,
913
            )
914
            tr.write_line(labels_line)
915
            tr.write_line("-" * len(labels_line), yellow=True)
916
917
            for bench in benchmarks:
918
                has_error = bench.get("has_error")
919
                tr.write(bench["name"].ljust(widths["name"]), red=has_error, invert=has_error)
920
                for prop in self.columns:
921
                    if prop in ("min", "max", "mean", "stddev", "median", "iqr"):
922
                        tr.write(
923
                            ALIGNED_NUMBER_FMT.format(
924
                                bench[prop] * adjustment,
925
                                widths[prop],
926
                                self.compute_baseline_scale(best[prop], bench[prop], rpadding),
927
                                rpadding
928
                            ),
929
                            green=not solo and bench[prop] == best.get(prop),
930
                            red=not solo and bench[prop] == worst.get(prop),
931
                            bold=True,
932
                        )
933
                    else:
934
                        tr.write("{0:>{1}}".format(bench[prop], widths[prop]))
935
                tr.write("\n")
936
            tr.write_line("-" * len(labels_line), yellow=True)
937
            tr.write_line("")
938
        tr.write_line("(*) Outliers: 1 Standard Deviation from Mean; "
939
                      "1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile.", bold=True, black=True)
940
941
    def compute_baseline_scale(self, baseline, value, width):
942
        if not width:
943
            return ""
944
        if value == baseline:
945
            return " (1.0)".ljust(width)
946
947
        scale = abs(value / baseline) if baseline else float("inf")
948
        if scale > 1000:
949
            if isinf(scale):
950
                return " (inf)".ljust(width)
951
            else:
952
                return " (>1000.0)".ljust(width)
953
        else:
954
            return " ({0:.2f})".format(scale).ljust(width)
955
956
957
def pytest_benchmark_compare_machine_info(config, benchmarksession, machine_info, compared_benchmark):
958
    if compared_benchmark["machine_info"] != machine_info:
959
        benchmarksession.logger.warn(
960
            "BENCHMARK-C6",
961
            "Benchmark machine_info is different. Current: %s VS saved: %s." % (
962
                format_dict(machine_info),
963
                format_dict(compared_benchmark["machine_info"]),
964
            ),
965
            fslocation=benchmarksession.storage_fslocation
966
        )
967
968
if hasattr(pytest, 'hookimpl'):
969
    _hookwrapper = pytest.hookimpl(hookwrapper=True)
970
else:
971
    _hookwrapper = pytest.mark.hookwrapper
972
973
974
@_hookwrapper
975
def pytest_runtest_call(item):
976
    bs = item.config._benchmarksession
977
    fixure = hasattr(item, "funcargs") and item.funcargs.get("benchmark")
978
    if isinstance(fixure, BenchmarkFixture):
979
        if bs.skip:
980
            pytest.skip("Skipping benchmark (--benchmark-skip active).")
981
        else:
982
            yield
983
    else:
984
        if bs.only:
985
            pytest.skip("Skipping non-benchmark (--benchmark-only active).")
986
        else:
987
            yield
988
989
990
def pytest_benchmark_group_stats(config, benchmarks, group_by):
991
    groups = defaultdict(list)
992
    for bench in benchmarks:
993
        if group_by == "group":
994
            groups[bench.group].append(bench)
995
        elif group_by == "name":
996
            groups[bench.name].append(bench)
997
        elif group_by == "func":
998
            groups[bench.name.split("[")[0]].append(bench)
999
        elif group_by == "fullfunc":
1000
            groups[bench.fullname.split("[")[0]].append(bench)
1001
        elif group_by == "fullname":
1002
            groups[bench.fullname].append(bench)
1003
        elif group_by == "param":
1004
            groups[bench.param].append(bench)
1005
        elif group_by.startswith("param:"):
1006
            param_name = group_by[len("param:"):]
1007
            param_value = bench.params[param_name]
1008
            groups[param_value].append(bench)
1009
        else:
1010
            raise NotImplementedError("Unsupported grouping %r." % group_by)
1011
    #
1012
    for grouped_benchmarks in groups.values():
1013
        grouped_benchmarks.sort(key=operator.attrgetter("fullname" if "full" in group_by else "name"))
1014
    return sorted(groups.items(), key=lambda pair: pair[0] or "")
1015
1016
def pytest_terminal_summary(terminalreporter):
1017
    try:
1018
        terminalreporter.config._benchmarksession.display(terminalreporter)
1019
    except PerformanceRegression:
1020
        raise
1021
    except Exception:
1022
        terminalreporter.config._benchmarksession.logger.error("\n%s" % traceback.format_exc())
1023
        raise
1024
1025
1026
def pytest_benchmark_generate_machine_info():
1027
    python_implementation = platform.python_implementation()
1028
    python_implementation_version = platform.python_version()
1029
    if python_implementation == 'PyPy':
1030
        python_implementation_version = '%d.%d.%d' % sys.pypy_version_info[:3]
1031
        if sys.pypy_version_info.releaselevel != 'final':
1032
            python_implementation_version += '-%s%d' % sys.pypy_version_info[3:]
1033
    return {
1034
        "node": platform.node(),
1035
        "processor": platform.processor(),
1036
        "machine": platform.machine(),
1037
        "python_compiler": platform.python_compiler(),
1038
        "python_implementation": python_implementation,
1039
        "python_implementation_version": python_implementation_version,
1040
        "python_version": platform.python_version(),
1041
        "python_build": platform.python_build(),
1042
        "release": platform.release(),
1043
        "system": platform.system()
1044
    }
1045
1046
1047
def pytest_benchmark_generate_commit_info(config):
1048
    return get_commit_info()
1049
1050
1051
def pytest_benchmark_generate_json(config, benchmarks, include_data):
1052
    machine_info = config.hook.pytest_benchmark_generate_machine_info(config=config)
1053
    config.hook.pytest_benchmark_update_machine_info(config=config, machine_info=machine_info)
1054
1055
    commit_info = config.hook.pytest_benchmark_generate_commit_info(config=config)
1056
    config.hook.pytest_benchmark_update_commit_info(config=config, commit_info=commit_info)
1057
1058
    benchmarks_json = []
1059
    output_json = {
1060
        "machine_info": machine_info,
1061
        "commit_info": commit_info,
1062
        "benchmarks": benchmarks_json,
1063
        "datetime": datetime.utcnow().isoformat(),
1064
        "version": __version__,
1065
    }
1066
    for bench in benchmarks:
1067
        if not bench.has_error:
1068
            benchmarks_json.append({
1069
                "group": bench.group,
1070
                "name": bench.name,
1071
                "fullname": bench.fullname,
1072
                "params": bench.params,
1073
                "stats": dict(bench.json(include_data=include_data), iterations=bench.iterations),
1074
                "options": dict(
1075
                    (k, v.__name__ if callable(v) else v) for k, v in bench.options.items()
1076
                )
1077
            })
1078
    return output_json
1079
1080
1081
@pytest.fixture(scope="function")
1082
def benchmark(request):
1083
    bs = request.config._benchmarksession
1084
1085
    if bs.skip:
1086
        pytest.skip("Benchmarks are skipped (--benchmark-skip was used).")
1087
    else:
1088
        node = request.node
1089
        marker = node.get_marker("benchmark")
1090
        options = marker.kwargs if marker else {}
1091
        if "timer" in options:
1092
            options["timer"] = NameWrapper(options["timer"])
1093
        fixture = BenchmarkFixture(
1094
            node,
1095
            add_stats=bs._benchmarks.append,
1096
            logger=bs.logger,
1097
            warner=request.node.warn,
1098
            disable=bs.disable,
1099
            **dict(bs.options, **options)
1100
        )
1101
        request.addfinalizer(fixture._cleanup)
1102
        return fixture
1103
1104
1105
@pytest.fixture(scope="function")
1106
def benchmark_weave(benchmark):
1107
    return benchmark.weave
1108
1109
1110
def pytest_runtest_setup(item):
1111
    marker = item.get_marker("benchmark")
1112
    if marker:
1113
        if marker.args:
1114
            raise ValueError("benchmark mark can't have positional arguments.")
1115
        for name in marker.kwargs:
1116
            if name not in (
1117
                    "max_time", "min_rounds", "min_time", "timer", "group", "disable_gc", "warmup",
1118
                    "warmup_iterations", "calibration_precision"):
1119
                raise ValueError("benchmark mark can't have %r keyword argument." % name)
1120
1121
1122
@pytest.mark.trylast  # force the other plugins to initialise, fixes issue with capture not being properly initialised
1123
def pytest_configure(config):
1124
    config.addinivalue_line("markers", "benchmark: mark a test with custom benchmark settings.")
1125
    config._benchmarksession = BenchmarkSession(config)
1126
    config.pluginmanager.register(config._benchmarksession, "pytest-benchmark")
1127