1
|
|
|
from __future__ import division |
2
|
|
|
from __future__ import print_function |
3
|
|
|
|
4
|
|
|
import operator |
5
|
|
|
import statistics |
6
|
|
|
from bisect import bisect_left |
7
|
|
|
from bisect import bisect_right |
8
|
|
|
|
9
|
|
|
from .utils import PERCENTILE_COL_RX |
10
|
|
|
from .utils import cached_property |
11
|
|
|
from .utils import funcname |
12
|
|
|
from .utils import get_cprofile_functions |
13
|
|
|
|
14
|
|
|
|
15
|
|
|
class Stats(object): |
16
|
|
|
fields = ( |
17
|
|
|
"min", "max", "mean", "stddev", "rounds", "median", "iqr", "q1", "q3", "iqr_outliers", "stddev_outliers", |
18
|
|
|
"outliers", "ld15iqr", "hd15iqr", "ops", "total" |
19
|
|
|
) |
20
|
|
|
|
21
|
|
|
def __init__(self): |
22
|
|
|
self.data = [] |
23
|
|
|
|
24
|
|
|
def __bool__(self): |
25
|
|
|
return bool(self.data) |
26
|
|
|
|
27
|
|
|
def __nonzero__(self): |
28
|
|
|
return bool(self.data) |
29
|
|
|
|
30
|
|
|
def as_dict(self, extra_fields=None): |
31
|
|
|
fields = Stats.fields + tuple(extra_fields) if extra_fields else Stats.fields |
32
|
|
|
return dict( |
33
|
|
|
(field, getattr(self, field)) |
34
|
|
|
for field in fields |
35
|
|
|
) |
36
|
|
|
|
37
|
|
|
def update(self, duration): |
38
|
|
|
self.data.append(duration) |
39
|
|
|
|
40
|
|
|
@cached_property |
41
|
|
|
def sorted_data(self): |
42
|
|
|
return sorted(self.data) |
43
|
|
|
|
44
|
|
|
@cached_property |
45
|
|
|
def total(self): |
46
|
|
|
return sum(self.data) |
47
|
|
|
|
48
|
|
|
@cached_property |
49
|
|
|
def min(self): |
50
|
|
|
return min(self.data) |
51
|
|
|
|
52
|
|
|
@cached_property |
53
|
|
|
def max(self): |
54
|
|
|
return max(self.data) |
55
|
|
|
|
56
|
|
|
@cached_property |
57
|
|
|
def mean(self): |
58
|
|
|
return statistics.mean(self.data) |
59
|
|
|
|
60
|
|
|
@cached_property |
61
|
|
|
def stddev(self): |
62
|
|
|
if len(self.data) > 1: |
63
|
|
|
return statistics.stdev(self.data) |
64
|
|
|
else: |
65
|
|
|
return 0 |
66
|
|
|
|
67
|
|
|
@property |
68
|
|
|
def stddev_outliers(self): |
69
|
|
|
""" |
70
|
|
|
Count of StdDev outliers: what's beyond (Mean - StdDev, Mean - StdDev) |
71
|
|
|
""" |
72
|
|
|
count = 0 |
73
|
|
|
q0 = self.mean - self.stddev |
74
|
|
|
q4 = self.mean + self.stddev |
75
|
|
|
for val in self.data: |
76
|
|
|
if val < q0 or val > q4: |
77
|
|
|
count += 1 |
78
|
|
|
return count |
79
|
|
|
|
80
|
|
|
@cached_property |
81
|
|
|
def rounds(self): |
82
|
|
|
return len(self.data) |
83
|
|
|
|
84
|
|
|
@cached_property |
85
|
|
|
def median(self): |
86
|
|
|
return statistics.median(self.data) |
87
|
|
|
|
88
|
|
|
@cached_property |
89
|
|
|
def ld15iqr(self): |
90
|
|
|
""" |
91
|
|
|
Tukey-style Lowest Datum within 1.5 IQR under Q1. |
92
|
|
|
""" |
93
|
|
|
if len(self.data) == 1: |
94
|
|
|
return self.data[0] |
95
|
|
|
else: |
96
|
|
|
return self.sorted_data[bisect_left(self.sorted_data, self.q1 - 1.5 * self.iqr)] |
97
|
|
|
|
98
|
|
|
@cached_property |
99
|
|
|
def hd15iqr(self): |
100
|
|
|
""" |
101
|
|
|
Tukey-style Highest Datum within 1.5 IQR over Q3. |
102
|
|
|
""" |
103
|
|
|
if len(self.data) == 1: |
104
|
|
|
return self.data[0] |
105
|
|
|
else: |
106
|
|
|
pos = bisect_right(self.sorted_data, self.q3 + 1.5 * self.iqr) |
107
|
|
|
if pos == len(self.data): |
108
|
|
|
return self.sorted_data[-1] |
109
|
|
|
else: |
110
|
|
|
return self.sorted_data[pos] |
111
|
|
|
|
112
|
|
View Code Duplication |
@cached_property |
|
|
|
|
113
|
|
|
def q1(self): |
114
|
|
|
rounds = self.rounds |
115
|
|
|
data = self.sorted_data |
116
|
|
|
|
117
|
|
|
# See: https://en.wikipedia.org/wiki/Quartile#Computing_methods |
118
|
|
|
if rounds == 1: |
119
|
|
|
return data[0] |
120
|
|
|
elif rounds % 2: # Method 3 |
121
|
|
|
n, q = rounds // 4, rounds % 4 |
122
|
|
|
if q == 1: |
123
|
|
|
return 0.25 * data[n - 1] + 0.75 * data[n] |
124
|
|
|
else: |
125
|
|
|
return 0.75 * data[n] + 0.25 * data[n + 1] |
126
|
|
|
else: # Method 2 |
127
|
|
|
return statistics.median(data[:rounds // 2]) |
128
|
|
|
|
129
|
|
View Code Duplication |
@cached_property |
|
|
|
|
130
|
|
|
def q3(self): |
131
|
|
|
rounds = self.rounds |
132
|
|
|
data = self.sorted_data |
133
|
|
|
|
134
|
|
|
# See: https://en.wikipedia.org/wiki/Quartile#Computing_methods |
135
|
|
|
if rounds == 1: |
136
|
|
|
return data[0] |
137
|
|
|
elif rounds % 2: # Method 3 |
138
|
|
|
n, q = rounds // 4, rounds % 4 |
139
|
|
|
if q == 1: |
140
|
|
|
return 0.75 * data[3 * n] + 0.25 * data[3 * n + 1] |
141
|
|
|
else: |
142
|
|
|
return 0.25 * data[3 * n + 1] + 0.75 * data[3 * n + 2] |
143
|
|
|
else: # Method 2 |
144
|
|
|
return statistics.median(data[rounds // 2:]) |
145
|
|
|
|
146
|
|
|
@cached_property |
147
|
|
|
def iqr(self): |
148
|
|
|
return self.q3 - self.q1 |
149
|
|
|
|
150
|
|
|
@property |
151
|
|
|
def iqr_outliers(self): |
152
|
|
|
""" |
153
|
|
|
Count of Tukey outliers: what's beyond (Q1 - 1.5IQR, Q3 + 1.5IQR) |
154
|
|
|
""" |
155
|
|
|
count = 0 |
156
|
|
|
q0 = self.q1 - 1.5 * self.iqr |
157
|
|
|
q4 = self.q3 + 1.5 * self.iqr |
158
|
|
|
for val in self.data: |
159
|
|
|
if val < q0 or val > q4: |
160
|
|
|
count += 1 |
161
|
|
|
return count |
162
|
|
|
|
163
|
|
|
@cached_property |
164
|
|
|
def outliers(self): |
165
|
|
|
return "%s;%s" % (self.stddev_outliers, self.iqr_outliers) |
166
|
|
|
|
167
|
|
|
@cached_property |
168
|
|
|
def ops(self): |
169
|
|
|
if self.total: |
170
|
|
|
return self.rounds / self.total |
171
|
|
|
return 0 |
172
|
|
|
|
173
|
|
|
def __getattr__(self, name): |
174
|
|
|
m = PERCENTILE_COL_RX.match(name) |
175
|
|
|
if not m: |
176
|
|
|
raise AttributeError(name) |
177
|
|
|
|
178
|
|
|
p = float(m.group(1)) / 100.0 |
179
|
|
|
return self.percentile(p) |
180
|
|
|
|
181
|
|
|
def percentile(self, percent): |
182
|
|
|
''' Compute the interpolated percentile. |
183
|
|
|
|
184
|
|
|
This is the method recommmended by NIST: |
185
|
|
|
http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm |
186
|
|
|
|
187
|
|
|
percent must be in the range [0.0, 1.0]. |
188
|
|
|
''' |
189
|
|
|
if not (0.0 <= percent <= 1.0): |
190
|
|
|
raise ValueError('percent must be in the range [0.0, 1.0]') |
191
|
|
|
|
192
|
|
|
if not hasattr(self, '_percentile_cache'): |
193
|
|
|
self._percentile_cache = {} |
194
|
|
|
|
195
|
|
|
# Check the cache first |
196
|
|
|
# This isn't perfect with floats for the usual reasons, but is good enough |
197
|
|
|
cached = self._percentile_cache.get(percent) |
198
|
|
|
if cached is not None: |
199
|
|
|
return cached |
200
|
|
|
|
201
|
|
|
# percentiles require sorted data |
202
|
|
|
data = self.sorted_data |
203
|
|
|
N = len(data) |
204
|
|
|
if percent <= 1/(N+1): |
205
|
|
|
# Too small, return min |
206
|
|
|
return self._percentile_cache.setdefault(percent, data[0]) |
207
|
|
|
elif percent >= N/(N+1): |
208
|
|
|
# too big, return max |
209
|
|
|
return self._percentile_cache.setdefault(percent, data[-1]) |
210
|
|
|
else: |
211
|
|
|
r = percent * (N + 1) |
212
|
|
|
k = r // 1 |
213
|
|
|
d = r % 1 |
214
|
|
|
|
215
|
|
|
n = int(k - 1) # zero-indexed lists |
216
|
|
|
result = data[n] + d * (data[n+1] - data[n]) |
217
|
|
|
return self._percentile_cache.setdefault(percent, result) |
218
|
|
|
|
219
|
|
|
|
220
|
|
|
class Metadata(object): |
221
|
|
|
def __init__(self, fixture, iterations, options): |
222
|
|
|
self.name = fixture.name |
223
|
|
|
self.fullname = fixture.fullname |
224
|
|
|
self.group = fixture.group |
225
|
|
|
self.param = fixture.param |
226
|
|
|
self.params = fixture.params |
227
|
|
|
self.extra_info = fixture.extra_info |
228
|
|
|
self.cprofile_stats = fixture.cprofile_stats |
229
|
|
|
|
230
|
|
|
self.iterations = iterations |
231
|
|
|
self.options = options |
232
|
|
|
self.fixture = fixture |
233
|
|
|
self.stats = Stats() |
234
|
|
|
|
235
|
|
|
def __bool__(self): |
236
|
|
|
return bool(self.stats) |
237
|
|
|
|
238
|
|
|
def __nonzero__(self): |
239
|
|
|
return bool(self.stats) |
240
|
|
|
|
241
|
|
|
def get(self, key, default=None): |
242
|
|
|
try: |
243
|
|
|
return getattr(self.stats, key) |
244
|
|
|
except AttributeError: |
245
|
|
|
return getattr(self, key, default) |
246
|
|
|
|
247
|
|
|
def __getitem__(self, key): |
248
|
|
|
try: |
249
|
|
|
return getattr(self.stats, key) |
250
|
|
|
except AttributeError: |
251
|
|
|
return getattr(self, key) |
252
|
|
|
|
253
|
|
|
@property |
254
|
|
|
def has_error(self): |
255
|
|
|
return self.fixture.has_error |
256
|
|
|
|
257
|
|
|
def as_dict(self, include_data=True, flat=False, stats=True, cprofile=None, columns=None): |
258
|
|
|
result = { |
259
|
|
|
"group": self.group, |
260
|
|
|
"name": self.name, |
261
|
|
|
"fullname": self.fullname, |
262
|
|
|
"params": self.params, |
263
|
|
|
"param": self.param, |
264
|
|
|
"extra_info": self.extra_info, |
265
|
|
|
"options": dict( |
266
|
|
|
(k, funcname(v) if callable(v) else v) for k, v in self.options.items() |
267
|
|
|
) |
268
|
|
|
} |
269
|
|
|
if self.cprofile_stats: |
270
|
|
|
cprofile_list = result["cprofile"] = [] |
271
|
|
|
cprofile_functions = get_cprofile_functions(self.cprofile_stats) |
272
|
|
|
stats_columns = ["cumtime", "tottime", "ncalls", "ncalls_recursion", |
273
|
|
|
"tottime_per", "cumtime_per", "function_name"] |
274
|
|
|
# move column first |
275
|
|
|
if cprofile is not None: |
276
|
|
|
stats_columns.remove(cprofile) |
277
|
|
|
stats_columns.insert(0, cprofile) |
278
|
|
|
for column in stats_columns: |
279
|
|
|
cprofile_functions.sort(key=operator.itemgetter(column), reverse=True) |
280
|
|
|
for cprofile_function in cprofile_functions[:25]: |
281
|
|
|
if cprofile_function not in cprofile_list: |
282
|
|
|
cprofile_list.append(cprofile_function) |
283
|
|
|
# if we want only one column or we already have all available functions |
284
|
|
|
if cprofile is None or len(cprofile_functions) == len(cprofile_list): |
285
|
|
|
break |
286
|
|
|
if stats: |
287
|
|
|
if columns is not None: |
288
|
|
|
extra_fields = tuple(c for c in columns if c not in Stats.fields and PERCENTILE_COL_RX.match(c)) |
289
|
|
|
else: |
290
|
|
|
extra_fields = None |
291
|
|
|
|
292
|
|
|
stats = self.stats.as_dict(extra_fields=extra_fields) |
293
|
|
|
if include_data: |
294
|
|
|
stats["data"] = self.stats.data |
295
|
|
|
stats["iterations"] = self.iterations |
296
|
|
|
if flat: |
297
|
|
|
result.update(stats) |
298
|
|
|
else: |
299
|
|
|
result["stats"] = stats |
300
|
|
|
return result |
301
|
|
|
|
302
|
|
|
def update(self, duration): |
303
|
|
|
self.stats.update(duration / self.iterations) |
304
|
|
|
|
305
|
|
|
|
306
|
|
|
def normalize_stats(stats): |
307
|
|
|
if 'ops' not in stats: |
308
|
|
|
# fill field added in 3.1.0 |
309
|
|
|
stats['ops'] = 1 / stats['mean'] |
310
|
|
|
return stats |
311
|
|
|
|