Completed
Pull Request — master (#93)
by
unknown
29s
created

Stats.__getattr__()   A

Complexity

Conditions 2

Size

Total Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
dl 0
loc 7
rs 9.4285
c 0
b 0
f 0
1
from __future__ import division
2
from __future__ import print_function
3
4
import operator
5
import statistics
6
from bisect import bisect_left
7
from bisect import bisect_right
8
9
from .utils import PERCENTILE_COL_RX
10
from .utils import cached_property
11
from .utils import funcname
12
from .utils import get_cprofile_functions
13
14
15
class Stats(object):
16
    fields = (
17
        "min", "max", "mean", "stddev", "rounds", "median", "iqr", "q1", "q3", "iqr_outliers", "stddev_outliers",
18
        "outliers", "ld15iqr", "hd15iqr", "ops", "total"
19
    )
20
21
    def __init__(self):
22
        self.data = []
23
24
    def __bool__(self):
25
        return bool(self.data)
26
27
    def __nonzero__(self):
28
        return bool(self.data)
29
30
    def as_dict(self, extra_fields=None):
31
        fields = Stats.fields + tuple(extra_fields) if extra_fields else Stats.fields
32
        return dict(
33
            (field, getattr(self, field))
34
            for field in fields
35
        )
36
37
    def update(self, duration):
38
        self.data.append(duration)
39
40
    @cached_property
41
    def sorted_data(self):
42
        return sorted(self.data)
43
44
    @cached_property
45
    def total(self):
46
        return sum(self.data)
47
48
    @cached_property
49
    def min(self):
50
        return min(self.data)
51
52
    @cached_property
53
    def max(self):
54
        return max(self.data)
55
56
    @cached_property
57
    def mean(self):
58
        return statistics.mean(self.data)
59
60
    @cached_property
61
    def stddev(self):
62
        if len(self.data) > 1:
63
            return statistics.stdev(self.data)
64
        else:
65
            return 0
66
67
    @property
68
    def stddev_outliers(self):
69
        """
70
        Count of StdDev outliers: what's beyond (Mean - StdDev, Mean - StdDev)
71
        """
72
        count = 0
73
        q0 = self.mean - self.stddev
74
        q4 = self.mean + self.stddev
75
        for val in self.data:
76
            if val < q0 or val > q4:
77
                count += 1
78
        return count
79
80
    @cached_property
81
    def rounds(self):
82
        return len(self.data)
83
84
    @cached_property
85
    def median(self):
86
        return statistics.median(self.data)
87
88
    @cached_property
89
    def ld15iqr(self):
90
        """
91
        Tukey-style Lowest Datum within 1.5 IQR under Q1.
92
        """
93
        if len(self.data) == 1:
94
            return self.data[0]
95
        else:
96
            return self.sorted_data[bisect_left(self.sorted_data, self.q1 - 1.5 * self.iqr)]
97
98
    @cached_property
99
    def hd15iqr(self):
100
        """
101
        Tukey-style Highest Datum within 1.5 IQR over Q3.
102
        """
103
        if len(self.data) == 1:
104
            return self.data[0]
105
        else:
106
            pos = bisect_right(self.sorted_data, self.q3 + 1.5 * self.iqr)
107
            if pos == len(self.data):
108
                return self.sorted_data[-1]
109
            else:
110
                return self.sorted_data[pos]
111
112 View Code Duplication
    @cached_property
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
113
    def q1(self):
114
        rounds = self.rounds
115
        data = self.sorted_data
116
117
        # See: https://en.wikipedia.org/wiki/Quartile#Computing_methods
118
        if rounds == 1:
119
            return data[0]
120
        elif rounds % 2:  # Method 3
121
            n, q = rounds // 4, rounds % 4
122
            if q == 1:
123
                return 0.25 * data[n - 1] + 0.75 * data[n]
124
            else:
125
                return 0.75 * data[n] + 0.25 * data[n + 1]
126
        else:  # Method 2
127
            return statistics.median(data[:rounds // 2])
128
129 View Code Duplication
    @cached_property
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
130
    def q3(self):
131
        rounds = self.rounds
132
        data = self.sorted_data
133
134
        # See: https://en.wikipedia.org/wiki/Quartile#Computing_methods
135
        if rounds == 1:
136
            return data[0]
137
        elif rounds % 2:  # Method 3
138
            n, q = rounds // 4, rounds % 4
139
            if q == 1:
140
                return 0.75 * data[3 * n] + 0.25 * data[3 * n + 1]
141
            else:
142
                return 0.25 * data[3 * n + 1] + 0.75 * data[3 * n + 2]
143
        else:  # Method 2
144
            return statistics.median(data[rounds // 2:])
145
146
    @cached_property
147
    def iqr(self):
148
        return self.q3 - self.q1
149
150
    @property
151
    def iqr_outliers(self):
152
        """
153
        Count of Tukey outliers: what's beyond (Q1 - 1.5IQR, Q3 + 1.5IQR)
154
        """
155
        count = 0
156
        q0 = self.q1 - 1.5 * self.iqr
157
        q4 = self.q3 + 1.5 * self.iqr
158
        for val in self.data:
159
            if val < q0 or val > q4:
160
                count += 1
161
        return count
162
163
    @cached_property
164
    def outliers(self):
165
        return "%s;%s" % (self.stddev_outliers, self.iqr_outliers)
166
167
    @cached_property
168
    def ops(self):
169
        if self.total:
170
            return self.rounds / self.total
171
        return 0
172
173
    def __getattr__(self, name):
174
        m = PERCENTILE_COL_RX.match(name)
175
        if not m:
176
            raise AttributeError(name)
177
178
        p = float(m.group(1)) / 100.0
179
        return self.percentile(p)
180
181
    def percentile(self, percent):
182
        ''' Compute the interpolated percentile.
183
184
        This is the method recommmended by NIST:
185
        http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm
186
187
        percent must be in the range [0.0, 1.0].
188
        '''
189
        if not (0.0 <= percent <= 1.0):
190
            raise ValueError('percent must be in the range [0.0, 1.0]')
191
192
        if not hasattr(self, '_percentile_cache'):
193
            self._percentile_cache = {}
194
195
        # Check the cache first
196
        # This isn't perfect with floats for the usual reasons, but is good enough
197
        cached = self._percentile_cache.get(percent)
198
        if cached is not None:
199
            return cached
200
201
        # percentiles require sorted data
202
        data = self.sorted_data
203
        N = len(data)
204
        if percent <= 1/(N+1):
205
            # Too small, return min
206
            return self._percentile_cache.setdefault(percent, data[0])
207
        elif percent >= N/(N+1):
208
            # too big, return max
209
            return self._percentile_cache.setdefault(percent, data[-1])
210
        else:
211
            r = percent * (N + 1)
212
            k = r // 1
213
            d = r % 1
214
215
            n = int(k - 1)  # zero-indexed lists
216
            result = data[n] + d * (data[n+1] - data[n])
217
            return self._percentile_cache.setdefault(percent, result)
218
219
220
class Metadata(object):
221
    def __init__(self, fixture, iterations, options):
222
        self.name = fixture.name
223
        self.fullname = fixture.fullname
224
        self.group = fixture.group
225
        self.param = fixture.param
226
        self.params = fixture.params
227
        self.extra_info = fixture.extra_info
228
        self.cprofile_stats = fixture.cprofile_stats
229
230
        self.iterations = iterations
231
        self.options = options
232
        self.fixture = fixture
233
        self.stats = Stats()
234
235
    def __bool__(self):
236
        return bool(self.stats)
237
238
    def __nonzero__(self):
239
        return bool(self.stats)
240
241
    def get(self, key, default=None):
242
        try:
243
            return getattr(self.stats, key)
244
        except AttributeError:
245
            return getattr(self, key, default)
246
247
    def __getitem__(self, key):
248
        try:
249
            return getattr(self.stats, key)
250
        except AttributeError:
251
            return getattr(self, key)
252
253
    @property
254
    def has_error(self):
255
        return self.fixture.has_error
256
257
    def as_dict(self, include_data=True, flat=False, stats=True, cprofile=None, columns=None):
258
        result = {
259
            "group": self.group,
260
            "name": self.name,
261
            "fullname": self.fullname,
262
            "params": self.params,
263
            "param": self.param,
264
            "extra_info": self.extra_info,
265
            "options": dict(
266
                (k, funcname(v) if callable(v) else v) for k, v in self.options.items()
267
            )
268
        }
269
        if self.cprofile_stats:
270
            cprofile_list = result["cprofile"] = []
271
            cprofile_functions = get_cprofile_functions(self.cprofile_stats)
272
            stats_columns = ["cumtime", "tottime", "ncalls", "ncalls_recursion",
273
                             "tottime_per", "cumtime_per", "function_name"]
274
            # move column first
275
            if cprofile is not None:
276
                stats_columns.remove(cprofile)
277
                stats_columns.insert(0, cprofile)
278
            for column in stats_columns:
279
                cprofile_functions.sort(key=operator.itemgetter(column), reverse=True)
280
                for cprofile_function in cprofile_functions[:25]:
281
                    if cprofile_function not in cprofile_list:
282
                        cprofile_list.append(cprofile_function)
283
                # if we want only one column or we already have all available functions
284
                if cprofile is None or len(cprofile_functions) == len(cprofile_list):
285
                    break
286
        if stats:
287
            if columns is not None:
288
                extra_fields = tuple(c for c in columns if c not in Stats.fields and PERCENTILE_COL_RX.match(c))
289
            else:
290
                extra_fields = None
291
292
            stats = self.stats.as_dict(extra_fields=extra_fields)
293
            if include_data:
294
                stats["data"] = self.stats.data
295
            stats["iterations"] = self.iterations
296
            if flat:
297
                result.update(stats)
298
            else:
299
                result["stats"] = stats
300
        return result
301
302
    def update(self, duration):
303
        self.stats.update(duration / self.iterations)
304
305
306
def normalize_stats(stats):
307
    if 'ops' not in stats:
308
        # fill field added in 3.1.0
309
        stats['ops'] = 1 / stats['mean']
310
    return stats
311