Stats - Code Metrics - Inspection of "Percentile Support" - ionelmc/pytest-benchmark - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#93)

unknown

created 2017-11-08 17:49 UTC

Stats B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	203
Duplicated Lines	15.76 %

Importance

Changes	2
Bugs	1	Features	0

Metric	Value
dl	32
loc	203
rs	8.5454
c	2
b	1
f	0
wmc	49

24 Methods

Rating	Name	Duplication	Size	Complexity
A	stddev()	0	6	2
A	ld15iqr()	0	9	2
A	outliers()	0	3	1
A	ops()	0	5	2
A	__nonzero__()	0	2	1
A	stddev_outliers()	0	12	4
A	median()	0	3	1
A	iqr_outliers()	0	12	4
B	percentile()	0	37	6
A	max()	0	3	1
A	sorted_data()	0	3	1
A	__bool__()	0	2	1
A	rounds()	0	3	1
A	total()	0	3	1
A	update()	0	2	1
A	min()	0	3	1
A	__getattr__()	0	7	2
A	iqr()	0	3	1
A	hd15iqr()	0	13	3
A	q1()	16	16	4
A	__init__()	0	2	1
A	q3()	16	16	4
A	mean()	0	3	1
A	as_dict()	0	5	3

How to fix Duplicated Code Complexity

from __future__ import division
from __future__ import print_function

import operator
import statistics
from bisect import bisect_left
from bisect import bisect_right

from .utils import PERCENTILE_COL_RX
from .utils import cached_property
from .utils import funcname
from .utils import get_cprofile_functions


class Stats(object):
    fields = (
        "min", "max", "mean", "stddev", "rounds", "median", "iqr", "q1", "q3", "iqr_outliers", "stddev_outliers",
        "outliers", "ld15iqr", "hd15iqr", "ops", "total"
    )

    def __init__(self):
        self.data = []

    def __bool__(self):
        return bool(self.data)

    def __nonzero__(self):
        return bool(self.data)

    def as_dict(self, extra_fields=None):
        fields = Stats.fields + tuple(extra_fields) if extra_fields else Stats.fields
        return dict(
            (field, getattr(self, field))
            for field in fields
        )

    def update(self, duration):
        self.data.append(duration)

    @cached_property
    def sorted_data(self):
        return sorted(self.data)

    @cached_property
    def total(self):
        return sum(self.data)

    @cached_property
    def min(self):
        return min(self.data)

    @cached_property
    def max(self):
        return max(self.data)

    @cached_property
    def mean(self):
        return statistics.mean(self.data)

    @cached_property
    def stddev(self):
        if len(self.data) > 1:
            return statistics.stdev(self.data)
        else:
            return 0

    @property
    def stddev_outliers(self):
        """
        Count of StdDev outliers: what's beyond (Mean - StdDev, Mean - StdDev)
        """
        count = 0
        q0 = self.mean - self.stddev
        q4 = self.mean + self.stddev
        for val in self.data:
            if val < q0 or val > q4:
                count += 1
        return count

    @cached_property
    def rounds(self):
        return len(self.data)

    @cached_property
    def median(self):
        return statistics.median(self.data)

    @cached_property
    def ld15iqr(self):
        """
        Tukey-style Lowest Datum within 1.5 IQR under Q1.
        """
        if len(self.data) == 1:
            return self.data[0]
        else:
            return self.sorted_data[bisect_left(self.sorted_data, self.q1 - 1.5 * self.iqr)]

    @cached_property
    def hd15iqr(self):
        """
        Tukey-style Highest Datum within 1.5 IQR over Q3.
        """
        if len(self.data) == 1:
            return self.data[0]
        else:
            pos = bisect_right(self.sorted_data, self.q3 + 1.5 * self.iqr)
            if pos == len(self.data):
                return self.sorted_data[-1]
            else:
                return self.sorted_data[pos]

    @cached_property

    def q1(self):
        rounds = self.rounds
        data = self.sorted_data

        # See: https://en.wikipedia.org/wiki/Quartile#Computing_methods
        if rounds == 1:
            return data[0]
        elif rounds % 2:  # Method 3
            n, q = rounds // 4, rounds % 4
            if q == 1:
                return 0.25 * data[n - 1] + 0.75 * data[n]
            else:
                return 0.75 * data[n] + 0.25 * data[n + 1]
        else:  # Method 2
            return statistics.median(data[:rounds // 2])

    @cached_property

    def q3(self):
        rounds = self.rounds
        data = self.sorted_data

        # See: https://en.wikipedia.org/wiki/Quartile#Computing_methods
        if rounds == 1:
            return data[0]
        elif rounds % 2:  # Method 3
            n, q = rounds // 4, rounds % 4
            if q == 1:
                return 0.75 * data[3 * n] + 0.25 * data[3 * n + 1]
            else:
                return 0.25 * data[3 * n + 1] + 0.75 * data[3 * n + 2]
        else:  # Method 2
            return statistics.median(data[rounds // 2:])

    @cached_property
    def iqr(self):
        return self.q3 - self.q1

    @property
    def iqr_outliers(self):
        """
        Count of Tukey outliers: what's beyond (Q1 - 1.5IQR, Q3 + 1.5IQR)
        """
        count = 0
        q0 = self.q1 - 1.5 * self.iqr
        q4 = self.q3 + 1.5 * self.iqr
        for val in self.data:
            if val < q0 or val > q4:
                count += 1
        return count

    @cached_property
    def outliers(self):
        return "%s;%s" % (self.stddev_outliers, self.iqr_outliers)

    @cached_property
    def ops(self):
        if self.total:
            return self.rounds / self.total
        return 0

    def __getattr__(self, name):
        m = PERCENTILE_COL_RX.match(name)
        if not m:
            raise AttributeError(name)

        p = float(m.group(1)) / 100.0
        return self.percentile(p)

    def percentile(self, percent):
        ''' Compute the interpolated percentile.

        This is the method recommmended by NIST:
        http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm

        percent must be in the range [0.0, 1.0].
        '''
        if not (0.0 <= percent <= 1.0):
            raise ValueError('percent must be in the range [0.0, 1.0]')

        if not hasattr(self, '_percentile_cache'):
            self._percentile_cache = {}

        # Check the cache first
        # This isn't perfect with floats for the usual reasons, but is good enough
        cached = self._percentile_cache.get(percent)
        if cached is not None:
            return cached

        # percentiles require sorted data
        data = self.sorted_data
        N = len(data)
        if percent <= 1/(N+1):
            # Too small, return min
            return self._percentile_cache.setdefault(percent, data[0])
        elif percent >= N/(N+1):
            # too big, return max
            return self._percentile_cache.setdefault(percent, data[-1])
        else:
            r = percent * (N + 1)
            k = r // 1
            d = r % 1

            n = int(k - 1)  # zero-indexed lists
            result = data[n] + d * (data[n+1] - data[n])
            return self._percentile_cache.setdefault(percent, result)


class Metadata(object):
    def __init__(self, fixture, iterations, options):
        self.name = fixture.name
        self.fullname = fixture.fullname
        self.group = fixture.group
        self.param = fixture.param
        self.params = fixture.params
        self.extra_info = fixture.extra_info
        self.cprofile_stats = fixture.cprofile_stats

        self.iterations = iterations
        self.options = options
        self.fixture = fixture
        self.stats = Stats()

    def __bool__(self):
        return bool(self.stats)

    def __nonzero__(self):
        return bool(self.stats)

    def get(self, key, default=None):
        try:
            return getattr(self.stats, key)
        except AttributeError:
            return getattr(self, key, default)

    def __getitem__(self, key):
        try:
            return getattr(self.stats, key)
        except AttributeError:
            return getattr(self, key)

    @property
    def has_error(self):
        return self.fixture.has_error

    def as_dict(self, include_data=True, flat=False, stats=True, cprofile=None, columns=None):
        result = {
            "group": self.group,
            "name": self.name,
            "fullname": self.fullname,
            "params": self.params,
            "param": self.param,
            "extra_info": self.extra_info,
            "options": dict(
                (k, funcname(v) if callable(v) else v) for k, v in self.options.items()
            )
        }
        if self.cprofile_stats:
            cprofile_list = result["cprofile"] = []
            cprofile_functions = get_cprofile_functions(self.cprofile_stats)
            stats_columns = ["cumtime", "tottime", "ncalls", "ncalls_recursion",
                             "tottime_per", "cumtime_per", "function_name"]
            # move column first
            if cprofile is not None:
                stats_columns.remove(cprofile)
                stats_columns.insert(0, cprofile)
            for column in stats_columns:
                cprofile_functions.sort(key=operator.itemgetter(column), reverse=True)
                for cprofile_function in cprofile_functions[:25]:
                    if cprofile_function not in cprofile_list:
                        cprofile_list.append(cprofile_function)
                # if we want only one column or we already have all available functions
                if cprofile is None or len(cprofile_functions) == len(cprofile_list):
                    break
        if stats:
            if columns is not None:
                extra_fields = tuple(c for c in columns if c not in Stats.fields and PERCENTILE_COL_RX.match(c))
            else:
                extra_fields = None

            stats = self.stats.as_dict(extra_fields=extra_fields)
            if include_data:
                stats["data"] = self.stats.data
            stats["iterations"] = self.iterations
            if flat:
                result.update(stats)
            else:
                result["stats"] = stats
        return result

    def update(self, duration):
        self.stats.update(duration / self.iterations)


def normalize_stats(stats):
    if 'ops' not in stats:
        # fill field added in 3.1.0
        stats['ops'] = 1 / stats['mean']
    return stats


1		from __future__ import division
2		from __future__ import print_function
3
4		import operator
5		import statistics
6		from bisect import bisect_left
7		from bisect import bisect_right
8
9		from .utils import PERCENTILE_COL_RX
10		from .utils import cached_property
11		from .utils import funcname
12		from .utils import get_cprofile_functions
13
14
15		class Stats(object):
16		fields = (
17		"min", "max", "mean", "stddev", "rounds", "median", "iqr", "q1", "q3", "iqr_outliers", "stddev_outliers",
18		"outliers", "ld15iqr", "hd15iqr", "ops", "total"
19		)
20
21		def __init__(self):
22		self.data = []
23
24		def __bool__(self):
25		return bool(self.data)
26
27		def __nonzero__(self):
28		return bool(self.data)
29
30		def as_dict(self, extra_fields=None):
31		fields = Stats.fields + tuple(extra_fields) if extra_fields else Stats.fields
32		return dict(
33		(field, getattr(self, field))
34		for field in fields
35		)
36
37		def update(self, duration):
38		self.data.append(duration)
39
40		@cached_property
41		def sorted_data(self):
42		return sorted(self.data)
43
44		@cached_property
45		def total(self):
46		return sum(self.data)
47
48		@cached_property
49		def min(self):
50		return min(self.data)
51
52		@cached_property
53		def max(self):
54		return max(self.data)
55
56		@cached_property
57		def mean(self):
58		return statistics.mean(self.data)
59
60		@cached_property
61		def stddev(self):
62		if len(self.data) > 1:
63		return statistics.stdev(self.data)
64		else:
65		return 0
66
67		@property
68		def stddev_outliers(self):
69		"""
70		Count of StdDev outliers: what's beyond (Mean - StdDev, Mean - StdDev)
71		"""
72		count = 0
73		q0 = self.mean - self.stddev
74		q4 = self.mean + self.stddev
75		for val in self.data:
76		if val < q0 or val > q4:
77		count += 1
78		return count
79
80		@cached_property
81		def rounds(self):
82		return len(self.data)
83
84		@cached_property
85		def median(self):
86		return statistics.median(self.data)
87
88		@cached_property
89		def ld15iqr(self):
90		"""
91		Tukey-style Lowest Datum within 1.5 IQR under Q1.
92		"""
93		if len(self.data) == 1:
94		return self.data[0]
95		else:
96		return self.sorted_data[bisect_left(self.sorted_data, self.q1 - 1.5 * self.iqr)]
97
98		@cached_property
99		def hd15iqr(self):
100		"""
101		Tukey-style Highest Datum within 1.5 IQR over Q3.
102		"""
103		if len(self.data) == 1:
104		return self.data[0]
105		else:
106		pos = bisect_right(self.sorted_data, self.q3 + 1.5 * self.iqr)
107		if pos == len(self.data):
108		return self.sorted_data[-1]
109		else:
110		return self.sorted_data[pos]
111
112	View Code Duplication	@cached_property
		0 ignored issues – show Duplication introduced 2016-05-26 23:02 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
113		def q1(self):
114		rounds = self.rounds
115		data = self.sorted_data
116
117		# See: https://en.wikipedia.org/wiki/Quartile#Computing_methods
118		if rounds == 1:
119		return data[0]
120		elif rounds % 2: # Method 3
121		n, q = rounds // 4, rounds % 4
122		if q == 1:
123		return 0.25 * data[n - 1] + 0.75 * data[n]
124		else:
125		return 0.75 * data[n] + 0.25 * data[n + 1]
126		else: # Method 2
127		return statistics.median(data[:rounds // 2])
128
129	View Code Duplication	@cached_property
		0 ignored issues – show Duplication introduced 2016-05-26 23:02 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
130		def q3(self):
131		rounds = self.rounds
132		data = self.sorted_data
133
134		# See: https://en.wikipedia.org/wiki/Quartile#Computing_methods
135		if rounds == 1:
136		return data[0]
137		elif rounds % 2: # Method 3
138		n, q = rounds // 4, rounds % 4
139		if q == 1:
140		return 0.75 * data[3 * n] + 0.25 * data[3 * n + 1]
141		else:
142		return 0.25 * data[3 * n + 1] + 0.75 * data[3 * n + 2]
143		else: # Method 2
144		return statistics.median(data[rounds // 2:])
145
146		@cached_property
147		def iqr(self):
148		return self.q3 - self.q1
149
150		@property
151		def iqr_outliers(self):
152		"""
153		Count of Tukey outliers: what's beyond (Q1 - 1.5IQR, Q3 + 1.5IQR)
154		"""
155		count = 0
156		q0 = self.q1 - 1.5 * self.iqr
157		q4 = self.q3 + 1.5 * self.iqr
158		for val in self.data:
159		if val < q0 or val > q4:
160		count += 1
161		return count
162
163		@cached_property
164		def outliers(self):
165		return "%s;%s" % (self.stddev_outliers, self.iqr_outliers)
166
167		@cached_property
168		def ops(self):
169		if self.total:
170		return self.rounds / self.total
171		return 0
172
173		def __getattr__(self, name):
174		m = PERCENTILE_COL_RX.match(name)
175		if not m:
176		raise AttributeError(name)
177
178		p = float(m.group(1)) / 100.0
179		return self.percentile(p)
180
181		def percentile(self, percent):
182		''' Compute the interpolated percentile.
183
184		This is the method recommmended by NIST:
185		http://www.itl.nist.gov/div898/handbook/prc/section2/prc262.htm
186
187		percent must be in the range [0.0, 1.0].
188		'''
189		if not (0.0 <= percent <= 1.0):
190		raise ValueError('percent must be in the range [0.0, 1.0]')
191
192		if not hasattr(self, '_percentile_cache'):
193		self._percentile_cache = {}
194
195		# Check the cache first
196		# This isn't perfect with floats for the usual reasons, but is good enough
197		cached = self._percentile_cache.get(percent)
198		if cached is not None:
199		return cached
200
201		# percentiles require sorted data
202		data = self.sorted_data
203		N = len(data)
204		if percent <= 1/(N+1):
205		# Too small, return min
206		return self._percentile_cache.setdefault(percent, data[0])
207		elif percent >= N/(N+1):
208		# too big, return max
209		return self._percentile_cache.setdefault(percent, data[-1])
210		else:
211		r = percent * (N + 1)
212		k = r // 1
213		d = r % 1
214
215		n = int(k - 1) # zero-indexed lists
216		result = data[n] + d * (data[n+1] - data[n])
217		return self._percentile_cache.setdefault(percent, result)
218
219
220		class Metadata(object):
221		def __init__(self, fixture, iterations, options):
222		self.name = fixture.name
223		self.fullname = fixture.fullname
224		self.group = fixture.group
225		self.param = fixture.param
226		self.params = fixture.params
227		self.extra_info = fixture.extra_info
228		self.cprofile_stats = fixture.cprofile_stats
229
230		self.iterations = iterations
231		self.options = options
232		self.fixture = fixture
233		self.stats = Stats()
234
235		def __bool__(self):
236		return bool(self.stats)
237
238		def __nonzero__(self):
239		return bool(self.stats)
240
241		def get(self, key, default=None):
242		try:
243		return getattr(self.stats, key)
244		except AttributeError:
245		return getattr(self, key, default)
246
247		def __getitem__(self, key):
248		try:
249		return getattr(self.stats, key)
250		except AttributeError:
251		return getattr(self, key)
252
253		@property
254		def has_error(self):
255		return self.fixture.has_error
256
257		def as_dict(self, include_data=True, flat=False, stats=True, cprofile=None, columns=None):
258		result = {
259		"group": self.group,
260		"name": self.name,
261		"fullname": self.fullname,
262		"params": self.params,
263		"param": self.param,
264		"extra_info": self.extra_info,
265		"options": dict(
266		(k, funcname(v) if callable(v) else v) for k, v in self.options.items()
267		)
268		}
269		if self.cprofile_stats:
270		cprofile_list = result["cprofile"] = []
271		cprofile_functions = get_cprofile_functions(self.cprofile_stats)
272		stats_columns = ["cumtime", "tottime", "ncalls", "ncalls_recursion",
273		"tottime_per", "cumtime_per", "function_name"]
274		# move column first
275		if cprofile is not None:
276		stats_columns.remove(cprofile)
277		stats_columns.insert(0, cprofile)
278		for column in stats_columns:
279		cprofile_functions.sort(key=operator.itemgetter(column), reverse=True)
280		for cprofile_function in cprofile_functions[:25]:
281		if cprofile_function not in cprofile_list:
282		cprofile_list.append(cprofile_function)
283		# if we want only one column or we already have all available functions
284		if cprofile is None or len(cprofile_functions) == len(cprofile_list):
285		break
286		if stats:
287		if columns is not None:
288		extra_fields = tuple(c for c in columns if c not in Stats.fields and PERCENTILE_COL_RX.match(c))
289		else:
290		extra_fields = None
291
292		stats = self.stats.as_dict(extra_fields=extra_fields)
293		if include_data:
294		stats["data"] = self.stats.data
295		stats["iterations"] = self.iterations
296		if flat:
297		result.update(stats)
298		else:
299		result["stats"] = stats
300		return result
301
302		def update(self, duration):
303		self.stats.update(duration / self.iterations)
304
305
306		def normalize_stats(stats):
307		if 'ops' not in stats:
308		# fill field added in 3.1.0
309		stats['ops'] = 1 / stats['mean']
310		return stats
311

ionelmc / pytest-benchmark

Pull Request — master (#93)

Stats B

Complexity

Size/Duplication

Importance

24 Methods

How to fix Duplicated Code Complexity

Duplicated Code

Complex Class

Duplication Side-by-Side

Filter issues like