BenchmarkJSONSerializer - Code Metrics - ionelmc/pytest-benchmark - Measure and Improve Code Quality continuously with Scrutinizer

BenchmarkJSONSerializer A
last analyzed 2018-06-06 15:27 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	10
Duplicated Lines	0 %

Importance

Changes

Metric	Value
c	0
b	0
f	0
dl	0
loc	10
rs	10
wmc	4

1 Method

Rating	Name	Duplication	Size	Complexity
A	default()	0	9	4

from __future__ import absolute_import

import re
import sys
import uuid
from datetime import date
from datetime import datetime
from decimal import Decimal
from functools import partial

from ..compat import reraise
from ..stats import normalize_stats

try:
    import elasticsearch
    from elasticsearch.serializer import JSONSerializer
except ImportError as exc:
    reraise(ImportError, ImportError("Please install elasticsearch or pytest-benchmark[elasticsearch]", exc.args),
            sys.exc_info()[2])


class BenchmarkJSONSerializer(JSONSerializer):
    def default(self, data):
        if isinstance(data, (date, datetime)):
            return data.isoformat()
        elif isinstance(data, Decimal):
            return float(data)
        elif isinstance(data, uuid.UUID):
            return str(data)
        else:
            return "UNSERIALIZABLE[%r]" % data


def _mask_hosts(hosts):
    m = re.compile('^([^:]+)://[^@]+@')
    sub_fun = partial(m.sub, '\\1://***:***@')
    masked_hosts = list(map(sub_fun, hosts))
    return masked_hosts


class ElasticsearchStorage(object):
    def __init__(self, hosts, index, doctype, project_name, logger,
                 default_machine_id=None):
        self._es_hosts = hosts
        self._es_index = index
        self._es_doctype = doctype
        self._es = elasticsearch.Elasticsearch(self._es_hosts, serializer=BenchmarkJSONSerializer())
        self._project_name = project_name
        self.default_machine_id = default_machine_id
        self.logger = logger
        self._cache = {}
        self._create_index()

    def __str__(self):
        return str(self._es_hosts)

    @property
    def location(self):
        return str(self._es_hosts)

    def query(self):
        """
        Returns sorted records names (ids) that corresponds with project.
        """
        body = {
            "size": 0,
            "aggs": {
                "benchmark_ids": {
                    "terms": {
                        "field": "benchmark_id"
                    }
                }
            }
        }
        result = self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
        return sorted([record["key"] for record in result["aggregations"]["benchmark_ids"]["buckets"]])

    def load(self, id_prefix=None):
        """
        Yield key and content of records that corresponds with project name.
        """
        r = self._search(self._project_name, id_prefix)
        groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
        result = [(key, value) for key, value in groupped_data.items()]
        result.sort(key=lambda x: datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
        for key, data in result:
            for bench in data["benchmarks"]:
                normalize_stats(bench["stats"])
            yield key, data

    def _search(self, project, id_prefix=None):
        body = {
            "size": 1000,
            "sort": [
                {
                    "datetime": {
                        "order": "desc"
                    }
                }
            ],
            "query": {
                "bool": {
                    "filter": {
                        "term": {
                            "commit_info.project": project
                        }
                    }
                }
            }
        }
        if id_prefix:
            body["query"]["bool"]["must"] = {
                "prefix": {
                    "_id": id_prefix
                }
            }

        return self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)

    @staticmethod
    def _benchmark_from_es_record(source_es_record):
        result = {}
        for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname", "benchmark_id"):
            result[benchmark_key] = source_es_record[benchmark_key]
        return result

    @staticmethod
    def _run_info_from_es_record(source_es_record):
        result = {}
        for run_key in ("machine_info", "commit_info", "datetime", "version"):
            result[run_key] = source_es_record[run_key]
        return result

    def _group_by_commit_and_time(self, hits):
        result = {}
        for hit in hits:
            source_hit = hit["_source"]
            key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
            benchmark = self._benchmark_from_es_record(source_hit)
            if key in result:
                result[key]["benchmarks"].append(benchmark)
            else:
                run_info = self._run_info_from_es_record(source_hit)
                run_info["benchmarks"] = [benchmark]
                result[key] = run_info
        return result

    def load_benchmarks(self, *args):
        """
        Yield benchmarks that corresponds with project. Put path and
        source (uncommon part of path) to benchmark dict.
        """
        id_prefix = args[0] if args else None
        r = self._search(self._project_name, id_prefix)
        for hit in r["hits"]["hits"]:
            bench = self._benchmark_from_es_record(hit["_source"])
            bench.update(bench.pop("stats"))
            bench["source"] = bench["benchmark_id"]
            yield bench

    def save(self, output_json, save):
        output_benchmarks = output_json.pop("benchmarks")
        for bench in output_benchmarks:
            # add top level info from output_json dict to each record
            bench.update(output_json)
            benchmark_id = save
            if self.default_machine_id:
                benchmark_id = self.default_machine_id + "_" + benchmark_id
            doc_id = benchmark_id + "_" + bench["fullname"]
            bench["benchmark_id"] = benchmark_id
            self._es.index(
                index=self._es_index,
                doc_type=self._es_doctype,
                body=bench,
                id=doc_id,
            )
        # hide user's credentials before logging
        masked_hosts = _mask_hosts(self._es_hosts)
        self.logger.info("Saved benchmark data to %s to index %s as doctype %s" % (
            masked_hosts, self._es_index, self._es_doctype))

    def _create_index(self):
        mapping = {
            "mappings": {
                "benchmark": {
                    "properties": {
                        "commit_info": {
                            "properties": {
                                "dirty": {
                                    "type": "boolean"
                                },
                                "id": {
                                    "type": "string",
                                    "index": "not_analyzed"

                                },
                                "project": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "datetime": {
                            "type": "date",
                            "format": "strict_date_optional_time||epoch_millis"
                        },
                        "name": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "fullname": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "version": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "benchmark_id": {
                            "type": "string",
                            "index": "not_analyzed",
                        },
                        "machine_info": {
                            "properties": {
                                "machine": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "node": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "processor": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_build": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_compiler": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_implementation": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_implementation_version": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_version": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "release": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "system": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "options": {
                            "properties": {
                                "disable_gc": {
                                    "type": "boolean"
                                },
                                "max_time": {
                                    "type": "double"
                                },
                                "min_rounds": {
                                    "type": "long"
                                },
                                "min_time": {
                                    "type": "double"
                                },
                                "timer": {
                                    "type": "string"
                                },
                                "warmup": {
                                    "type": "boolean"
                                }
                            }
                        },
                        "stats": {
                            "properties": {
                                "hd15iqr": {
                                    "type": "double"
                                },
                                "iqr": {
                                    "type": "double"
                                },
                                "iqr_outliers": {
                                    "type": "long"
                                },
                                "iterations": {
                                    "type": "long"
                                },
                                "ld15iqr": {
                                    "type": "double"
                                },
                                "max": {
                                    "type": "double"
                                },
                                "mean": {
                                    "type": "double"
                                },
                                "median": {
                                    "type": "double"
                                },
                                "min": {
                                    "type": "double"
                                },
                                "outliers": {
                                    "type": "string"
                                },
                                "q1": {
                                    "type": "double"
                                },
                                "q3": {
                                    "type": "double"
                                },
                                "rounds": {
                                    "type": "long"
                                },
                                "stddev": {
                                    "type": "double"
                                },
                                "stddev_outliers": {
                                    "type": "long"
                                },
                                "ops": {
                                    "type": "double"
                                },
                            }
                        },
                    }
                }
            }
        }
        self._es.indices.create(index=self._es_index, ignore=400, body=mapping)


1			from __future__ import absolute_import
2
3			import re
4			import sys
5			import uuid
6			from datetime import date
7			from datetime import datetime
8			from decimal import Decimal
9			from functools import partial
10
11			from ..compat import reraise
12			from ..stats import normalize_stats
13
14			try:
15			import elasticsearch
16			from elasticsearch.serializer import JSONSerializer
17			except ImportError as exc:
18			reraise(ImportError, ImportError("Please install elasticsearch or pytest-benchmark[elasticsearch]", exc.args),
19			sys.exc_info()[2])
20
21
22			class BenchmarkJSONSerializer(JSONSerializer):
23			def default(self, data):
24			if isinstance(data, (date, datetime)):
25			return data.isoformat()
26			elif isinstance(data, Decimal):
27			return float(data)
28			elif isinstance(data, uuid.UUID):
29			return str(data)
30			else:
31			return "UNSERIALIZABLE[%r]" % data
32
33
34			def _mask_hosts(hosts):
35			m = re.compile('^([^:]+)://[^@]+@')
36			sub_fun = partial(m.sub, '\\1://*:*@')
37			masked_hosts = list(map(sub_fun, hosts))
38			return masked_hosts
39
40
41			class ElasticsearchStorage(object):
42			def __init__(self, hosts, index, doctype, project_name, logger,
43			default_machine_id=None):
44			self._es_hosts = hosts
45			self._es_index = index
46			self._es_doctype = doctype
47			self._es = elasticsearch.Elasticsearch(self._es_hosts, serializer=BenchmarkJSONSerializer())
48			self._project_name = project_name
49			self.default_machine_id = default_machine_id
50			self.logger = logger
51			self._cache = {}
52			self._create_index()
53
54			def __str__(self):
55			return str(self._es_hosts)
56
57			@property
58			def location(self):
59			return str(self._es_hosts)
60
61			def query(self):
62			"""
63			Returns sorted records names (ids) that corresponds with project.
64			"""
65			body = {
66			"size": 0,
67			"aggs": {
68			"benchmark_ids": {
69			"terms": {
70			"field": "benchmark_id"
71			}
72			}
73			}
74			}
75			result = self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
76			return sorted([record["key"] for record in result["aggregations"]["benchmark_ids"]["buckets"]])
77
78			def load(self, id_prefix=None):
79			"""
80			Yield key and content of records that corresponds with project name.
81			"""
82			r = self._search(self._project_name, id_prefix)
83			groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
84			result = [(key, value) for key, value in groupped_data.items()]
85			result.sort(key=lambda x: datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
86			for key, data in result:
87			for bench in data["benchmarks"]:
88			normalize_stats(bench["stats"])
89			yield key, data
90
91			def _search(self, project, id_prefix=None):
92			body = {
93			"size": 1000,
94			"sort": [
95			{
96			"datetime": {
97			"order": "desc"
98			}
99			}
100			],
101			"query": {
102			"bool": {
103			"filter": {
104			"term": {
105			"commit_info.project": project
106			}
107			}
108			}
109			}
110			}
111			if id_prefix:
112			body["query"]["bool"]["must"] = {
113			"prefix": {
114			"_id": id_prefix
115			}
116			}
117
118			return self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
119
120			@staticmethod
121			def _benchmark_from_es_record(source_es_record):
122			result = {}
123			for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname", "benchmark_id"):
124			result[benchmark_key] = source_es_record[benchmark_key]
125			return result
126
127			@staticmethod
128			def _run_info_from_es_record(source_es_record):
129			result = {}
130			for run_key in ("machine_info", "commit_info", "datetime", "version"):
131			result[run_key] = source_es_record[run_key]
132			return result
133
134			def _group_by_commit_and_time(self, hits):
135			result = {}
136			for hit in hits:
137			source_hit = hit["_source"]
138			key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
139			benchmark = self._benchmark_from_es_record(source_hit)
140			if key in result:
141			result[key]["benchmarks"].append(benchmark)
142			else:
143			run_info = self._run_info_from_es_record(source_hit)
144			run_info["benchmarks"] = [benchmark]
145			result[key] = run_info
146			return result
147
148			def load_benchmarks(self, *args):
149			"""
150			Yield benchmarks that corresponds with project. Put path and
151			source (uncommon part of path) to benchmark dict.
152			"""
153			id_prefix = args[0] if args else None
154			r = self._search(self._project_name, id_prefix)
155			for hit in r["hits"]["hits"]:
156			bench = self._benchmark_from_es_record(hit["_source"])
157			bench.update(bench.pop("stats"))
158			bench["source"] = bench["benchmark_id"]
159			yield bench
160
161			def save(self, output_json, save):
162			output_benchmarks = output_json.pop("benchmarks")
163			for bench in output_benchmarks:
164			# add top level info from output_json dict to each record
165			bench.update(output_json)
166			benchmark_id = save
167			if self.default_machine_id:
168			benchmark_id = self.default_machine_id + "_" + benchmark_id
169			doc_id = benchmark_id + "_" + bench["fullname"]
170			bench["benchmark_id"] = benchmark_id
171			self._es.index(
172			index=self._es_index,
173			doc_type=self._es_doctype,
174			body=bench,
175			id=doc_id,
176			)
177			# hide user's credentials before logging
178			masked_hosts = _mask_hosts(self._es_hosts)
179			self.logger.info("Saved benchmark data to %s to index %s as doctype %s" % (
180			masked_hosts, self._es_index, self._es_doctype))
181
182			def _create_index(self):
183			mapping = {
184			"mappings": {
185			"benchmark": {
186			"properties": {
187			"commit_info": {
188			"properties": {
189			"dirty": {
190			"type": "boolean"
191			},
192			"id": {
193			"type": "string",
194			"index": "not_analyzed"
195
196			},
197			"project": {
198			"type": "string",
199			"index": "not_analyzed"
200			}
201			}
202			},
203			"datetime": {
204			"type": "date",
205			"format": "strict_date_optional_time\|\|epoch_millis"
206			},
207			"name": {
208			"type": "string",
209			"index": "not_analyzed"
210			},
211			"fullname": {
212			"type": "string",
213			"index": "not_analyzed"
214			},
215			"version": {
216			"type": "string",
217			"index": "not_analyzed"
218			},
219			"benchmark_id": {
220			"type": "string",
221			"index": "not_analyzed",
222			},
223			"machine_info": {
224			"properties": {
225			"machine": {
226			"type": "string",
227			"index": "not_analyzed"
228			},
229			"node": {
230			"type": "string",
231			"index": "not_analyzed"
232			},
233			"processor": {
234			"type": "string",
235			"index": "not_analyzed"
236			},
237			"python_build": {
238			"type": "string",
239			"index": "not_analyzed"
240			},
241			"python_compiler": {
242			"type": "string",
243			"index": "not_analyzed"
244			},
245			"python_implementation": {
246			"type": "string",
247			"index": "not_analyzed"
248			},
249			"python_implementation_version": {
250			"type": "string",
251			"index": "not_analyzed"
252			},
253			"python_version": {
254			"type": "string",
255			"index": "not_analyzed"
256			},
257			"release": {
258			"type": "string",
259			"index": "not_analyzed"
260			},
261			"system": {
262			"type": "string",
263			"index": "not_analyzed"
264			}
265			}
266			},
267			"options": {
268			"properties": {
269			"disable_gc": {
270			"type": "boolean"
271			},
272			"max_time": {
273			"type": "double"
274			},
275			"min_rounds": {
276			"type": "long"
277			},
278			"min_time": {
279			"type": "double"
280			},
281			"timer": {
282			"type": "string"
283			},
284			"warmup": {
285			"type": "boolean"
286			}
287			}
288			},
289			"stats": {
290			"properties": {
291			"hd15iqr": {
292			"type": "double"
293			},
294			"iqr": {
295			"type": "double"
296			},
297			"iqr_outliers": {
298			"type": "long"
299			},
300			"iterations": {
301			"type": "long"
302			},
303			"ld15iqr": {
304			"type": "double"
305			},
306			"max": {
307			"type": "double"
308			},
309			"mean": {
310			"type": "double"
311			},
312			"median": {
313			"type": "double"
314			},
315			"min": {
316			"type": "double"
317			},
318			"outliers": {
319			"type": "string"
320			},
321			"q1": {
322			"type": "double"
323			},
324			"q3": {
325			"type": "double"
326			},
327			"rounds": {
328			"type": "long"
329			},
330			"stddev": {
331			"type": "double"
332			},
333			"stddev_outliers": {
334			"type": "long"
335			},
336			"ops": {
337			"type": "double"
338			},
339			}
340			},
341			}
342			}
343			}
344			}
345			self._es.indices.create(index=self._es_index, ignore=400, body=mapping)
346

ionelmc / pytest-benchmark

BenchmarkJSONSerializer A last analyzed 2018-06-06 15:27 UTC

Complexity

Size/Duplication

Importance

1 Method

Duplication Side-by-Side

Filter issues like

BenchmarkJSONSerializer A
last analyzed 2018-06-06 15:27 UTC