ElasticsearchStorage._create_index() - Code Metrics - Inspection of "Elasticsearch report backend" - ionelmc/pytest-benchmark - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#58)

unknown

created 2016-10-11 11:14 UTC

ElasticsearchStorage._create_index() B

↳ Parent: ElasticsearchStorage

Complexity

Conditions

Size

Total Lines

161

Duplication

Lines	0
Ratio	0 %

Importance

Changes	2
Bugs	0	Features	0

Metric	Value
cc	1
c	2
b	0
f	0
dl	0
loc	161
rs	8.2857

How to fix Long Method

from __future__ import absolute_import

import uuid
import sys
from datetime import date
from datetime import datetime
from decimal import Decimal

from ..compat import reraise

try:
    import elasticsearch
    from elasticsearch.serializer import JSONSerializer
except ImportError as exc:
    reraise(ImportError, ImportError("Please install elasticsearch or pytest-benchmark[elasticsearch]", exc.args),
            sys.exc_info()[2])


class BenchmarkJSONSerializer(JSONSerializer):
    def default(self, data):
        if isinstance(data, (date, datetime)):
            return data.isoformat()
        elif isinstance(data, Decimal):
            return float(data)
        elif isinstance(data, uuid.UUID):
            return str(data)
        else:
            return "UNSERIALIZABLE[%r]" % data


class ElasticsearchStorage(object):
    def __init__(self, hosts, index, doctype, project_name, logger,
                 default_machine_id=None):
        self._es_hosts = hosts
        self._es_index = index
        self._es_doctype = doctype
        self._es = elasticsearch.Elasticsearch(self._es_hosts, serializer=BenchmarkJSONSerializer())
        self._project_name = project_name
        self.default_machine_id = default_machine_id
        self.logger = logger
        self._cache = {}
        self._create_index()

    def __str__(self):
        return str(self._es_hosts)

    @property
    def location(self):
        return str(self._es_hosts)

    def query(self):
        """
        Returns sorted records names (ids) that corresponds with project.
        """
        body = {
            "size": 0,
            "aggs": {
                "benchmark_ids": {
                    "terms": {
                        "field": "benchmark_id"
                    }
                }
            }
        }
        result = self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
        return sorted([record["key"] for record in result["aggregations"]["benchmark_ids"]["buckets"]])

    def load(self, id_prefix=None):
        """
        Yield key and content of records that corresponds with project name.
        """
        r = self._search(self._project_name, id_prefix)
        groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
        result = [(key, value) for key, value in groupped_data.items()]
        result.sort(key=lambda x: datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
        for key, data in result:
            yield key, data

    def _search(self, project, id_prefix=None):
        body = {
            "size": 1000,
            "sort": [
                {
                    "datetime": {
                        "order": "desc"
                    }
                }
            ],
            "query": {
                "bool": {
                    "filter": {
                        "term": {
                            "commit_info.project": project
                        }
                    }
                }
            }
        }
        if id_prefix:
            body["query"]["bool"]["must"] = {
                "prefix": {
                    "_id": id_prefix
                }
            }

        return self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)

    @staticmethod
    def _benchmark_from_es_record(source_es_record):
        result = {}
        for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname", "benchmark_id"):
            result[benchmark_key] = source_es_record[benchmark_key]
        return result

    @staticmethod
    def _run_info_from_es_record(source_es_record):
        result = {}
        for run_key in ("machine_info", "commit_info", "datetime", "version"):
            result[run_key] = source_es_record[run_key]
        return result

    def _group_by_commit_and_time(self, hits):
        result = {}
        for hit in hits:
            source_hit = hit["_source"]
            key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
            benchmark = self._benchmark_from_es_record(source_hit)
            if key in result:
                result[key]["benchmarks"].append(benchmark)
            else:
                run_info = self._run_info_from_es_record(source_hit)
                run_info["benchmarks"] = [benchmark]
                result[key] = run_info
        return result

    def load_benchmarks(self, *args):
        """
        Yield benchmarks that corresponds with project. Put path and
        source (uncommon part of path) to benchmark dict.
        """
        id_prefix = args[0] if args else None
        r = self._search(self._project_name, id_prefix)
        for hit in r["hits"]["hits"]:
            bench = self._benchmark_from_es_record(hit["_source"])
            bench.update(bench.pop("stats"))
            bench["source"] = bench["benchmark_id"]
            yield bench

    def save(self, output_json, save):
        output_benchmarks = output_json.pop("benchmarks")
        for bench in output_benchmarks:
            # add top level info from output_json dict to each record
            bench.update(output_json)
            benchmark_id = save
            if self.default_machine_id:
                benchmark_id = self.default_machine_id + "_" + benchmark_id
            doc_id = benchmark_id + "_" + bench["fullname"]
            bench["benchmark_id"] = benchmark_id
            self._es.index(
                index=self._es_index,
                doc_type=self._es_doctype,
                body=bench,
                id=doc_id,
            )
        self.logger.info("Saved benchmark data to %s to index %s as doctype %s" % (
            self._es_hosts, self._es_index, self._es_doctype))

    def _create_index(self):
        mapping = {
            "mappings": {
                "benchmark": {
                    "properties": {
                        "commit_info": {
                            "properties": {
                                "dirty": {
                                    "type": "boolean"
                                },
                                "id": {
                                    "type": "string",
                                    "index": "not_analyzed"

                                },
                                "project": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "datetime": {
                            "type": "date",
                            "format": "strict_date_optional_time||epoch_millis"
                        },
                        "name": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "fullname": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "version": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "benchmark_id": {
                            "type": "string",
                            "index": "not_analyzed",
                        },
                        "machine_info": {
                            "properties": {
                                "machine": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "node": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "processor": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_build": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_compiler": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_implementation": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_implementation_version": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_version": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "release": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "system": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "options": {
                            "properties": {
                                "disable_gc": {
                                    "type": "boolean"
                                },
                                "max_time": {
                                    "type": "double"
                                },
                                "min_rounds": {
                                    "type": "long"
                                },
                                "min_time": {
                                    "type": "double"
                                },
                                "timer": {
                                    "type": "string"
                                },
                                "warmup": {
                                    "type": "boolean"
                                }
                            }
                        },
                        "stats": {
                            "properties": {
                                "hd15iqr": {
                                    "type": "double"
                                },
                                "iqr": {
                                    "type": "double"
                                },
                                "iqr_outliers": {
                                    "type": "long"
                                },
                                "iterations": {
                                    "type": "long"
                                },
                                "ld15iqr": {
                                    "type": "double"
                                },
                                "max": {
                                    "type": "double"
                                },
                                "mean": {
                                    "type": "double"
                                },
                                "median": {
                                    "type": "double"
                                },
                                "min": {
                                    "type": "double"
                                },
                                "outliers": {
                                    "type": "string"
                                },
                                "q1": {
                                    "type": "double"
                                },
                                "q3": {
                                    "type": "double"
                                },
                                "rounds": {
                                    "type": "long"
                                },
                                "stddev": {
                                    "type": "double"
                                },
                                "stddev_outliers": {
                                    "type": "long"
                                }
                            }
                        },
                    }
                }
            }
        }
        self._es.indices.create(index=self._es_index, ignore=400, body=mapping)


1			from __future__ import absolute_import
2
3			import uuid
4			import sys
5			from datetime import date
6			from datetime import datetime
7			from decimal import Decimal
8
9			from ..compat import reraise
10
11			try:
12			import elasticsearch
13			from elasticsearch.serializer import JSONSerializer
14			except ImportError as exc:
15			reraise(ImportError, ImportError("Please install elasticsearch or pytest-benchmark[elasticsearch]", exc.args),
16			sys.exc_info()[2])
17
18
19			class BenchmarkJSONSerializer(JSONSerializer):
20			def default(self, data):
21			if isinstance(data, (date, datetime)):
22			return data.isoformat()
23			elif isinstance(data, Decimal):
24			return float(data)
25			elif isinstance(data, uuid.UUID):
26			return str(data)
27			else:
28			return "UNSERIALIZABLE[%r]" % data
29
30
31			class ElasticsearchStorage(object):
32			def __init__(self, hosts, index, doctype, project_name, logger,
33			default_machine_id=None):
34			self._es_hosts = hosts
35			self._es_index = index
36			self._es_doctype = doctype
37			self._es = elasticsearch.Elasticsearch(self._es_hosts, serializer=BenchmarkJSONSerializer())
38			self._project_name = project_name
39			self.default_machine_id = default_machine_id
40			self.logger = logger
41			self._cache = {}
42			self._create_index()
43
44			def __str__(self):
45			return str(self._es_hosts)
46
47			@property
48			def location(self):
49			return str(self._es_hosts)
50
51			def query(self):
52			"""
53			Returns sorted records names (ids) that corresponds with project.
54			"""
55			body = {
56			"size": 0,
57			"aggs": {
58			"benchmark_ids": {
59			"terms": {
60			"field": "benchmark_id"
61			}
62			}
63			}
64			}
65			result = self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
66			return sorted([record["key"] for record in result["aggregations"]["benchmark_ids"]["buckets"]])
67
68			def load(self, id_prefix=None):
69			"""
70			Yield key and content of records that corresponds with project name.
71			"""
72			r = self._search(self._project_name, id_prefix)
73			groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
74			result = [(key, value) for key, value in groupped_data.items()]
75			result.sort(key=lambda x: datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
76			for key, data in result:
77			yield key, data
78
79			def _search(self, project, id_prefix=None):
80			body = {
81			"size": 1000,
82			"sort": [
83			{
84			"datetime": {
85			"order": "desc"
86			}
87			}
88			],
89			"query": {
90			"bool": {
91			"filter": {
92			"term": {
93			"commit_info.project": project
94			}
95			}
96			}
97			}
98			}
99			if id_prefix:
100			body["query"]["bool"]["must"] = {
101			"prefix": {
102			"_id": id_prefix
103			}
104			}
105
106			return self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
107
108			@staticmethod
109			def _benchmark_from_es_record(source_es_record):
110			result = {}
111			for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname", "benchmark_id"):
112			result[benchmark_key] = source_es_record[benchmark_key]
113			return result
114
115			@staticmethod
116			def _run_info_from_es_record(source_es_record):
117			result = {}
118			for run_key in ("machine_info", "commit_info", "datetime", "version"):
119			result[run_key] = source_es_record[run_key]
120			return result
121
122			def _group_by_commit_and_time(self, hits):
123			result = {}
124			for hit in hits:
125			source_hit = hit["_source"]
126			key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
127			benchmark = self._benchmark_from_es_record(source_hit)
128			if key in result:
129			result[key]["benchmarks"].append(benchmark)
130			else:
131			run_info = self._run_info_from_es_record(source_hit)
132			run_info["benchmarks"] = [benchmark]
133			result[key] = run_info
134			return result
135
136			def load_benchmarks(self, *args):
137			"""
138			Yield benchmarks that corresponds with project. Put path and
139			source (uncommon part of path) to benchmark dict.
140			"""
141			id_prefix = args[0] if args else None
142			r = self._search(self._project_name, id_prefix)
143			for hit in r["hits"]["hits"]:
144			bench = self._benchmark_from_es_record(hit["_source"])
145			bench.update(bench.pop("stats"))
146			bench["source"] = bench["benchmark_id"]
147			yield bench
148
149			def save(self, output_json, save):
150			output_benchmarks = output_json.pop("benchmarks")
151			for bench in output_benchmarks:
152			# add top level info from output_json dict to each record
153			bench.update(output_json)
154			benchmark_id = save
155			if self.default_machine_id:
156			benchmark_id = self.default_machine_id + "_" + benchmark_id
157			doc_id = benchmark_id + "_" + bench["fullname"]
158			bench["benchmark_id"] = benchmark_id
159			self._es.index(
160			index=self._es_index,
161			doc_type=self._es_doctype,
162			body=bench,
163			id=doc_id,
164			)
165			self.logger.info("Saved benchmark data to %s to index %s as doctype %s" % (
166			self._es_hosts, self._es_index, self._es_doctype))
167
168			def _create_index(self):
169			mapping = {
170			"mappings": {
171			"benchmark": {
172			"properties": {
173			"commit_info": {
174			"properties": {
175			"dirty": {
176			"type": "boolean"
177			},
178			"id": {
179			"type": "string",
180			"index": "not_analyzed"
181
182			},
183			"project": {
184			"type": "string",
185			"index": "not_analyzed"
186			}
187			}
188			},
189			"datetime": {
190			"type": "date",
191			"format": "strict_date_optional_time\|\|epoch_millis"
192			},
193			"name": {
194			"type": "string",
195			"index": "not_analyzed"
196			},
197			"fullname": {
198			"type": "string",
199			"index": "not_analyzed"
200			},
201			"version": {
202			"type": "string",
203			"index": "not_analyzed"
204			},
205			"benchmark_id": {
206			"type": "string",
207			"index": "not_analyzed",
208			},
209			"machine_info": {
210			"properties": {
211			"machine": {
212			"type": "string",
213			"index": "not_analyzed"
214			},
215			"node": {
216			"type": "string",
217			"index": "not_analyzed"
218			},
219			"processor": {
220			"type": "string",
221			"index": "not_analyzed"
222			},
223			"python_build": {
224			"type": "string",
225			"index": "not_analyzed"
226			},
227			"python_compiler": {
228			"type": "string",
229			"index": "not_analyzed"
230			},
231			"python_implementation": {
232			"type": "string",
233			"index": "not_analyzed"
234			},
235			"python_implementation_version": {
236			"type": "string",
237			"index": "not_analyzed"
238			},
239			"python_version": {
240			"type": "string",
241			"index": "not_analyzed"
242			},
243			"release": {
244			"type": "string",
245			"index": "not_analyzed"
246			},
247			"system": {
248			"type": "string",
249			"index": "not_analyzed"
250			}
251			}
252			},
253			"options": {
254			"properties": {
255			"disable_gc": {
256			"type": "boolean"
257			},
258			"max_time": {
259			"type": "double"
260			},
261			"min_rounds": {
262			"type": "long"
263			},
264			"min_time": {
265			"type": "double"
266			},
267			"timer": {
268			"type": "string"
269			},
270			"warmup": {
271			"type": "boolean"
272			}
273			}
274			},
275			"stats": {
276			"properties": {
277			"hd15iqr": {
278			"type": "double"
279			},
280			"iqr": {
281			"type": "double"
282			},
283			"iqr_outliers": {
284			"type": "long"
285			},
286			"iterations": {
287			"type": "long"
288			},
289			"ld15iqr": {
290			"type": "double"
291			},
292			"max": {
293			"type": "double"
294			},
295			"mean": {
296			"type": "double"
297			},
298			"median": {
299			"type": "double"
300			},
301			"min": {
302			"type": "double"
303			},
304			"outliers": {
305			"type": "string"
306			},
307			"q1": {
308			"type": "double"
309			},
310			"q3": {
311			"type": "double"
312			},
313			"rounds": {
314			"type": "long"
315			},
316			"stddev": {
317			"type": "double"
318			},
319			"stddev_outliers": {
320			"type": "long"
321			}
322			}
323			},
324			}
325			}
326			}
327			}
328			self._es.indices.create(index=self._es_index, ignore=400, body=mapping)
329

ionelmc / pytest-benchmark

Pull Request — master (#58)

ElasticsearchStorage._create_index() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like