ElasticsearchStorage - Code Metrics - Inspection of "Elasticsearch report backend" - ionelmc/pytest-benchmark - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#58)

unknown

created 2016-09-09 08:49 UTC

ElasticsearchStorage A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	258
Duplicated Lines	0 %

Importance

Changes	4
Bugs	0	Features	0

Metric	Value
c	4
b	0
f	0
dl	0
loc	258
rs	10
wmc	22

12 Methods

Rating	Name	Size	Complexity
A	load_benchmarks()	8	2
A	__init__()	13	2
A	_benchmark_from_es_record()	3	2
A	_search()	22	1
A	__str__()	2	1
A	_run_info_from_es_record()	3	2
B	_create_index()	157	1
A	location()	3	1
A	load()	10	4
A	query()	5	2
A	_group_by_commit_and_time()	13	3
A	save()	6	1

import datetime


class ElasticsearchStorage(object):
    def __init__(self, elasticsearch_host, elasticsearch_index, elasticsearch_doctype, logger, default_machine_id=None):
        try:
            import elasticsearch
        except ImportError as exc:
            raise ImportError(exc.args, "Please install elasticsearch or pytest-benchmark[elasticsearch]")
        self._elasticsearch_host = elasticsearch_host
        self._elasticsearch_index = elasticsearch_index
        self._elasticsearch_doctype = elasticsearch_doctype
        self._elasticsearch = elasticsearch.Elasticsearch(self._elasticsearch_host)
        self.default_machine_id = default_machine_id
        self.logger = logger
        self._cache = {}
        self._create_index()

    def __str__(self):
        return str(self._elasticsearch_host)

    @property
    def location(self):
        return str(self._elasticsearch_host)

    def query(self, project):
        """
        Returns sorted records names (ids) that corresponds with globs_or_files.
        """
        return [commit_and_time for commit_and_time, _ in self.load(project)]

    def load(self, project):
        """
        Yield path and content of records that corresponds with globs_or_files
        """
        r = self._search(project)
        groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
        result = [(key, value) for key, value in groupped_data.items()]
        result.sort(key=lambda x: datetime.datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
        for key, data in result:
            yield key, data

    def _search(self, project):
        body = {
            "size": 1000,
            "sort": [
                {
                    "datetime": {
                        "order": "desc"
                    }
                }
            ],
            "query": {
                "bool": {
                    "filter": {
                        "term": {
                            "commit_info.project": project
                        }
                    }
                }
            }
        }

        return self._elasticsearch.search(index=self._elasticsearch_index, doc_type=self._elasticsearch_doctype, body=body)

    @staticmethod
    def _benchmark_from_es_record(source_es_record):
        return {benchmark_key: source_es_record[benchmark_key] for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname")}

    @staticmethod
    def _run_info_from_es_record(source_es_record):
        return {run_key: source_es_record[run_key] for run_key in ("machine_info", "commit_info", "datetime", "version")}

    def _group_by_commit_and_time(self, hits):
        result = {}
        for hit in hits:
            source_hit = hit["_source"]
            key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
            benchmark = self._benchmark_from_es_record(source_hit)
            if key in result:
                result[key]["benchmarks"].append(benchmark)
            else:
                run_info = self._run_info_from_es_record(source_hit)
                run_info["benchmarks"] = [benchmark]
                result[key] = run_info
        return result

    def load_benchmarks(self, project):
        """
        Yield benchmarks that corresponds with glob_or_files. Put path and
        source (uncommon part of path) to benchmark dict.
        """
        r = self._search(project)
        for hit in r["hits"]["hits"]:
            yield self._benchmark_from_es_record(hit["_source"])

    def save(self, document, document_id):
        self._elasticsearch.index(
            index=self._elasticsearch_index,
            doc_type=self._elasticsearch_doctype,
            body=document,
            id=document_id,
        )

    def _create_index(self):
        mapping = {
            "mappings": {
                "benchmark": {
                    "properties": {
                        "commit_info": {
                            "properties": {
                                "dirty": {
                                    "type": "boolean"
                                },
                                "id": {
                                    "type": "string",
                                    "index": "not_analyzed"

                                },
                                "project": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "datetime": {
                            "type": "date",
                            "format": "strict_date_optional_time||epoch_millis"
                        },
                        "name": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "fullname": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "version": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "machine_info": {
                            "properties": {
                                "machine": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "node": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "processor": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_build": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_compiler": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_implementation": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_implementation_version": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_version": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "release": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "system": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "options": {
                            "properties": {
                                "disable_gc": {
                                    "type": "boolean"
                                },
                                "max_time": {
                                    "type": "double"
                                },
                                "min_rounds": {
                                    "type": "long"
                                },
                                "min_time": {
                                    "type": "double"
                                },
                                "timer": {
                                    "type": "string"
                                },
                                "warmup": {
                                    "type": "boolean"
                                }
                            }
                        },
                        "stats": {
                            "properties": {
                                "hd15iqr": {
                                    "type": "double"
                                },
                                "iqr": {
                                    "type": "double"
                                },
                                "iqr_outliers": {
                                    "type": "long"
                                },
                                "iterations": {
                                    "type": "long"
                                },
                                "ld15iqr": {
                                    "type": "double"
                                },
                                "max": {
                                    "type": "double"
                                },
                                "mean": {
                                    "type": "double"
                                },
                                "median": {
                                    "type": "double"
                                },
                                "min": {
                                    "type": "double"
                                },
                                "outliers": {
                                    "type": "string"
                                },
                                "q1": {
                                    "type": "double"
                                },
                                "q3": {
                                    "type": "double"
                                },
                                "rounds": {
                                    "type": "long"
                                },
                                "stddev": {
                                    "type": "double"
                                },
                                "stddev_outliers": {
                                    "type": "long"
                                }
                            }
                        },
                    }
                }
            }
        }
        self._elasticsearch.indices.create(index=self._elasticsearch_index, ignore=400, body=mapping)



1			import datetime
2
3
4			class ElasticsearchStorage(object):
5			def __init__(self, elasticsearch_host, elasticsearch_index, elasticsearch_doctype, logger, default_machine_id=None):
6			try:
7			import elasticsearch
8			except ImportError as exc:
9			raise ImportError(exc.args, "Please install elasticsearch or pytest-benchmark[elasticsearch]")
10			self._elasticsearch_host = elasticsearch_host
11			self._elasticsearch_index = elasticsearch_index
12			self._elasticsearch_doctype = elasticsearch_doctype
13			self._elasticsearch = elasticsearch.Elasticsearch(self._elasticsearch_host)
14			self.default_machine_id = default_machine_id
15			self.logger = logger
16			self._cache = {}
17			self._create_index()
18
19			def __str__(self):
20			return str(self._elasticsearch_host)
21
22			@property
23			def location(self):
24			return str(self._elasticsearch_host)
25
26			def query(self, project):
27			"""
28			Returns sorted records names (ids) that corresponds with globs_or_files.
29			"""
30			return [commit_and_time for commit_and_time, _ in self.load(project)]
31
32			def load(self, project):
33			"""
34			Yield path and content of records that corresponds with globs_or_files
35			"""
36			r = self._search(project)
37			groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
38			result = [(key, value) for key, value in groupped_data.items()]
39			result.sort(key=lambda x: datetime.datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
40			for key, data in result:
41			yield key, data
42
43			def _search(self, project):
44			body = {
45			"size": 1000,
46			"sort": [
47			{
48			"datetime": {
49			"order": "desc"
50			}
51			}
52			],
53			"query": {
54			"bool": {
55			"filter": {
56			"term": {
57			"commit_info.project": project
58			}
59			}
60			}
61			}
62			}
63
64			return self._elasticsearch.search(index=self._elasticsearch_index, doc_type=self._elasticsearch_doctype, body=body)
65
66			@staticmethod
67			def _benchmark_from_es_record(source_es_record):
68			return {benchmark_key: source_es_record[benchmark_key] for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname")}
69
70			@staticmethod
71			def _run_info_from_es_record(source_es_record):
72			return {run_key: source_es_record[run_key] for run_key in ("machine_info", "commit_info", "datetime", "version")}
73
74			def _group_by_commit_and_time(self, hits):
75			result = {}
76			for hit in hits:
77			source_hit = hit["_source"]
78			key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
79			benchmark = self._benchmark_from_es_record(source_hit)
80			if key in result:
81			result[key]["benchmarks"].append(benchmark)
82			else:
83			run_info = self._run_info_from_es_record(source_hit)
84			run_info["benchmarks"] = [benchmark]
85			result[key] = run_info
86			return result
87
88			def load_benchmarks(self, project):
89			"""
90			Yield benchmarks that corresponds with glob_or_files. Put path and
91			source (uncommon part of path) to benchmark dict.
92			"""
93			r = self._search(project)
94			for hit in r["hits"]["hits"]:
95			yield self._benchmark_from_es_record(hit["_source"])
96
97			def save(self, document, document_id):
98			self._elasticsearch.index(
99			index=self._elasticsearch_index,
100			doc_type=self._elasticsearch_doctype,
101			body=document,
102			id=document_id,
103			)
104
105			def _create_index(self):
106			mapping = {
107			"mappings": {
108			"benchmark": {
109			"properties": {
110			"commit_info": {
111			"properties": {
112			"dirty": {
113			"type": "boolean"
114			},
115			"id": {
116			"type": "string",
117			"index": "not_analyzed"
118
119			},
120			"project": {
121			"type": "string",
122			"index": "not_analyzed"
123			}
124			}
125			},
126			"datetime": {
127			"type": "date",
128			"format": "strict_date_optional_time\|\|epoch_millis"
129			},
130			"name": {
131			"type": "string",
132			"index": "not_analyzed"
133			},
134			"fullname": {
135			"type": "string",
136			"index": "not_analyzed"
137			},
138			"version": {
139			"type": "string",
140			"index": "not_analyzed"
141			},
142			"machine_info": {
143			"properties": {
144			"machine": {
145			"type": "string",
146			"index": "not_analyzed"
147			},
148			"node": {
149			"type": "string",
150			"index": "not_analyzed"
151			},
152			"processor": {
153			"type": "string",
154			"index": "not_analyzed"
155			},
156			"python_build": {
157			"type": "string",
158			"index": "not_analyzed"
159			},
160			"python_compiler": {
161			"type": "string",
162			"index": "not_analyzed"
163			},
164			"python_implementation": {
165			"type": "string",
166			"index": "not_analyzed"
167			},
168			"python_implementation_version": {
169			"type": "string",
170			"index": "not_analyzed"
171			},
172			"python_version": {
173			"type": "string",
174			"index": "not_analyzed"
175			},
176			"release": {
177			"type": "string",
178			"index": "not_analyzed"
179			},
180			"system": {
181			"type": "string",
182			"index": "not_analyzed"
183			}
184			}
185			},
186			"options": {
187			"properties": {
188			"disable_gc": {
189			"type": "boolean"
190			},
191			"max_time": {
192			"type": "double"
193			},
194			"min_rounds": {
195			"type": "long"
196			},
197			"min_time": {
198			"type": "double"
199			},
200			"timer": {
201			"type": "string"
202			},
203			"warmup": {
204			"type": "boolean"
205			}
206			}
207			},
208			"stats": {
209			"properties": {
210			"hd15iqr": {
211			"type": "double"
212			},
213			"iqr": {
214			"type": "double"
215			},
216			"iqr_outliers": {
217			"type": "long"
218			},
219			"iterations": {
220			"type": "long"
221			},
222			"ld15iqr": {
223			"type": "double"
224			},
225			"max": {
226			"type": "double"
227			},
228			"mean": {
229			"type": "double"
230			},
231			"median": {
232			"type": "double"
233			},
234			"min": {
235			"type": "double"
236			},
237			"outliers": {
238			"type": "string"
239			},
240			"q1": {
241			"type": "double"
242			},
243			"q3": {
244			"type": "double"
245			},
246			"rounds": {
247			"type": "long"
248			},
249			"stddev": {
250			"type": "double"
251			},
252			"stddev_outliers": {
253			"type": "long"
254			}
255			}
256			},
257			}
258			}
259			}
260			}
261			self._elasticsearch.indices.create(index=self._elasticsearch_index, ignore=400, body=mapping)
262
263

ionelmc / pytest-benchmark

Pull Request — master (#58)

ElasticsearchStorage A

Complexity

Size/Duplication

Importance

12 Methods

Duplication Side-by-Side

Filter issues like