ElasticsearchStorage.load() - Code Metrics - Inspection of "Elasticsearch report backend" - ionelmc/pytest-benchmark - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#58)

unknown

created 2016-10-06 07:44 UTC

ElasticsearchStorage.load() A

↳ Parent: ElasticsearchStorage

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	4
c	1
b	0
f	0
dl	0
loc	10
rs	9.2

import datetime

try:
    import elasticsearch.serializer

    import uuid
    from datetime import date, datetime
    from decimal import Decimal

    class SaveElasticsearchJSONSerializer(elasticsearch.serializer.JSONSerializer):
        def default(self, data):
            if isinstance(data, (date, datetime)):
                return data.isoformat()
            elif isinstance(data, Decimal):
                return float(data)
            elif isinstance(data, uuid.UUID):
                return str(data)
            else:
                return "UNSERIALIZABLE[%r]" % data

except ImportError as exc:
    SaveElasticsearchJSONSerializer = None


class ElasticsearchStorage(object):
    def __init__(self, elasticsearch_hosts, elasticsearch_index, elasticsearch_doctype, logger,
                 default_machine_id=None):
        try:
            import elasticsearch
        except ImportError as exc:
            raise ImportError(exc.args, "Please install elasticsearch or pytest-benchmark[elasticsearch]")
        self._elasticsearch_hosts = elasticsearch_hosts
        self._elasticsearch_index = elasticsearch_index
        self._elasticsearch_doctype = elasticsearch_doctype
        self._elasticsearch = elasticsearch.Elasticsearch(self._elasticsearch_hosts, serializer=SaveElasticsearchJSONSerializer())
        self.default_machine_id = default_machine_id
        self.logger = logger
        self._cache = {}
        self._create_index()

    def __str__(self):
        return str(self._elasticsearch_hosts)

    @property
    def location(self):
        return str(self._elasticsearch_hosts)

    def query(self, project):
        """
        Returns sorted records names (ids) that corresponds with project.
        """
        return [commit_and_time for commit_and_time, _ in self.load(project)]

    def load(self, project, id_prefix=None):
        """
        Yield key and content of records that corresponds with project name.
        """
        r = self._search(project, id_prefix)
        groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
        result = [(key, value) for key, value in groupped_data.items()]
        result.sort(key=lambda x: datetime.datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
        for key, data in result:
            yield key, data

    def _search(self, project, id_prefix=None):
        body = {
            "size": 1000,
            "sort": [
                {
                    "datetime": {
                        "order": "desc"
                    }
                }
            ],
            "query": {
                "bool": {
                    "filter": {
                        "term": {
                            "commit_info.project": project
                        }
                    }
                }
            }
        }
        if id_prefix:
            body["query"]["bool"]["must"] = {
                "prefix": {
                    "_id": id_prefix
                }
            }

        return self._elasticsearch.search(index=self._elasticsearch_index,
                                          doc_type=self._elasticsearch_doctype,
                                          body=body)

    @staticmethod
    def _benchmark_from_es_record(source_es_record):
        result = {}
        for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname"):
            result[benchmark_key] = source_es_record[benchmark_key]
        return result

    @staticmethod
    def _run_info_from_es_record(source_es_record):
        result = {}
        for run_key in ("machine_info", "commit_info", "datetime", "version"):
            result[run_key] = source_es_record[run_key]
        return result

    def _group_by_commit_and_time(self, hits):
        result = {}
        for hit in hits:
            source_hit = hit["_source"]
            key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
            benchmark = self._benchmark_from_es_record(source_hit)
            if key in result:
                result[key]["benchmarks"].append(benchmark)
            else:
                run_info = self._run_info_from_es_record(source_hit)
                run_info["benchmarks"] = [benchmark]
                result[key] = run_info
        return result

    def load_benchmarks(self, project):
        """
        Yield benchmarks that corresponds with project. Put path and
        source (uncommon part of path) to benchmark dict.
        """
        r = self._search(project)
        for hit in r["hits"]["hits"]:
            yield self._benchmark_from_es_record(hit["_source"])

    def save(self, document, document_id):
        self._elasticsearch.index(
            index=self._elasticsearch_index,
            doc_type=self._elasticsearch_doctype,
            body=document,
            id=document_id,
        )

    def _create_index(self):
        mapping = {
            "mappings": {
                "benchmark": {
                    "properties": {
                        "commit_info": {
                            "properties": {
                                "dirty": {
                                    "type": "boolean"
                                },
                                "id": {
                                    "type": "string",
                                    "index": "not_analyzed"

                                },
                                "project": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "datetime": {
                            "type": "date",
                            "format": "strict_date_optional_time||epoch_millis"
                        },
                        "name": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "fullname": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "version": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "machine_info": {
                            "properties": {
                                "machine": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "node": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "processor": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_build": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_compiler": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_implementation": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_implementation_version": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_version": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "release": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "system": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "options": {
                            "properties": {
                                "disable_gc": {
                                    "type": "boolean"
                                },
                                "max_time": {
                                    "type": "double"
                                },
                                "min_rounds": {
                                    "type": "long"
                                },
                                "min_time": {
                                    "type": "double"
                                },
                                "timer": {
                                    "type": "string"
                                },
                                "warmup": {
                                    "type": "boolean"
                                }
                            }
                        },
                        "stats": {
                            "properties": {
                                "hd15iqr": {
                                    "type": "double"
                                },
                                "iqr": {
                                    "type": "double"
                                },
                                "iqr_outliers": {
                                    "type": "long"
                                },
                                "iterations": {
                                    "type": "long"
                                },
                                "ld15iqr": {
                                    "type": "double"
                                },
                                "max": {
                                    "type": "double"
                                },
                                "mean": {
                                    "type": "double"
                                },
                                "median": {
                                    "type": "double"
                                },
                                "min": {
                                    "type": "double"
                                },
                                "outliers": {
                                    "type": "string"
                                },
                                "q1": {
                                    "type": "double"
                                },
                                "q3": {
                                    "type": "double"
                                },
                                "rounds": {
                                    "type": "long"
                                },
                                "stddev": {
                                    "type": "double"
                                },
                                "stddev_outliers": {
                                    "type": "long"
                                }
                            }
                        },
                    }
                }
            }
        }
        self._elasticsearch.indices.create(index=self._elasticsearch_index, ignore=400, body=mapping)



1			import datetime
2
3			try:
4			import elasticsearch.serializer
5
6			import uuid
7			from datetime import date, datetime
8			from decimal import Decimal
9
10			class SaveElasticsearchJSONSerializer(elasticsearch.serializer.JSONSerializer):
11			def default(self, data):
12			if isinstance(data, (date, datetime)):
13			return data.isoformat()
14			elif isinstance(data, Decimal):
15			return float(data)
16			elif isinstance(data, uuid.UUID):
17			return str(data)
18			else:
19			return "UNSERIALIZABLE[%r]" % data
20
21			except ImportError as exc:
22			SaveElasticsearchJSONSerializer = None
23
24
25			class ElasticsearchStorage(object):
26			def __init__(self, elasticsearch_hosts, elasticsearch_index, elasticsearch_doctype, logger,
27			default_machine_id=None):
28			try:
29			import elasticsearch
30			except ImportError as exc:
31			raise ImportError(exc.args, "Please install elasticsearch or pytest-benchmark[elasticsearch]")
32			self._elasticsearch_hosts = elasticsearch_hosts
33			self._elasticsearch_index = elasticsearch_index
34			self._elasticsearch_doctype = elasticsearch_doctype
35			self._elasticsearch = elasticsearch.Elasticsearch(self._elasticsearch_hosts, serializer=SaveElasticsearchJSONSerializer())
36			self.default_machine_id = default_machine_id
37			self.logger = logger
38			self._cache = {}
39			self._create_index()
40
41			def __str__(self):
42			return str(self._elasticsearch_hosts)
43
44			@property
45			def location(self):
46			return str(self._elasticsearch_hosts)
47
48			def query(self, project):
49			"""
50			Returns sorted records names (ids) that corresponds with project.
51			"""
52			return [commit_and_time for commit_and_time, _ in self.load(project)]
53
54			def load(self, project, id_prefix=None):
55			"""
56			Yield key and content of records that corresponds with project name.
57			"""
58			r = self._search(project, id_prefix)
59			groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
60			result = [(key, value) for key, value in groupped_data.items()]
61			result.sort(key=lambda x: datetime.datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
62			for key, data in result:
63			yield key, data
64
65			def _search(self, project, id_prefix=None):
66			body = {
67			"size": 1000,
68			"sort": [
69			{
70			"datetime": {
71			"order": "desc"
72			}
73			}
74			],
75			"query": {
76			"bool": {
77			"filter": {
78			"term": {
79			"commit_info.project": project
80			}
81			}
82			}
83			}
84			}
85			if id_prefix:
86			body["query"]["bool"]["must"] = {
87			"prefix": {
88			"_id": id_prefix
89			}
90			}
91
92			return self._elasticsearch.search(index=self._elasticsearch_index,
93			doc_type=self._elasticsearch_doctype,
94			body=body)
95
96			@staticmethod
97			def _benchmark_from_es_record(source_es_record):
98			result = {}
99			for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname"):
100			result[benchmark_key] = source_es_record[benchmark_key]
101			return result
102
103			@staticmethod
104			def _run_info_from_es_record(source_es_record):
105			result = {}
106			for run_key in ("machine_info", "commit_info", "datetime", "version"):
107			result[run_key] = source_es_record[run_key]
108			return result
109
110			def _group_by_commit_and_time(self, hits):
111			result = {}
112			for hit in hits:
113			source_hit = hit["_source"]
114			key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
115			benchmark = self._benchmark_from_es_record(source_hit)
116			if key in result:
117			result[key]["benchmarks"].append(benchmark)
118			else:
119			run_info = self._run_info_from_es_record(source_hit)
120			run_info["benchmarks"] = [benchmark]
121			result[key] = run_info
122			return result
123
124			def load_benchmarks(self, project):
125			"""
126			Yield benchmarks that corresponds with project. Put path and
127			source (uncommon part of path) to benchmark dict.
128			"""
129			r = self._search(project)
130			for hit in r["hits"]["hits"]:
131			yield self._benchmark_from_es_record(hit["_source"])
132
133			def save(self, document, document_id):
134			self._elasticsearch.index(
135			index=self._elasticsearch_index,
136			doc_type=self._elasticsearch_doctype,
137			body=document,
138			id=document_id,
139			)
140
141			def _create_index(self):
142			mapping = {
143			"mappings": {
144			"benchmark": {
145			"properties": {
146			"commit_info": {
147			"properties": {
148			"dirty": {
149			"type": "boolean"
150			},
151			"id": {
152			"type": "string",
153			"index": "not_analyzed"
154
155			},
156			"project": {
157			"type": "string",
158			"index": "not_analyzed"
159			}
160			}
161			},
162			"datetime": {
163			"type": "date",
164			"format": "strict_date_optional_time\|\|epoch_millis"
165			},
166			"name": {
167			"type": "string",
168			"index": "not_analyzed"
169			},
170			"fullname": {
171			"type": "string",
172			"index": "not_analyzed"
173			},
174			"version": {
175			"type": "string",
176			"index": "not_analyzed"
177			},
178			"machine_info": {
179			"properties": {
180			"machine": {
181			"type": "string",
182			"index": "not_analyzed"
183			},
184			"node": {
185			"type": "string",
186			"index": "not_analyzed"
187			},
188			"processor": {
189			"type": "string",
190			"index": "not_analyzed"
191			},
192			"python_build": {
193			"type": "string",
194			"index": "not_analyzed"
195			},
196			"python_compiler": {
197			"type": "string",
198			"index": "not_analyzed"
199			},
200			"python_implementation": {
201			"type": "string",
202			"index": "not_analyzed"
203			},
204			"python_implementation_version": {
205			"type": "string",
206			"index": "not_analyzed"
207			},
208			"python_version": {
209			"type": "string",
210			"index": "not_analyzed"
211			},
212			"release": {
213			"type": "string",
214			"index": "not_analyzed"
215			},
216			"system": {
217			"type": "string",
218			"index": "not_analyzed"
219			}
220			}
221			},
222			"options": {
223			"properties": {
224			"disable_gc": {
225			"type": "boolean"
226			},
227			"max_time": {
228			"type": "double"
229			},
230			"min_rounds": {
231			"type": "long"
232			},
233			"min_time": {
234			"type": "double"
235			},
236			"timer": {
237			"type": "string"
238			},
239			"warmup": {
240			"type": "boolean"
241			}
242			}
243			},
244			"stats": {
245			"properties": {
246			"hd15iqr": {
247			"type": "double"
248			},
249			"iqr": {
250			"type": "double"
251			},
252			"iqr_outliers": {
253			"type": "long"
254			},
255			"iterations": {
256			"type": "long"
257			},
258			"ld15iqr": {
259			"type": "double"
260			},
261			"max": {
262			"type": "double"
263			},
264			"mean": {
265			"type": "double"
266			},
267			"median": {
268			"type": "double"
269			},
270			"min": {
271			"type": "double"
272			},
273			"outliers": {
274			"type": "string"
275			},
276			"q1": {
277			"type": "double"
278			},
279			"q3": {
280			"type": "double"
281			},
282			"rounds": {
283			"type": "long"
284			},
285			"stddev": {
286			"type": "double"
287			},
288			"stddev_outliers": {
289			"type": "long"
290			}
291			}
292			},
293			}
294			}
295			}
296			}
297			self._elasticsearch.indices.create(index=self._elasticsearch_index, ignore=400, body=mapping)
298
299

ionelmc / pytest-benchmark

Pull Request — master (#58)

ElasticsearchStorage.load() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like