Completed
Pull Request — master (#58)
by
unknown
01:19
created

ElasticsearchStorage._create_index()   B

Complexity

Conditions 1

Size

Total Lines 161

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 1
c 2
b 0
f 0
dl 0
loc 161
rs 8.2857

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
from __future__ import absolute_import
2
3
import uuid
4
import sys
5
from datetime import date
6
from datetime import datetime
7
from decimal import Decimal
8
9
from ..compat import reraise
10
11
try:
12
    import elasticsearch
13
    from elasticsearch.serializer import JSONSerializer
14
except ImportError as exc:
15
    reraise(ImportError, ImportError("Please install elasticsearch or pytest-benchmark[elasticsearch]", exc.args),
16
            sys.exc_info()[2])
17
18
19
class BenchmarkJSONSerializer(JSONSerializer):
20
    def default(self, data):
21
        if isinstance(data, (date, datetime)):
22
            return data.isoformat()
23
        elif isinstance(data, Decimal):
24
            return float(data)
25
        elif isinstance(data, uuid.UUID):
26
            return str(data)
27
        else:
28
            return "UNSERIALIZABLE[%r]" % data
29
30
31
class ElasticsearchStorage(object):
32
    def __init__(self, hosts, index, doctype, project_name, logger,
33
                 default_machine_id=None):
34
        self._es_hosts = hosts
35
        self._es_index = index
36
        self._es_doctype = doctype
37
        self._es = elasticsearch.Elasticsearch(self._es_hosts, serializer=BenchmarkJSONSerializer())
38
        self._project_name = project_name
39
        self.default_machine_id = default_machine_id
40
        self.logger = logger
41
        self._cache = {}
42
        self._create_index()
43
44
    def __str__(self):
45
        return str(self._es_hosts)
46
47
    @property
48
    def location(self):
49
        return str(self._es_hosts)
50
51
    def query(self):
52
        """
53
        Returns sorted records names (ids) that corresponds with project.
54
        """
55
        body = {
56
            "size": 0,
57
            "aggs": {
58
                "benchmark_ids": {
59
                    "terms": {
60
                        "field": "benchmark_id"
61
                    }
62
                }
63
            }
64
        }
65
        result = self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
66
        return sorted([record["key"] for record in result["aggregations"]["benchmark_ids"]["buckets"]])
67
68
    def load(self, id_prefix=None):
69
        """
70
        Yield key and content of records that corresponds with project name.
71
        """
72
        r = self._search(self._project_name, id_prefix)
73
        groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
74
        result = [(key, value) for key, value in groupped_data.items()]
75
        result.sort(key=lambda x: datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
76
        for key, data in result:
77
            yield key, data
78
79
    def _search(self, project, id_prefix=None):
80
        body = {
81
            "size": 1000,
82
            "sort": [
83
                {
84
                    "datetime": {
85
                        "order": "desc"
86
                    }
87
                }
88
            ],
89
            "query": {
90
                "bool": {
91
                    "filter": {
92
                        "term": {
93
                            "commit_info.project": project
94
                        }
95
                    }
96
                }
97
            }
98
        }
99
        if id_prefix:
100
            body["query"]["bool"]["must"] = {
101
                "prefix": {
102
                    "_id": id_prefix
103
                }
104
            }
105
106
        return self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
107
108
    @staticmethod
109
    def _benchmark_from_es_record(source_es_record):
110
        result = {}
111
        for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname", "benchmark_id"):
112
            result[benchmark_key] = source_es_record[benchmark_key]
113
        return result
114
115
    @staticmethod
116
    def _run_info_from_es_record(source_es_record):
117
        result = {}
118
        for run_key in ("machine_info", "commit_info", "datetime", "version"):
119
            result[run_key] = source_es_record[run_key]
120
        return result
121
122
    def _group_by_commit_and_time(self, hits):
123
        result = {}
124
        for hit in hits:
125
            source_hit = hit["_source"]
126
            key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
127
            benchmark = self._benchmark_from_es_record(source_hit)
128
            if key in result:
129
                result[key]["benchmarks"].append(benchmark)
130
            else:
131
                run_info = self._run_info_from_es_record(source_hit)
132
                run_info["benchmarks"] = [benchmark]
133
                result[key] = run_info
134
        return result
135
136
    def load_benchmarks(self, *args):
137
        """
138
        Yield benchmarks that corresponds with project. Put path and
139
        source (uncommon part of path) to benchmark dict.
140
        """
141
        id_prefix = args[0] if args else None
142
        r = self._search(self._project_name, id_prefix)
143
        for hit in r["hits"]["hits"]:
144
            bench = self._benchmark_from_es_record(hit["_source"])
145
            bench.update(bench.pop("stats"))
146
            bench["source"] = bench["benchmark_id"]
147
            yield bench
148
149
    def save(self, output_json, save):
150
        output_benchmarks = output_json.pop("benchmarks")
151
        for bench in output_benchmarks:
152
            # add top level info from output_json dict to each record
153
            bench.update(output_json)
154
            benchmark_id = save
155
            if self.default_machine_id:
156
                benchmark_id = self.default_machine_id + "_" + benchmark_id
157
            doc_id = benchmark_id + "_" + bench["fullname"]
158
            bench["benchmark_id"] = benchmark_id
159
            self._es.index(
160
                index=self._es_index,
161
                doc_type=self._es_doctype,
162
                body=bench,
163
                id=doc_id,
164
            )
165
        self.logger.info("Saved benchmark data to %s to index %s as doctype %s" % (
166
            self._es_hosts, self._es_index, self._es_doctype))
167
168
    def _create_index(self):
169
        mapping = {
170
            "mappings": {
171
                "benchmark": {
172
                    "properties": {
173
                        "commit_info": {
174
                            "properties": {
175
                                "dirty": {
176
                                    "type": "boolean"
177
                                },
178
                                "id": {
179
                                    "type": "string",
180
                                    "index": "not_analyzed"
181
182
                                },
183
                                "project": {
184
                                    "type": "string",
185
                                    "index": "not_analyzed"
186
                                }
187
                            }
188
                        },
189
                        "datetime": {
190
                            "type": "date",
191
                            "format": "strict_date_optional_time||epoch_millis"
192
                        },
193
                        "name": {
194
                            "type": "string",
195
                            "index": "not_analyzed"
196
                        },
197
                        "fullname": {
198
                            "type": "string",
199
                            "index": "not_analyzed"
200
                        },
201
                        "version": {
202
                            "type": "string",
203
                            "index": "not_analyzed"
204
                        },
205
                        "benchmark_id": {
206
                            "type": "string",
207
                            "index": "not_analyzed",
208
                        },
209
                        "machine_info": {
210
                            "properties": {
211
                                "machine": {
212
                                    "type": "string",
213
                                    "index": "not_analyzed"
214
                                },
215
                                "node": {
216
                                    "type": "string",
217
                                    "index": "not_analyzed"
218
                                },
219
                                "processor": {
220
                                    "type": "string",
221
                                    "index": "not_analyzed"
222
                                },
223
                                "python_build": {
224
                                    "type": "string",
225
                                    "index": "not_analyzed"
226
                                },
227
                                "python_compiler": {
228
                                    "type": "string",
229
                                    "index": "not_analyzed"
230
                                },
231
                                "python_implementation": {
232
                                    "type": "string",
233
                                    "index": "not_analyzed"
234
                                },
235
                                "python_implementation_version": {
236
                                    "type": "string",
237
                                    "index": "not_analyzed"
238
                                },
239
                                "python_version": {
240
                                    "type": "string",
241
                                    "index": "not_analyzed"
242
                                },
243
                                "release": {
244
                                    "type": "string",
245
                                    "index": "not_analyzed"
246
                                },
247
                                "system": {
248
                                    "type": "string",
249
                                    "index": "not_analyzed"
250
                                }
251
                            }
252
                        },
253
                        "options": {
254
                            "properties": {
255
                                "disable_gc": {
256
                                    "type": "boolean"
257
                                },
258
                                "max_time": {
259
                                    "type": "double"
260
                                },
261
                                "min_rounds": {
262
                                    "type": "long"
263
                                },
264
                                "min_time": {
265
                                    "type": "double"
266
                                },
267
                                "timer": {
268
                                    "type": "string"
269
                                },
270
                                "warmup": {
271
                                    "type": "boolean"
272
                                }
273
                            }
274
                        },
275
                        "stats": {
276
                            "properties": {
277
                                "hd15iqr": {
278
                                    "type": "double"
279
                                },
280
                                "iqr": {
281
                                    "type": "double"
282
                                },
283
                                "iqr_outliers": {
284
                                    "type": "long"
285
                                },
286
                                "iterations": {
287
                                    "type": "long"
288
                                },
289
                                "ld15iqr": {
290
                                    "type": "double"
291
                                },
292
                                "max": {
293
                                    "type": "double"
294
                                },
295
                                "mean": {
296
                                    "type": "double"
297
                                },
298
                                "median": {
299
                                    "type": "double"
300
                                },
301
                                "min": {
302
                                    "type": "double"
303
                                },
304
                                "outliers": {
305
                                    "type": "string"
306
                                },
307
                                "q1": {
308
                                    "type": "double"
309
                                },
310
                                "q3": {
311
                                    "type": "double"
312
                                },
313
                                "rounds": {
314
                                    "type": "long"
315
                                },
316
                                "stddev": {
317
                                    "type": "double"
318
                                },
319
                                "stddev_outliers": {
320
                                    "type": "long"
321
                                }
322
                            }
323
                        },
324
                    }
325
                }
326
            }
327
        }
328
        self._es.indices.create(index=self._es_index, ignore=400, body=mapping)
329