Completed
Pull Request — master (#58)
by
unknown
01:15
created

ElasticsearchStorage._create_index()   B

Complexity

Conditions 1

Size

Total Lines 157

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
c 1
b 0
f 0
dl 0
loc 157
rs 8.2857

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
import datetime
2
3
try:
4
    import elasticsearch.serializer
5
6
    import uuid
7
    from datetime import date, datetime
8
    from decimal import Decimal
9
10
    class SaveElasticsearchJSONSerializer(elasticsearch.serializer.JSONSerializer):
11
        def default(self, data):
12
            if isinstance(data, (date, datetime)):
13
                return data.isoformat()
14
            elif isinstance(data, Decimal):
15
                return float(data)
16
            elif isinstance(data, uuid.UUID):
17
                return str(data)
18
            else:
19
                return "UNSERIALIZABLE[%r]" % data
20
21
except ImportError as exc:
22
    SaveElasticsearchJSONSerializer = None
23
24
25
class ElasticsearchStorage(object):
26
    def __init__(self, elasticsearch_hosts, elasticsearch_index, elasticsearch_doctype, logger,
27
                 default_machine_id=None):
28
        try:
29
            import elasticsearch
30
        except ImportError as exc:
31
            raise ImportError(exc.args, "Please install elasticsearch or pytest-benchmark[elasticsearch]")
32
        self._elasticsearch_hosts = elasticsearch_hosts
33
        self._elasticsearch_index = elasticsearch_index
34
        self._elasticsearch_doctype = elasticsearch_doctype
35
        self._elasticsearch = elasticsearch.Elasticsearch(self._elasticsearch_hosts, serializer=SaveElasticsearchJSONSerializer())
36
        self.default_machine_id = default_machine_id
37
        self.logger = logger
38
        self._cache = {}
39
        self._create_index()
40
41
    def __str__(self):
42
        return str(self._elasticsearch_hosts)
43
44
    @property
45
    def location(self):
46
        return str(self._elasticsearch_hosts)
47
48
    def query(self, project):
49
        """
50
        Returns sorted records names (ids) that corresponds with project.
51
        """
52
        return [commit_and_time for commit_and_time, _ in self.load(project)]
53
54
    def load(self, project, id_prefix=None):
55
        """
56
        Yield key and content of records that corresponds with project name.
57
        """
58
        r = self._search(project, id_prefix)
59
        groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
60
        result = [(key, value) for key, value in groupped_data.items()]
61
        result.sort(key=lambda x: datetime.datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
62
        for key, data in result:
63
            yield key, data
64
65
    def _search(self, project, id_prefix=None):
66
        body = {
67
            "size": 1000,
68
            "sort": [
69
                {
70
                    "datetime": {
71
                        "order": "desc"
72
                    }
73
                }
74
            ],
75
            "query": {
76
                "bool": {
77
                    "filter": {
78
                        "term": {
79
                            "commit_info.project": project
80
                        }
81
                    }
82
                }
83
            }
84
        }
85
        if id_prefix:
86
            body["query"]["bool"]["must"] = {
87
                "prefix": {
88
                    "_id": id_prefix
89
                }
90
            }
91
92
        return self._elasticsearch.search(index=self._elasticsearch_index,
93
                                          doc_type=self._elasticsearch_doctype,
94
                                          body=body)
95
96
    @staticmethod
97
    def _benchmark_from_es_record(source_es_record):
98
        result = {}
99
        for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname"):
100
            result[benchmark_key] = source_es_record[benchmark_key]
101
        return result
102
103
    @staticmethod
104
    def _run_info_from_es_record(source_es_record):
105
        result = {}
106
        for run_key in ("machine_info", "commit_info", "datetime", "version"):
107
            result[run_key] = source_es_record[run_key]
108
        return result
109
110
    def _group_by_commit_and_time(self, hits):
111
        result = {}
112
        for hit in hits:
113
            source_hit = hit["_source"]
114
            key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
115
            benchmark = self._benchmark_from_es_record(source_hit)
116
            if key in result:
117
                result[key]["benchmarks"].append(benchmark)
118
            else:
119
                run_info = self._run_info_from_es_record(source_hit)
120
                run_info["benchmarks"] = [benchmark]
121
                result[key] = run_info
122
        return result
123
124
    def load_benchmarks(self, project):
125
        """
126
        Yield benchmarks that corresponds with project. Put path and
127
        source (uncommon part of path) to benchmark dict.
128
        """
129
        r = self._search(project)
130
        for hit in r["hits"]["hits"]:
131
            yield self._benchmark_from_es_record(hit["_source"])
132
133
    def save(self, document, document_id):
134
        self._elasticsearch.index(
135
            index=self._elasticsearch_index,
136
            doc_type=self._elasticsearch_doctype,
137
            body=document,
138
            id=document_id,
139
        )
140
141
    def _create_index(self):
142
        mapping = {
143
            "mappings": {
144
                "benchmark": {
145
                    "properties": {
146
                        "commit_info": {
147
                            "properties": {
148
                                "dirty": {
149
                                    "type": "boolean"
150
                                },
151
                                "id": {
152
                                    "type": "string",
153
                                    "index": "not_analyzed"
154
155
                                },
156
                                "project": {
157
                                    "type": "string",
158
                                    "index": "not_analyzed"
159
                                }
160
                            }
161
                        },
162
                        "datetime": {
163
                            "type": "date",
164
                            "format": "strict_date_optional_time||epoch_millis"
165
                        },
166
                        "name": {
167
                            "type": "string",
168
                            "index": "not_analyzed"
169
                        },
170
                        "fullname": {
171
                            "type": "string",
172
                            "index": "not_analyzed"
173
                        },
174
                        "version": {
175
                            "type": "string",
176
                            "index": "not_analyzed"
177
                        },
178
                        "machine_info": {
179
                            "properties": {
180
                                "machine": {
181
                                    "type": "string",
182
                                    "index": "not_analyzed"
183
                                },
184
                                "node": {
185
                                    "type": "string",
186
                                    "index": "not_analyzed"
187
                                },
188
                                "processor": {
189
                                    "type": "string",
190
                                    "index": "not_analyzed"
191
                                },
192
                                "python_build": {
193
                                    "type": "string",
194
                                    "index": "not_analyzed"
195
                                },
196
                                "python_compiler": {
197
                                    "type": "string",
198
                                    "index": "not_analyzed"
199
                                },
200
                                "python_implementation": {
201
                                    "type": "string",
202
                                    "index": "not_analyzed"
203
                                },
204
                                "python_implementation_version": {
205
                                    "type": "string",
206
                                    "index": "not_analyzed"
207
                                },
208
                                "python_version": {
209
                                    "type": "string",
210
                                    "index": "not_analyzed"
211
                                },
212
                                "release": {
213
                                    "type": "string",
214
                                    "index": "not_analyzed"
215
                                },
216
                                "system": {
217
                                    "type": "string",
218
                                    "index": "not_analyzed"
219
                                }
220
                            }
221
                        },
222
                        "options": {
223
                            "properties": {
224
                                "disable_gc": {
225
                                    "type": "boolean"
226
                                },
227
                                "max_time": {
228
                                    "type": "double"
229
                                },
230
                                "min_rounds": {
231
                                    "type": "long"
232
                                },
233
                                "min_time": {
234
                                    "type": "double"
235
                                },
236
                                "timer": {
237
                                    "type": "string"
238
                                },
239
                                "warmup": {
240
                                    "type": "boolean"
241
                                }
242
                            }
243
                        },
244
                        "stats": {
245
                            "properties": {
246
                                "hd15iqr": {
247
                                    "type": "double"
248
                                },
249
                                "iqr": {
250
                                    "type": "double"
251
                                },
252
                                "iqr_outliers": {
253
                                    "type": "long"
254
                                },
255
                                "iterations": {
256
                                    "type": "long"
257
                                },
258
                                "ld15iqr": {
259
                                    "type": "double"
260
                                },
261
                                "max": {
262
                                    "type": "double"
263
                                },
264
                                "mean": {
265
                                    "type": "double"
266
                                },
267
                                "median": {
268
                                    "type": "double"
269
                                },
270
                                "min": {
271
                                    "type": "double"
272
                                },
273
                                "outliers": {
274
                                    "type": "string"
275
                                },
276
                                "q1": {
277
                                    "type": "double"
278
                                },
279
                                "q3": {
280
                                    "type": "double"
281
                                },
282
                                "rounds": {
283
                                    "type": "long"
284
                                },
285
                                "stddev": {
286
                                    "type": "double"
287
                                },
288
                                "stddev_outliers": {
289
                                    "type": "long"
290
                                }
291
                            }
292
                        },
293
                    }
294
                }
295
            }
296
        }
297
        self._elasticsearch.indices.create(index=self._elasticsearch_index, ignore=400, body=mapping)
298
299