BenchmarkJSONSerializer   A
last analyzed

Complexity

Total Complexity 4

Size/Duplication

Total Lines 10
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 10
rs 10
wmc 4

1 Method

Rating   Name   Duplication   Size   Complexity  
A default() 0 9 4
1
from __future__ import absolute_import
2
3
import re
4
import sys
5
import uuid
6
from datetime import date
7
from datetime import datetime
8
from decimal import Decimal
9
from functools import partial
10
11
from ..compat import reraise
12
from ..stats import normalize_stats
13
14
try:
15
    import elasticsearch
16
    from elasticsearch.serializer import JSONSerializer
17
except ImportError as exc:
18
    reraise(ImportError, ImportError("Please install elasticsearch or pytest-benchmark[elasticsearch]", exc.args),
19
            sys.exc_info()[2])
20
21
22
class BenchmarkJSONSerializer(JSONSerializer):
23
    def default(self, data):
24
        if isinstance(data, (date, datetime)):
25
            return data.isoformat()
26
        elif isinstance(data, Decimal):
27
            return float(data)
28
        elif isinstance(data, uuid.UUID):
29
            return str(data)
30
        else:
31
            return "UNSERIALIZABLE[%r]" % data
32
33
34
def _mask_hosts(hosts):
35
    m = re.compile('^([^:]+)://[^@]+@')
36
    sub_fun = partial(m.sub, '\\1://***:***@')
37
    masked_hosts = list(map(sub_fun, hosts))
38
    return masked_hosts
39
40
41
class ElasticsearchStorage(object):
42
    def __init__(self, hosts, index, doctype, project_name, logger,
43
                 default_machine_id=None):
44
        self._es_hosts = hosts
45
        self._es_index = index
46
        self._es_doctype = doctype
47
        self._es = elasticsearch.Elasticsearch(self._es_hosts, serializer=BenchmarkJSONSerializer())
48
        self._project_name = project_name
49
        self.default_machine_id = default_machine_id
50
        self.logger = logger
51
        self._cache = {}
52
        self._create_index()
53
54
    def __str__(self):
55
        return str(self._es_hosts)
56
57
    @property
58
    def location(self):
59
        return str(self._es_hosts)
60
61
    def query(self):
62
        """
63
        Returns sorted records names (ids) that corresponds with project.
64
        """
65
        body = {
66
            "size": 0,
67
            "aggs": {
68
                "benchmark_ids": {
69
                    "terms": {
70
                        "field": "benchmark_id"
71
                    }
72
                }
73
            }
74
        }
75
        result = self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
76
        return sorted([record["key"] for record in result["aggregations"]["benchmark_ids"]["buckets"]])
77
78
    def load(self, id_prefix=None):
79
        """
80
        Yield key and content of records that corresponds with project name.
81
        """
82
        r = self._search(self._project_name, id_prefix)
83
        groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
84
        result = [(key, value) for key, value in groupped_data.items()]
85
        result.sort(key=lambda x: datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
86
        for key, data in result:
87
            for bench in data["benchmarks"]:
88
                normalize_stats(bench["stats"])
89
            yield key, data
90
91
    def _search(self, project, id_prefix=None):
92
        body = {
93
            "size": 1000,
94
            "sort": [
95
                {
96
                    "datetime": {
97
                        "order": "desc"
98
                    }
99
                }
100
            ],
101
            "query": {
102
                "bool": {
103
                    "filter": {
104
                        "term": {
105
                            "commit_info.project": project
106
                        }
107
                    }
108
                }
109
            }
110
        }
111
        if id_prefix:
112
            body["query"]["bool"]["must"] = {
113
                "prefix": {
114
                    "_id": id_prefix
115
                }
116
            }
117
118
        return self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
119
120
    @staticmethod
121
    def _benchmark_from_es_record(source_es_record):
122
        result = {}
123
        for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname", "benchmark_id"):
124
            result[benchmark_key] = source_es_record[benchmark_key]
125
        return result
126
127
    @staticmethod
128
    def _run_info_from_es_record(source_es_record):
129
        result = {}
130
        for run_key in ("machine_info", "commit_info", "datetime", "version"):
131
            result[run_key] = source_es_record[run_key]
132
        return result
133
134
    def _group_by_commit_and_time(self, hits):
135
        result = {}
136
        for hit in hits:
137
            source_hit = hit["_source"]
138
            key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
139
            benchmark = self._benchmark_from_es_record(source_hit)
140
            if key in result:
141
                result[key]["benchmarks"].append(benchmark)
142
            else:
143
                run_info = self._run_info_from_es_record(source_hit)
144
                run_info["benchmarks"] = [benchmark]
145
                result[key] = run_info
146
        return result
147
148
    def load_benchmarks(self, *args):
149
        """
150
        Yield benchmarks that corresponds with project. Put path and
151
        source (uncommon part of path) to benchmark dict.
152
        """
153
        id_prefix = args[0] if args else None
154
        r = self._search(self._project_name, id_prefix)
155
        for hit in r["hits"]["hits"]:
156
            bench = self._benchmark_from_es_record(hit["_source"])
157
            bench.update(bench.pop("stats"))
158
            bench["source"] = bench["benchmark_id"]
159
            yield bench
160
161
    def save(self, output_json, save):
162
        output_benchmarks = output_json.pop("benchmarks")
163
        for bench in output_benchmarks:
164
            # add top level info from output_json dict to each record
165
            bench.update(output_json)
166
            benchmark_id = save
167
            if self.default_machine_id:
168
                benchmark_id = self.default_machine_id + "_" + benchmark_id
169
            doc_id = benchmark_id + "_" + bench["fullname"]
170
            bench["benchmark_id"] = benchmark_id
171
            self._es.index(
172
                index=self._es_index,
173
                doc_type=self._es_doctype,
174
                body=bench,
175
                id=doc_id,
176
            )
177
        # hide user's credentials before logging
178
        masked_hosts = _mask_hosts(self._es_hosts)
179
        self.logger.info("Saved benchmark data to %s to index %s as doctype %s" % (
180
            masked_hosts, self._es_index, self._es_doctype))
181
182
    def _create_index(self):
183
        mapping = {
184
            "mappings": {
185
                "benchmark": {
186
                    "properties": {
187
                        "commit_info": {
188
                            "properties": {
189
                                "dirty": {
190
                                    "type": "boolean"
191
                                },
192
                                "id": {
193
                                    "type": "string",
194
                                    "index": "not_analyzed"
195
196
                                },
197
                                "project": {
198
                                    "type": "string",
199
                                    "index": "not_analyzed"
200
                                }
201
                            }
202
                        },
203
                        "datetime": {
204
                            "type": "date",
205
                            "format": "strict_date_optional_time||epoch_millis"
206
                        },
207
                        "name": {
208
                            "type": "string",
209
                            "index": "not_analyzed"
210
                        },
211
                        "fullname": {
212
                            "type": "string",
213
                            "index": "not_analyzed"
214
                        },
215
                        "version": {
216
                            "type": "string",
217
                            "index": "not_analyzed"
218
                        },
219
                        "benchmark_id": {
220
                            "type": "string",
221
                            "index": "not_analyzed",
222
                        },
223
                        "machine_info": {
224
                            "properties": {
225
                                "machine": {
226
                                    "type": "string",
227
                                    "index": "not_analyzed"
228
                                },
229
                                "node": {
230
                                    "type": "string",
231
                                    "index": "not_analyzed"
232
                                },
233
                                "processor": {
234
                                    "type": "string",
235
                                    "index": "not_analyzed"
236
                                },
237
                                "python_build": {
238
                                    "type": "string",
239
                                    "index": "not_analyzed"
240
                                },
241
                                "python_compiler": {
242
                                    "type": "string",
243
                                    "index": "not_analyzed"
244
                                },
245
                                "python_implementation": {
246
                                    "type": "string",
247
                                    "index": "not_analyzed"
248
                                },
249
                                "python_implementation_version": {
250
                                    "type": "string",
251
                                    "index": "not_analyzed"
252
                                },
253
                                "python_version": {
254
                                    "type": "string",
255
                                    "index": "not_analyzed"
256
                                },
257
                                "release": {
258
                                    "type": "string",
259
                                    "index": "not_analyzed"
260
                                },
261
                                "system": {
262
                                    "type": "string",
263
                                    "index": "not_analyzed"
264
                                }
265
                            }
266
                        },
267
                        "options": {
268
                            "properties": {
269
                                "disable_gc": {
270
                                    "type": "boolean"
271
                                },
272
                                "max_time": {
273
                                    "type": "double"
274
                                },
275
                                "min_rounds": {
276
                                    "type": "long"
277
                                },
278
                                "min_time": {
279
                                    "type": "double"
280
                                },
281
                                "timer": {
282
                                    "type": "string"
283
                                },
284
                                "warmup": {
285
                                    "type": "boolean"
286
                                }
287
                            }
288
                        },
289
                        "stats": {
290
                            "properties": {
291
                                "hd15iqr": {
292
                                    "type": "double"
293
                                },
294
                                "iqr": {
295
                                    "type": "double"
296
                                },
297
                                "iqr_outliers": {
298
                                    "type": "long"
299
                                },
300
                                "iterations": {
301
                                    "type": "long"
302
                                },
303
                                "ld15iqr": {
304
                                    "type": "double"
305
                                },
306
                                "max": {
307
                                    "type": "double"
308
                                },
309
                                "mean": {
310
                                    "type": "double"
311
                                },
312
                                "median": {
313
                                    "type": "double"
314
                                },
315
                                "min": {
316
                                    "type": "double"
317
                                },
318
                                "outliers": {
319
                                    "type": "string"
320
                                },
321
                                "q1": {
322
                                    "type": "double"
323
                                },
324
                                "q3": {
325
                                    "type": "double"
326
                                },
327
                                "rounds": {
328
                                    "type": "long"
329
                                },
330
                                "stddev": {
331
                                    "type": "double"
332
                                },
333
                                "stddev_outliers": {
334
                                    "type": "long"
335
                                },
336
                                "ops": {
337
                                    "type": "double"
338
                                },
339
                            }
340
                        },
341
                    }
342
                }
343
            }
344
        }
345
        self._es.indices.create(index=self._es_index, ignore=400, body=mapping)
346