Completed
Push — master ( b996cf...bbb940 )
by Ionel Cristian
9s
created

_mask_hosts()   A

Complexity

Conditions 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
dl 0
loc 5
rs 9.4285
c 0
b 0
f 0
1
from __future__ import absolute_import
2
3
import re
4
import sys
5
import uuid
6
from datetime import date
7
from datetime import datetime
8
from decimal import Decimal
9
from functools import partial
10
11
from ..compat import reraise
12
13
try:
14
    import elasticsearch
15
    from elasticsearch.serializer import JSONSerializer
16
except ImportError as exc:
17
    reraise(ImportError, ImportError("Please install elasticsearch or pytest-benchmark[elasticsearch]", exc.args),
18
            sys.exc_info()[2])
19
20
21
class BenchmarkJSONSerializer(JSONSerializer):
22
    def default(self, data):
23
        if isinstance(data, (date, datetime)):
24
            return data.isoformat()
25
        elif isinstance(data, Decimal):
26
            return float(data)
27
        elif isinstance(data, uuid.UUID):
28
            return str(data)
29
        else:
30
            return "UNSERIALIZABLE[%r]" % data
31
32
33
def _mask_hosts(hosts):
34
    m = re.compile('^([^:]+)://[^@]+@')
35
    sub_fun = partial(m.sub, '\\1://***:***@')
36
    masked_hosts = list(map(sub_fun, hosts))
37
    return masked_hosts
38
39
40
class ElasticsearchStorage(object):
41
    def __init__(self, hosts, index, doctype, project_name, logger,
42
                 default_machine_id=None):
43
        self._es_hosts = hosts
44
        self._es_index = index
45
        self._es_doctype = doctype
46
        self._es = elasticsearch.Elasticsearch(self._es_hosts, serializer=BenchmarkJSONSerializer())
47
        self._project_name = project_name
48
        self.default_machine_id = default_machine_id
49
        self.logger = logger
50
        self._cache = {}
51
        self._create_index()
52
53
    def __str__(self):
54
        return str(self._es_hosts)
55
56
    @property
57
    def location(self):
58
        return str(self._es_hosts)
59
60
    def query(self):
61
        """
62
        Returns sorted records names (ids) that corresponds with project.
63
        """
64
        body = {
65
            "size": 0,
66
            "aggs": {
67
                "benchmark_ids": {
68
                    "terms": {
69
                        "field": "benchmark_id"
70
                    }
71
                }
72
            }
73
        }
74
        result = self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
75
        return sorted([record["key"] for record in result["aggregations"]["benchmark_ids"]["buckets"]])
76
77
    def load(self, id_prefix=None):
78
        """
79
        Yield key and content of records that corresponds with project name.
80
        """
81
        r = self._search(self._project_name, id_prefix)
82
        groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
83
        result = [(key, value) for key, value in groupped_data.items()]
84
        result.sort(key=lambda x: datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
85
        for key, data in result:
86
            yield key, data
87
88
    def _search(self, project, id_prefix=None):
89
        body = {
90
            "size": 1000,
91
            "sort": [
92
                {
93
                    "datetime": {
94
                        "order": "desc"
95
                    }
96
                }
97
            ],
98
            "query": {
99
                "bool": {
100
                    "filter": {
101
                        "term": {
102
                            "commit_info.project": project
103
                        }
104
                    }
105
                }
106
            }
107
        }
108
        if id_prefix:
109
            body["query"]["bool"]["must"] = {
110
                "prefix": {
111
                    "_id": id_prefix
112
                }
113
            }
114
115
        return self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
116
117
    @staticmethod
118
    def _benchmark_from_es_record(source_es_record):
119
        result = {}
120
        for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname", "benchmark_id"):
121
            result[benchmark_key] = source_es_record[benchmark_key]
122
        return result
123
124
    @staticmethod
125
    def _run_info_from_es_record(source_es_record):
126
        result = {}
127
        for run_key in ("machine_info", "commit_info", "datetime", "version"):
128
            result[run_key] = source_es_record[run_key]
129
        return result
130
131
    def _group_by_commit_and_time(self, hits):
132
        result = {}
133
        for hit in hits:
134
            source_hit = hit["_source"]
135
            key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
136
            benchmark = self._benchmark_from_es_record(source_hit)
137
            if key in result:
138
                result[key]["benchmarks"].append(benchmark)
139
            else:
140
                run_info = self._run_info_from_es_record(source_hit)
141
                run_info["benchmarks"] = [benchmark]
142
                result[key] = run_info
143
        return result
144
145
    def load_benchmarks(self, *args):
146
        """
147
        Yield benchmarks that corresponds with project. Put path and
148
        source (uncommon part of path) to benchmark dict.
149
        """
150
        id_prefix = args[0] if args else None
151
        r = self._search(self._project_name, id_prefix)
152
        for hit in r["hits"]["hits"]:
153
            bench = self._benchmark_from_es_record(hit["_source"])
154
            bench.update(bench.pop("stats"))
155
            bench["source"] = bench["benchmark_id"]
156
            yield bench
157
158
    def save(self, output_json, save):
159
        output_benchmarks = output_json.pop("benchmarks")
160
        for bench in output_benchmarks:
161
            # add top level info from output_json dict to each record
162
            bench.update(output_json)
163
            benchmark_id = save
164
            if self.default_machine_id:
165
                benchmark_id = self.default_machine_id + "_" + benchmark_id
166
            doc_id = benchmark_id + "_" + bench["fullname"]
167
            bench["benchmark_id"] = benchmark_id
168
            self._es.index(
169
                index=self._es_index,
170
                doc_type=self._es_doctype,
171
                body=bench,
172
                id=doc_id,
173
            )
174
        # hide user's credentials before logging
175
        masked_hosts = _mask_hosts(self._es_hosts)
176
        self.logger.info("Saved benchmark data to %s to index %s as doctype %s" % (
177
            masked_hosts, self._es_index, self._es_doctype))
178
179
    def _create_index(self):
180
        mapping = {
181
            "mappings": {
182
                "benchmark": {
183
                    "properties": {
184
                        "commit_info": {
185
                            "properties": {
186
                                "dirty": {
187
                                    "type": "boolean"
188
                                },
189
                                "id": {
190
                                    "type": "string",
191
                                    "index": "not_analyzed"
192
193
                                },
194
                                "project": {
195
                                    "type": "string",
196
                                    "index": "not_analyzed"
197
                                }
198
                            }
199
                        },
200
                        "datetime": {
201
                            "type": "date",
202
                            "format": "strict_date_optional_time||epoch_millis"
203
                        },
204
                        "name": {
205
                            "type": "string",
206
                            "index": "not_analyzed"
207
                        },
208
                        "fullname": {
209
                            "type": "string",
210
                            "index": "not_analyzed"
211
                        },
212
                        "version": {
213
                            "type": "string",
214
                            "index": "not_analyzed"
215
                        },
216
                        "benchmark_id": {
217
                            "type": "string",
218
                            "index": "not_analyzed",
219
                        },
220
                        "machine_info": {
221
                            "properties": {
222
                                "machine": {
223
                                    "type": "string",
224
                                    "index": "not_analyzed"
225
                                },
226
                                "node": {
227
                                    "type": "string",
228
                                    "index": "not_analyzed"
229
                                },
230
                                "processor": {
231
                                    "type": "string",
232
                                    "index": "not_analyzed"
233
                                },
234
                                "python_build": {
235
                                    "type": "string",
236
                                    "index": "not_analyzed"
237
                                },
238
                                "python_compiler": {
239
                                    "type": "string",
240
                                    "index": "not_analyzed"
241
                                },
242
                                "python_implementation": {
243
                                    "type": "string",
244
                                    "index": "not_analyzed"
245
                                },
246
                                "python_implementation_version": {
247
                                    "type": "string",
248
                                    "index": "not_analyzed"
249
                                },
250
                                "python_version": {
251
                                    "type": "string",
252
                                    "index": "not_analyzed"
253
                                },
254
                                "release": {
255
                                    "type": "string",
256
                                    "index": "not_analyzed"
257
                                },
258
                                "system": {
259
                                    "type": "string",
260
                                    "index": "not_analyzed"
261
                                }
262
                            }
263
                        },
264
                        "options": {
265
                            "properties": {
266
                                "disable_gc": {
267
                                    "type": "boolean"
268
                                },
269
                                "max_time": {
270
                                    "type": "double"
271
                                },
272
                                "min_rounds": {
273
                                    "type": "long"
274
                                },
275
                                "min_time": {
276
                                    "type": "double"
277
                                },
278
                                "timer": {
279
                                    "type": "string"
280
                                },
281
                                "warmup": {
282
                                    "type": "boolean"
283
                                }
284
                            }
285
                        },
286
                        "stats": {
287
                            "properties": {
288
                                "hd15iqr": {
289
                                    "type": "double"
290
                                },
291
                                "iqr": {
292
                                    "type": "double"
293
                                },
294
                                "iqr_outliers": {
295
                                    "type": "long"
296
                                },
297
                                "iterations": {
298
                                    "type": "long"
299
                                },
300
                                "ld15iqr": {
301
                                    "type": "double"
302
                                },
303
                                "max": {
304
                                    "type": "double"
305
                                },
306
                                "mean": {
307
                                    "type": "double"
308
                                },
309
                                "median": {
310
                                    "type": "double"
311
                                },
312
                                "min": {
313
                                    "type": "double"
314
                                },
315
                                "outliers": {
316
                                    "type": "string"
317
                                },
318
                                "q1": {
319
                                    "type": "double"
320
                                },
321
                                "q3": {
322
                                    "type": "double"
323
                                },
324
                                "rounds": {
325
                                    "type": "long"
326
                                },
327
                                "stddev": {
328
                                    "type": "double"
329
                                },
330
                                "stddev_outliers": {
331
                                    "type": "long"
332
                                }
333
                            }
334
                        },
335
                    }
336
                }
337
            }
338
        }
339
        self._es.indices.create(index=self._es_index, ignore=400, body=mapping)
340