_mask_hosts() - Code Metrics - Inspection of "Fix update hooks specs" - ionelmc/pytest-benchmark - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#74)

unknown

created 2017-04-04 18:44 UTC

_mask_hosts() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
dl	0
loc	5
rs	9.4285
c	0
b	0
f	0

from __future__ import absolute_import

import re
import sys
import uuid
from datetime import date
from datetime import datetime
from decimal import Decimal
from functools import partial

from ..compat import reraise

try:
    import elasticsearch
    from elasticsearch.serializer import JSONSerializer
except ImportError as exc:
    reraise(ImportError, ImportError("Please install elasticsearch or pytest-benchmark[elasticsearch]", exc.args),
            sys.exc_info()[2])


class BenchmarkJSONSerializer(JSONSerializer):
    def default(self, data):
        if isinstance(data, (date, datetime)):
            return data.isoformat()
        elif isinstance(data, Decimal):
            return float(data)
        elif isinstance(data, uuid.UUID):
            return str(data)
        else:
            return "UNSERIALIZABLE[%r]" % data


def _mask_hosts(hosts):
    m = re.compile('^([^:]+)://[^@]+@')
    sub_fun = partial(m.sub, '\\1://***:***@')
    masked_hosts = list(map(sub_fun, hosts))
    return masked_hosts


class ElasticsearchStorage(object):
    def __init__(self, hosts, index, doctype, project_name, logger,
                 default_machine_id=None):
        self._es_hosts = hosts
        self._es_index = index
        self._es_doctype = doctype
        self._es = elasticsearch.Elasticsearch(self._es_hosts, serializer=BenchmarkJSONSerializer())
        self._project_name = project_name
        self.default_machine_id = default_machine_id
        self.logger = logger
        self._cache = {}
        self._create_index()

    def __str__(self):
        return str(self._es_hosts)

    @property
    def location(self):
        return str(self._es_hosts)

    def query(self):
        """
        Returns sorted records names (ids) that corresponds with project.
        """
        body = {
            "size": 0,
            "aggs": {
                "benchmark_ids": {
                    "terms": {
                        "field": "benchmark_id"
                    }
                }
            }
        }
        result = self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
        return sorted([record["key"] for record in result["aggregations"]["benchmark_ids"]["buckets"]])

    def load(self, id_prefix=None):
        """
        Yield key and content of records that corresponds with project name.
        """
        r = self._search(self._project_name, id_prefix)
        groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
        result = [(key, value) for key, value in groupped_data.items()]
        result.sort(key=lambda x: datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
        for key, data in result:
            yield key, data

    def _search(self, project, id_prefix=None):
        body = {
            "size": 1000,
            "sort": [
                {
                    "datetime": {
                        "order": "desc"
                    }
                }
            ],
            "query": {
                "bool": {
                    "filter": {
                        "term": {
                            "commit_info.project": project
                        }
                    }
                }
            }
        }
        if id_prefix:
            body["query"]["bool"]["must"] = {
                "prefix": {
                    "_id": id_prefix
                }
            }

        return self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)

    @staticmethod
    def _benchmark_from_es_record(source_es_record):
        result = {}
        for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname", "benchmark_id"):
            result[benchmark_key] = source_es_record[benchmark_key]
        return result

    @staticmethod
    def _run_info_from_es_record(source_es_record):
        result = {}
        for run_key in ("machine_info", "commit_info", "datetime", "version"):
            result[run_key] = source_es_record[run_key]
        return result

    def _group_by_commit_and_time(self, hits):
        result = {}
        for hit in hits:
            source_hit = hit["_source"]
            key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
            benchmark = self._benchmark_from_es_record(source_hit)
            if key in result:
                result[key]["benchmarks"].append(benchmark)
            else:
                run_info = self._run_info_from_es_record(source_hit)
                run_info["benchmarks"] = [benchmark]
                result[key] = run_info
        return result

    def load_benchmarks(self, *args):
        """
        Yield benchmarks that corresponds with project. Put path and
        source (uncommon part of path) to benchmark dict.
        """
        id_prefix = args[0] if args else None
        r = self._search(self._project_name, id_prefix)
        for hit in r["hits"]["hits"]:
            bench = self._benchmark_from_es_record(hit["_source"])
            bench.update(bench.pop("stats"))
            bench["source"] = bench["benchmark_id"]
            yield bench

    def save(self, output_json, save):
        output_benchmarks = output_json.pop("benchmarks")
        for bench in output_benchmarks:
            # add top level info from output_json dict to each record
            bench.update(output_json)
            benchmark_id = save
            if self.default_machine_id:
                benchmark_id = self.default_machine_id + "_" + benchmark_id
            doc_id = benchmark_id + "_" + bench["fullname"]
            bench["benchmark_id"] = benchmark_id
            self._es.index(
                index=self._es_index,
                doc_type=self._es_doctype,
                body=bench,
                id=doc_id,
            )
        # hide user's credentials before logging
        masked_hosts = _mask_hosts(self._es_hosts)
        self.logger.info("Saved benchmark data to %s to index %s as doctype %s" % (
            masked_hosts, self._es_index, self._es_doctype))

    def _create_index(self):
        mapping = {
            "mappings": {
                "benchmark": {
                    "properties": {
                        "commit_info": {
                            "properties": {
                                "dirty": {
                                    "type": "boolean"
                                },
                                "id": {
                                    "type": "string",
                                    "index": "not_analyzed"

                                },
                                "project": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "datetime": {
                            "type": "date",
                            "format": "strict_date_optional_time||epoch_millis"
                        },
                        "name": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "fullname": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "version": {
                            "type": "string",
                            "index": "not_analyzed"
                        },
                        "benchmark_id": {
                            "type": "string",
                            "index": "not_analyzed",
                        },
                        "machine_info": {
                            "properties": {
                                "machine": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "node": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "processor": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_build": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_compiler": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_implementation": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_implementation_version": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "python_version": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "release": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                },
                                "system": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "options": {
                            "properties": {
                                "disable_gc": {
                                    "type": "boolean"
                                },
                                "max_time": {
                                    "type": "double"
                                },
                                "min_rounds": {
                                    "type": "long"
                                },
                                "min_time": {
                                    "type": "double"
                                },
                                "timer": {
                                    "type": "string"
                                },
                                "warmup": {
                                    "type": "boolean"
                                }
                            }
                        },
                        "stats": {
                            "properties": {
                                "hd15iqr": {
                                    "type": "double"
                                },
                                "iqr": {
                                    "type": "double"
                                },
                                "iqr_outliers": {
                                    "type": "long"
                                },
                                "iterations": {
                                    "type": "long"
                                },
                                "ld15iqr": {
                                    "type": "double"
                                },
                                "max": {
                                    "type": "double"
                                },
                                "mean": {
                                    "type": "double"
                                },
                                "median": {
                                    "type": "double"
                                },
                                "min": {
                                    "type": "double"
                                },
                                "outliers": {
                                    "type": "string"
                                },
                                "q1": {
                                    "type": "double"
                                },
                                "q3": {
                                    "type": "double"
                                },
                                "rounds": {
                                    "type": "long"
                                },
                                "stddev": {
                                    "type": "double"
                                },
                                "stddev_outliers": {
                                    "type": "long"
                                }
                            }
                        },
                    }
                }
            }
        }
        self._es.indices.create(index=self._es_index, ignore=400, body=mapping)


1			from __future__ import absolute_import
2
3			import re
4			import sys
5			import uuid
6			from datetime import date
7			from datetime import datetime
8			from decimal import Decimal
9			from functools import partial
10
11			from ..compat import reraise
12
13			try:
14			import elasticsearch
15			from elasticsearch.serializer import JSONSerializer
16			except ImportError as exc:
17			reraise(ImportError, ImportError("Please install elasticsearch or pytest-benchmark[elasticsearch]", exc.args),
18			sys.exc_info()[2])
19
20
21			class BenchmarkJSONSerializer(JSONSerializer):
22			def default(self, data):
23			if isinstance(data, (date, datetime)):
24			return data.isoformat()
25			elif isinstance(data, Decimal):
26			return float(data)
27			elif isinstance(data, uuid.UUID):
28			return str(data)
29			else:
30			return "UNSERIALIZABLE[%r]" % data
31
32
33			def _mask_hosts(hosts):
34			m = re.compile('^([^:]+)://[^@]+@')
35			sub_fun = partial(m.sub, '\\1://*:*@')
36			masked_hosts = list(map(sub_fun, hosts))
37			return masked_hosts
38
39
40			class ElasticsearchStorage(object):
41			def __init__(self, hosts, index, doctype, project_name, logger,
42			default_machine_id=None):
43			self._es_hosts = hosts
44			self._es_index = index
45			self._es_doctype = doctype
46			self._es = elasticsearch.Elasticsearch(self._es_hosts, serializer=BenchmarkJSONSerializer())
47			self._project_name = project_name
48			self.default_machine_id = default_machine_id
49			self.logger = logger
50			self._cache = {}
51			self._create_index()
52
53			def __str__(self):
54			return str(self._es_hosts)
55
56			@property
57			def location(self):
58			return str(self._es_hosts)
59
60			def query(self):
61			"""
62			Returns sorted records names (ids) that corresponds with project.
63			"""
64			body = {
65			"size": 0,
66			"aggs": {
67			"benchmark_ids": {
68			"terms": {
69			"field": "benchmark_id"
70			}
71			}
72			}
73			}
74			result = self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
75			return sorted([record["key"] for record in result["aggregations"]["benchmark_ids"]["buckets"]])
76
77			def load(self, id_prefix=None):
78			"""
79			Yield key and content of records that corresponds with project name.
80			"""
81			r = self._search(self._project_name, id_prefix)
82			groupped_data = self._group_by_commit_and_time(r["hits"]["hits"])
83			result = [(key, value) for key, value in groupped_data.items()]
84			result.sort(key=lambda x: datetime.strptime(x[1]["datetime"], "%Y-%m-%dT%H:%M:%S.%f"))
85			for key, data in result:
86			yield key, data
87
88			def _search(self, project, id_prefix=None):
89			body = {
90			"size": 1000,
91			"sort": [
92			{
93			"datetime": {
94			"order": "desc"
95			}
96			}
97			],
98			"query": {
99			"bool": {
100			"filter": {
101			"term": {
102			"commit_info.project": project
103			}
104			}
105			}
106			}
107			}
108			if id_prefix:
109			body["query"]["bool"]["must"] = {
110			"prefix": {
111			"_id": id_prefix
112			}
113			}
114
115			return self._es.search(index=self._es_index, doc_type=self._es_doctype, body=body)
116
117			@staticmethod
118			def _benchmark_from_es_record(source_es_record):
119			result = {}
120			for benchmark_key in ("group", "stats", "options", "param", "name", "params", "fullname", "benchmark_id"):
121			result[benchmark_key] = source_es_record[benchmark_key]
122			return result
123
124			@staticmethod
125			def _run_info_from_es_record(source_es_record):
126			result = {}
127			for run_key in ("machine_info", "commit_info", "datetime", "version"):
128			result[run_key] = source_es_record[run_key]
129			return result
130
131			def _group_by_commit_and_time(self, hits):
132			result = {}
133			for hit in hits:
134			source_hit = hit["_source"]
135			key = "%s_%s" % (source_hit["commit_info"]["id"], source_hit["datetime"])
136			benchmark = self._benchmark_from_es_record(source_hit)
137			if key in result:
138			result[key]["benchmarks"].append(benchmark)
139			else:
140			run_info = self._run_info_from_es_record(source_hit)
141			run_info["benchmarks"] = [benchmark]
142			result[key] = run_info
143			return result
144
145			def load_benchmarks(self, *args):
146			"""
147			Yield benchmarks that corresponds with project. Put path and
148			source (uncommon part of path) to benchmark dict.
149			"""
150			id_prefix = args[0] if args else None
151			r = self._search(self._project_name, id_prefix)
152			for hit in r["hits"]["hits"]:
153			bench = self._benchmark_from_es_record(hit["_source"])
154			bench.update(bench.pop("stats"))
155			bench["source"] = bench["benchmark_id"]
156			yield bench
157
158			def save(self, output_json, save):
159			output_benchmarks = output_json.pop("benchmarks")
160			for bench in output_benchmarks:
161			# add top level info from output_json dict to each record
162			bench.update(output_json)
163			benchmark_id = save
164			if self.default_machine_id:
165			benchmark_id = self.default_machine_id + "_" + benchmark_id
166			doc_id = benchmark_id + "_" + bench["fullname"]
167			bench["benchmark_id"] = benchmark_id
168			self._es.index(
169			index=self._es_index,
170			doc_type=self._es_doctype,
171			body=bench,
172			id=doc_id,
173			)
174			# hide user's credentials before logging
175			masked_hosts = _mask_hosts(self._es_hosts)
176			self.logger.info("Saved benchmark data to %s to index %s as doctype %s" % (
177			masked_hosts, self._es_index, self._es_doctype))
178
179			def _create_index(self):
180			mapping = {
181			"mappings": {
182			"benchmark": {
183			"properties": {
184			"commit_info": {
185			"properties": {
186			"dirty": {
187			"type": "boolean"
188			},
189			"id": {
190			"type": "string",
191			"index": "not_analyzed"
192
193			},
194			"project": {
195			"type": "string",
196			"index": "not_analyzed"
197			}
198			}
199			},
200			"datetime": {
201			"type": "date",
202			"format": "strict_date_optional_time\|\|epoch_millis"
203			},
204			"name": {
205			"type": "string",
206			"index": "not_analyzed"
207			},
208			"fullname": {
209			"type": "string",
210			"index": "not_analyzed"
211			},
212			"version": {
213			"type": "string",
214			"index": "not_analyzed"
215			},
216			"benchmark_id": {
217			"type": "string",
218			"index": "not_analyzed",
219			},
220			"machine_info": {
221			"properties": {
222			"machine": {
223			"type": "string",
224			"index": "not_analyzed"
225			},
226			"node": {
227			"type": "string",
228			"index": "not_analyzed"
229			},
230			"processor": {
231			"type": "string",
232			"index": "not_analyzed"
233			},
234			"python_build": {
235			"type": "string",
236			"index": "not_analyzed"
237			},
238			"python_compiler": {
239			"type": "string",
240			"index": "not_analyzed"
241			},
242			"python_implementation": {
243			"type": "string",
244			"index": "not_analyzed"
245			},
246			"python_implementation_version": {
247			"type": "string",
248			"index": "not_analyzed"
249			},
250			"python_version": {
251			"type": "string",
252			"index": "not_analyzed"
253			},
254			"release": {
255			"type": "string",
256			"index": "not_analyzed"
257			},
258			"system": {
259			"type": "string",
260			"index": "not_analyzed"
261			}
262			}
263			},
264			"options": {
265			"properties": {
266			"disable_gc": {
267			"type": "boolean"
268			},
269			"max_time": {
270			"type": "double"
271			},
272			"min_rounds": {
273			"type": "long"
274			},
275			"min_time": {
276			"type": "double"
277			},
278			"timer": {
279			"type": "string"
280			},
281			"warmup": {
282			"type": "boolean"
283			}
284			}
285			},
286			"stats": {
287			"properties": {
288			"hd15iqr": {
289			"type": "double"
290			},
291			"iqr": {
292			"type": "double"
293			},
294			"iqr_outliers": {
295			"type": "long"
296			},
297			"iterations": {
298			"type": "long"
299			},
300			"ld15iqr": {
301			"type": "double"
302			},
303			"max": {
304			"type": "double"
305			},
306			"mean": {
307			"type": "double"
308			},
309			"median": {
310			"type": "double"
311			},
312			"min": {
313			"type": "double"
314			},
315			"outliers": {
316			"type": "string"
317			},
318			"q1": {
319			"type": "double"
320			},
321			"q3": {
322			"type": "double"
323			},
324			"rounds": {
325			"type": "long"
326			},
327			"stddev": {
328			"type": "double"
329			},
330			"stddev_outliers": {
331			"type": "long"
332			}
333			}
334			},
335			}
336			}
337			}
338			}
339			self._es.indices.create(index=self._es_index, ignore=400, body=mapping)
340

ionelmc / pytest-benchmark

Pull Request — master (#74)

_mask_hosts() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like