optimization_metadata.hypermemory_api.Hypermemory.get_best_model() - Code Metrics - Inspection of "v0.3.0" - SimonBlanke/Optimization-Metadata - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( d5b48a...5a31d8 )

by Simon

created 2020-07-15 16:55 UTC

Hypermemory.get_best_model() B

↳ Parent: optimization_metadata.hypermemory_api

Complexity

Conditions

Size

Total Lines	43
Code Lines	27

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	27
dl	0
loc	43
rs	7.3333
c	0
b	0
f	0
cc	8
nop	3

# Author: Simon Blanke
# Email: [email protected]
# License: MIT License

import os
import sys
import json
import dill
import shutil
import pathlib
from fnmatch import fnmatch

import numpy as np
import pandas as pd

from .memory_load import MemoryLoad
from .memory_dump import MemoryDump

from .utils import (
    _connect_key2value,
    _split_key_value,
    _reset_memory,
    _query_yes_no,
    object_hash,
    model_id,
    meta_data_name,
)

from .paths import _paths_


class Hypermemory(MemoryDump):
    def __init__(self, *args, **kwargs):
        self.memory_dict = None
        self.meta_data_found = False
        self.n_dims = None

        self.meta_path = _paths_["default"]

    def load(self):
        self._load_ = MemoryLoad(X, y, model, search_space)


        self.memory_dict = self._load_.hyperactive_memory_load()
        self.meta_data_found = self._load_.meta_data_found

        self.score_best = self._load_.score_best
        self.pos_best = self._load_.pos_best

        return self.memory_dict

    def dump(self, memory):
        self._dump_ = MemoryDump(X, y, model, search_space)

        self._dump_.hyperactive_memory_dump(memory)

    def _get_para(self):
        if self.memory_dict is None:
            print("Error")
            return
        para_pd, metrics_pd = self._dump_._get_opt_meta_data(self.memory_dict)

        return para_pd.values, np.expand_dims(metrics_pd["score"].values, axis=1)

    def get_best_model(self, X, y):
        meta_data_paths = []
        pattern = meta_data_name(X, y)

        for path, subdirs, files in os.walk(self.meta_path):
            for name in files:
                if fnmatch(name, pattern):
                    meta_data_paths.append(pathlib.PurePath(path, name))

        score_best = -np.inf

        for path in meta_data_paths:
            path = str(path)
            meta_data = pd.read_csv(path)
            scores = meta_data["_score_"].values

            # score_mean = scores.mean()
            # score_std = scores.std()
            score_max = scores.max()
            # score_min = scores.min()

            if score_max > score_best:
                score_best = score_max

                model_path = path.rsplit("dataset_id:", 1)[0]

                obj_func_path = model_path + "objective_function.pkl"
                search_space_path = model_path + "search_space.pkl"

                with open(obj_func_path, "rb") as fp:
                    obj_func = dill.load(fp)

                with open(search_space_path, "rb") as fp:
                    search_space = dill.load(fp)

                para_names = list(search_space.keys())

                best_para = meta_data[meta_data["_score_"] == score_max]
                best_para = best_para[para_names].iloc[0]

                best_para = best_para.to_dict()

            return (score_best, {obj_func: search_space}, {obj_func: best_para})


    def reset_memory(self, force_true=False):
        if force_true:
            _reset_memory(self.meta_path)
        elif _query_yes_no():
            _reset_memory(self.meta_path)

    def delete_model(self, model):
        model_hash = model_id(model)
        path = self.meta_path + "model_id:" + str(model_hash)

        if os.path.exists(path) and os.path.isdir(path):
            shutil.rmtree(path)
            print("Model data successfully removed")
        else:
            print("Model data not found in memory")

    def delete_model_dataset(self, model, X, y):
        csv_file = self._get_file_path(model, X, y)

        if os.path.exists(csv_file):
            os.remove(csv_file)
            print("Model data successfully removed")
        else:
            print("Model data not found in memory")

    def connect_model_IDs(self, model1, model2):
        # do checks if search space has same dim

        with open(self.meta_path + "model_connections.json") as f:
            data = json.load(f)

        model1_hash = model_id(model1)
        model2_hash = model_id(model2)

        if model1_hash in data:
            key_model = model1_hash
            value_model = model2_hash
            data = _connect_key2value(data, key_model, value_model)
        else:
            data[model1_hash] = [model2_hash]
            print("IDs successfully connected")

        if model2_hash in data:
            key_model = model2_hash
            value_model = model1_hash
            data = _connect_key2value(data, key_model, value_model)
        else:
            data[model2_hash] = [model1_hash]
            print("IDs successfully connected")

        with open(self.meta_path + "model_connections.json", "w") as f:
            json.dump(data, f, indent=4)

    def split_model_IDs(self, model1, model2):
        # TODO: do checks if search space has same dim

        with open(self.meta_path + "model_connections.json") as f:
            data = json.load(f)

        model1_hash = model_id(model1)
        model2_hash = model_id(model2)

        if model1_hash in data:
            key_model = model1_hash
            value_model = model2_hash
            data = _split_key_value(data, key_model, value_model)
        else:
            print("IDs of models are not connected")

        if model2_hash in data:
            key_model = model2_hash
            value_model = model1_hash
            data = _split_key_value(data, key_model, value_model)
        else:
            print("IDs of models are not connected")

        with open(self.meta_path + "model_connections.json", "w") as f:
            json.dump(data, f, indent=4)

    def _get_file_path(self, model, X, y):
        func_path_ = "model_id:" + model_id(model) + "/"
        func_path = self.meta_path + func_path_

        feature_hash = object_hash(X)
        label_hash = object_hash(y)

        return func_path + (feature_hash + "_" + label_hash + "_.csv")


1			# Author: Simon Blanke
2			# Email: [email protected]
3			# License: MIT License
4
5			import os
6			import sys
7			import json
8			import dill
9			import shutil
10			import pathlib
11			from fnmatch import fnmatch
12
13			import numpy as np
14			import pandas as pd
15
16			from .memory_load import MemoryLoad
17			from .memory_dump import MemoryDump
18
19			from .utils import (
20			_connect_key2value,
21			_split_key_value,
22			_reset_memory,
23			_query_yes_no,
24			object_hash,
25			model_id,
26			meta_data_name,
27			)
28
29			from .paths import _paths_
30
31
32			class Hypermemory(MemoryDump):
33			def __init__(self, args, *kwargs):
34			self.memory_dict = None
35			self.meta_data_found = False
36			self.n_dims = None
37
38			self.meta_path = _paths_["default"]
39
40			def load(self):
41			self._load_ = MemoryLoad(X, y, model, search_space)
			0 ignored issues – show Comprehensibility Best Practice introduced 2020-07-15 08:28 UTC by Report Bug Copy Issue Report The variable `y` does not seem to be defined. Loading history... Comprehensibility Best Practice introduced 2020-07-15 08:28 UTC by Report Bug Copy Issue Report The variable `model` does not seem to be defined. Loading history... Comprehensibility Best Practice introduced 2020-07-15 08:28 UTC by Report Bug Copy Issue Report The variable `X` does not seem to be defined. Loading history... Comprehensibility Best Practice introduced 2020-07-15 08:28 UTC by Report Bug Copy Issue Report The variable `search_space` does not seem to be defined. Loading history...
42
43			self.memory_dict = self._load_.hyperactive_memory_load()
44			self.meta_data_found = self._load_.meta_data_found
45
46			self.score_best = self._load_.score_best
47			self.pos_best = self._load_.pos_best
48
49			return self.memory_dict
50
51			def dump(self, memory):
52			self._dump_ = MemoryDump(X, y, model, search_space)
			0 ignored issues – show Comprehensibility Best Practice introduced 2020-07-15 08:28 UTC by Report Bug Copy Issue Report The variable `y` does not seem to be defined. Loading history... Comprehensibility Best Practice introduced 2020-07-15 08:28 UTC by Report Bug Copy Issue Report The variable `search_space` does not seem to be defined. Loading history... Comprehensibility Best Practice introduced 2020-07-15 08:28 UTC by Report Bug Copy Issue Report The variable `model` does not seem to be defined. Loading history... Comprehensibility Best Practice introduced 2020-07-15 08:28 UTC by Report Bug Copy Issue Report The variable `X` does not seem to be defined. Loading history...
53			self._dump_.hyperactive_memory_dump(memory)
54
55			def _get_para(self):
56			if self.memory_dict is None:
57			print("Error")
58			return
59			para_pd, metrics_pd = self._dump_._get_opt_meta_data(self.memory_dict)
60
61			return para_pd.values, np.expand_dims(metrics_pd["score"].values, axis=1)
62
63			def get_best_model(self, X, y):
64			meta_data_paths = []
65			pattern = meta_data_name(X, y)
66
67			for path, subdirs, files in os.walk(self.meta_path):
68			for name in files:
69			if fnmatch(name, pattern):
70			meta_data_paths.append(pathlib.PurePath(path, name))
71
72			score_best = -np.inf
73
74			for path in meta_data_paths:
75			path = str(path)
76			meta_data = pd.read_csv(path)
77			scores = meta_data["_score_"].values
78
79			# score_mean = scores.mean()
80			# score_std = scores.std()
81			score_max = scores.max()
82			# score_min = scores.min()
83
84			if score_max > score_best:
85			score_best = score_max
86
87			model_path = path.rsplit("dataset_id:", 1)[0]
88
89			obj_func_path = model_path + "objective_function.pkl"
90			search_space_path = model_path + "search_space.pkl"
91
92			with open(obj_func_path, "rb") as fp:
93			obj_func = dill.load(fp)
94
95			with open(search_space_path, "rb") as fp:
96			search_space = dill.load(fp)
97
98			para_names = list(search_space.keys())
99
100			best_para = meta_data[meta_data["_score_"] == score_max]
101			best_para = best_para[para_names].iloc[0]
102
103			best_para = best_para.to_dict()
104
105			return (score_best, {obj_func: search_space}, {obj_func: best_para})
			0 ignored issues – show introduced 2020-07-15 08:28 UTC by Report Bug Copy Issue Report The variable `search_space` does not seem to be defined in case `score_max > score_best` on line `84` is `False`. Are you sure this can never be the case? Loading history... introduced 2020-07-15 08:28 UTC by Report Bug Copy Issue Report The variable `best_para` does not seem to be defined in case `score_max > score_best` on line `84` is `False`. Are you sure this can never be the case? Loading history... introduced 2020-07-15 08:28 UTC by Report Bug Copy Issue Report The variable `obj_func` does not seem to be defined in case `score_max > score_best` on line `84` is `False`. Are you sure this can never be the case? Loading history...
106
107			def reset_memory(self, force_true=False):
108			if force_true:
109			_reset_memory(self.meta_path)
110			elif _query_yes_no():
111			_reset_memory(self.meta_path)
112
113			def delete_model(self, model):
114			model_hash = model_id(model)
115			path = self.meta_path + "model_id:" + str(model_hash)
116
117			if os.path.exists(path) and os.path.isdir(path):
118			shutil.rmtree(path)
119			print("Model data successfully removed")
120			else:
121			print("Model data not found in memory")
122
123			def delete_model_dataset(self, model, X, y):
124			csv_file = self._get_file_path(model, X, y)
125
126			if os.path.exists(csv_file):
127			os.remove(csv_file)
128			print("Model data successfully removed")
129			else:
130			print("Model data not found in memory")
131
132			def connect_model_IDs(self, model1, model2):
133			# do checks if search space has same dim
134
135			with open(self.meta_path + "model_connections.json") as f:
136			data = json.load(f)
137
138			model1_hash = model_id(model1)
139			model2_hash = model_id(model2)
140
141			if model1_hash in data:
142			key_model = model1_hash
143			value_model = model2_hash
144			data = _connect_key2value(data, key_model, value_model)
145			else:
146			data[model1_hash] = [model2_hash]
147			print("IDs successfully connected")
148
149			if model2_hash in data:
150			key_model = model2_hash
151			value_model = model1_hash
152			data = _connect_key2value(data, key_model, value_model)
153			else:
154			data[model2_hash] = [model1_hash]
155			print("IDs successfully connected")
156
157			with open(self.meta_path + "model_connections.json", "w") as f:
158			json.dump(data, f, indent=4)
159
160			def split_model_IDs(self, model1, model2):
161			# TODO: do checks if search space has same dim
162
163			with open(self.meta_path + "model_connections.json") as f:
164			data = json.load(f)
165
166			model1_hash = model_id(model1)
167			model2_hash = model_id(model2)
168
169			if model1_hash in data:
170			key_model = model1_hash
171			value_model = model2_hash
172			data = _split_key_value(data, key_model, value_model)
173			else:
174			print("IDs of models are not connected")
175
176			if model2_hash in data:
177			key_model = model2_hash
178			value_model = model1_hash
179			data = _split_key_value(data, key_model, value_model)
180			else:
181			print("IDs of models are not connected")
182
183			with open(self.meta_path + "model_connections.json", "w") as f:
184			json.dump(data, f, indent=4)
185
186			def _get_file_path(self, model, X, y):
187			func_path_ = "model_id:" + model_id(model) + "/"
188			func_path = self.meta_path + func_path_
189
190			feature_hash = object_hash(X)
191			label_hash = object_hash(y)
192
193			return func_path + (feature_hash + "_" + label_hash + "_.csv")
194

SimonBlanke / Optimization-Metadata

Push — master ( d5b48a...5a31d8 )

Hypermemory.get_best_model() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like