|
1
|
|
|
# Author: Simon Blanke |
|
2
|
|
|
# Email: [email protected] |
|
3
|
|
|
# License: MIT License |
|
4
|
|
|
|
|
5
|
|
|
import os |
|
6
|
|
|
import sys |
|
7
|
|
|
import json |
|
8
|
|
|
import dill |
|
9
|
|
|
import shutil |
|
10
|
|
|
import pathlib |
|
11
|
|
|
from fnmatch import fnmatch |
|
12
|
|
|
|
|
13
|
|
|
import numpy as np |
|
14
|
|
|
import pandas as pd |
|
15
|
|
|
|
|
16
|
|
|
from .memory_load import MemoryLoad |
|
17
|
|
|
from .memory_dump import MemoryDump |
|
18
|
|
|
|
|
19
|
|
|
from .utils import ( |
|
20
|
|
|
_connect_key2value, |
|
21
|
|
|
_split_key_value, |
|
22
|
|
|
_reset_memory, |
|
23
|
|
|
_query_yes_no, |
|
24
|
|
|
object_hash, |
|
25
|
|
|
model_id, |
|
26
|
|
|
meta_data_name, |
|
27
|
|
|
) |
|
28
|
|
|
|
|
29
|
|
|
from .paths import _paths_ |
|
30
|
|
|
|
|
31
|
|
|
|
|
32
|
|
|
class Hypermemory(MemoryDump): |
|
33
|
|
|
def __init__(self, *args, **kwargs): |
|
34
|
|
|
self.memory_dict = None |
|
35
|
|
|
self.meta_data_found = False |
|
36
|
|
|
self.n_dims = None |
|
37
|
|
|
|
|
38
|
|
|
self.meta_path = _paths_["default"] |
|
39
|
|
|
|
|
40
|
|
|
def load(self): |
|
41
|
|
|
self._load_ = MemoryLoad(X, y, model, search_space) |
|
|
|
|
|
|
42
|
|
|
|
|
43
|
|
|
self.memory_dict = self._load_.hyperactive_memory_load() |
|
44
|
|
|
self.meta_data_found = self._load_.meta_data_found |
|
45
|
|
|
|
|
46
|
|
|
self.score_best = self._load_.score_best |
|
47
|
|
|
self.pos_best = self._load_.pos_best |
|
48
|
|
|
|
|
49
|
|
|
return self.memory_dict |
|
50
|
|
|
|
|
51
|
|
|
def dump(self, memory): |
|
52
|
|
|
self._dump_ = MemoryDump(X, y, model, search_space) |
|
|
|
|
|
|
53
|
|
|
self._dump_.hyperactive_memory_dump(memory) |
|
54
|
|
|
|
|
55
|
|
|
def _get_para(self): |
|
56
|
|
|
if self.memory_dict is None: |
|
57
|
|
|
print("Error") |
|
58
|
|
|
return |
|
59
|
|
|
para_pd, metrics_pd = self._dump_._get_opt_meta_data(self.memory_dict) |
|
60
|
|
|
|
|
61
|
|
|
return para_pd.values, np.expand_dims(metrics_pd["score"].values, axis=1) |
|
62
|
|
|
|
|
63
|
|
|
def get_best_model(self, X, y): |
|
64
|
|
|
meta_data_paths = [] |
|
65
|
|
|
pattern = meta_data_name(X, y) |
|
66
|
|
|
|
|
67
|
|
|
for path, subdirs, files in os.walk(self.meta_path): |
|
68
|
|
|
for name in files: |
|
69
|
|
|
if fnmatch(name, pattern): |
|
70
|
|
|
meta_data_paths.append(pathlib.PurePath(path, name)) |
|
71
|
|
|
|
|
72
|
|
|
score_best = -np.inf |
|
73
|
|
|
|
|
74
|
|
|
for path in meta_data_paths: |
|
75
|
|
|
path = str(path) |
|
76
|
|
|
meta_data = pd.read_csv(path) |
|
77
|
|
|
scores = meta_data["_score_"].values |
|
78
|
|
|
|
|
79
|
|
|
# score_mean = scores.mean() |
|
80
|
|
|
# score_std = scores.std() |
|
81
|
|
|
score_max = scores.max() |
|
82
|
|
|
# score_min = scores.min() |
|
83
|
|
|
|
|
84
|
|
|
if score_max > score_best: |
|
85
|
|
|
score_best = score_max |
|
86
|
|
|
|
|
87
|
|
|
model_path = path.rsplit("dataset_id:", 1)[0] |
|
88
|
|
|
|
|
89
|
|
|
obj_func_path = model_path + "objective_function.pkl" |
|
90
|
|
|
search_space_path = model_path + "search_space.pkl" |
|
91
|
|
|
|
|
92
|
|
|
with open(obj_func_path, "rb") as fp: |
|
93
|
|
|
obj_func = dill.load(fp) |
|
94
|
|
|
|
|
95
|
|
|
with open(search_space_path, "rb") as fp: |
|
96
|
|
|
search_space = dill.load(fp) |
|
97
|
|
|
|
|
98
|
|
|
para_names = list(search_space.keys()) |
|
99
|
|
|
|
|
100
|
|
|
best_para = meta_data[meta_data["_score_"] == score_max] |
|
101
|
|
|
best_para = best_para[para_names].iloc[0] |
|
102
|
|
|
|
|
103
|
|
|
best_para = best_para.to_dict() |
|
104
|
|
|
|
|
105
|
|
|
return (score_best, {obj_func: search_space}, {obj_func: best_para}) |
|
|
|
|
|
|
106
|
|
|
|
|
107
|
|
|
def reset_memory(self, force_true=False): |
|
108
|
|
|
if force_true: |
|
109
|
|
|
_reset_memory(self.meta_path) |
|
110
|
|
|
elif _query_yes_no(): |
|
111
|
|
|
_reset_memory(self.meta_path) |
|
112
|
|
|
|
|
113
|
|
|
def delete_model(self, model): |
|
114
|
|
|
model_hash = model_id(model) |
|
115
|
|
|
path = self.meta_path + "model_id:" + str(model_hash) |
|
116
|
|
|
|
|
117
|
|
|
if os.path.exists(path) and os.path.isdir(path): |
|
118
|
|
|
shutil.rmtree(path) |
|
119
|
|
|
print("Model data successfully removed") |
|
120
|
|
|
else: |
|
121
|
|
|
print("Model data not found in memory") |
|
122
|
|
|
|
|
123
|
|
|
def delete_model_dataset(self, model, X, y): |
|
124
|
|
|
csv_file = self._get_file_path(model, X, y) |
|
125
|
|
|
|
|
126
|
|
|
if os.path.exists(csv_file): |
|
127
|
|
|
os.remove(csv_file) |
|
128
|
|
|
print("Model data successfully removed") |
|
129
|
|
|
else: |
|
130
|
|
|
print("Model data not found in memory") |
|
131
|
|
|
|
|
132
|
|
|
def connect_model_IDs(self, model1, model2): |
|
133
|
|
|
# do checks if search space has same dim |
|
134
|
|
|
|
|
135
|
|
|
with open(self.meta_path + "model_connections.json") as f: |
|
136
|
|
|
data = json.load(f) |
|
137
|
|
|
|
|
138
|
|
|
model1_hash = model_id(model1) |
|
139
|
|
|
model2_hash = model_id(model2) |
|
140
|
|
|
|
|
141
|
|
|
if model1_hash in data: |
|
142
|
|
|
key_model = model1_hash |
|
143
|
|
|
value_model = model2_hash |
|
144
|
|
|
data = _connect_key2value(data, key_model, value_model) |
|
145
|
|
|
else: |
|
146
|
|
|
data[model1_hash] = [model2_hash] |
|
147
|
|
|
print("IDs successfully connected") |
|
148
|
|
|
|
|
149
|
|
|
if model2_hash in data: |
|
150
|
|
|
key_model = model2_hash |
|
151
|
|
|
value_model = model1_hash |
|
152
|
|
|
data = _connect_key2value(data, key_model, value_model) |
|
153
|
|
|
else: |
|
154
|
|
|
data[model2_hash] = [model1_hash] |
|
155
|
|
|
print("IDs successfully connected") |
|
156
|
|
|
|
|
157
|
|
|
with open(self.meta_path + "model_connections.json", "w") as f: |
|
158
|
|
|
json.dump(data, f, indent=4) |
|
159
|
|
|
|
|
160
|
|
|
def split_model_IDs(self, model1, model2): |
|
161
|
|
|
# TODO: do checks if search space has same dim |
|
162
|
|
|
|
|
163
|
|
|
with open(self.meta_path + "model_connections.json") as f: |
|
164
|
|
|
data = json.load(f) |
|
165
|
|
|
|
|
166
|
|
|
model1_hash = model_id(model1) |
|
167
|
|
|
model2_hash = model_id(model2) |
|
168
|
|
|
|
|
169
|
|
|
if model1_hash in data: |
|
170
|
|
|
key_model = model1_hash |
|
171
|
|
|
value_model = model2_hash |
|
172
|
|
|
data = _split_key_value(data, key_model, value_model) |
|
173
|
|
|
else: |
|
174
|
|
|
print("IDs of models are not connected") |
|
175
|
|
|
|
|
176
|
|
|
if model2_hash in data: |
|
177
|
|
|
key_model = model2_hash |
|
178
|
|
|
value_model = model1_hash |
|
179
|
|
|
data = _split_key_value(data, key_model, value_model) |
|
180
|
|
|
else: |
|
181
|
|
|
print("IDs of models are not connected") |
|
182
|
|
|
|
|
183
|
|
|
with open(self.meta_path + "model_connections.json", "w") as f: |
|
184
|
|
|
json.dump(data, f, indent=4) |
|
185
|
|
|
|
|
186
|
|
|
def _get_file_path(self, model, X, y): |
|
187
|
|
|
func_path_ = "model_id:" + model_id(model) + "/" |
|
188
|
|
|
func_path = self.meta_path + func_path_ |
|
189
|
|
|
|
|
190
|
|
|
feature_hash = object_hash(X) |
|
191
|
|
|
label_hash = object_hash(y) |
|
192
|
|
|
|
|
193
|
|
|
return func_path + (feature_hash + "_" + label_hash + "_.csv") |
|
194
|
|
|
|