|
1
|
|
|
# Author: Simon Blanke |
|
2
|
|
|
# Email: [email protected] |
|
3
|
|
|
# License: MIT License |
|
4
|
|
|
|
|
5
|
|
|
import os |
|
6
|
|
|
import sys |
|
7
|
|
|
import json |
|
8
|
|
|
import dill |
|
9
|
|
|
import shutil |
|
10
|
|
|
import pathlib |
|
11
|
|
|
from fnmatch import fnmatch |
|
12
|
|
|
|
|
13
|
|
|
import numpy as np |
|
14
|
|
|
import pandas as pd |
|
15
|
|
|
|
|
16
|
|
|
from .util import get_hash, get_model_id, get_func_str |
|
17
|
|
|
from .paths import get_meta_path, get_meta_data_name |
|
18
|
|
|
|
|
19
|
|
|
|
|
20
|
|
|
meta_path = get_meta_path() |
|
21
|
|
|
|
|
22
|
|
|
""" |
|
23
|
|
|
def get_best_models(X, y): |
|
24
|
|
|
# TODO: model_dict key:model value:score |
|
25
|
|
|
|
|
26
|
|
|
return model_dict |
|
27
|
|
|
|
|
28
|
|
|
|
|
29
|
|
|
def get_model_search_config(model): |
|
30
|
|
|
# TODO |
|
31
|
|
|
return search_config |
|
32
|
|
|
|
|
33
|
|
|
|
|
34
|
|
|
def get_model_init_config(model): |
|
35
|
|
|
# TODO |
|
36
|
|
|
return init_config |
|
37
|
|
|
""" |
|
38
|
|
|
|
|
39
|
|
|
|
|
40
|
|
|
def get_best_model(X, y): |
|
41
|
|
|
meta_data_paths = [] |
|
42
|
|
|
pattern = get_meta_data_name(X, y) |
|
43
|
|
|
|
|
44
|
|
|
for path, subdirs, files in os.walk(meta_path): |
|
45
|
|
|
for name in files: |
|
46
|
|
|
if fnmatch(name, pattern): |
|
47
|
|
|
meta_data_paths.append(pathlib.PurePath(path, name)) |
|
48
|
|
|
|
|
49
|
|
|
score_best = -np.inf |
|
50
|
|
|
|
|
51
|
|
|
for path in meta_data_paths: |
|
52
|
|
|
path = str(path) |
|
53
|
|
|
meta_data = pd.read_csv(path) |
|
54
|
|
|
scores = meta_data["_score_"].values |
|
55
|
|
|
|
|
56
|
|
|
# score_mean = scores.mean() |
|
57
|
|
|
# score_std = scores.std() |
|
58
|
|
|
score_max = scores.max() |
|
59
|
|
|
# score_min = scores.min() |
|
60
|
|
|
|
|
61
|
|
|
if score_max > score_best: |
|
62
|
|
|
score_best = score_max |
|
63
|
|
|
|
|
64
|
|
|
model_path = path.rsplit("dataset_id:", 1)[0] |
|
65
|
|
|
|
|
66
|
|
|
obj_func_path = model_path + "objective_function.pkl" |
|
67
|
|
|
search_space_path = model_path + "search_space.pkl" |
|
68
|
|
|
|
|
69
|
|
|
with open(obj_func_path, "rb") as fp: |
|
70
|
|
|
obj_func = dill.load(fp) |
|
71
|
|
|
|
|
72
|
|
|
with open(search_space_path, "rb") as fp: |
|
73
|
|
|
search_space = dill.load(fp) |
|
74
|
|
|
|
|
75
|
|
|
# exec(get_func_str(obj_func)) |
|
76
|
|
|
|
|
77
|
|
|
return ( |
|
78
|
|
|
score_best, |
|
79
|
|
|
{obj_func: search_space}, |
|
|
|
|
|
|
80
|
|
|
{obj_func: None}, |
|
81
|
|
|
) # TODO: init_config |
|
82
|
|
|
|
|
83
|
|
|
|
|
84
|
|
|
def reset_memory(force_true=False): |
|
85
|
|
|
if force_true: |
|
86
|
|
|
_reset_memory() |
|
87
|
|
|
elif query_yes_no(): |
|
88
|
|
|
_reset_memory() |
|
89
|
|
|
|
|
90
|
|
|
|
|
91
|
|
|
def _reset_memory(): |
|
92
|
|
|
dirs = next(os.walk(meta_path))[1] |
|
93
|
|
|
for dir in dirs: |
|
94
|
|
|
shutil.rmtree(meta_path + dir) |
|
95
|
|
|
|
|
96
|
|
|
with open(meta_path + "model_connections.json", "w") as f: |
|
97
|
|
|
json.dump({}, f, indent=4) |
|
98
|
|
|
|
|
99
|
|
|
print("Memory reset successful") |
|
100
|
|
|
|
|
101
|
|
|
|
|
102
|
|
|
def query_yes_no(): |
|
103
|
|
|
valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} |
|
104
|
|
|
question = "Delete the entire long term memory?" |
|
105
|
|
|
|
|
106
|
|
|
while True: |
|
107
|
|
|
sys.stdout.write(question + " [y/n] ") |
|
108
|
|
|
choice = input().lower() |
|
109
|
|
|
if choice == "": |
|
110
|
|
|
return False |
|
111
|
|
|
elif choice in valid: |
|
112
|
|
|
return valid[choice] |
|
113
|
|
|
else: |
|
114
|
|
|
sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n") |
|
115
|
|
|
|
|
116
|
|
|
|
|
117
|
|
|
def delete_model(model): |
|
118
|
|
|
model_hash = get_model_id(model) |
|
119
|
|
|
path = meta_path + str(model_hash) |
|
120
|
|
|
|
|
121
|
|
|
if os.path.exists(path) and os.path.isdir(path): |
|
122
|
|
|
shutil.rmtree(meta_path + str(model_hash)) |
|
123
|
|
|
print("Model data successfully removed") |
|
124
|
|
|
else: |
|
125
|
|
|
print("Model data not found in memory") |
|
126
|
|
|
|
|
127
|
|
|
|
|
128
|
|
|
def delete_model_dataset(model, X, y): |
|
129
|
|
|
csv_file = _get_file_path(model, X, y) |
|
130
|
|
|
|
|
131
|
|
|
if os.path.exists(csv_file): |
|
132
|
|
|
os.remove(csv_file) |
|
133
|
|
|
print("Model data successfully removed") |
|
134
|
|
|
else: |
|
135
|
|
|
print("Model data not found in memory") |
|
136
|
|
|
|
|
137
|
|
|
|
|
138
|
|
|
def connect_model_IDs(model1, model2): |
|
139
|
|
|
# do checks if search space has same dim |
|
140
|
|
|
|
|
141
|
|
|
with open(meta_path + "model_connections.json") as f: |
|
142
|
|
|
data = json.load(f) |
|
143
|
|
|
|
|
144
|
|
|
model1_hash = get_model_id(model1) |
|
145
|
|
|
model2_hash = get_model_id(model2) |
|
146
|
|
|
|
|
147
|
|
|
if model1_hash in data: |
|
148
|
|
|
key_model = model1_hash |
|
149
|
|
|
value_model = model2_hash |
|
150
|
|
|
data = _connect_key2value(data, key_model, value_model) |
|
151
|
|
|
else: |
|
152
|
|
|
data[model1_hash] = [model2_hash] |
|
153
|
|
|
print("IDs successfully connected") |
|
154
|
|
|
|
|
155
|
|
|
if model2_hash in data: |
|
156
|
|
|
key_model = model2_hash |
|
157
|
|
|
value_model = model1_hash |
|
158
|
|
|
data = _connect_key2value(data, key_model, value_model) |
|
159
|
|
|
else: |
|
160
|
|
|
data[model2_hash] = [model1_hash] |
|
161
|
|
|
print("IDs successfully connected") |
|
162
|
|
|
|
|
163
|
|
|
with open(meta_path + "model_connections.json", "w") as f: |
|
164
|
|
|
json.dump(data, f, indent=4) |
|
165
|
|
|
|
|
166
|
|
|
|
|
167
|
|
|
def _connect_key2value(data, key_model, value_model): |
|
168
|
|
|
if value_model in data[key_model]: |
|
169
|
|
|
print("IDs of models are already connected") |
|
170
|
|
|
else: |
|
171
|
|
|
data[key_model].append(value_model) |
|
172
|
|
|
print("IDs successfully connected") |
|
173
|
|
|
|
|
174
|
|
|
return data |
|
175
|
|
|
|
|
176
|
|
|
|
|
177
|
|
|
def _split_key_value(data, key_model, value_model): |
|
178
|
|
|
if value_model in data[key_model]: |
|
179
|
|
|
data[key_model].remove(value_model) |
|
180
|
|
|
|
|
181
|
|
|
if len(data[key_model]) == 0: |
|
182
|
|
|
del data[key_model] |
|
183
|
|
|
print("ID connection successfully deleted") |
|
184
|
|
|
else: |
|
185
|
|
|
print("IDs of models are not connected") |
|
186
|
|
|
|
|
187
|
|
|
return data |
|
188
|
|
|
|
|
189
|
|
|
|
|
190
|
|
|
def split_model_IDs(model1, model2): |
|
191
|
|
|
# TODO: do checks if search space has same dim |
|
192
|
|
|
|
|
193
|
|
|
with open(meta_path + "model_connections.json") as f: |
|
194
|
|
|
data = json.load(f) |
|
195
|
|
|
|
|
196
|
|
|
model1_hash = get_model_id(model1) |
|
197
|
|
|
model2_hash = get_model_id(model2) |
|
198
|
|
|
|
|
199
|
|
|
if model1_hash in data: |
|
200
|
|
|
key_model = model1_hash |
|
201
|
|
|
value_model = model2_hash |
|
202
|
|
|
data = _split_key_value(data, key_model, value_model) |
|
203
|
|
|
else: |
|
204
|
|
|
print("IDs of models are not connected") |
|
205
|
|
|
|
|
206
|
|
|
if model2_hash in data: |
|
207
|
|
|
key_model = model2_hash |
|
208
|
|
|
value_model = model1_hash |
|
209
|
|
|
data = _split_key_value(data, key_model, value_model) |
|
210
|
|
|
else: |
|
211
|
|
|
print("IDs of models are not connected") |
|
212
|
|
|
|
|
213
|
|
|
with open(meta_path + "model_connections.json", "w") as f: |
|
214
|
|
|
json.dump(data, f, indent=4) |
|
215
|
|
|
|
|
216
|
|
|
|
|
217
|
|
|
def _get_file_path(model, X, y): |
|
218
|
|
|
func_path_ = get_model_id(model) + "/" |
|
219
|
|
|
func_path = meta_path + func_path_ |
|
220
|
|
|
|
|
221
|
|
|
feature_hash = get_hash(X) |
|
222
|
|
|
label_hash = get_hash(y) |
|
223
|
|
|
|
|
224
|
|
|
return func_path + (feature_hash + "_" + label_hash + "_.csv") |
|
225
|
|
|
|