1
|
|
|
# Author: Simon Blanke |
2
|
|
|
# Email: [email protected] |
3
|
|
|
# License: MIT License |
4
|
|
|
|
5
|
|
|
import os |
6
|
|
|
import sys |
7
|
|
|
import json |
8
|
|
|
import dill |
9
|
|
|
import shutil |
10
|
|
|
import pathlib |
11
|
|
|
from fnmatch import fnmatch |
12
|
|
|
|
13
|
|
|
import numpy as np |
14
|
|
|
import pandas as pd |
15
|
|
|
|
16
|
|
|
from .util import get_hash, get_model_id, get_func_str |
17
|
|
|
from .paths import get_meta_path, get_meta_data_name |
18
|
|
|
|
19
|
|
|
|
20
|
|
|
meta_path = get_meta_path() |
21
|
|
|
|
22
|
|
|
""" |
23
|
|
|
def get_best_models(X, y): |
24
|
|
|
# TODO: model_dict key:model value:score |
25
|
|
|
|
26
|
|
|
return model_dict |
27
|
|
|
|
28
|
|
|
|
29
|
|
|
def get_model_search_config(model): |
30
|
|
|
# TODO |
31
|
|
|
return search_config |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
def get_model_init_config(model): |
35
|
|
|
# TODO |
36
|
|
|
return init_config |
37
|
|
|
""" |
38
|
|
|
|
39
|
|
|
|
40
|
|
|
def get_best_model(X, y): |
41
|
|
|
meta_data_paths = [] |
42
|
|
|
pattern = get_meta_data_name(X, y) |
43
|
|
|
|
44
|
|
|
for path, subdirs, files in os.walk(meta_path): |
45
|
|
|
for name in files: |
46
|
|
|
if fnmatch(name, pattern): |
47
|
|
|
meta_data_paths.append(pathlib.PurePath(path, name)) |
48
|
|
|
|
49
|
|
|
score_best = -np.inf |
50
|
|
|
|
51
|
|
|
for path in meta_data_paths: |
52
|
|
|
path = str(path) |
53
|
|
|
meta_data = pd.read_csv(path) |
54
|
|
|
scores = meta_data["_score_"].values |
55
|
|
|
|
56
|
|
|
# score_mean = scores.mean() |
57
|
|
|
# score_std = scores.std() |
58
|
|
|
score_max = scores.max() |
59
|
|
|
# score_min = scores.min() |
60
|
|
|
|
61
|
|
|
if score_max > score_best: |
62
|
|
|
score_best = score_max |
63
|
|
|
|
64
|
|
|
model_path = path.rsplit("dataset_id:", 1)[0] |
65
|
|
|
|
66
|
|
|
obj_func_path = model_path + "objective_function.pkl" |
67
|
|
|
search_space_path = model_path + "search_space.pkl" |
68
|
|
|
|
69
|
|
|
with open(obj_func_path, "rb") as fp: |
70
|
|
|
obj_func = dill.load(fp) |
71
|
|
|
|
72
|
|
|
with open(search_space_path, "rb") as fp: |
73
|
|
|
search_space = dill.load(fp) |
74
|
|
|
|
75
|
|
|
# exec(get_func_str(obj_func)) |
76
|
|
|
|
77
|
|
|
return ( |
78
|
|
|
score_best, |
79
|
|
|
{obj_func: search_space}, |
|
|
|
|
80
|
|
|
{obj_func: None}, |
81
|
|
|
) # TODO: init_config |
82
|
|
|
|
83
|
|
|
|
84
|
|
|
def reset_memory(force_true=False): |
85
|
|
|
if force_true: |
86
|
|
|
_reset_memory() |
87
|
|
|
elif query_yes_no(): |
88
|
|
|
_reset_memory() |
89
|
|
|
|
90
|
|
|
|
91
|
|
|
def _reset_memory(): |
92
|
|
|
dirs = next(os.walk(meta_path))[1] |
93
|
|
|
for dir in dirs: |
94
|
|
|
shutil.rmtree(meta_path + dir) |
95
|
|
|
|
96
|
|
|
with open(meta_path + "model_connections.json", "w") as f: |
97
|
|
|
json.dump({}, f, indent=4) |
98
|
|
|
|
99
|
|
|
print("Memory reset successful") |
100
|
|
|
|
101
|
|
|
|
102
|
|
|
def query_yes_no(): |
103
|
|
|
valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False} |
104
|
|
|
question = "Delete the entire long term memory?" |
105
|
|
|
|
106
|
|
|
while True: |
107
|
|
|
sys.stdout.write(question + " [y/n] ") |
108
|
|
|
choice = input().lower() |
109
|
|
|
if choice == "": |
110
|
|
|
return False |
111
|
|
|
elif choice in valid: |
112
|
|
|
return valid[choice] |
113
|
|
|
else: |
114
|
|
|
sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n") |
115
|
|
|
|
116
|
|
|
|
117
|
|
|
def delete_model(model): |
118
|
|
|
model_hash = get_model_id(model) |
119
|
|
|
path = meta_path + str(model_hash) |
120
|
|
|
|
121
|
|
|
if os.path.exists(path) and os.path.isdir(path): |
122
|
|
|
shutil.rmtree(meta_path + str(model_hash)) |
123
|
|
|
print("Model data successfully removed") |
124
|
|
|
else: |
125
|
|
|
print("Model data not found in memory") |
126
|
|
|
|
127
|
|
|
|
128
|
|
|
def delete_model_dataset(model, X, y): |
129
|
|
|
csv_file = _get_file_path(model, X, y) |
130
|
|
|
|
131
|
|
|
if os.path.exists(csv_file): |
132
|
|
|
os.remove(csv_file) |
133
|
|
|
print("Model data successfully removed") |
134
|
|
|
else: |
135
|
|
|
print("Model data not found in memory") |
136
|
|
|
|
137
|
|
|
|
138
|
|
|
def connect_model_IDs(model1, model2): |
139
|
|
|
# do checks if search space has same dim |
140
|
|
|
|
141
|
|
|
with open(meta_path + "model_connections.json") as f: |
142
|
|
|
data = json.load(f) |
143
|
|
|
|
144
|
|
|
model1_hash = get_model_id(model1) |
145
|
|
|
model2_hash = get_model_id(model2) |
146
|
|
|
|
147
|
|
|
if model1_hash in data: |
148
|
|
|
key_model = model1_hash |
149
|
|
|
value_model = model2_hash |
150
|
|
|
data = _connect_key2value(data, key_model, value_model) |
151
|
|
|
else: |
152
|
|
|
data[model1_hash] = [model2_hash] |
153
|
|
|
print("IDs successfully connected") |
154
|
|
|
|
155
|
|
|
if model2_hash in data: |
156
|
|
|
key_model = model2_hash |
157
|
|
|
value_model = model1_hash |
158
|
|
|
data = _connect_key2value(data, key_model, value_model) |
159
|
|
|
else: |
160
|
|
|
data[model2_hash] = [model1_hash] |
161
|
|
|
print("IDs successfully connected") |
162
|
|
|
|
163
|
|
|
with open(meta_path + "model_connections.json", "w") as f: |
164
|
|
|
json.dump(data, f, indent=4) |
165
|
|
|
|
166
|
|
|
|
167
|
|
|
def _connect_key2value(data, key_model, value_model): |
168
|
|
|
if value_model in data[key_model]: |
169
|
|
|
print("IDs of models are already connected") |
170
|
|
|
else: |
171
|
|
|
data[key_model].append(value_model) |
172
|
|
|
print("IDs successfully connected") |
173
|
|
|
|
174
|
|
|
return data |
175
|
|
|
|
176
|
|
|
|
177
|
|
|
def _split_key_value(data, key_model, value_model): |
178
|
|
|
if value_model in data[key_model]: |
179
|
|
|
data[key_model].remove(value_model) |
180
|
|
|
|
181
|
|
|
if len(data[key_model]) == 0: |
182
|
|
|
del data[key_model] |
183
|
|
|
print("ID connection successfully deleted") |
184
|
|
|
else: |
185
|
|
|
print("IDs of models are not connected") |
186
|
|
|
|
187
|
|
|
return data |
188
|
|
|
|
189
|
|
|
|
190
|
|
|
def split_model_IDs(model1, model2): |
191
|
|
|
# TODO: do checks if search space has same dim |
192
|
|
|
|
193
|
|
|
with open(meta_path + "model_connections.json") as f: |
194
|
|
|
data = json.load(f) |
195
|
|
|
|
196
|
|
|
model1_hash = get_model_id(model1) |
197
|
|
|
model2_hash = get_model_id(model2) |
198
|
|
|
|
199
|
|
|
if model1_hash in data: |
200
|
|
|
key_model = model1_hash |
201
|
|
|
value_model = model2_hash |
202
|
|
|
data = _split_key_value(data, key_model, value_model) |
203
|
|
|
else: |
204
|
|
|
print("IDs of models are not connected") |
205
|
|
|
|
206
|
|
|
if model2_hash in data: |
207
|
|
|
key_model = model2_hash |
208
|
|
|
value_model = model1_hash |
209
|
|
|
data = _split_key_value(data, key_model, value_model) |
210
|
|
|
else: |
211
|
|
|
print("IDs of models are not connected") |
212
|
|
|
|
213
|
|
|
with open(meta_path + "model_connections.json", "w") as f: |
214
|
|
|
json.dump(data, f, indent=4) |
215
|
|
|
|
216
|
|
|
|
217
|
|
|
def _get_file_path(model, X, y): |
218
|
|
|
func_path_ = get_model_id(model) + "/" |
219
|
|
|
func_path = meta_path + func_path_ |
220
|
|
|
|
221
|
|
|
feature_hash = get_hash(X) |
222
|
|
|
label_hash = get_hash(y) |
223
|
|
|
|
224
|
|
|
return func_path + (feature_hash + "_" + label_hash + "_.csv") |
225
|
|
|
|