|
1
|
|
|
# Author: Simon Blanke |
|
2
|
|
|
# Email: [email protected] |
|
3
|
|
|
# License: MIT License |
|
4
|
|
|
|
|
5
|
|
|
import os |
|
6
|
|
|
import json |
|
7
|
|
|
import glob |
|
8
|
|
|
|
|
9
|
|
|
import numpy as np |
|
10
|
|
|
import pandas as pd |
|
11
|
|
|
|
|
12
|
|
|
from functools import partial |
|
13
|
|
|
|
|
14
|
|
|
from .memory_io import MemoryIO |
|
15
|
|
|
from .utils import model_id, model_path |
|
16
|
|
|
|
|
17
|
|
|
|
|
18
|
|
|
class MemoryLoad(MemoryIO): |
|
19
|
|
|
def __init__(self, X, y, model, search_space): |
|
20
|
|
|
super().__init__(X, y, model, search_space) |
|
21
|
|
|
|
|
22
|
|
|
self.pos_best = None |
|
23
|
|
|
self.score_best = -np.inf |
|
24
|
|
|
|
|
25
|
|
|
self.meta_data_found = False |
|
26
|
|
|
|
|
27
|
|
|
self.con_ids = [] |
|
28
|
|
|
|
|
29
|
|
|
if not os.path.exists(self.meta_path + "model_connections.json"): |
|
30
|
|
|
with open(self.meta_path + "model_connections.json", "w") as f: |
|
31
|
|
|
json.dump({}, f, indent=4) |
|
32
|
|
|
|
|
33
|
|
|
with open(self.meta_path + "model_connections.json") as f: |
|
34
|
|
|
self.model_con = json.load(f) |
|
35
|
|
|
|
|
36
|
|
|
model_id_ = model_id(self.model) |
|
37
|
|
|
if model_id_ in self.model_con: |
|
38
|
|
|
self._get_id_list(self.model_con[model_id_]) |
|
39
|
|
|
else: |
|
40
|
|
|
self.con_ids = [model_id_] |
|
41
|
|
|
|
|
42
|
|
|
self.con_ids = set(self.con_ids) |
|
43
|
|
|
|
|
44
|
|
|
def _get_id_list(self, id_list): |
|
45
|
|
|
self.con_ids = self.con_ids + id_list |
|
46
|
|
|
|
|
47
|
|
|
for id in id_list: |
|
48
|
|
|
id_list_new = self.model_con[id] |
|
49
|
|
|
|
|
50
|
|
|
if set(id_list_new).issubset(self.con_ids): |
|
51
|
|
|
continue |
|
52
|
|
|
|
|
53
|
|
|
self._get_id_list(id_list_new) |
|
54
|
|
|
|
|
55
|
|
|
def hyperactive_memory_load(self): |
|
56
|
|
|
para, score = self._read_func_metadata(self.model) |
|
57
|
|
|
if para is None or score is None: |
|
58
|
|
|
print("No meta data found") |
|
59
|
|
|
return {} |
|
60
|
|
|
|
|
61
|
|
|
# print(len(para), "samples found") |
|
62
|
|
|
|
|
63
|
|
|
# _verb_.load_samples(para) |
|
64
|
|
|
|
|
65
|
|
|
memory_dict = self._load_data_into_memory(para, score) |
|
66
|
|
|
self.n_dims = len(para.columns) |
|
67
|
|
|
|
|
68
|
|
|
return memory_dict |
|
69
|
|
|
|
|
70
|
|
|
def _read_func_metadata(self, model_func): |
|
71
|
|
|
paths = self._get_func_data_names() |
|
72
|
|
|
|
|
73
|
|
|
meta_data_list = [] |
|
74
|
|
|
for path in paths: |
|
75
|
|
|
meta_data = pd.read_csv(path) |
|
76
|
|
|
meta_data_list.append(meta_data) |
|
77
|
|
|
self.meta_data_found = True |
|
78
|
|
|
|
|
79
|
|
|
if len(meta_data_list) > 0: |
|
80
|
|
|
meta_data = pd.concat(meta_data_list, ignore_index=True) |
|
81
|
|
|
|
|
82
|
|
|
para = meta_data[self.para_names] |
|
83
|
|
|
score = meta_data[self.score_col_name] |
|
84
|
|
|
|
|
85
|
|
|
# _verb_.load_meta_data() |
|
86
|
|
|
return para, score |
|
87
|
|
|
|
|
88
|
|
|
else: |
|
89
|
|
|
# _verb_.no_meta_data(model_func) |
|
90
|
|
|
return None, None |
|
91
|
|
|
|
|
92
|
|
|
def _get_func_data_names(self): |
|
93
|
|
|
paths = [] |
|
94
|
|
|
for id in self.con_ids: |
|
95
|
|
|
paths = paths + glob.glob( |
|
96
|
|
|
self.meta_path + model_path(id) + self.meta_data_name |
|
97
|
|
|
) |
|
98
|
|
|
|
|
99
|
|
|
return paths |
|
100
|
|
|
|
|
101
|
|
|
def idx_closest_values(self, X, Y): |
|
102
|
|
|
dist = np.absolute(X - Y[:, np.newaxis]) |
|
103
|
|
|
return dist.argmin(axis=1) |
|
104
|
|
|
|
|
105
|
|
|
def apply_index(self, pos_key, df): |
|
106
|
|
|
return ( |
|
107
|
|
|
self.search_space[pos_key].index(df) |
|
108
|
|
|
if df in self.search_space[pos_key] |
|
109
|
|
|
else None |
|
110
|
|
|
) |
|
111
|
|
|
|
|
112
|
|
|
def para2pos(self, paras): |
|
113
|
|
|
paras = paras[self.para_names] |
|
114
|
|
|
pos = paras.copy() |
|
115
|
|
|
|
|
116
|
|
|
for pos_key in self.search_space: |
|
117
|
|
|
is_float = isinstance(self.search_space[pos_key][0], float) |
|
118
|
|
|
if is_float: |
|
119
|
|
|
pos[pos_key] = self.idx_closest_values( |
|
120
|
|
|
self.search_space[pos_key], paras[pos_key] |
|
121
|
|
|
) |
|
122
|
|
|
else: |
|
123
|
|
|
apply_index = partial(self.apply_index, pos_key) |
|
124
|
|
|
pos[pos_key] = paras[pos_key].apply(apply_index) |
|
125
|
|
|
|
|
126
|
|
|
# print("\n pos \n", pos, type(pos)) |
|
127
|
|
|
|
|
128
|
|
|
pos.dropna(how="any", inplace=True) |
|
129
|
|
|
pos = pos.astype("int64") |
|
130
|
|
|
|
|
131
|
|
|
return pos |
|
132
|
|
|
|
|
133
|
|
|
def _load_data_into_memory(self, paras, scores): |
|
134
|
|
|
# print("\n paras \n", paras, type(paras)) |
|
135
|
|
|
|
|
136
|
|
|
paras = paras.replace(self.hash2obj) |
|
137
|
|
|
pos = self.para2pos(paras) |
|
138
|
|
|
|
|
139
|
|
|
scores_np = np.array(scores)[:, 0] |
|
140
|
|
|
paras_np = np.array(paras) |
|
141
|
|
|
|
|
142
|
|
|
print("scores_np", scores_np) |
|
143
|
|
|
|
|
144
|
|
|
idx = np.argmax(scores_np) |
|
145
|
|
|
self.score_best = scores_np[idx] |
|
146
|
|
|
self.pos_best = paras_np[idx] |
|
147
|
|
|
|
|
148
|
|
|
print("self.score_best", self.score_best) |
|
149
|
|
|
|
|
150
|
|
|
scores = scores.to_dict("records") |
|
151
|
|
|
tuple_list = list(map(tuple, pos.values)) |
|
152
|
|
|
memory_dict = dict(zip(tuple_list, scores)) |
|
153
|
|
|
|
|
154
|
|
|
print("Meta data successfully loaded") |
|
155
|
|
|
|
|
156
|
|
|
return memory_dict |
|
157
|
|
|
|