|
1
|
|
|
# Author: Simon Blanke |
|
2
|
|
|
# Email: [email protected] |
|
3
|
|
|
# License: MIT License |
|
4
|
|
|
|
|
5
|
|
|
import os |
|
6
|
|
|
import dill |
|
7
|
|
|
import datetime |
|
8
|
|
|
import numpy as np |
|
9
|
|
|
import pandas as pd |
|
10
|
|
|
import hashlib |
|
11
|
|
|
import inspect |
|
12
|
|
|
|
|
13
|
|
|
from .paths import HypermemoryPaths |
|
14
|
|
|
from .io_helpers import ( |
|
15
|
|
|
save_object, |
|
16
|
|
|
load_object, |
|
17
|
|
|
save_json, |
|
18
|
|
|
load_json, |
|
19
|
|
|
save_dataframe, |
|
20
|
|
|
load_dataframes, |
|
21
|
|
|
) |
|
22
|
|
|
|
|
23
|
|
|
from .dataset_features import dataset_features |
|
24
|
|
|
from .memory_conv import memory_dict2dataframe, convert_dataframe, dataframe2memory_dict |
|
25
|
|
|
|
|
26
|
|
|
|
|
27
|
|
|
def get_datetime(): |
|
28
|
|
|
return datetime.datetime.now().strftime("%d.%m.%Y - %H:%M:%S:%f") |
|
29
|
|
|
|
|
30
|
|
|
|
|
31
|
|
|
class HyperactiveWrapper: |
|
32
|
|
|
def __init__(self, main_path, X, y, model, search_space): |
|
33
|
|
|
self.paths = HypermemoryPaths(main_path) |
|
34
|
|
|
self.paths.add_directory(name="X", prefix="X_ID:", id_type="array", object_=X) |
|
35
|
|
|
self.paths.add_directory(name="y", prefix="y_ID:", id_type="array", object_=y) |
|
36
|
|
|
self.paths.add_directory( |
|
37
|
|
|
name="model", prefix="model_ID:", id_type="function", object_=model |
|
38
|
|
|
) |
|
39
|
|
|
self.paths.add_directory( |
|
40
|
|
|
name="search_space", |
|
41
|
|
|
prefix="search_space_ID:", |
|
42
|
|
|
id_type="dictionary", |
|
43
|
|
|
object_=search_space, |
|
44
|
|
|
) |
|
45
|
|
|
|
|
46
|
|
|
self.X = X |
|
47
|
|
|
self.y = y |
|
48
|
|
|
self.model = model |
|
49
|
|
|
self.search_space = search_space |
|
50
|
|
|
|
|
51
|
|
|
def _drop_duplicates(self, dataframe): |
|
52
|
|
|
columns_drop = list(self.search_space.keys()) |
|
53
|
|
|
return dataframe.drop_duplicates(subset=columns_drop, keep="last") |
|
54
|
|
|
|
|
55
|
|
|
def _load_dataframes(self): |
|
56
|
|
|
subdirs = self.paths.subdirs("model") |
|
57
|
|
|
|
|
58
|
|
|
dataframes_all = [] |
|
59
|
|
|
for subdir in subdirs: |
|
60
|
|
|
search_space = load_object(path=subdir, name="search_space") |
|
61
|
|
|
dataframes = load_dataframes(subdir) |
|
62
|
|
|
|
|
63
|
|
|
for dataframe in dataframes: |
|
64
|
|
|
dataframe = convert_dataframe( |
|
65
|
|
|
dataframe, search_space, self.search_space |
|
66
|
|
|
) |
|
67
|
|
|
dataframes_all.append(dataframe) |
|
68
|
|
|
|
|
69
|
|
|
return dataframes_all |
|
70
|
|
|
|
|
71
|
|
|
def load(self): |
|
72
|
|
|
dataframes_all = self._load_dataframes() |
|
73
|
|
|
if len(dataframes_all) == 0: |
|
74
|
|
|
return {} |
|
75
|
|
|
|
|
76
|
|
|
dataframe = pd.concat(dataframes_all, axis=0) |
|
77
|
|
|
dataframe = self._drop_duplicates(dataframe) |
|
78
|
|
|
print( |
|
79
|
|
|
"Loading search data for", |
|
80
|
|
|
self.model.__name__, |
|
81
|
|
|
"was successful:", |
|
82
|
|
|
len(dataframe), |
|
83
|
|
|
"samples found", |
|
84
|
|
|
) |
|
85
|
|
|
memory_dict = dataframe2memory_dict(dataframe, self.search_space) |
|
86
|
|
|
|
|
87
|
|
|
return memory_dict |
|
88
|
|
|
|
|
89
|
|
|
def save(self, memory_dict): |
|
90
|
|
|
X_info = dataset_features(self.X) |
|
91
|
|
|
y_info = dataset_features(self.y) |
|
92
|
|
|
dataframe = memory_dict2dataframe(memory_dict, self.search_space) |
|
93
|
|
|
|
|
94
|
|
|
io_X_path = self.paths.path_dict["X"] |
|
95
|
|
|
io_y_path = self.paths.path_dict["y"] |
|
96
|
|
|
io_model_path = self.paths.path_dict["model"] |
|
97
|
|
|
io_search_space_path = self.paths.path_dict["search_space"] |
|
98
|
|
|
|
|
99
|
|
|
save_json(io_X_path, "X_meta_data", X_info) |
|
100
|
|
|
save_json(io_y_path, "y_meta_data", y_info) |
|
101
|
|
|
save_object(io_model_path, "model", self.model) |
|
102
|
|
|
save_object(io_search_space_path, "search_space", self.search_space) |
|
103
|
|
|
save_dataframe( |
|
104
|
|
|
io_search_space_path, "search_data_" + str(get_datetime()), dataframe |
|
105
|
|
|
) |
|
106
|
|
|
|