Completed
Push — master ( 9ccc17...a45661 )
by Simon
04:00 queued 22s
created

LongTermMemory._get_para()   A

Complexity

Conditions 1

Size

Total Lines 4
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
import os
6
import sys
7
import glob
8
import json
9
import dill
10
import pickle
11
import datetime
12
import hashlib
13
import inspect
14
15
import numpy as np
16
import pandas as pd
17
18
19
class Memory:
20
    def __init__(self, _space_, _main_args_, _cand_):
21
        self._space_ = _space_
22
        self._main_args_ = _main_args_
23
24
        self.pos_best = None
25
        self.score_best = -np.inf
26
27
        self.memory_type = _main_args_.memory
28
        self.memory_dict = {}
29
30
        self.meta_data_found = False
31
32
33
class ShortTermMemory(Memory):
34
    def __init__(self, _space_, _main_args_, _cand_):
35
        super().__init__(_space_, _main_args_, _cand_)
36
37
38
class LongTermMemory(Memory):
39
    def __init__(self, _space_, _main_args_, _cand_):
40
        super().__init__(_space_, _main_args_, _cand_)
41
42
        self.score_col_name = "mean_test_score"
43
44
        current_path = os.path.realpath(__file__)
45
        meta_learn_path, _ = current_path.rsplit("/", 1)
46
47
        self.datetime = datetime.datetime.now().strftime("%d.%m.%Y - %H:%M:%S")
48
        func_str = self._get_func_str(_cand_.func_)
49
        self.func_path_ = self._get_hash(func_str.encode("utf-8")) + "/"
50
51
        self.meta_path = meta_learn_path + "/meta_data/"
52
        self.func_path = self.meta_path + self.func_path_
53
        self.date_path = self.meta_path + self.func_path_ + self.datetime + "/"
54
55
        if not os.path.exists(self.date_path):
56
            os.makedirs(self.date_path, exist_ok=True)
57
58
    def load_memory(self, model_func):
59
        para, score = self._read_func_metadata(model_func)
60
        if para is None or score is None:
61
            return
62
63
        self._load_data_into_memory(para, score)
64
65
    def save_memory(self, _main_args_, _opt_args_, _cand_):
66
        path = self._get_file_path(_cand_.func_)
67
        meta_data = self._collect(_cand_)
68
69
        meta_data["run"] = self.datetime
70
71
        self._save_toCSV(meta_data, path)
72
73
        obj_func_path = self.func_path + "objective_function.py"
74
        if not os.path.exists(obj_func_path):
75
            file = open(obj_func_path, "w")
76
            file.write(self._get_func_str(_cand_.func_))
77
            file.close()
78
79
        search_config_path = self.date_path + "search_config.py"
80
        search_config_temp = dict(self._main_args_.search_config)
81
82
        for key in search_config_temp.keys():
83
            if isinstance(key, str):
84
                continue
85
            search_config_temp[key.__name__] = search_config_temp[key]
86
            del search_config_temp[key]
87
88
        search_config_str = "search_config = " + str(search_config_temp)
89
90
        if not os.path.exists(search_config_path):
91
            file = open(search_config_path, "w")
92
            file.write(search_config_str)
93
            file.close()
94
95
        """
96
        os.chdir(self.date_path)
97
        os.system("black search_config.py")
98
        os.getcwd()
99
        """
100
101
        run_data = {
102
            "random_state": self._main_args_.random_state,
103
            "max_time": self._main_args_.random_state,
104
            "n_iter": self._main_args_.n_iter,
105
            "optimizer": self._main_args_.optimizer,
106
            "n_jobs": self._main_args_.n_jobs,
107
            "eval_time": np.array(_cand_.eval_time).sum(),
108
            "total_time": _cand_.total_time,
109
        }
110
111
        with open("run_data.json", "w") as f:
112
            json.dump(run_data, f, indent=4)
113
114
        """
115
        print("_opt_args_.kwargs_opt", _opt_args_.kwargs_opt)
116
117
        opt_para = pd.DataFrame.from_dict(_opt_args_.kwargs_opt, dtype=object)
118
        print("opt_para", opt_para)
119
        opt_para.to_csv(
120
            self.meta_data_path + self.func_path + self.datetime + "opt_para",
121
            index=False,
122
        )
123
        """
124
125
    def _save_toCSV(self, meta_data_new, path):
126
        if os.path.exists(path):
127
            meta_data_old = pd.read_csv(path)
128
            meta_data = meta_data_old.append(meta_data_new)
129
130
            columns = list(meta_data.columns)
131
            noScore = ["mean_test_score", "cv_default_score", "eval_time", "run"]
132
            columns_noScore = [c for c in columns if c not in noScore]
133
134
            meta_data = meta_data.drop_duplicates(subset=columns_noScore)
135
        else:
136
            meta_data = meta_data_new
137
138
        meta_data.to_csv(path, index=False)
139
140
    def _read_func_metadata(self, model_func):
141
        paths = self._get_func_data_names()
142
143
        meta_data_list = []
144
        for path in paths:
145
            meta_data = pd.read_csv(path)
146
            meta_data_list.append(meta_data)
147
            self.meta_data_found = True
148
149
        if len(meta_data_list) > 0:
150
            meta_data = pd.concat(meta_data_list, ignore_index=True)
151
152
            column_names = meta_data.columns
153
            score_name = [name for name in column_names if self.score_col_name in name]
154
155
            para = meta_data.drop(score_name, axis=1)
156
            score = meta_data[score_name]
157
158
            print("\rLoading meta data successful ")
159
            return para, score
160
161
        else:
162
            print("Warning: No meta data found for following function:", model_func)
163
            return None, None
164
165
    def _get_opt_meta_data(self):
166
        results_dict = {}
167
        para_list = []
168
        score_list = []
169
170
        for key in self.memory_dict.keys():
171
            pos = np.fromstring(key, dtype=int)
172
            para = self._space_.pos2para(pos)
173
            score = self.memory_dict[key]
174
175
            for key in para.keys():
176
                if (
177
                    not isinstance(para[key], int)
178
                    and not isinstance(para[key], float)
179
                    and not isinstance(para[key], str)
180
                ):
181
182
                    para_dill = dill.dumps(para[key])
183
                    para_hash = self._get_hash(para_dill)
184
185
                    with open(
186
                        self.func_path + str(para_hash) + ".pkl", "wb"
187
                    ) as pickle_file:
188
                        dill.dump(para_dill, pickle_file)
189
190
                    para[key] = para_hash
191
192
            if score != 0:
193
                para_list.append(para)
194
                score_list.append(score)
195
196
        results_dict["params"] = para_list
197
        results_dict["mean_test_score"] = score_list
198
199
        return results_dict
200
201
    def _load_data_into_memory(self, paras, scores):
202
203
        for idx in range(paras.shape[0]):
204
            para = paras.iloc[[idx]]
205
206
            pos = self._space_.para2pos(paras.iloc[[idx]], self._get_pkl_hash)
207
            pos_str = pos.tostring()
208
209
            score = float(scores.values[idx])
210
            self.memory_dict[pos_str] = score
211
212
            if score > self.score_best:
213
                self.score_best = score
214
                self.pos_best = pos
215
216
    def _collect(self, _cand_):
217
        results_dict = self._get_opt_meta_data()
218
219
        para_pd = pd.DataFrame(results_dict["params"])
220
        metric_pd = pd.DataFrame(
221
            results_dict["mean_test_score"], columns=["mean_test_score"]
222
        )
223
224
        eval_time = pd.DataFrame(_cand_.eval_time, columns=["eval_time"])
225
        md_model = pd.concat(
226
            [para_pd, metric_pd, eval_time], axis=1, ignore_index=False
227
        )
228
229
        return md_model
230
231
    def _get_hash(self, object):
232
        return hashlib.sha1(object).hexdigest()
233
234
    def _get_func_str(self, func):
235
        return inspect.getsource(func)
236
237
    def _get_subdirs(self):
238
        subdirs = glob.glob(self.func_path + "*/")
239
240
        return subdirs
241
242
    def _get_func_data_names1(self):
243
        subdirs = self._get_subdirs()
244
245
        path_list = []
246
        for subdir in subdirs:
247
            paths = glob.glob(subdir + "*.csv")
248
            path_list = path_list + paths
249
250
        return path_list
251
252
    def _get_func_data_names(self):
253
        paths = glob.glob(self.func_path + "*_.csv")
254
255
        return paths
256
257
    def _get_pkl_hash(self, hash):
258
        paths = glob.glob(self.func_path + hash + "*.pkl")
259
260
        return paths
261
262
    def _get_file_path(self, model_func):
263
        feature_hash = self._get_hash(self._main_args_.X)
264
        label_hash = self._get_hash(self._main_args_.y)
265
266
        if not os.path.exists(self.date_path):
267
            os.makedirs(self.date_path)
268
269
        return self.func_path + (feature_hash + "_" + label_hash + "_.csv")
270