Passed
Push — master ( 17e358...227259 )
by Simon
01:31
created

LongTermMemory._get_subdirs()   A

Complexity

Conditions 2

Size

Total Lines 11
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 8
dl 0
loc 11
rs 10
c 0
b 0
f 0
cc 2
nop 2
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
import os
6
import glob
7
import datetime
8
import hashlib
9
import inspect
10
11
import numpy as np
12
import pandas as pd
13
14
15
class Memory:
16
    def __init__(self, _space_, _main_args_):
17
        self._space_ = _space_
18
        self._main_args_ = _main_args_
19
20
        self.pos_best = None
21
        self.score_best = -np.inf
22
23
        self.memory_type = _main_args_.memory
24
        self.memory_dict = {}
25
26
        self.meta_data_found = False
27
28
        self.datetime = datetime.datetime.now().strftime("%d.%m.%Y - %H:%M:%S") + "/"
29
30
31
class ShortTermMemory(Memory):
32
    def __init__(self, _space_, _main_args_):
33
        super().__init__(_space_, _main_args_)
34
35
36
class LongTermMemory(Memory):
37
    def __init__(self, _space_, _main_args_):
38
        super().__init__(_space_, _main_args_)
39
40
        self.score_col_name = "mean_test_score"
41
42
        current_path = os.path.realpath(__file__)
43
        meta_learn_path, _ = current_path.rsplit("/", 1)
44
        self.meta_data_path = meta_learn_path + "/meta_data/"
45
46
    def load_memory(self, model_func):
47
        para, score = self._read_func_metadata(model_func)
48
        if para is None or score is None:
49
            return
50
51
        self._load_data_into_memory(para, score)
52
53
    def save_memory(self, _main_args_, _cand_):
54
        meta_data = self._collect()
55
        path = self._get_file_path(_cand_.func_)
56
        self._save_toCSV(meta_data, path)
57
58
    def _save_toCSV(self, meta_data_new, path):
59
        if os.path.exists(path):
60
            meta_data_old = pd.read_csv(path)
61
            meta_data = meta_data_old.append(meta_data_new)
62
63
            columns = list(meta_data.columns)
64
            noScore = ["mean_test_score", "cv_default_score"]
65
            columns_noScore = [c for c in columns if c not in noScore]
66
67
            meta_data = meta_data.drop_duplicates(subset=columns_noScore)
68
        else:
69
            meta_data = meta_data_new
70
71
        meta_data.to_csv(path, index=False)
72
73
    def _read_func_metadata(self, model_func):
74
        paths = self._get_func_data_names(model_func)
75
76
        meta_data_list = []
77
        for path in paths:
78
            meta_data = pd.read_csv(path)
79
            meta_data_list.append(meta_data)
80
            self.meta_data_found = True
81
82
        if len(meta_data_list) > 0:
83
            meta_data = pd.concat(meta_data_list, ignore_index=True)
84
85
            column_names = meta_data.columns
86
            score_name = [name for name in column_names if self.score_col_name in name]
87
88
            para = meta_data.drop(score_name, axis=1)
89
            score = meta_data[score_name]
90
91
            print("Loading meta data successful")
92
            return para, score
93
94
        else:
95
            print("Warning: No meta data found for following function:", model_func)
96
            return None, None
97
98
    def _get_opt_meta_data(self):
99
        results_dict = {}
100
        para_list = []
101
        score_list = []
102
103
        for key in self.memory_dict.keys():
104
            pos = np.fromstring(key, dtype=int)
105
            para = self._space_.pos2para(pos)
106
            score = self.memory_dict[key]
107
108
            if score != 0:
109
                para_list.append(para)
110
                score_list.append(score)
111
112
        results_dict["params"] = para_list
113
        results_dict["mean_test_score"] = score_list
114
115
        return results_dict
116
117
    def _load_data_into_memory(self, paras, scores):
118
        for idx in range(paras.shape[0]):
119
            pos = self._space_.para2pos(paras.iloc[[idx]])
120
            pos_str = pos.tostring()
121
122
            score = float(scores.values[idx])
123
            self.memory_dict[pos_str] = score
124
125
            if score > self.score_best:
126
                self.score_best = score
127
                self.pos_best = pos
128
129
    def _get_para(self):
130
        results_dict = self._get_opt_meta_data()
131
132
        return pd.DataFrame(results_dict["params"])
133
134
    def _get_score(self):
135
        results_dict = self._get_opt_meta_data()
136
        return pd.DataFrame(
137
            results_dict["mean_test_score"], columns=["mean_test_score"]
138
        )
139
140
    def _collect(self):
141
        para_pd = self._get_para()
142
        metric_pd = self._get_score()
143
        md_model = pd.concat([para_pd, metric_pd], axis=1, ignore_index=False)
144
145
        return md_model
146
147
    def _get_hash(self, object):
148
        return hashlib.sha1(object).hexdigest()
149
150
    def _get_func_str(self, func):
151
        return inspect.getsource(func)
152
153
    def _get_subdirs(self, model_func):
154
        func_str = self._get_func_str(model_func)
155
        self.func_path = self._get_hash(func_str.encode("utf-8")) + "/"
156
157
        directory = self.meta_data_path + self.func_path
158
        if not os.path.exists(directory):
159
            os.makedirs(directory, exist_ok=True)
160
161
        subdirs = glob.glob(directory+'*/')
162
163
        return subdirs
164
165
    def _get_func_data_names(self, model_func):
166
        subdirs = self._get_subdirs(model_func)
167
168
        path_list = []
169
        for subdir in subdirs:
170
            paths = glob.glob(subdir + "*.csv")
171
            path_list = path_list + paths
172
173
        return path_list
174
175
    def _get_file_path(self, model_func):
176
        func_str = self._get_func_str(model_func)
177
        feature_hash = self._get_hash(self._main_args_.X)
178
        label_hash = self._get_hash(self._main_args_.y)
179
180
        self.func_path = self._get_hash(func_str.encode("utf-8")) + "/"
181
182
        directory = self.meta_data_path + self.func_path + self.datetime
183
        if not os.path.exists(directory):
184
            os.makedirs(directory)
185
186
        return directory + (
187
            feature_hash
188
            + "_"
189
            + label_hash
190
            + ".csv"
191
        )
192