Completed
Push — master ( 77ef8d...d45822 )
by Simon
02:00
created

hyperactive.memory.memory_helper._reset_memory()   A

Complexity

Conditions 3

Size

Total Lines 9
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 7
nop 0
dl 0
loc 9
rs 10
c 0
b 0
f 0
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
import os
6
import sys
7
import json
8
import dill
9
import shutil
10
import pathlib
11
from fnmatch import fnmatch
12
13
import numpy as np
14
import pandas as pd
15
16
from .util import get_hash, get_model_id, get_func_str
17
from .paths import get_meta_path, get_meta_data_name
18
19
20
meta_path = get_meta_path()
21
22
"""
23
def get_best_models(X, y):
24
    # TODO: model_dict   key:model   value:score
25
26
    return model_dict
27
28
29
def get_model_search_config(model):
30
    # TODO
31
    return search_config
32
33
34
def get_model_init_config(model):
35
    # TODO
36
    return init_config
37
"""
38
39
40
def get_best_model(X, y):
41
    meta_data_paths = []
42
    pattern = get_meta_data_name(X, y)
43
44
    for path, subdirs, files in os.walk(meta_path):
45
        for name in files:
46
            if fnmatch(name, pattern):
47
                meta_data_paths.append(pathlib.PurePath(path, name))
48
49
    score_best = -np.inf
50
51
    for path in meta_data_paths:
52
        path = str(path)
53
        meta_data = pd.read_csv(path)
54
        scores = meta_data["_score_"].values
55
56
        # score_mean = scores.mean()
57
        # score_std = scores.std()
58
        score_max = scores.max()
59
        # score_min = scores.min()
60
61
        if score_max > score_best:
62
            score_best = score_max
63
64
            model_path = path.rsplit("dataset_id:", 1)[0]
65
66
            obj_func_path = model_path + "objective_function.pkl"
67
            search_space_path = model_path + "search_space.pkl"
68
69
            with open(obj_func_path, "rb") as fp:
70
                obj_func = dill.load(fp)
71
72
            with open(search_space_path, "rb") as fp:
73
                search_space = dill.load(fp)
74
75
        # exec(get_func_str(obj_func))
76
77
        return (
78
            score_best,
79
            {obj_func: search_space},
0 ignored issues
show
introduced by
The variable search_space does not seem to be defined in case score_max > score_best on line 61 is False. Are you sure this can never be the case?
Loading history...
introduced by
The variable obj_func does not seem to be defined in case score_max > score_best on line 61 is False. Are you sure this can never be the case?
Loading history...
80
            {obj_func: None},
81
        )  # TODO: init_config
82
83
84
def reset_memory(force_true=False):
85
    if force_true:
86
        _reset_memory()
87
    elif query_yes_no():
88
        _reset_memory()
89
90
91
def _reset_memory():
92
    dirs = next(os.walk(meta_path))[1]
93
    for dir in dirs:
94
        shutil.rmtree(meta_path + dir)
95
96
    with open(meta_path + "model_connections.json", "w") as f:
97
        json.dump({}, f, indent=4)
98
99
    print("Memory reset successful")
100
101
102
def query_yes_no():
103
    valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
104
    question = "Delete the entire long term memory?"
105
106
    while True:
107
        sys.stdout.write(question + " [y/n] ")
108
        choice = input().lower()
109
        if choice == "":
110
            return False
111
        elif choice in valid:
112
            return valid[choice]
113
        else:
114
            sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
115
116
117
def delete_model(model):
118
    model_hash = get_model_id(model)
119
    path = meta_path + str(model_hash)
120
121
    if os.path.exists(path) and os.path.isdir(path):
122
        shutil.rmtree(meta_path + str(model_hash))
123
        print("Model data successfully removed")
124
    else:
125
        print("Model data not found in memory")
126
127
128
def delete_model_dataset(model, X, y):
129
    csv_file = _get_file_path(model, X, y)
130
131
    if os.path.exists(csv_file):
132
        os.remove(csv_file)
133
        print("Model data successfully removed")
134
    else:
135
        print("Model data not found in memory")
136
137
138
def connect_model_IDs(model1, model2):
139
    # do checks if search space has same dim
140
141
    with open(meta_path + "model_connections.json") as f:
142
        data = json.load(f)
143
144
    model1_hash = get_model_id(model1)
145
    model2_hash = get_model_id(model2)
146
147
    if model1_hash in data:
148
        key_model = model1_hash
149
        value_model = model2_hash
150
        data = _connect_key2value(data, key_model, value_model)
151
    else:
152
        data[model1_hash] = [model2_hash]
153
        print("IDs successfully connected")
154
155
    if model2_hash in data:
156
        key_model = model2_hash
157
        value_model = model1_hash
158
        data = _connect_key2value(data, key_model, value_model)
159
    else:
160
        data[model2_hash] = [model1_hash]
161
        print("IDs successfully connected")
162
163
    with open(meta_path + "model_connections.json", "w") as f:
164
        json.dump(data, f, indent=4)
165
166
167
def _connect_key2value(data, key_model, value_model):
168
    if value_model in data[key_model]:
169
        print("IDs of models are already connected")
170
    else:
171
        data[key_model].append(value_model)
172
        print("IDs successfully connected")
173
174
    return data
175
176
177
def _split_key_value(data, key_model, value_model):
178
    if value_model in data[key_model]:
179
        data[key_model].remove(value_model)
180
181
        if len(data[key_model]) == 0:
182
            del data[key_model]
183
        print("ID connection successfully deleted")
184
    else:
185
        print("IDs of models are not connected")
186
187
    return data
188
189
190
def split_model_IDs(model1, model2):
191
    # TODO: do checks if search space has same dim
192
193
    with open(meta_path + "model_connections.json") as f:
194
        data = json.load(f)
195
196
    model1_hash = get_model_id(model1)
197
    model2_hash = get_model_id(model2)
198
199
    if model1_hash in data:
200
        key_model = model1_hash
201
        value_model = model2_hash
202
        data = _split_key_value(data, key_model, value_model)
203
    else:
204
        print("IDs of models are not connected")
205
206
    if model2_hash in data:
207
        key_model = model2_hash
208
        value_model = model1_hash
209
        data = _split_key_value(data, key_model, value_model)
210
    else:
211
        print("IDs of models are not connected")
212
213
    with open(meta_path + "model_connections.json", "w") as f:
214
        json.dump(data, f, indent=4)
215
216
217
def _get_file_path(model, X, y):
218
    func_path_ = get_model_id(model) + "/"
219
    func_path = meta_path + func_path_
220
221
    feature_hash = get_hash(X)
222
    label_hash = get_hash(y)
223
224
    return func_path + (feature_hash + "_" + label_hash + "_.csv")
225