Passed
Push — master ( d5b48a...5a31d8 )
by Simon
01:07
created

Hypermemory.get_best_model()   B

Complexity

Conditions 8

Size

Total Lines 43
Code Lines 27

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 27
dl 0
loc 43
rs 7.3333
c 0
b 0
f 0
cc 8
nop 3
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
import os
6
import sys
7
import json
8
import dill
9
import shutil
10
import pathlib
11
from fnmatch import fnmatch
12
13
import numpy as np
14
import pandas as pd
15
16
from .memory_load import MemoryLoad
17
from .memory_dump import MemoryDump
18
19
from .utils import (
20
    _connect_key2value,
21
    _split_key_value,
22
    _reset_memory,
23
    _query_yes_no,
24
    object_hash,
25
    model_id,
26
    meta_data_name,
27
)
28
29
from .paths import _paths_
30
31
32
class Hypermemory(MemoryDump):
33
    def __init__(self, *args, **kwargs):
34
        self.memory_dict = None
35
        self.meta_data_found = False
36
        self.n_dims = None
37
38
        self.meta_path = _paths_["default"]
39
40
    def load(self):
41
        self._load_ = MemoryLoad(X, y, model, search_space)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable y does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable model does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable X does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable search_space does not seem to be defined.
Loading history...
42
43
        self.memory_dict = self._load_.hyperactive_memory_load()
44
        self.meta_data_found = self._load_.meta_data_found
45
46
        self.score_best = self._load_.score_best
47
        self.pos_best = self._load_.pos_best
48
49
        return self.memory_dict
50
51
    def dump(self, memory):
52
        self._dump_ = MemoryDump(X, y, model, search_space)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable y does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable search_space does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable model does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable X does not seem to be defined.
Loading history...
53
        self._dump_.hyperactive_memory_dump(memory)
54
55
    def _get_para(self):
56
        if self.memory_dict is None:
57
            print("Error")
58
            return
59
        para_pd, metrics_pd = self._dump_._get_opt_meta_data(self.memory_dict)
60
61
        return para_pd.values, np.expand_dims(metrics_pd["score"].values, axis=1)
62
63
    def get_best_model(self, X, y):
64
        meta_data_paths = []
65
        pattern = meta_data_name(X, y)
66
67
        for path, subdirs, files in os.walk(self.meta_path):
68
            for name in files:
69
                if fnmatch(name, pattern):
70
                    meta_data_paths.append(pathlib.PurePath(path, name))
71
72
        score_best = -np.inf
73
74
        for path in meta_data_paths:
75
            path = str(path)
76
            meta_data = pd.read_csv(path)
77
            scores = meta_data["_score_"].values
78
79
            # score_mean = scores.mean()
80
            # score_std = scores.std()
81
            score_max = scores.max()
82
            # score_min = scores.min()
83
84
            if score_max > score_best:
85
                score_best = score_max
86
87
                model_path = path.rsplit("dataset_id:", 1)[0]
88
89
                obj_func_path = model_path + "objective_function.pkl"
90
                search_space_path = model_path + "search_space.pkl"
91
92
                with open(obj_func_path, "rb") as fp:
93
                    obj_func = dill.load(fp)
94
95
                with open(search_space_path, "rb") as fp:
96
                    search_space = dill.load(fp)
97
98
                para_names = list(search_space.keys())
99
100
                best_para = meta_data[meta_data["_score_"] == score_max]
101
                best_para = best_para[para_names].iloc[0]
102
103
                best_para = best_para.to_dict()
104
105
            return (score_best, {obj_func: search_space}, {obj_func: best_para})
0 ignored issues
show
introduced by
The variable search_space does not seem to be defined in case score_max > score_best on line 84 is False. Are you sure this can never be the case?
Loading history...
introduced by
The variable best_para does not seem to be defined in case score_max > score_best on line 84 is False. Are you sure this can never be the case?
Loading history...
introduced by
The variable obj_func does not seem to be defined in case score_max > score_best on line 84 is False. Are you sure this can never be the case?
Loading history...
106
107
    def reset_memory(self, force_true=False):
108
        if force_true:
109
            _reset_memory(self.meta_path)
110
        elif _query_yes_no():
111
            _reset_memory(self.meta_path)
112
113
    def delete_model(self, model):
114
        model_hash = model_id(model)
115
        path = self.meta_path + "model_id:" + str(model_hash)
116
117
        if os.path.exists(path) and os.path.isdir(path):
118
            shutil.rmtree(path)
119
            print("Model data successfully removed")
120
        else:
121
            print("Model data not found in memory")
122
123
    def delete_model_dataset(self, model, X, y):
124
        csv_file = self._get_file_path(model, X, y)
125
126
        if os.path.exists(csv_file):
127
            os.remove(csv_file)
128
            print("Model data successfully removed")
129
        else:
130
            print("Model data not found in memory")
131
132
    def connect_model_IDs(self, model1, model2):
133
        # do checks if search space has same dim
134
135
        with open(self.meta_path + "model_connections.json") as f:
136
            data = json.load(f)
137
138
        model1_hash = model_id(model1)
139
        model2_hash = model_id(model2)
140
141
        if model1_hash in data:
142
            key_model = model1_hash
143
            value_model = model2_hash
144
            data = _connect_key2value(data, key_model, value_model)
145
        else:
146
            data[model1_hash] = [model2_hash]
147
            print("IDs successfully connected")
148
149
        if model2_hash in data:
150
            key_model = model2_hash
151
            value_model = model1_hash
152
            data = _connect_key2value(data, key_model, value_model)
153
        else:
154
            data[model2_hash] = [model1_hash]
155
            print("IDs successfully connected")
156
157
        with open(self.meta_path + "model_connections.json", "w") as f:
158
            json.dump(data, f, indent=4)
159
160
    def split_model_IDs(self, model1, model2):
161
        # TODO: do checks if search space has same dim
162
163
        with open(self.meta_path + "model_connections.json") as f:
164
            data = json.load(f)
165
166
        model1_hash = model_id(model1)
167
        model2_hash = model_id(model2)
168
169
        if model1_hash in data:
170
            key_model = model1_hash
171
            value_model = model2_hash
172
            data = _split_key_value(data, key_model, value_model)
173
        else:
174
            print("IDs of models are not connected")
175
176
        if model2_hash in data:
177
            key_model = model2_hash
178
            value_model = model1_hash
179
            data = _split_key_value(data, key_model, value_model)
180
        else:
181
            print("IDs of models are not connected")
182
183
        with open(self.meta_path + "model_connections.json", "w") as f:
184
            json.dump(data, f, indent=4)
185
186
    def _get_file_path(self, model, X, y):
187
        func_path_ = "model_id:" + model_id(model) + "/"
188
        func_path = self.meta_path + func_path_
189
190
        feature_hash = object_hash(X)
191
        label_hash = object_hash(y)
192
193
        return func_path + (feature_hash + "_" + label_hash + "_.csv")
194