optimization_metadata.memory_load   A
last analyzed

Complexity

Total Complexity 23

Size/Duplication

Total Lines 157
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 23
eloc 97
dl 0
loc 157
rs 10
c 0
b 0
f 0

9 Methods

Rating   Name   Duplication   Size   Complexity  
A MemoryLoad.hyperactive_memory_load() 0 14 3
A MemoryLoad._load_data_into_memory() 0 24 1
A MemoryLoad._get_func_data_names() 0 8 2
A MemoryLoad.__init__() 0 24 5
A MemoryLoad.para2pos() 0 20 3
A MemoryLoad.idx_closest_values() 0 3 1
A MemoryLoad.apply_index() 0 5 2
A MemoryLoad._read_func_metadata() 0 21 3
A MemoryLoad._get_id_list() 0 10 3
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
import os
6
import json
7
import glob
8
9
import numpy as np
10
import pandas as pd
11
12
from functools import partial
13
14
from .memory_io import MemoryIO
15
from .utils import model_id, model_path
16
17
18
class MemoryLoad(MemoryIO):
19
    def __init__(self, X, y, model, search_space):
20
        super().__init__(X, y, model, search_space)
21
22
        self.pos_best = None
23
        self.score_best = -np.inf
24
25
        self.meta_data_found = False
26
27
        self.con_ids = []
28
29
        if not os.path.exists(self.meta_path + "model_connections.json"):
30
            with open(self.meta_path + "model_connections.json", "w") as f:
31
                json.dump({}, f, indent=4)
32
33
        with open(self.meta_path + "model_connections.json") as f:
34
            self.model_con = json.load(f)
35
36
        model_id_ = model_id(self.model)
37
        if model_id_ in self.model_con:
38
            self._get_id_list(self.model_con[model_id_])
39
        else:
40
            self.con_ids = [model_id_]
41
42
        self.con_ids = set(self.con_ids)
43
44
    def _get_id_list(self, id_list):
45
        self.con_ids = self.con_ids + id_list
46
47
        for id in id_list:
48
            id_list_new = self.model_con[id]
49
50
            if set(id_list_new).issubset(self.con_ids):
51
                continue
52
53
            self._get_id_list(id_list_new)
54
55
    def hyperactive_memory_load(self):
56
        para, score = self._read_func_metadata(self.model)
57
        if para is None or score is None:
58
            print("No meta data found")
59
            return {}
60
61
        # print(len(para), "samples found")
62
63
        # _verb_.load_samples(para)
64
65
        memory_dict = self._load_data_into_memory(para, score)
66
        self.n_dims = len(para.columns)
67
68
        return memory_dict
69
70
    def _read_func_metadata(self, model_func):
71
        paths = self._get_func_data_names()
72
73
        meta_data_list = []
74
        for path in paths:
75
            meta_data = pd.read_csv(path)
76
            meta_data_list.append(meta_data)
77
            self.meta_data_found = True
78
79
        if len(meta_data_list) > 0:
80
            meta_data = pd.concat(meta_data_list, ignore_index=True)
81
82
            para = meta_data[self.para_names]
83
            score = meta_data[self.score_col_name]
84
85
            # _verb_.load_meta_data()
86
            return para, score
87
88
        else:
89
            # _verb_.no_meta_data(model_func)
90
            return None, None
91
92
    def _get_func_data_names(self):
93
        paths = []
94
        for id in self.con_ids:
95
            paths = paths + glob.glob(
96
                self.meta_path + model_path(id) + self.meta_data_name
97
            )
98
99
        return paths
100
101
    def idx_closest_values(self, X, Y):
102
        dist = np.absolute(X - Y[:, np.newaxis])
103
        return dist.argmin(axis=1)
104
105
    def apply_index(self, pos_key, df):
106
        return (
107
            self.search_space[pos_key].index(df)
108
            if df in self.search_space[pos_key]
109
            else None
110
        )
111
112
    def para2pos(self, paras):
113
        paras = paras[self.para_names]
114
        pos = paras.copy()
115
116
        for pos_key in self.search_space:
117
            is_float = isinstance(self.search_space[pos_key][0], float)
118
            if is_float:
119
                pos[pos_key] = self.idx_closest_values(
120
                    self.search_space[pos_key], paras[pos_key]
121
                )
122
            else:
123
                apply_index = partial(self.apply_index, pos_key)
124
                pos[pos_key] = paras[pos_key].apply(apply_index)
125
126
        # print("\n pos \n", pos, type(pos))
127
128
        pos.dropna(how="any", inplace=True)
129
        pos = pos.astype("int64")
130
131
        return pos
132
133
    def _load_data_into_memory(self, paras, scores):
134
        # print("\n paras \n", paras, type(paras))
135
136
        paras = paras.replace(self.hash2obj)
137
        pos = self.para2pos(paras)
138
139
        scores_np = np.array(scores)[:, 0]
140
        paras_np = np.array(paras)
141
142
        print("scores_np", scores_np)
143
144
        idx = np.argmax(scores_np)
145
        self.score_best = scores_np[idx]
146
        self.pos_best = paras_np[idx]
147
148
        print("self.score_best", self.score_best)
149
150
        scores = scores.to_dict("records")
151
        tuple_list = list(map(tuple, pos.values))
152
        memory_dict = dict(zip(tuple_list, scores))
153
154
        print("Meta data successfully loaded")
155
156
        return memory_dict
157