Completed
Push — master ( 2b17b1...77ef8d )
by Simon
03:52
created

hyperactive.memory.memory_load   A

Complexity

Total Complexity 21

Size/Duplication

Total Lines 146
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 97
dl 0
loc 146
rs 10
c 0
b 0
f 0
wmc 21

1 Function

Rating   Name   Duplication   Size   Complexity  
A apply_tobytes() 0 2 1

8 Methods

Rating   Name   Duplication   Size   Complexity  
A MemoryLoad._get_id_list() 0 10 3
A MemoryLoad._load_memory() 0 14 3
A MemoryLoad.apply_index() 0 5 2
A MemoryLoad._get_func_data_names() 0 11 2
A MemoryLoad._read_func_metadata() 0 24 3
A MemoryLoad.__init__() 0 21 3
A MemoryLoad._load_data_into_memory() 0 19 2
A MemoryLoad.para2pos() 0 12 2
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
import json
6
import glob
7
8
import numpy as np
9
import pandas as pd
10
11
from functools import partial
12
13
from .memory_io import MemoryIO
14
from .util import get_model_id
15
16
17
def apply_tobytes(df):
18
    return df.values.tobytes()
19
20
21
class MemoryLoad(MemoryIO):
22
    def __init__(self, _space_, _main_args_, _cand_):
23
        super().__init__(_space_, _main_args_, _cand_)
24
25
        self.pos_best = None
26
        self.score_best = -np.inf
27
28
        self.memory_type = _main_args_.memory
29
        self.meta_data_found = False
30
31
        self.con_ids = []
32
33
        with open(self.meta_path + "model_connections.json") as f:
34
            self.model_con = json.load(f)
35
36
        model_id = get_model_id(_cand_.func_)
37
        if model_id in self.model_con:
38
            self._get_id_list(self.model_con[model_id])
39
        else:
40
            self.con_ids = [model_id]
41
42
        self.con_ids = set(self.con_ids)
43
44
    def _get_id_list(self, id_list):
45
        self.con_ids = self.con_ids + id_list
46
47
        for id in id_list:
48
            id_list_new = self.model_con[id]
49
50
            if set(id_list_new).issubset(self.con_ids):
51
                continue
52
53
            self._get_id_list(id_list_new)
54
55
    def _load_memory(self, _cand_, _verb_, memory_dict):
56
        self.memory_dict = memory_dict
57
58
        para, score = self._read_func_metadata(_cand_.func_, _verb_)
59
        if para is None or score is None:
60
            return {}
61
62
        _verb_.load_samples(para)
63
        _cand_.eval_time = list(para["eval_time"])
64
65
        self._load_data_into_memory(para, score)
66
        self.n_dims = len(para.columns)
67
68
        return self.memory_dict
69
70
    def apply_index(self, pos_key, df):
71
        return (
72
            self._space_.search_space[pos_key].index(df)
73
            if df in self._space_.search_space[pos_key]
74
            else None
75
        )
76
77
    def _read_func_metadata(self, model_func, _verb_):
78
        paths = self._get_func_data_names()
79
80
        meta_data_list = []
81
        for path in paths:
82
            meta_data = pd.read_csv(path)
83
            meta_data_list.append(meta_data)
84
            self.meta_data_found = True
85
86
        if len(meta_data_list) > 0:
87
            meta_data = pd.concat(meta_data_list, ignore_index=True)
88
89
            column_names = meta_data.columns
90
            score_name = [name for name in column_names if self.score_col_name in name]
91
92
            para = meta_data.drop(score_name, axis=1)
93
            score = meta_data[score_name]
94
95
            _verb_.load_meta_data()
96
            return para, score
97
98
        else:
99
            _verb_.no_meta_data(model_func)
100
            return None, None
101
102
    def _get_func_data_names(self):
103
        paths = []
104
        for id in self.con_ids:
105
            paths = paths + glob.glob(
106
                self.meta_path
107
                + id
108
                + "/"
109
                + (self.feature_hash + "_" + self.label_hash + "_.csv")
110
            )
111
112
        return paths
113
114
    def para2pos(self, paras):
115
        paras = paras[self._space_.para_names]
116
        pos = paras.copy()
117
118
        for pos_key in self._space_.search_space:
119
            apply_index = partial(self.apply_index, pos_key)
120
            pos[pos_key] = paras[pos_key].apply(apply_index)
121
122
        pos.dropna(how="any", inplace=True)
123
        pos = pos.astype("int64")
124
125
        return pos
126
127
    def _load_data_into_memory(self, paras, scores):
128
        paras = paras.replace(self.hash2obj)
129
        pos = self.para2pos(paras)
130
131
        if len(pos) == 0:
132
            return
133
134
        df_temp = pd.DataFrame()
135
        df_temp["pos_str"] = pos.apply(apply_tobytes, axis=1)
136
        df_temp["score"] = scores
137
138
        self.memory_dict = df_temp.set_index("pos_str").to_dict()["score"]
139
140
        scores = np.array(scores)
141
        paras = np.array(paras)
142
143
        idx = np.argmax(scores)
144
        self.score_best = scores[idx]
145
        self.pos_best = paras[idx]
146