Passed
Push — master ( fb398d...c64a78 )
by Simon
01:38 queued 11s
created

hyperactive.memory.memory_load   A

Complexity

Total Complexity 16

Size/Duplication

Total Lines 120
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 78
dl 0
loc 120
rs 10
c 0
b 0
f 0
wmc 16

8 Methods

Rating   Name   Duplication   Size   Complexity  
A MemoryLoad._load_memory() 0 14 3
A MemoryLoad._load_data_into_memory() 0 19 2
A MemoryLoad.apply_index() 0 5 2
A MemoryLoad._get_func_data_names() 0 6 1
A MemoryLoad._get_hash() 0 2 1
A MemoryLoad.para2pos() 0 12 2
A MemoryLoad._read_func_metadata() 0 24 3
A MemoryLoad.__init__() 0 9 1

1 Function

Rating   Name   Duplication   Size   Complexity  
A apply_tobytes() 0 2 1
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
import glob
6
import hashlib
7
8
import numpy as np
9
import pandas as pd
10
11
from functools import partial
12
13
from .memory_io import MemoryIO
14
15
16
def apply_tobytes(df):
17
    return df.values.tobytes()
18
19
20
class MemoryLoad(MemoryIO):
21
    def __init__(self, _space_, _main_args_, _cand_):
22
        super().__init__(_space_, _main_args_, _cand_)
23
24
        self.pos_best = None
25
        self.score_best = -np.inf
26
27
        self.memory_type = _main_args_.memory
28
29
        self.meta_data_found = False
30
31
    def _load_memory(self, _cand_, _verb_, memory_dict):
32
        self.memory_dict = memory_dict
33
34
        para, score = self._read_func_metadata(_cand_.func_, _verb_)
35
        if para is None or score is None:
36
            return {}
37
38
        _verb_.load_samples(para)
39
        _cand_.eval_time = list(para["eval_time"])
40
41
        self._load_data_into_memory(para, score)
42
        self.n_dims = len(para.columns)
43
44
        return self.memory_dict
45
46
    def apply_index(self, pos_key, df):
47
        return (
48
            self._space_.search_space[pos_key].index(df)
49
            if df in self._space_.search_space[pos_key]
50
            else None
51
        )
52
53
    def _read_func_metadata(self, model_func, _verb_):
54
        paths = self._get_func_data_names()
55
56
        meta_data_list = []
57
        for path in paths:
58
            meta_data = pd.read_csv(path)
59
            meta_data_list.append(meta_data)
60
            self.meta_data_found = True
61
62
        if len(meta_data_list) > 0:
63
            meta_data = pd.concat(meta_data_list, ignore_index=True)
64
65
            column_names = meta_data.columns
66
            score_name = [name for name in column_names if self.score_col_name in name]
67
68
            para = meta_data.drop(score_name, axis=1)
69
            score = meta_data[score_name]
70
71
            _verb_.load_meta_data()
72
            return para, score
73
74
        else:
75
            _verb_.no_meta_data(model_func)
76
            return None, None
77
78
    def _get_func_data_names(self):
79
        paths = glob.glob(
80
            self.func_path + (self.feature_hash + "_" + self.label_hash + "_.csv")
81
        )
82
83
        return paths
84
85
    def _get_hash(self, object):
86
        return hashlib.sha1(object).hexdigest()
87
88
    def para2pos(self, paras):
89
        paras = paras[self._space_.para_names]
90
        pos = paras.copy()
91
92
        for pos_key in self._space_.search_space:
93
            apply_index = partial(self.apply_index, pos_key)
94
            pos[pos_key] = paras[pos_key].apply(apply_index)
95
96
        pos.dropna(how="any", inplace=True)
97
        pos = pos.astype("int64")
98
99
        return pos
100
101
    def _load_data_into_memory(self, paras, scores):
102
        paras = paras.replace(self.hash2obj)
103
        pos = self.para2pos(paras)
104
105
        if len(pos) == 0:
106
            return
107
108
        df_temp = pd.DataFrame()
109
        df_temp["pos_str"] = pos.apply(apply_tobytes, axis=1)
110
        df_temp["score"] = scores
111
112
        self.memory_dict = df_temp.set_index("pos_str").to_dict()["score"]
113
114
        scores = np.array(scores)
115
        paras = np.array(paras)
116
117
        idx = np.argmax(scores)
118
        self.score_best = scores[idx]
119
        self.pos_best = paras[idx]
120