Passed
Push — master ( bf6a06...0a901a )
by Simon
01:26
created

MemoryDump._collect()   A

Complexity

Conditions 1

Size

Total Lines 14
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 10
nop 2
dl 0
loc 14
rs 9.9
c 0
b 0
f 0
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
import os
6
import json
7
import dill
8
import inspect
9
10
import numpy as np
11
import pandas as pd
12
13
from .memory_io import MemoryIO
14
15
16
class MemoryDump(MemoryIO):
17
    def __init__(self, _space_, _main_args_, _cand_, memory_dict):
18
        super().__init__(_space_, _main_args_, _cand_, memory_dict)
19
20
        self.memory_type = _main_args_.memory
21
        self.memory_dict = memory_dict
22
23
    def _save_memory(self, _main_args_, _opt_args_, _cand_):
24
        path = self._get_file_path(_cand_.func_)
25
        meta_data = self._collect(_cand_)
26
27
        meta_data["run"] = self.datetime
28
        self._save_toCSV(meta_data, path)
29
30
        obj_func_path = self.func_path + "objective_function.py"
31
        if not os.path.exists(obj_func_path):
32
            file = open(obj_func_path, "w")
33
            file.write(self._get_func_str(_cand_.func_))
34
            file.close()
35
36
        search_config_path = self.date_path + "search_config.py"
37
        search_config_temp = dict(self._main_args_.search_config)
38
39
        for key in search_config_temp.keys():
40
            if isinstance(key, str):
41
                continue
42
            search_config_temp[key.__name__] = search_config_temp[key]
43
            del search_config_temp[key]
44
45
        search_config_str = "search_config = " + str(search_config_temp)
46
47
        if not os.path.exists(search_config_path):
48
            file = open(search_config_path, "w")
49
            file.write(search_config_str)
50
            file.close()
51
52
        """
53
        os.chdir(self.date_path)
54
        os.system("black search_config.py")
55
        os.getcwd()
56
        """
57
58
        run_data = {
59
            "random_state": self._main_args_.random_state,
60
            "max_time": self._main_args_.random_state,
61
            "n_iter": self._main_args_.n_iter,
62
            "optimizer": self._main_args_.optimizer,
63
            "n_jobs": self._main_args_.n_jobs,
64
            "eval_time": np.array(_cand_.eval_time).sum(),
65
            "total_time": _cand_.total_time,
66
        }
67
68
        with open(self.date_path + "run_data.json", "w") as f:
69
            json.dump(run_data, f, indent=4)
70
71
        """
72
        print("_opt_args_.kwargs_opt", _opt_args_.kwargs_opt)
73
74
        opt_para = pd.DataFrame.from_dict(_opt_args_.kwargs_opt, dtype=object)
75
        print("opt_para", opt_para)
76
        opt_para.to_csv(self.date_path + "opt_para", index=False)
77
        """
78
79
    def _get_func_str(self, func):
80
        return inspect.getsource(func)
81
82
    def _get_file_path(self, model_func):
83
        if not os.path.exists(self.date_path):
84
            os.makedirs(self.date_path)
85
86
        return self.func_path + (self.feature_hash + "_" + self.label_hash + "_.csv")
87
88
    def _collect(self, _cand_):
89
        results_dict = self._get_opt_meta_data()
90
91
        para_pd = pd.DataFrame(results_dict["params"])
92
        metric_pd = pd.DataFrame(
93
            results_dict["mean_test_score"], columns=["mean_test_score"]
94
        )
95
        n_rows = len(para_pd)
96
        eval_time = pd.DataFrame(_cand_.eval_time[-n_rows:], columns=["eval_time"])
97
        md_model = pd.concat(
98
            [para_pd, metric_pd, eval_time], axis=1, ignore_index=False
99
        )
100
101
        return md_model
102
103
    def _get_opt_meta_data(self):
104
        results_dict = {}
105
        para_list = []
106
        score_list = []
107
108
        for key in self.memory_dict.keys():
109
            pos = np.fromstring(key, dtype=int)
110
            para = self._space_.pos2para(pos)
111
            score = self.memory_dict[key]
112
113
            for key in para.keys():
114
                if (
115
                    not isinstance(para[key], int)
116
                    and not isinstance(para[key], float)
117
                    and not isinstance(para[key], str)
118
                ):
119
120
                    para_dill = dill.dumps(para[key])
121
                    para_hash = self._get_hash(para_dill)
122
123
                    with open(
124
                        self.func_path + str(para_hash) + ".pkl", "wb"
125
                    ) as pickle_file:
126
                        dill.dump(para_dill, pickle_file)
127
128
                    para[key] = para_hash
129
130
            if score != 0:
131
                para_list.append(para)
132
                score_list.append(score)
133
134
        results_dict["params"] = para_list
135
        results_dict["mean_test_score"] = score_list
136
137
        return results_dict
138
139
    def _save_toCSV(self, meta_data_new, path):
140
        if os.path.exists(path):
141
            meta_data_old = pd.read_csv(path)
142
143
            if len(meta_data_old.columns) != len(meta_data_new.columns):
144
                print("Warning meta data dimensionality does not match")
145
                print("Meta data will not be saved")
146
                return
147
148
            meta_data = meta_data_old.append(meta_data_new)
149
150
            columns = list(meta_data.columns)
151
            noScore = ["mean_test_score", "cv_default_score", "eval_time", "run"]
152
            columns_noScore = [c for c in columns if c not in noScore]
153
154
            meta_data = meta_data.drop_duplicates(subset=columns_noScore)
155
        else:
156
            meta_data = meta_data_new
157
158
        meta_data.to_csv(path, index=False)
159