Passed
Push — master ( ad2fdd...91ec5e )
by Simon
01:05
created

HyperactiveWrapper._load_dataframes()   A

Complexity

Conditions 3

Size

Total Lines 15
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 11
dl 0
loc 15
rs 9.85
c 0
b 0
f 0
cc 3
nop 1
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
import os
6
import dill
7
import datetime
8
import numpy as np
9
import pandas as pd
10
import hashlib
11
import inspect
12
13
from .paths import HypermemoryPaths
14
from .io_helpers import (
15
    save_object,
16
    load_object,
17
    save_json,
18
    load_json,
19
    save_dataframe,
20
    load_dataframes,
21
)
22
23
from .dataset_features import dataset_features
24
from .memory_conv import memory_dict2dataframe, convert_dataframe, dataframe2memory_dict
25
26
27
def get_datetime():
28
    return datetime.datetime.now().strftime("%d.%m.%Y - %H:%M:%S:%f")
29
30
31
class HyperactiveWrapper:
32
    def __init__(self, main_path, X, y, model, search_space):
33
        self.paths = HypermemoryPaths(main_path)
34
        self.paths.add_directory(name="X", prefix="X_ID:", id_type="array", object_=X)
35
        self.paths.add_directory(name="y", prefix="y_ID:", id_type="array", object_=y)
36
        self.paths.add_directory(
37
            name="model", prefix="model_ID:", id_type="function", object_=model
38
        )
39
        self.paths.add_directory(
40
            name="search_space",
41
            prefix="search_space_ID:",
42
            id_type="dictionary",
43
            object_=search_space,
44
        )
45
46
        self.X = X
47
        self.y = y
48
        self.model = model
49
        self.search_space = search_space
50
51
    def _drop_duplicates(self, dataframe):
52
        columns_drop = list(self.search_space.keys())
53
        return dataframe.drop_duplicates(subset=columns_drop, keep="last")
54
55
    def _load_dataframes(self):
56
        subdirs = self.paths.subdirs("model")
57
58
        dataframes_all = []
59
        for subdir in subdirs:
60
            search_space = load_object(path=subdir, name="search_space")
61
            dataframes = load_dataframes(subdir)
62
63
            for dataframe in dataframes:
64
                dataframe = convert_dataframe(
65
                    dataframe, search_space, self.search_space
66
                )
67
                dataframes_all.append(dataframe)
68
69
        return dataframes_all
70
71
    def load(self):
72
        dataframes_all = self._load_dataframes()
73
        if len(dataframes_all) == 0:
74
            return {}
75
76
        dataframe = pd.concat(dataframes_all, axis=0)
77
        dataframe = self._drop_duplicates(dataframe)
78
        print(
79
            "Loading search data for",
80
            self.model.__name__,
81
            "was successful:",
82
            len(dataframe),
83
            "samples found",
84
        )
85
        memory_dict = dataframe2memory_dict(dataframe, self.search_space)
86
87
        return memory_dict
88
89
    def save(self, memory_dict):
90
        X_info = dataset_features(self.X)
91
        y_info = dataset_features(self.y)
92
        dataframe = memory_dict2dataframe(memory_dict, self.search_space)
93
94
        io_X_path = self.paths.path_dict["X"]
95
        io_y_path = self.paths.path_dict["y"]
96
        io_model_path = self.paths.path_dict["model"]
97
        io_search_space_path = self.paths.path_dict["search_space"]
98
99
        save_json(io_X_path, "X_meta_data", X_info)
100
        save_json(io_y_path, "y_meta_data", y_info)
101
        save_object(io_model_path, "model", self.model)
102
        save_object(io_search_space_path, "search_space", self.search_space)
103
        save_dataframe(
104
            io_search_space_path, "search_data_" + str(get_datetime()), dataframe
105
        )
106