|
1
|
|
|
# Author: Simon Blanke |
|
2
|
|
|
# Email: [email protected] |
|
3
|
|
|
# License: MIT License |
|
4
|
|
|
|
|
5
|
|
|
|
|
6
|
|
|
import random |
|
7
|
|
|
from tqdm.auto import tqdm |
|
8
|
|
|
import scipy |
|
9
|
|
|
import numpy as np |
|
10
|
|
|
import pandas as pd |
|
11
|
|
|
import multiprocessing |
|
12
|
|
|
|
|
13
|
|
|
from .util import merge_dicts |
|
14
|
|
|
|
|
15
|
|
|
|
|
16
|
|
|
class Core: |
|
17
|
|
|
def __init__(self, *args, **kwargs): |
|
18
|
|
|
kwargs_base = { |
|
19
|
|
|
"n_iter": 10, |
|
20
|
|
|
"max_time": None, |
|
21
|
|
|
"optimizer": "RandomSearch", |
|
22
|
|
|
"n_jobs": 1, |
|
23
|
|
|
"verbosity": 2, |
|
24
|
|
|
"warnings": True, |
|
25
|
|
|
"random_state": None, |
|
26
|
|
|
"warm_start": False, |
|
27
|
|
|
"memory": True, |
|
28
|
|
|
"scatter_init": False, |
|
29
|
|
|
"meta_learn": False, |
|
30
|
|
|
"proxy_dataset": False, |
|
31
|
|
|
"get_search_path": False, |
|
32
|
|
|
} |
|
33
|
|
|
|
|
34
|
|
|
self.search_config = args[0] |
|
35
|
|
|
self.opt_para = dict() |
|
36
|
|
|
|
|
37
|
|
|
if "optimizer" in kwargs and isinstance(kwargs["optimizer"], dict): |
|
38
|
|
|
opt = list(kwargs["optimizer"].keys())[0] |
|
39
|
|
|
self.opt_para = kwargs["optimizer"][opt] |
|
40
|
|
|
|
|
41
|
|
|
kwargs["optimizer"] = opt |
|
42
|
|
|
|
|
43
|
|
|
kwargs_base = merge_dicts(kwargs_base, kwargs) |
|
44
|
|
|
self._set_general_args(kwargs_base) |
|
45
|
|
|
|
|
46
|
|
|
self.model_list = list(self.search_config.keys()) |
|
47
|
|
|
self.n_models = len(self.model_list) |
|
48
|
|
|
|
|
49
|
|
|
self.set_n_jobs() |
|
50
|
|
|
self._n_process_range = range(0, int(self.n_jobs)) |
|
51
|
|
|
|
|
52
|
|
|
if self.max_time: |
|
53
|
|
|
self.max_time = self.max_time * 3600 |
|
54
|
|
|
|
|
55
|
|
|
def _set_general_args(self, kwargs_base): |
|
56
|
|
|
self.n_iter = kwargs_base["n_iter"] |
|
57
|
|
|
self.max_time = kwargs_base["max_time"] |
|
58
|
|
|
self.optimizer = kwargs_base["optimizer"] |
|
59
|
|
|
self.n_jobs = kwargs_base["n_jobs"] |
|
60
|
|
|
self.verbosity = kwargs_base["verbosity"] |
|
61
|
|
|
self.warnings = kwargs_base["warnings"] |
|
62
|
|
|
self.random_state = kwargs_base["random_state"] |
|
63
|
|
|
self.warm_start = kwargs_base["warm_start"] |
|
64
|
|
|
self.memory = kwargs_base["memory"] |
|
65
|
|
|
self.scatter_init = kwargs_base["scatter_init"] |
|
66
|
|
|
self.meta_learn = kwargs_base["meta_learn"] |
|
67
|
|
|
self.get_search_path = kwargs_base["get_search_path"] |
|
68
|
|
|
|
|
69
|
|
|
def init_p_bar(self, _core_, _cand_): |
|
70
|
|
|
if self.verbosity == 2: |
|
71
|
|
|
self.p_bar = tqdm(**_core_._tqdm_dict(_cand_)) |
|
72
|
|
|
else: |
|
73
|
|
|
self.p_bar = None |
|
74
|
|
|
|
|
75
|
|
|
def update_p_bar(self, n, _cand_): |
|
76
|
|
|
if self.p_bar: |
|
77
|
|
|
self.p_bar.update(n) |
|
78
|
|
|
self.p_bar.set_postfix(best_score=str(_cand_.score_best)) |
|
79
|
|
|
|
|
80
|
|
|
def close_p_bar(self): |
|
81
|
|
|
if self.p_bar: |
|
82
|
|
|
self.p_bar.close() |
|
83
|
|
|
|
|
84
|
|
|
def _tqdm_dict(self, _cand_): |
|
85
|
|
|
"""Generates the parameter dict for tqdm in the iteration-loop of each optimizer""" |
|
86
|
|
|
return { |
|
87
|
|
|
"total": self.n_iter, |
|
88
|
|
|
"desc": "Thread " |
|
89
|
|
|
+ str(_cand_.nth_process) |
|
90
|
|
|
+ " -> " |
|
91
|
|
|
+ _cand_._model_.func_.__name__, |
|
92
|
|
|
"position": _cand_.nth_process, |
|
93
|
|
|
"leave": True, |
|
94
|
|
|
} |
|
95
|
|
|
|
|
96
|
|
|
def _set_random_seed(self, thread=0): |
|
97
|
|
|
"""Sets the random seed separately for each thread (to avoid getting the same results in each thread)""" |
|
98
|
|
|
if self.random_state: |
|
99
|
|
|
rand = int(self.random_state) |
|
100
|
|
|
else: |
|
101
|
|
|
rand = 0 |
|
102
|
|
|
|
|
103
|
|
|
random.seed(rand + thread) |
|
104
|
|
|
np.random.seed(rand + thread) |
|
105
|
|
|
scipy.random.seed(rand + thread) |
|
106
|
|
|
|
|
107
|
|
|
def _check_data(self, X, y): |
|
108
|
|
|
"""Checks if data is pandas Dataframe and converts to numpy array if necessary""" |
|
109
|
|
|
if isinstance(X, pd.core.frame.DataFrame): |
|
110
|
|
|
X = X.values |
|
111
|
|
|
if isinstance(y, pd.core.frame.DataFrame): |
|
112
|
|
|
y = y.values |
|
113
|
|
|
|
|
114
|
|
|
return X, y |
|
115
|
|
|
|
|
116
|
|
|
def set_n_jobs(self): |
|
117
|
|
|
"""Sets the number of jobs to run in parallel""" |
|
118
|
|
|
num_cores = multiprocessing.cpu_count() |
|
119
|
|
|
if self.n_jobs == -1 or self.n_jobs > num_cores: |
|
120
|
|
|
self.n_jobs = num_cores |
|
121
|
|
|
|