1
|
|
|
# Author: Simon Blanke |
2
|
|
|
# Email: [email protected] |
3
|
|
|
# License: MIT License |
4
|
|
|
|
5
|
|
|
|
6
|
|
|
import random |
7
|
|
|
from tqdm.auto import tqdm |
8
|
|
|
import scipy |
9
|
|
|
import numpy as np |
10
|
|
|
import pandas as pd |
11
|
|
|
import multiprocessing |
12
|
|
|
|
13
|
|
|
from .util import merge_dicts |
14
|
|
|
|
15
|
|
|
|
16
|
|
|
class Config: |
17
|
|
|
def __init__(self, *args, **kwargs): |
18
|
|
|
kwargs_base = { |
19
|
|
|
"n_iter": 10, |
20
|
|
|
"max_time": None, |
21
|
|
|
"optimizer": "RandomSearch", |
22
|
|
|
"n_jobs": 1, |
23
|
|
|
"verbosity": 2, |
24
|
|
|
"warnings": True, |
25
|
|
|
"random_state": None, |
26
|
|
|
"warm_start": False, |
27
|
|
|
"memory": True, |
28
|
|
|
"scatter_init": False, |
29
|
|
|
"meta_learn": False, |
30
|
|
|
"proxy_dataset": False, |
31
|
|
|
"get_search_path": False, |
32
|
|
|
} |
33
|
|
|
|
34
|
|
|
self.search_config = args[0] |
35
|
|
|
self.opt_para = dict() |
36
|
|
|
|
37
|
|
|
if "optimizer" in kwargs and isinstance(kwargs["optimizer"], dict): |
38
|
|
|
opt = list(kwargs["optimizer"].keys())[0] |
39
|
|
|
self.opt_para = kwargs["optimizer"][opt] |
40
|
|
|
|
41
|
|
|
kwargs["optimizer"] = opt |
42
|
|
|
|
43
|
|
|
kwargs_base = merge_dicts(kwargs_base, kwargs) |
44
|
|
|
self._set_general_args(kwargs_base) |
45
|
|
|
|
46
|
|
|
self.model_list = list(self.search_config.keys()) |
47
|
|
|
self.n_models = len(self.model_list) |
48
|
|
|
|
49
|
|
|
self.set_n_jobs() |
50
|
|
|
self._n_process_range = range(0, int(self.n_jobs)) |
51
|
|
|
|
52
|
|
|
if self.max_time: |
53
|
|
|
self.max_time = self.max_time * 3600 |
54
|
|
|
|
55
|
|
|
def _set_general_args(self, kwargs_base): |
56
|
|
|
self.n_iter = kwargs_base["n_iter"] |
57
|
|
|
self.max_time = kwargs_base["max_time"] |
58
|
|
|
self.optimizer = kwargs_base["optimizer"] |
59
|
|
|
self.n_jobs = kwargs_base["n_jobs"] |
60
|
|
|
self.verbosity = kwargs_base["verbosity"] |
61
|
|
|
self.warnings = kwargs_base["warnings"] |
62
|
|
|
self.random_state = kwargs_base["random_state"] |
63
|
|
|
self.warm_start = kwargs_base["warm_start"] |
64
|
|
|
self.memory = kwargs_base["memory"] |
65
|
|
|
self.scatter_init = kwargs_base["scatter_init"] |
66
|
|
|
self.meta_learn = kwargs_base["meta_learn"] |
67
|
|
|
self.get_search_path = kwargs_base["get_search_path"] |
68
|
|
|
|
69
|
|
|
def init_p_bar(self, _config_, _cand_): |
70
|
|
|
if self.verbosity == 2: |
71
|
|
|
self.p_bar = tqdm(**_config_._tqdm_dict(_cand_)) |
72
|
|
|
else: |
73
|
|
|
self.p_bar = None |
74
|
|
|
|
75
|
|
|
def update_p_bar(self, n, _cand_): |
76
|
|
|
if self.p_bar: |
77
|
|
|
self.p_bar.update(n) |
78
|
|
|
self.p_bar.set_postfix(best_score=str(_cand_.score_best)) |
79
|
|
|
|
80
|
|
|
def close_p_bar(self): |
81
|
|
|
if self.p_bar: |
82
|
|
|
self.p_bar.close() |
83
|
|
|
|
84
|
|
|
def _tqdm_dict(self, _cand_): |
85
|
|
|
"""Generates the parameter dict for tqdm in the iteration-loop of each optimizer""" |
86
|
|
|
return { |
87
|
|
|
"total": self.n_iter, |
88
|
|
|
"desc": "Thread " |
89
|
|
|
+ str(_cand_.nth_process) |
90
|
|
|
+ " -> " |
91
|
|
|
+ _cand_._model_.func_.__name__, |
92
|
|
|
"position": _cand_.nth_process, |
93
|
|
|
"leave": True, |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
def _set_random_seed(self, thread=0): |
97
|
|
|
"""Sets the random seed separately for each thread (to avoid getting the same results in each thread)""" |
98
|
|
|
if self.random_state: |
99
|
|
|
rand = int(self.random_state) |
100
|
|
|
else: |
101
|
|
|
rand = 0 |
102
|
|
|
|
103
|
|
|
random.seed(rand + thread) |
104
|
|
|
np.random.seed(rand + thread) |
105
|
|
|
scipy.random.seed(rand + thread) |
106
|
|
|
|
107
|
|
|
def _check_data(self, X, y): |
108
|
|
|
"""Checks if data is pandas Dataframe and converts to numpy array if necessary""" |
109
|
|
|
if isinstance(X, pd.core.frame.DataFrame): |
110
|
|
|
X = X.values |
111
|
|
|
if isinstance(y, pd.core.frame.DataFrame): |
112
|
|
|
y = y.values |
113
|
|
|
|
114
|
|
|
return X, y |
115
|
|
|
|
116
|
|
|
def set_n_jobs(self): |
117
|
|
|
"""Sets the number of jobs to run in parallel""" |
118
|
|
|
num_cores = multiprocessing.cpu_count() |
119
|
|
|
if self.n_jobs == -1 or self.n_jobs > num_cores: |
120
|
|
|
self.n_jobs = num_cores |
121
|
|
|
|