1
|
|
|
# Author: Simon Blanke |
2
|
|
|
# Email: [email protected] |
3
|
|
|
# License: MIT License |
4
|
|
|
|
5
|
|
|
|
6
|
|
|
import random |
7
|
|
|
from tqdm.auto import tqdm |
8
|
|
|
import scipy |
9
|
|
|
import numpy as np |
10
|
|
|
import pandas as pd |
11
|
|
|
import multiprocessing |
12
|
|
|
|
13
|
|
|
from .util import merge_dicts |
14
|
|
|
|
15
|
|
|
|
16
|
|
|
class Config: |
17
|
|
|
def __init__(self, *args, **kwargs): |
18
|
|
|
kwargs_base = { |
19
|
|
|
"n_iter": 10, |
20
|
|
|
"optimizer": "RandomSearch", |
21
|
|
|
"n_jobs": 1, |
22
|
|
|
"verbosity": 2, |
23
|
|
|
"warnings": True, |
24
|
|
|
"random_state": None, |
25
|
|
|
"warm_start": False, |
26
|
|
|
"memory": True, |
27
|
|
|
"scatter_init": False, |
28
|
|
|
"meta_learn": False, |
29
|
|
|
"proxy_dataset": False, |
30
|
|
|
"repulsor": False, |
31
|
|
|
"get_search_path": False, |
32
|
|
|
} |
33
|
|
|
|
34
|
|
|
self.search_config = args[0] |
35
|
|
|
kwargs_base = merge_dicts(kwargs_base, kwargs) |
36
|
|
|
self._set_general_args(kwargs_base) |
37
|
|
|
|
38
|
|
|
self.model_list = list(self.search_config.keys()) |
39
|
|
|
self.n_models = len(self.model_list) |
40
|
|
|
|
41
|
|
|
self.set_n_jobs() |
42
|
|
|
self._n_process_range = range(0, int(self.n_jobs)) |
43
|
|
|
|
44
|
|
|
def _set_general_args(self, kwargs_base): |
45
|
|
|
self.n_iter = kwargs_base["n_iter"] |
46
|
|
|
self.optimizer = kwargs_base["optimizer"] |
47
|
|
|
self.n_jobs = kwargs_base["n_jobs"] |
48
|
|
|
self.verbosity = kwargs_base["verbosity"] |
49
|
|
|
self.warnings = kwargs_base["warnings"] |
50
|
|
|
self.random_state = kwargs_base["random_state"] |
51
|
|
|
self.warm_start = kwargs_base["warm_start"] |
52
|
|
|
self.memory = kwargs_base["memory"] |
53
|
|
|
self.scatter_init = kwargs_base["scatter_init"] |
54
|
|
|
self.meta_learn = kwargs_base["meta_learn"] |
55
|
|
|
self.get_search_path = kwargs_base["get_search_path"] |
56
|
|
|
|
57
|
|
|
def init_p_bar(self, _config_, _cand_): |
58
|
|
|
if self.verbosity == 2: |
59
|
|
|
self.p_bar = tqdm(**_config_._tqdm_dict(_cand_)) |
60
|
|
|
else: |
61
|
|
|
self.p_bar = None |
62
|
|
|
|
63
|
|
|
def update_p_bar(self, n): |
64
|
|
|
if self.p_bar: |
65
|
|
|
self.p_bar.update(n) |
66
|
|
|
|
67
|
|
|
def close_p_bar(self): |
68
|
|
|
if self.p_bar: |
69
|
|
|
self.p_bar.close() |
70
|
|
|
|
71
|
|
|
def _tqdm_dict(self, _cand_): |
72
|
|
|
"""Generates the parameter dict for tqdm in the iteration-loop of each optimizer""" |
73
|
|
|
return { |
74
|
|
|
"total": self.n_iter, |
75
|
|
|
"desc": "Search " + str(_cand_.nth_process), |
76
|
|
|
"position": _cand_.nth_process, |
77
|
|
|
"leave": False, |
78
|
|
|
} |
79
|
|
|
|
80
|
|
|
def _set_random_seed(self, thread=0): |
81
|
|
|
"""Sets the random seed separately for each thread (to avoid getting the same results in each thread)""" |
82
|
|
|
if self.random_state: |
83
|
|
|
rand = int(self.random_state) |
84
|
|
|
else: |
85
|
|
|
rand = 0 |
86
|
|
|
|
87
|
|
|
random.seed(rand + thread) |
88
|
|
|
np.random.seed(rand + thread) |
89
|
|
|
scipy.random.seed(rand + thread) |
90
|
|
|
|
91
|
|
|
def _check_data(self, X, y): |
92
|
|
|
"""Checks if data is pandas Dataframe and converts to numpy array if necessary""" |
93
|
|
|
if isinstance(X, pd.core.frame.DataFrame): |
94
|
|
|
X = X.values |
95
|
|
|
if isinstance(y, pd.core.frame.DataFrame): |
96
|
|
|
y = y.values |
97
|
|
|
|
98
|
|
|
return X, y |
99
|
|
|
|
100
|
|
|
def set_n_jobs(self): |
101
|
|
|
"""Sets the number of jobs to run in parallel""" |
102
|
|
|
num_cores = multiprocessing.cpu_count() |
103
|
|
|
if self.n_jobs == -1 or self.n_jobs > num_cores: |
104
|
|
|
self.n_jobs = num_cores |
105
|
|
|
|