hyperactive.base_optimizer.BaseOptimizer.search() - Code Metrics - Inspection of "Merge branch 'master' of https://github.com/SimonB..." - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 3f1f7f...e4a05a )

by Simon

created 2019-09-08 11:04 UTC

hyperactive.base_optimizer.BaseOptimizer.search() B

↳ Parent: hyperactive.base_optimizer

Complexity

Conditions

Size

Total Lines	34
Code Lines	25

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	25
dl	0
loc	34
rs	8.8133
c	0
b	0
f	0
cc	5
nop	4

# Author: Simon Blanke
# Email: [email protected]
# License: MIT License


import pickle
import numpy as np
import multiprocessing

from importlib import import_module
from functools import partial

from .base_positioner import BasePositioner
from .config import Config
from .opt_args import Arguments
from .sub_packages import MetaLearn
from .util import initialize_search, finish_search_, sort_for_best


class BaseOptimizer:
    def __init__(self, *args, **kwargs):

        """

        Parameters
        ----------

        search_config: dict
            A dictionary providing the model and hyperparameter search space for the
            optimization process.
        n_iter: int
            The number of iterations the optimizer performs.
        metric: string, optional (default: "accuracy")
            The metric the model is evaluated by.
        n_jobs: int, optional (default: 1)
            The number of searches to run in parallel.
        cv: int, optional (default: 5)
            The number of folds for the cross validation.
        verbosity: int, optional (default: 1)
            Verbosity level. 1 prints out warm_start points and their scores.
        random_state: int, optional (default: None)
            Sets the random seed.
        warm_start: dict, optional (default: False)
            Dictionary that definies a start point for the optimizer.
        memory: bool, optional (default: True)
            A memory, that saves the evaluation during the optimization to save time when
            optimizer returns to position.
        scatter_init: int, optional (default: False)
            Defines the number n of random positions that should be evaluated with 1/n the
            training data, to find a better initial position.

        Returns
        -------
        None

        """

        self._config_ = Config(*args, **kwargs)
        self._arg_ = Arguments(**kwargs)

        if self._config_.meta_learn:
            self._meta_ = MetaLearn(self._config_.search_config)

        self.search_config = self._config_.search_config
        self.n_iter = self._config_.n_iter

        if self._config_.get_search_path:
            self.pos_list = []
            self.score_list = []

    def _hill_climb_iteration(self, _cand_, _p_, X, y):
        _p_.pos_new = _p_.move_climb(_cand_, _p_.pos_current)
        _p_.score_new = _cand_.eval_pos(_p_.pos_new, X, y)

        if _p_.score_new > _cand_.score_best:
            _cand_, _p_ = self._update_pos(_cand_, _p_)

        return _cand_, _p_

    def _init_base_positioner(self, _cand_, positioner=None, pos_para={}):
        if positioner:
            _p_ = positioner(**pos_para)
        else:
            _p_ = BasePositioner(**pos_para)

        _p_.pos_current = _cand_.pos_best
        _p_.score_current = _cand_.score_best

        return _p_

    def _update_pos(self, _cand_, _p_):
        _cand_.pos_best = _p_.pos_new
        _cand_.score_best = _p_.score_new

        _p_.pos_current = _p_.pos_new
        _p_.score_current = _p_.score_new

        return _cand_, _p_

    def search(self, nth_process, X, y):
        self._config_, _cand_ = initialize_search(self._config_, nth_process, X, y)
        _p_ = self._init_opt_positioner(_cand_, X, y)

        for i in range(self._config_.n_iter):
            _cand_ = self._iterate(i, _cand_, _p_, X, y)
            self._config_.update_p_bar(1)

            if self._config_.get_search_path:
                pos_list = []
                score_list = []
                if isinstance(_p_, list):
                    for p in _p_:
                        pos_list.append(p.pos_new)
                        score_list.append(p.score_new)

                        pos_list_ = np.array(pos_list)
                        score_list_ = np.array(score_list)

                    self.pos_list.append(pos_list_)

                    self.score_list.append(score_list_)

                else:
                    pos_list.append(_p_.pos_new)
                    score_list.append(_p_.score_new)

                    pos_list_ = np.array(pos_list)
                    score_list_ = np.array(score_list)

                    self.pos_list.append(pos_list_)
                    self.score_list.append(score_list_)

        _cand_ = finish_search_(self._config_, _cand_, X, y)

        return _cand_, _p_

    def _search_multiprocessing(self, X, y):
        """Wrapper for the parallel search. Passes integer that corresponds to process number"""
        pool = multiprocessing.Pool(self._config_.n_jobs)
        search = partial(self.search, X=X, y=y)

        _cand_list, _p_list = pool.map(search, self._config_._n_process_range)

        return _cand_list

    def _run_one_job(self, X, y):
        _cand_, _p_ = self.search(0, X, y)
        if self._config_.meta_learn:
            self._meta_.collect(X, y, _cand_list=[_cand_])

        self.model_best = _cand_.model
        self.score_best = _cand_.score_best
        start_point = _cand_._get_warm_start()

        if self._config_.verbosity:
            print("\n", self._config_.metric, self.score_best)
            print("start_point =", start_point)

        if self._config_.get_search_path:
            self._p_ = _p_

    def _run_multiple_jobs(self, X, y):
        _cand_list = self._search_multiprocessing(X, y)

        start_point_list = []
        score_best_list = []
        model_best_list = []
        for _cand_ in _cand_list:
            model_best = _cand_.model
            score_best = _cand_.score_best
            start_point = _cand_._get_warm_start()

            start_point_list.append(start_point)
            score_best_list.append(score_best)
            model_best_list.append(model_best)

        start_point_sorted, score_best_sorted = sort_for_best(
            start_point_list, score_best_list
        )

        model_best_sorted, score_best_sorted = sort_for_best(
            model_best_list, score_best_list
        )

        if self._config_.verbosity:
            print("\nList of start points (best first):")
            for start_point, score_best in zip(start_point_sorted, score_best_sorted):
                print("\n", self._config_.metric, score_best)
                print("start_point =", start_point)

        self.score_best = score_best_sorted[0]
        self.model_best = model_best_sorted[0]

    def fit(self, X, y):
        """Public method for starting the search with the training data (X, y)

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]

        y : array-like, shape = [n_samples] or [n_samples, n_outputs]

        Returns
        -------
        None
        """
        X, y = self._config_._check_data(X, y)

        if self._config_.model_type == "keras":
            self._config_.n_jobs = 1

        if self._config_.n_jobs == 1:
            self._run_one_job(X, y)

        else:
            self._run_multiple_jobs(X, y)

    def predict(self, X_test):
        """Returns the prediction of X_test after a model was searched by `fit`

        Parameters
        ----------
        X_test : array-like or sparse matrix of shape = [n_samples, n_features]

        Returns
        -------
        (unnamed array) : array-like, shape = [n_samples] or [n_samples, n_outputs]
        """
        return self.model_best.predict(X_test)

    def score(self, X_test, y_true):
        """Returns the score calculated from the prediction of X_test and the true values from y_test

        Parameters
        ----------
        X_test : array-like or sparse matrix of shape = [n_samples, n_features]

        y_true : array-like, shape = [n_samples] or [n_samples, n_outputs]

        Returns
        -------
        (unnamed float) : float
        """
        if self._config_.model_type in ["sklearn", "xgboost", "lightgbm", "catboost"]:
            module = import_module("sklearn.metrics")
            metric_class = getattr(module, self._config_.metric)

            y_pred = self.model_best.predict(X_test)
            return metric_class(y_true, y_pred)
        elif self._config_.model_type in ["keras"]:
            loss, score = self.model_best.evaluate(X_test, y_true, verbose=0)
            return score

        """
        y_pred = self.model_best.predict(X_test)

        metric_type = list(self._config_.metric.keys())[0]
        metric_class = self._config_.metric[metric_type]

        return metric_class(y_true, y_pred)
        """

    def export(self, filename):
        """Exports the best model, that was found by the optimizer during `fit`

        Parameters
        ----------
        filename : string or path

        Returns
        -------
        None
        """
        if self.model_best:
            pickle.dump(self.model_best, open(filename, "wb"))


1			# Author: Simon Blanke
2			# Email: [email protected]
3			# License: MIT License
4
5
6			import pickle
7			import numpy as np
8			import multiprocessing
9
10			from importlib import import_module
11			from functools import partial
12
13			from .base_positioner import BasePositioner
14			from .config import Config
15			from .opt_args import Arguments
16			from .sub_packages import MetaLearn
17			from .util import initialize_search, finish_search_, sort_for_best
18
19
20			class BaseOptimizer:
21			def __init__(self, args, *kwargs):
22
23			"""
24
25			Parameters
26			----------
27
28			search_config: dict
29			A dictionary providing the model and hyperparameter search space for the
30			optimization process.
31			n_iter: int
32			The number of iterations the optimizer performs.
33			metric: string, optional (default: "accuracy")
34			The metric the model is evaluated by.
35			n_jobs: int, optional (default: 1)
36			The number of searches to run in parallel.
37			cv: int, optional (default: 5)
38			The number of folds for the cross validation.
39			verbosity: int, optional (default: 1)
40			Verbosity level. 1 prints out warm_start points and their scores.
41			random_state: int, optional (default: None)
42			Sets the random seed.
43			warm_start: dict, optional (default: False)
44			Dictionary that definies a start point for the optimizer.
45			memory: bool, optional (default: True)
46			A memory, that saves the evaluation during the optimization to save time when
47			optimizer returns to position.
48			scatter_init: int, optional (default: False)
49			Defines the number n of random positions that should be evaluated with 1/n the
50			training data, to find a better initial position.
51
52			Returns
53			-------
54			None
55
56			"""
57
58			self._config_ = Config(args, *kwargs)
59			self._arg_ = Arguments(**kwargs)
60
61			if self._config_.meta_learn:
62			self._meta_ = MetaLearn(self._config_.search_config)
63
64			self.search_config = self._config_.search_config
65			self.n_iter = self._config_.n_iter
66
67			if self._config_.get_search_path:
68			self.pos_list = []
69			self.score_list = []
70
71			def _hill_climb_iteration(self, _cand_, _p_, X, y):
72			_p_.pos_new = _p_.move_climb(_cand_, _p_.pos_current)
73			_p_.score_new = _cand_.eval_pos(_p_.pos_new, X, y)
74
75			if _p_.score_new > _cand_.score_best:
76			_cand_, _p_ = self._update_pos(_cand_, _p_)
77
78			return _cand_, _p_
79
80			def _init_base_positioner(self, _cand_, positioner=None, pos_para={}):
81			if positioner:
82			_p_ = positioner(**pos_para)
83			else:
84			_p_ = BasePositioner(**pos_para)
85
86			_p_.pos_current = _cand_.pos_best
87			_p_.score_current = _cand_.score_best
88
89			return _p_
90
91			def _update_pos(self, _cand_, _p_):
92			_cand_.pos_best = _p_.pos_new
93			_cand_.score_best = _p_.score_new
94
95			_p_.pos_current = _p_.pos_new
96			_p_.score_current = _p_.score_new
97
98			return _cand_, _p_
99
100			def search(self, nth_process, X, y):
101			self._config_, _cand_ = initialize_search(self._config_, nth_process, X, y)
102			_p_ = self._init_opt_positioner(_cand_, X, y)
103
104			for i in range(self._config_.n_iter):
105			_cand_ = self._iterate(i, _cand_, _p_, X, y)
106			self._config_.update_p_bar(1)
107
108			if self._config_.get_search_path:
109			pos_list = []
110			score_list = []
111			if isinstance(_p_, list):
112			for p in _p_:
113			pos_list.append(p.pos_new)
114			score_list.append(p.score_new)
115
116			pos_list_ = np.array(pos_list)
117			score_list_ = np.array(score_list)
118
119			self.pos_list.append(pos_list_)
			0 ignored issues – show introduced 2019-09-08 11:07 UTC by Report Bug Copy Issue Report The variable `pos_list_` does not seem to be defined for all execution paths. Loading history...
120			self.score_list.append(score_list_)
			0 ignored issues – show introduced 2019-09-08 11:07 UTC by Report Bug Copy Issue Report The variable `score_list_` does not seem to be defined for all execution paths. Loading history...
121			else:
122			pos_list.append(_p_.pos_new)
123			score_list.append(_p_.score_new)
124
125			pos_list_ = np.array(pos_list)
126			score_list_ = np.array(score_list)
127
128			self.pos_list.append(pos_list_)
129			self.score_list.append(score_list_)
130
131			_cand_ = finish_search_(self._config_, _cand_, X, y)
132
133			return _cand_, _p_
134
135			def _search_multiprocessing(self, X, y):
136			"""Wrapper for the parallel search. Passes integer that corresponds to process number"""
137			pool = multiprocessing.Pool(self._config_.n_jobs)
138			search = partial(self.search, X=X, y=y)
139
140			_cand_list, _p_list = pool.map(search, self._config_._n_process_range)
141
142			return _cand_list
143
144			def _run_one_job(self, X, y):
145			_cand_, _p_ = self.search(0, X, y)
146			if self._config_.meta_learn:
147			self._meta_.collect(X, y, _cand_list=[_cand_])
148
149			self.model_best = _cand_.model
150			self.score_best = _cand_.score_best
151			start_point = _cand_._get_warm_start()
152
153			if self._config_.verbosity:
154			print("\n", self._config_.metric, self.score_best)
155			print("start_point =", start_point)
156
157			if self._config_.get_search_path:
158			self._p_ = _p_
159
160			def _run_multiple_jobs(self, X, y):
161			_cand_list = self._search_multiprocessing(X, y)
162
163			start_point_list = []
164			score_best_list = []
165			model_best_list = []
166			for _cand_ in _cand_list:
167			model_best = _cand_.model
168			score_best = _cand_.score_best
169			start_point = _cand_._get_warm_start()
170
171			start_point_list.append(start_point)
172			score_best_list.append(score_best)
173			model_best_list.append(model_best)
174
175			start_point_sorted, score_best_sorted = sort_for_best(
176			start_point_list, score_best_list
177			)
178
179			model_best_sorted, score_best_sorted = sort_for_best(
180			model_best_list, score_best_list
181			)
182
183			if self._config_.verbosity:
184			print("\nList of start points (best first):")
185			for start_point, score_best in zip(start_point_sorted, score_best_sorted):
186			print("\n", self._config_.metric, score_best)
187			print("start_point =", start_point)
188
189			self.score_best = score_best_sorted[0]
190			self.model_best = model_best_sorted[0]
191
192			def fit(self, X, y):
193			"""Public method for starting the search with the training data (X, y)
194
195			Parameters
196			----------
197			X : array-like or sparse matrix of shape = [n_samples, n_features]
198
199			y : array-like, shape = [n_samples] or [n_samples, n_outputs]
200
201			Returns
202			-------
203			None
204			"""
205			X, y = self._config_._check_data(X, y)
206
207			if self._config_.model_type == "keras":
208			self._config_.n_jobs = 1
209
210			if self._config_.n_jobs == 1:
211			self._run_one_job(X, y)
212
213			else:
214			self._run_multiple_jobs(X, y)
215
216			def predict(self, X_test):
217			"""Returns the prediction of X_test after a model was searched by `fit`
218
219			Parameters
220			----------
221			X_test : array-like or sparse matrix of shape = [n_samples, n_features]
222
223			Returns
224			-------
225			(unnamed array) : array-like, shape = [n_samples] or [n_samples, n_outputs]
226			"""
227			return self.model_best.predict(X_test)
228
229			def score(self, X_test, y_true):
230			"""Returns the score calculated from the prediction of X_test and the true values from y_test
231
232			Parameters
233			----------
234			X_test : array-like or sparse matrix of shape = [n_samples, n_features]
235
236			y_true : array-like, shape = [n_samples] or [n_samples, n_outputs]
237
238			Returns
239			-------
240			(unnamed float) : float
241			"""
242			if self._config_.model_type in ["sklearn", "xgboost", "lightgbm", "catboost"]:
243			module = import_module("sklearn.metrics")
244			metric_class = getattr(module, self._config_.metric)
245
246			y_pred = self.model_best.predict(X_test)
247			return metric_class(y_true, y_pred)
248			elif self._config_.model_type in ["keras"]:
249			loss, score = self.model_best.evaluate(X_test, y_true, verbose=0)
250			return score
251
252			"""
253			y_pred = self.model_best.predict(X_test)
254
255			metric_type = list(self._config_.metric.keys())[0]
256			metric_class = self._config_.metric[metric_type]
257
258			return metric_class(y_true, y_pred)
259			"""
260
261			def export(self, filename):
262			"""Exports the best model, that was found by the optimizer during `fit`
263
264			Parameters
265			----------
266			filename : string or path
267
268			Returns
269			-------
270			None
271			"""
272			if self.model_best:
273			pickle.dump(self.model_best, open(filename, "wb"))
274

SimonBlanke / Hyperactive

Push — master ( 3f1f7f...e4a05a )

hyperactive.base_optimizer.BaseOptimizer.search() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like