Completed
Push — master ( aee0ed...be0089 )
by Simon
01:27
created

hyperactive.optimizers.sequence_model.tree_structured_parzen_estimators   A

Complexity

Total Complexity 12

Size/Duplication

Total Lines 104
Duplicated Lines 15.38 %

Importance

Changes 0
Metric Value
eloc 66
dl 16
loc 104
rs 10
c 0
b 0
f 0
wmc 12

8 Methods

Rating   Name   Duplication   Size   Complexity  
A TreeStructuredParzenEstimators._get_samples() 0 12 1
A TreeStructuredParzenEstimators.propose_location() 0 13 1
A TreeStructuredParzenEstimators.expected_improvement() 0 9 1
A TreeStructuredParzenEstimators._iterate() 0 16 3
A TreeStructuredParzenEstimators._init_opt_positioner() 16 16 2
A Bayesian.__init__() 0 2 1
A TreeStructuredParzenEstimators.__init__() 0 4 1
A TreeStructuredParzenEstimators._all_possible_pos() 0 7 2

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
6
import numpy as np
7
8
from sklearn.neighbors import KernelDensity
9
10
from ...base_optimizer import BaseOptimizer
11
from ...base_positioner import BasePositioner
12
13
14
class TreeStructuredParzenEstimators(BaseOptimizer):
15
    def __init__(self, _main_args_, _opt_args_):
16
        super().__init__(_main_args_, _opt_args_)
17
        self.kd_best = KernelDensity()
18
        self.kd_worst = KernelDensity()
19
20
    def _get_samples(self):
21
        n_samples = self.X_sample.shape[0]
22
23
        n_best = int(n_samples * self._opt_args_.gamme_tpe)
24
25
        Y_sample = self.Y_sample[:, 0]
26
        index_best = Y_sample.argsort()[-n_best:][::-1]
27
28
        best_samples = self.X_sample[index_best]
29
        worst_samples = self.X_sample[~index_best]
30
31
        return best_samples, worst_samples
32
33
    def _all_possible_pos(self, cand):
34
        pos_space = []
35
        for dim_ in cand._space_.dim:
36
            pos_space.append(np.arange(dim_ + 1))
37
38
        self.n_dim = len(pos_space)
39
        self.all_pos_comb = np.array(np.meshgrid(*pos_space)).T.reshape(-1, self.n_dim)
40
41
    def expected_improvement(self):
42
        logprob_best = self.kd_best.score_samples(self.all_pos_comb)
43
        logprob_worst = self.kd_worst.score_samples(self.all_pos_comb)
44
45
        prob_best = np.exp(logprob_best)
46
        prob_worst = np.exp(logprob_worst)
47
48
        return np.divide(
49
            prob_best, prob_worst, out=np.zeros_like(prob_worst), where=prob_worst != 0
50
        )
51
52
    def propose_location(self, cand):
53
        best_samples, worst_samples = self._get_samples()
54
55
        self.kd_best.fit(best_samples)
56
        self.kd_worst.fit(worst_samples)
57
58
        exp_imp = self.expected_improvement()
59
        index_best = list(exp_imp.argsort()[::-1])
60
61
        all_pos_comb_sorted = self.all_pos_comb[index_best]
62
        pos_best = all_pos_comb_sorted[0]
63
64
        return pos_best
65
66
    def _iterate(self, i, _cand_, _p_):
67
        if i < self._opt_args_.start_up_evals:
68
            _p_.pos_new = _p_.move_random(_cand_)
69
            _p_.score_new = _cand_.eval_pos(_p_.pos_new)
70
71
        else:
72
            _p_.pos_new = self.propose_location(_cand_)
73
            _p_.score_new = _cand_.eval_pos(_p_.pos_new)
74
75
        if _p_.score_new > _cand_.score_best:
76
            _cand_, _p_ = self._update_pos(_cand_, _p_)
77
78
        self.X_sample = np.vstack((self.X_sample, _p_.pos_new))
79
        self.Y_sample = np.vstack((self.Y_sample, _p_.score_new))
80
81
        return _cand_
82
83 View Code Duplication
    def _init_opt_positioner(self, _cand_):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
84
        _p_ = Bayesian()
85
86
        self._all_possible_pos(_cand_)
87
88
        if self._opt_args_.warm_start_smbo:
89
            self.X_sample = _cand_.mem._get_para()
90
            self.Y_sample = _cand_.mem._get_score()
91
        else:
92
            self.X_sample = _cand_.pos_best.reshape(1, -1)
93
            self.Y_sample = np.array(_cand_.score_best).reshape(1, -1)
94
95
        _p_.pos_current = _cand_.pos_best
96
        _p_.score_current = _cand_.score_best
97
98
        return _p_
99
100
101
class Bayesian(BasePositioner):
102
    def __init__(self, *args, **kwargs):
103
        super().__init__(*args, **kwargs)
104