Passed
Push — master ( c241e4...b050e9 )
by Simon
01:57
created

TPEOptimizer.get_test_params()   B

Complexity

Conditions 1

Size

Total Lines 75
Code Lines 49

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 49
dl 0
loc 75
rs 8.669
c 0
b 0
f 0
cc 1
nop 2

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""TPE (Tree-structured Parzen Estimator) optimizer."""
2
# copyright: hyperactive developers, MIT License (see LICENSE file)
3
4
from .._adapters._base_optuna_adapter import _BaseOptunaAdapter
5
6
7
class TPEOptimizer(_BaseOptunaAdapter):
8
    """Tree-structured Parzen Estimator optimizer.
9
10
    Parameters
11
    ----------
12
    param_space : dict[str, tuple or list or optuna distributions]
13
        The search space to explore. Dictionary with parameter names
14
        as keys and either tuples/lists of (low, high) or
15
        optuna distribution objects as values.
16
    n_trials : int, default=100
17
        Number of optimization trials.
18
    initialize : dict[str, int], default=None
19
        The method to generate initial positions. A dictionary with
20
        the following key literals and the corresponding value type:
21
        {"grid": int, "vertices": int, "random": int, "warm_start": list[dict]}
22
    random_state : None, int, default=None
23
        If None, create a new random state. If int, create a new random state
24
        seeded with the value.
25
    early_stopping : int, default=None
26
        Number of trials after which to stop if no improvement.
27
    max_score : float, default=None
28
        Maximum score threshold. Stop optimization when reached.
29
    n_startup_trials : int, default=10
30
        Number of startup trials for TPE.
31
    n_ei_candidates : int, default=24
32
        Number of candidates for expected improvement.
33
    weights : callable, default=None
34
        Weight function for TPE.
35
    experiment : BaseExperiment, optional
36
        The experiment to optimize parameters for.
37
        Optional, can be passed later via ``set_params``.
38
39
    Examples
40
    --------
41
    Basic usage of TPEOptimizer with a scikit-learn experiment:
42
43
    >>> from hyperactive.experiment.integrations import SklearnCvExperiment
44
    >>> from hyperactive.opt.optuna import TPEOptimizer
45
    >>> from sklearn.datasets import load_iris
46
    >>> from sklearn.svm import SVC
47
    >>> X, y = load_iris(return_X_y=True)
48
    >>> sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y)
49
    >>> param_space = {
50
    ...     "C": (0.01, 10),
51
    ...     "gamma": (0.0001, 10),
52
    ... }
53
    >>> optimizer = TPEOptimizer(
54
    ...     param_space=param_space, n_trials=50, experiment=sklearn_exp
55
    ... )
56
    >>> best_params = optimizer.run()
57
    """
58
59
    _tags = {
60
        "info:name": "Tree-structured Parzen Estimator",
61
        "info:local_vs_global": "global",
62
        "info:explore_vs_exploit": "exploit",
63
        "info:compute": "middle",
64
        "python_dependencies": ["optuna"],
65
    }
66
67
    def __init__(
68
        self,
69
        param_space=None,
70
        n_trials=100,
71
        initialize=None,
72
        random_state=None,
73
        early_stopping=None,
74
        max_score=None,
75
        n_startup_trials=10,
76
        n_ei_candidates=24,
77
        weights=None,
78
        experiment=None,
79
    ):
80
        self.n_startup_trials = n_startup_trials
81
        self.n_ei_candidates = n_ei_candidates
82
        self.weights = weights
83
84
        super().__init__(
85
            param_space=param_space,
86
            n_trials=n_trials,
87
            initialize=initialize,
88
            random_state=random_state,
89
            early_stopping=early_stopping,
90
            max_score=max_score,
91
            experiment=experiment,
92
        )
93
94
    def _get_optimizer(self):
95
        """Get the TPE optimizer.
96
97
        Returns
98
        -------
99
        optimizer
100
            The Optuna TPEOptimizer instance
101
        """
102
        import optuna
103
104
        optimizer_kwargs = {
105
            "n_startup_trials": self.n_startup_trials,
106
            "n_ei_candidates": self.n_ei_candidates,
107
        }
108
109
        if self.weights is not None:
110
            optimizer_kwargs["weights"] = self.weights
111
112
        if self.random_state is not None:
113
            optimizer_kwargs["seed"] = self.random_state
114
115
        return optuna.samplers.TPESampler(**optimizer_kwargs)
116
117
    @classmethod
118
    def get_test_params(cls, parameter_set="default"):
119
        """Return testing parameter settings for the optimizer."""
120
        from sklearn.datasets import load_wine
121
        from sklearn.ensemble import RandomForestClassifier
122
        from sklearn.svm import SVC
123
124
        from hyperactive.experiment.integrations import SklearnCvExperiment
125
126
        # Test case 1: Basic TPE with standard parameters
127
        params = super().get_test_params(parameter_set)
128
        params[0].update(
129
            {
130
                "n_startup_trials": 5,
131
                "n_ei_candidates": 12,
132
            }
133
        )
134
135
        # Test case 2: Mixed parameter types with warm start
136
        X, y = load_wine(return_X_y=True)
137
        rf_exp = SklearnCvExperiment(
138
            estimator=RandomForestClassifier(random_state=42), X=X, y=y
139
        )
140
141
        mixed_param_space = {
142
            "n_estimators": (10, 100),  # Continuous integer
143
            "max_depth": [3, 5, 7, 10, None],  # Mixed discrete/None
144
            "criterion": ["gini", "entropy"],  # Categorical
145
            "min_samples_split": (2, 20),  # Continuous integer
146
            "bootstrap": [True, False],  # Boolean
147
        }
148
149
        # Warm start with known good configuration
150
        warm_start_points = [
151
            {
152
                "n_estimators": 50,
153
                "max_depth": 5,
154
                "criterion": "gini",
155
                "min_samples_split": 2,
156
                "bootstrap": True,
157
            }
158
        ]
159
160
        params.append(
161
            {
162
                "param_space": mixed_param_space,
163
                "n_trials": 20,
164
                "experiment": rf_exp,
165
                "n_startup_trials": 3,  # Fewer random trials before TPE
166
                "n_ei_candidates": 24,  # More EI candidates for better optimization
167
                "initialize": {"warm_start": warm_start_points},
168
            }
169
        )
170
171
        # Test case 3: High-dimensional continuous space (TPE strength)
172
        svm_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y)
173
        high_dim_space = {
174
            "C": (0.01, 100),
175
            "gamma": (1e-6, 1e2),
176
            "coef0": (0.0, 10.0),
177
            "degree": (2, 5),
178
            "tol": (1e-5, 1e-2),
179
        }
180
181
        params.append(
182
            {
183
                "param_space": high_dim_space,
184
                "n_trials": 25,
185
                "experiment": svm_exp,
186
                "n_startup_trials": 8,  # More startup for exploration
187
                "n_ei_candidates": 32,  # More candidates for complex space
188
            }
189
        )
190
191
        return params
192