Passed
Push — master ( c241e4...b050e9 )
by Simon
01:57
created

TPEOptimizer.__init__()   A

Complexity

Conditions 1

Size

Total Lines 25
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 23
dl 0
loc 25
rs 9.328
c 0
b 0
f 0
cc 1
nop 11

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""TPE (Tree-structured Parzen Estimator) optimizer."""
2
# copyright: hyperactive developers, MIT License (see LICENSE file)
3
4
from .._adapters._base_optuna_adapter import _BaseOptunaAdapter
5
6
7
class TPEOptimizer(_BaseOptunaAdapter):
8
    """Tree-structured Parzen Estimator optimizer.
9
10
    Parameters
11
    ----------
12
    param_space : dict[str, tuple or list or optuna distributions]
13
        The search space to explore. Dictionary with parameter names
14
        as keys and either tuples/lists of (low, high) or
15
        optuna distribution objects as values.
16
    n_trials : int, default=100
17
        Number of optimization trials.
18
    initialize : dict[str, int], default=None
19
        The method to generate initial positions. A dictionary with
20
        the following key literals and the corresponding value type:
21
        {"grid": int, "vertices": int, "random": int, "warm_start": list[dict]}
22
    random_state : None, int, default=None
23
        If None, create a new random state. If int, create a new random state
24
        seeded with the value.
25
    early_stopping : int, default=None
26
        Number of trials after which to stop if no improvement.
27
    max_score : float, default=None
28
        Maximum score threshold. Stop optimization when reached.
29
    n_startup_trials : int, default=10
30
        Number of startup trials for TPE.
31
    n_ei_candidates : int, default=24
32
        Number of candidates for expected improvement.
33
    weights : callable, default=None
34
        Weight function for TPE.
35
    experiment : BaseExperiment, optional
36
        The experiment to optimize parameters for.
37
        Optional, can be passed later via ``set_params``.
38
39
    Examples
40
    --------
41
    Basic usage of TPEOptimizer with a scikit-learn experiment:
42
43
    >>> from hyperactive.experiment.integrations import SklearnCvExperiment
44
    >>> from hyperactive.opt.optuna import TPEOptimizer
45
    >>> from sklearn.datasets import load_iris
46
    >>> from sklearn.svm import SVC
47
    >>> X, y = load_iris(return_X_y=True)
48
    >>> sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y)
49
    >>> param_space = {
50
    ...     "C": (0.01, 10),
51
    ...     "gamma": (0.0001, 10),
52
    ... }
53
    >>> optimizer = TPEOptimizer(
54
    ...     param_space=param_space, n_trials=50, experiment=sklearn_exp
55
    ... )
56
    >>> best_params = optimizer.run()
57
    """
58
59
    _tags = {
60
        "info:name": "Tree-structured Parzen Estimator",
61
        "info:local_vs_global": "global",
62
        "info:explore_vs_exploit": "exploit",
63
        "info:compute": "middle",
64
        "python_dependencies": ["optuna"],
65
    }
66
67
    def __init__(
68
        self,
69
        param_space=None,
70
        n_trials=100,
71
        initialize=None,
72
        random_state=None,
73
        early_stopping=None,
74
        max_score=None,
75
        n_startup_trials=10,
76
        n_ei_candidates=24,
77
        weights=None,
78
        experiment=None,
79
    ):
80
        self.n_startup_trials = n_startup_trials
81
        self.n_ei_candidates = n_ei_candidates
82
        self.weights = weights
83
84
        super().__init__(
85
            param_space=param_space,
86
            n_trials=n_trials,
87
            initialize=initialize,
88
            random_state=random_state,
89
            early_stopping=early_stopping,
90
            max_score=max_score,
91
            experiment=experiment,
92
        )
93
94
    def _get_optimizer(self):
95
        """Get the TPE optimizer.
96
97
        Returns
98
        -------
99
        optimizer
100
            The Optuna TPEOptimizer instance
101
        """
102
        import optuna
103
104
        optimizer_kwargs = {
105
            "n_startup_trials": self.n_startup_trials,
106
            "n_ei_candidates": self.n_ei_candidates,
107
        }
108
109
        if self.weights is not None:
110
            optimizer_kwargs["weights"] = self.weights
111
112
        if self.random_state is not None:
113
            optimizer_kwargs["seed"] = self.random_state
114
115
        return optuna.samplers.TPESampler(**optimizer_kwargs)
116
117
    @classmethod
118
    def get_test_params(cls, parameter_set="default"):
119
        """Return testing parameter settings for the optimizer."""
120
        from sklearn.datasets import load_wine
121
        from sklearn.ensemble import RandomForestClassifier
122
        from sklearn.svm import SVC
123
124
        from hyperactive.experiment.integrations import SklearnCvExperiment
125
126
        # Test case 1: Basic TPE with standard parameters
127
        params = super().get_test_params(parameter_set)
128
        params[0].update(
129
            {
130
                "n_startup_trials": 5,
131
                "n_ei_candidates": 12,
132
            }
133
        )
134
135
        # Test case 2: Mixed parameter types with warm start
136
        X, y = load_wine(return_X_y=True)
137
        rf_exp = SklearnCvExperiment(
138
            estimator=RandomForestClassifier(random_state=42), X=X, y=y
139
        )
140
141
        mixed_param_space = {
142
            "n_estimators": (10, 100),  # Continuous integer
143
            "max_depth": [3, 5, 7, 10, None],  # Mixed discrete/None
144
            "criterion": ["gini", "entropy"],  # Categorical
145
            "min_samples_split": (2, 20),  # Continuous integer
146
            "bootstrap": [True, False],  # Boolean
147
        }
148
149
        # Warm start with known good configuration
150
        warm_start_points = [
151
            {
152
                "n_estimators": 50,
153
                "max_depth": 5,
154
                "criterion": "gini",
155
                "min_samples_split": 2,
156
                "bootstrap": True,
157
            }
158
        ]
159
160
        params.append(
161
            {
162
                "param_space": mixed_param_space,
163
                "n_trials": 20,
164
                "experiment": rf_exp,
165
                "n_startup_trials": 3,  # Fewer random trials before TPE
166
                "n_ei_candidates": 24,  # More EI candidates for better optimization
167
                "initialize": {"warm_start": warm_start_points},
168
            }
169
        )
170
171
        # Test case 3: High-dimensional continuous space (TPE strength)
172
        svm_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y)
173
        high_dim_space = {
174
            "C": (0.01, 100),
175
            "gamma": (1e-6, 1e2),
176
            "coef0": (0.0, 10.0),
177
            "degree": (2, 5),
178
            "tol": (1e-5, 1e-2),
179
        }
180
181
        params.append(
182
            {
183
                "param_space": high_dim_space,
184
                "n_trials": 25,
185
                "experiment": svm_exp,
186
                "n_startup_trials": 8,  # More startup for exploration
187
                "n_ei_candidates": 32,  # More candidates for complex space
188
            }
189
        )
190
191
        return params
192