Passed
Push — master ( c241e4...b050e9 )
by Simon
01:57
created

hyperactive.opt.optuna._qmc_optimizer   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 164
Duplicated Lines 32.93 %

Importance

Changes 0
Metric Value
wmc 4
eloc 72
dl 54
loc 164
rs 10
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
A QMCOptimizer.get_test_params() 54 54 1
A QMCOptimizer._get_optimizer() 0 19 2
A QMCOptimizer.__init__() 0 23 1

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
"""Quasi-Monte Carlo optimizer."""
2
# copyright: hyperactive developers, MIT License (see LICENSE file)
3
4
from .._adapters._base_optuna_adapter import _BaseOptunaAdapter
5
6
7
class QMCOptimizer(_BaseOptunaAdapter):
8
    """Quasi-Monte Carlo optimizer.
9
10
    Parameters
11
    ----------
12
    param_space : dict[str, tuple or list or optuna distributions]
13
        The search space to explore. Dictionary with parameter names
14
        as keys and either tuples/lists of (low, high) or
15
        optuna distribution objects as values.
16
    n_trials : int, default=100
17
        Number of optimization trials.
18
    initialize : dict[str, int], default=None
19
        The method to generate initial positions. A dictionary with
20
        the following key literals and the corresponding value type:
21
        {"grid": int, "vertices": int, "random": int, "warm_start": list[dict]}
22
    random_state : None, int, default=None
23
        If None, create a new random state. If int, create a new random state
24
        seeded with the value.
25
    early_stopping : int, default=None
26
        Number of trials after which to stop if no improvement.
27
    max_score : float, default=None
28
        Maximum score threshold. Stop optimization when reached.
29
    qmc_type : str, default="sobol"
30
        Type of QMC sequence. Options: "sobol", "halton".
31
    scramble : bool, default=True
32
        Whether to scramble the QMC sequence.
33
    experiment : BaseExperiment, optional
34
        The experiment to optimize parameters for.
35
        Optional, can be passed later via ``set_params``.
36
37
    Examples
38
    --------
39
    Basic usage of QMCOptimizer with a scikit-learn experiment:
40
41
    >>> from hyperactive.experiment.integrations import SklearnCvExperiment
42
    >>> from hyperactive.opt.optuna import QMCOptimizer
43
    >>> from sklearn.datasets import load_iris
44
    >>> from sklearn.svm import SVC
45
    >>> X, y = load_iris(return_X_y=True)
46
    >>> sklearn_exp = SklearnCvExperiment(estimator=SVC(), X=X, y=y)
47
    >>> param_space = {
48
    ...     "C": (0.01, 10),
49
    ...     "gamma": (0.0001, 10),
50
    ... }
51
    >>> optimizer = QMCOptimizer(
52
    ...     param_space=param_space, n_trials=50, experiment=sklearn_exp
53
    ... )
54
    >>> best_params = optimizer.run()
55
    """
56
57
    _tags = {
58
        "info:name": "Quasi-Monte Carlo Optimizer",
59
        "info:local_vs_global": "global",
60
        "info:explore_vs_exploit": "explore",
61
        "info:compute": "low",
62
        "python_dependencies": ["optuna"],
63
    }
64
65
    def __init__(
66
        self,
67
        param_space=None,
68
        n_trials=100,
69
        initialize=None,
70
        random_state=None,
71
        early_stopping=None,
72
        max_score=None,
73
        qmc_type="sobol",
74
        scramble=True,
75
        experiment=None,
76
    ):
77
        self.qmc_type = qmc_type
78
        self.scramble = scramble
79
80
        super().__init__(
81
            param_space=param_space,
82
            n_trials=n_trials,
83
            initialize=initialize,
84
            random_state=random_state,
85
            early_stopping=early_stopping,
86
            max_score=max_score,
87
            experiment=experiment,
88
        )
89
90
    def _get_optimizer(self):
91
        """Get the QMC optimizer.
92
93
        Returns
94
        -------
95
        optimizer
96
            The Optuna QMCOptimizer instance
97
        """
98
        import optuna
99
100
        optimizer_kwargs = {
101
            "qmc_type": self.qmc_type,
102
            "scramble": self.scramble,
103
        }
104
105
        if self.random_state is not None:
106
            optimizer_kwargs["seed"] = self.random_state
107
108
        return optuna.samplers.QMCSampler(**optimizer_kwargs)
109
110 View Code Duplication
    @classmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
111
    def get_test_params(cls, parameter_set="default"):
112
        """Return testing parameter settings for the optimizer."""
113
        from sklearn.datasets import load_iris
114
        from sklearn.linear_model import LogisticRegression
115
116
        from hyperactive.experiment.integrations import SklearnCvExperiment
117
118
        # Test case 1: Halton sequence without scrambling
119
        params = super().get_test_params(parameter_set)
120
        params[0].update(
121
            {
122
                "qmc_type": "halton",
123
                "scramble": False,
124
            }
125
        )
126
127
        # Test case 2: Sobol sequence with scrambling
128
        X, y = load_iris(return_X_y=True)
129
        lr_exp = SklearnCvExperiment(
130
            estimator=LogisticRegression(random_state=42, max_iter=1000), X=X, y=y
131
        )
132
133
        mixed_param_space = {
134
            "C": (0.01, 100),  # Continuous
135
            "penalty": [
136
                "l1",
137
                "l2",
138
            ],  # Categorical - removed elasticnet to avoid solver conflicts
139
            "solver": ["liblinear", "saga"],  # Categorical
140
        }
141
142
        params.append(
143
            {
144
                "param_space": mixed_param_space,
145
                "n_trials": 16,  # Power of 2 for better QMC properties
146
                "experiment": lr_exp,
147
                "qmc_type": "sobol",  # Different sequence type
148
                "scramble": True,  # With scrambling for randomization
149
            }
150
        )
151
152
        # Test case 3: Different sampler configuration with same experiment
153
        params.append(
154
            {
155
                "param_space": mixed_param_space,
156
                "n_trials": 8,  # Power of 2, good for QMC
157
                "experiment": lr_exp,
158
                "qmc_type": "halton",  # Different QMC type
159
                "scramble": False,
160
            }
161
        )
162
163
        return params
164