GridSearchSk._check_param_grid()   C
last analyzed

Complexity

Conditions 9

Size

Total Lines 21
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 13
dl 0
loc 21
rs 6.6666
c 0
b 0
f 0
cc 9
nop 2
1
"""Grid search optimizer."""
2
3
# copyright: hyperactive developers, MIT License (see LICENSE file)
4
5
from collections.abc import Sequence
6
7
import numpy as np
8
from sklearn.model_selection import ParameterGrid
9
10
from hyperactive.base import BaseOptimizer
11
from hyperactive.opt._common import _score_params
12
from hyperactive.utils.parallel import parallelize
13
14
15
class GridSearchSk(BaseOptimizer):
16
    """Grid search optimizer, with backend selection and sklearn style parameter grid.
17
18
    Parameters
19
    ----------
20
    param_grid : dict[str, list]
21
        The search space to explore. A dictionary with parameter
22
        names as keys and a numpy array as values.
23
24
    error_score : float, default=np.nan
25
        The score to assign if an error occurs during the evaluation of a parameter set.
26
27
    backend : {"dask", "loky", "multiprocessing", "threading", "ray"}, default = "None".
28
        Parallelization backend to use in the search process.
29
30
        - "None": executes loop sequentally, simple list comprehension
31
        - "loky", "multiprocessing" and "threading": uses ``joblib.Parallel`` loops
32
        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``
33
        - "dask": uses ``dask``, requires ``dask`` package in environment
34
        - "ray": uses ``ray``, requires ``ray`` package in environment
35
36
    backend_params : dict, optional
37
        additional parameters passed to the backend as config.
38
        Directly passed to ``utils.parallel.parallelize``.
39
        Valid keys depend on the value of ``backend``:
40
41
        - "None": no additional parameters, ``backend_params`` is ignored
42
        - "loky", "multiprocessing" and "threading": default ``joblib`` backends
43
          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
44
          with the exception of ``backend`` which is directly controlled by ``backend``.
45
          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
46
          will default to ``joblib`` defaults.
47
        - "joblib": custom and 3rd party ``joblib`` backends, e.g., ``spark``.
48
          any valid keys for ``joblib.Parallel`` can be passed here, e.g., ``n_jobs``,
49
          ``backend`` must be passed as a key of ``backend_params`` in this case.
50
          If ``n_jobs`` is not passed, it will default to ``-1``, other parameters
51
          will default to ``joblib`` defaults.
52
        - "dask": any valid keys for ``dask.compute`` can be passed, e.g., ``scheduler``
53
54
        - "ray": The following keys can be passed:
55
56
            - "ray_remote_args": dictionary of valid keys for ``ray.init``
57
            - "shutdown_ray": bool, default=True; False prevents ``ray`` from shutting
58
                down after parallelization.
59
            - "logger_name": str, default="ray"; name of the logger to use.
60
            - "mute_warnings": bool, default=False; if True, suppresses warnings
61
62
    experiment : BaseExperiment, optional
63
        The experiment to optimize parameters for.
64
        Optional, can be passed later via ``set_params``.
65
66
    Example
67
    -------
68
    Grid search applied to scikit-learn parameter tuning:
69
70
    1. defining the experiment to optimize:
71
    >>> from hyperactive.experiment.integrations import SklearnCvExperiment
72
    >>> from sklearn.datasets import load_iris
73
    >>> from sklearn.svm import SVC
74
    >>>
75
    >>> X, y = load_iris(return_X_y=True)
76
    >>>
77
    >>> sklearn_exp = SklearnCvExperiment(
78
    ...     estimator=SVC(),
79
    ...     X=X,
80
    ...     y=y,
81
    ... )
82
83
    2. setting up the grid search optimizer:
84
    >>> from hyperactive.opt import GridSearchSk as GridSearch
85
    >>> param_grid = {
86
    ...     "C": [0.01, 0.1, 1, 10],
87
    ...     "gamma": [0.0001, 0.01, 0.1, 1, 10],
88
    ... }
89
    >>> grid_search = GridSearch(param_grid, experiment=sklearn_exp)
90
91
    3. running the grid search:
92
    >>> best_params = grid_search.solve()
93
94
    Best parameters can also be accessed via the attributes:
95
    >>> best_params = grid_search.best_params_
96
97
    To parallelize the search, set the ``backend`` and ``backend_params``:
98
    >>> grid_search = GridSearch(
99
    ...     param_grid,
100
    ...     backend="joblib",
101
    ...     backend_params={"n_jobs": -1},
102
    ...     experiment=sklearn_exp,
103
    ... )
104
    """
105
106
    def __init__(
107
        self,
108
        param_grid=None,
109
        error_score=np.nan,
110
        backend="None",
111
        backend_params=None,
112
        experiment=None,
113
    ):
114
        self.experiment = experiment
115
        self.param_grid = param_grid
116
        self.error_score = error_score
117
        self.backend = backend
118
        self.backend_params = backend_params
119
120
        super().__init__()
121
122
    def _check_param_grid(self, param_grid):
123
        """_check_param_grid from sklearn 1.0.2, before it was removed."""
124
        if hasattr(param_grid, "items"):
125
            param_grid = [param_grid]
126
127
        for p in param_grid:
128
            for name, v in p.items():
129
                if isinstance(v, np.ndarray) and v.ndim > 1:
130
                    raise ValueError("Parameter array should be one-dimensional.")
131
132
                if isinstance(v, str) or not isinstance(v, (np.ndarray, Sequence)):
133
                    raise ValueError(
134
                        f"Parameter grid for parameter ({name}) needs to"
135
                        f" be a list or numpy array, but got ({type(v)})."
136
                        " Single values need to be wrapped in a list"
137
                        " with one element."
138
                    )
139
140
                if len(v) == 0:
141
                    raise ValueError(
142
                        f"Parameter values for parameter ({name}) need "
143
                        "to be a non-empty sequence."
144
                    )
145
146
    def _solve(self, experiment, param_grid, error_score, backend, backend_params):
147
        """Run the optimization search process."""
148
        self._check_param_grid(param_grid)
149
        candidate_params = list(ParameterGrid(param_grid))
150
151
        meta = {
152
            "experiment": experiment,
153
            "error_score": error_score,
154
        }
155
156
        scores = parallelize(
157
            fun=_score_params,
158
            iter=candidate_params,
159
            meta=meta,
160
            backend=backend,
161
            backend_params=backend_params,
162
        )
163
164
        best_index = np.argmin(scores)
165
        best_params = candidate_params[best_index]
166
167
        self.best_index_ = best_index
168
        self.best_score_ = scores[best_index]
169
170
        return best_params
171
172
    @classmethod
173
    def get_test_params(cls, parameter_set="default"):
174
        """Return testing parameter settings for the skbase object.
175
176
        ``get_test_params`` is a unified interface point to store
177
        parameter settings for testing purposes. This function is also
178
        used in ``create_test_instance`` and ``create_test_instances_and_names``
179
        to construct test instances.
180
181
        ``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``.
182
183
        Each ``dict`` is a parameter configuration for testing,
184
        and can be used to construct an "interesting" test instance.
185
        A call to ``cls(**params)`` should
186
        be valid for all dictionaries ``params`` in the return of ``get_test_params``.
187
188
        The ``get_test_params`` need not return fixed lists of dictionaries,
189
        it can also return dynamic or stochastic parameter settings.
190
191
        Parameters
192
        ----------
193
        parameter_set : str, default="default"
194
            Name of the set of test parameters to return, for use in tests. If no
195
            special parameters are defined for a value, will return `"default"` set.
196
197
        Returns
198
        -------
199
        params : dict or list of dict, default = {}
200
            Parameters to create testing instances of the class
201
            Each dict are parameters to construct an "interesting" test instance, i.e.,
202
            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
203
            `create_test_instance` uses the first (or only) dictionary in `params`
204
        """
205
        from hyperactive.experiment.integrations import SklearnCvExperiment
206
207
        sklearn_exp = SklearnCvExperiment.create_test_instance()
208
        param_grid = {
209
            "C": [0.01, 0.1, 1, 10],
210
            "gamma": [0.0001, 0.01, 0.1, 1, 10],
211
        }
212
        params_sklearn = {
213
            "experiment": sklearn_exp,
214
            "param_grid": param_grid,
215
        }
216
217
        from hyperactive.experiment.bench import Ackley
218
219
        ackley_exp = Ackley.create_test_instance()
220
        param_grid = {
221
            "x0": np.linspace(-5, 5, 10),
222
            "x1": np.linspace(-5, 5, 10),
223
        }
224
        params_ackley = {
225
            "experiment": ackley_exp,
226
            "param_grid": param_grid,
227
        }
228
229
        params = [params_sklearn, params_ackley]
230
231
        from hyperactive.utils.parallel import _get_parallel_test_fixtures
232
233
        parallel_fixtures = _get_parallel_test_fixtures()
234
235
        for x in parallel_fixtures:
236
            new_ackley = params_ackley.copy()
237
            new_ackley.update(x)
238
            params.append(new_ackley)
239
240
        return params
241