Passed
Pull Request — master (#110)
by
unknown
01:37
created

hyperactive.opt.gridsearch   A

Complexity

Total Complexity 13

Size/Duplication

Total Lines 117
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 13
eloc 45
dl 0
loc 117
rs 10
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
A GridSearch._run() 0 22 3
A GridSearch.__init__() 0 11 1
C GridSearch._check_param_grid() 0 21 9
1
"""Grid search optimizer."""
2
3
from collections.abc import Sequence
4
5
import numpy as np
6
7
from sklearn.model_selection import ParameterGrid, ParameterSampler, check_cv
8
9
from hyperactive.base import BaseOptimizer
10
11
12
class GridSearch(BaseOptimizer):
13
    """Grid search optimizer.
14
15
    Parameters
16
    ----------
17
    experiment : BaseExperiment, optional
18
        The experiment to optimize parameters for.
19
        Optional, can be passed later in ``add_search``.
20
    error_score : float, default=np.nan
21
        The score to assign if an error occurs during the evaluation of a parameter set.
22
    param_grid : dict[str, list]
23
        The search space to explore. A dictionary with parameter
24
        names as keys and a numpy array as values.
25
26
    Example
27
    -------
28
    Grid search applied to scikit-learn parameter tuning:
29
30
    1. defining the experiment to optimize:
31
    >>> from hyperactive.experiment import Experiment
32
    >>> from hyperactive.experiment.integrations import SklearnCvExperiment
33
    >>> from sklearn.datasets import load_iris
34
    >>> from sklearn.svm import SVC
35
    >>>
36
    >>> X, y = load_iris(return_X_y=True)
37
    >>>
38
    >>> sklearn_exp = SklearnCvExperiment(
39
    ...     estimator=SVC(),
40
    ...     X=X,
41
    ...     y=y,
42
    ... )
43
44
    2. setting up the grid search optimizer:
45
    >>> from hyperactive import GridSearch
46
    >>> param_grid = {
47
    ...     "C": [0.01, 0.1, 1, 10],
48
    ...     "gamma": [0.0001, 0.01, 0.1, 1, 10],
49
    ... }
50
    ... grid_search = GridSearch(sklearn_exp, param_grid=param_grid)
51
52
    3. running the grid search:
53
    >>> best_params = grid_search.run()
54
55
    Best parameters can also be accessed via the attributes:
56
    >>> best_params = grid_search.best_params_
57
    """
58
59
    def __init__(
60
        self,
61
        experiment=None,
62
        error_score=np.nan,
63
        param_grid=None,
64
    ):
65
        self.experiment = experiment
66
        self.param_grid = param_grid
67
        self.error_score = error_score
68
69
        super().__init__()
70
71
    def _check_param_grid(self, param_grid):
72
        """_check_param_grid from sklearn 1.0.2, before it was removed."""
73
        if hasattr(param_grid, "items"):
74
            param_grid = [param_grid]
75
76
        for p in param_grid:
77
            for name, v in p.items():
78
                if isinstance(v, np.ndarray) and v.ndim > 1:
79
                    raise ValueError("Parameter array should be one-dimensional.")
80
81
                if isinstance(v, str) or not isinstance(v, (np.ndarray, Sequence)):
82
                    raise ValueError(
83
                        f"Parameter grid for parameter ({name}) needs to"
84
                        f" be a list or numpy array, but got ({type(v)})."
85
                        " Single values need to be wrapped in a list"
86
                        " with one element."
87
                    )
88
89
                if len(v) == 0:
90
                    raise ValueError(
91
                        f"Parameter values for parameter ({name}) need "
92
                        "to be a non-empty sequence."
93
                    )
94
95
    def _run(self, experiment, param_grid, error_score):
96
        """Run the optimization search process."""
97
        self._check_param_grid(param_grid)
98
        candidate_params = list(ParameterGrid(param_grid))
99
100
        scores = []
101
        for candidate_param in candidate_params:
102
            try:
103
                score = experiment(**candidate_param)
104
            except Exception:  # noqa: B904
105
                # Catch all exceptions and assign error_score
106
                score = error_score
107
            scores.append(score)
108
109
        best_index = np.argmin(scores)
110
        best_params = candidate_params[best_index]
111
112
        self.best_index_ = best_index
113
        self.best_params_ = best_params
114
        self.best_score_ = scores[best_index]
115
116
        return best_params
117