Passed
Push — master ( 3434da...da0201 )
by Simon
02:16
created

HyperactiveSearchCV.__init__()   A

Complexity

Conditions 1

Size

Total Lines 24
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 22
nop 11
dl 0
loc 24
rs 9.352
c 0
b 0
f 0

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
from collections.abc import Iterable, Callable
6
from typing import Union, Dict, Type
7
8
from sklearn.base import BaseEstimator, clone
9
from sklearn.metrics import check_scoring
10
from sklearn.utils.validation import indexable, _check_method_params
11
12
from sklearn.base import BaseEstimator as SklearnBaseEstimator
13
from sklearn.model_selection import BaseCrossValidator
14
15
from hyperactive import Hyperactive
16
17
from .objective_function_adapter import ObjectiveFunctionAdapter
18
from .best_estimator import BestEstimator as _BestEstimator_
19
from .checks import Checks
20
from ...optimizers import RandomSearchOptimizer
21
22
23
class HyperactiveSearchCV(BaseEstimator, _BestEstimator_, Checks):
24
    """
25
    HyperactiveSearchCV class for hyperparameter tuning using cross-validation with sklearn estimators.
26
27
    Parameters:
28
    - estimator: SklearnBaseEstimator
29
        The estimator to be tuned.
30
    - params_config: Dict[str, list]
31
        Dictionary containing the hyperparameter search space.
32
    - optimizer: Union[str, Type[RandomSearchOptimizer]], optional
33
        The optimizer to be used for hyperparameter search, default is "default".
34
    - n_iter: int, optional
35
        Number of parameter settings that are sampled, default is 100.
36
    - scoring: Callable | str | None, optional
37
        Scoring method to evaluate the predictions on the test set.
38
    - n_jobs: int, optional
39
        Number of jobs to run in parallel, default is 1.
40
    - random_state: int | None, optional
41
        Random seed for reproducibility.
42
    - refit: bool, optional
43
        Refit the best estimator with the entire dataset, default is True.
44
    - cv: int | "BaseCrossValidator" | Iterable | None, optional
45
        Determines the cross-validation splitting strategy.
46
47
    Methods:
48
    - fit(X, y, **fit_params)
49
        Fit the estimator and tune hyperparameters.
50
    - score(X, y, **params)
51
        Return the score of the best estimator on the input data.
52
    """
53
54
    _required_parameters = ["estimator", "optimizer", "params_config"]
55
56
    def __init__(
57
        self,
58
        estimator: "SklearnBaseEstimator",
59
        params_config: Dict[str, list],
60
        optimizer: Union[str, Type[RandomSearchOptimizer]] = "default",
61
        n_iter: int = 100,
62
        *,
63
        scoring: Union[Callable, str, None] = None,
64
        n_jobs: int = 1,
65
        random_state: Union[int, None] = None,
66
        refit: bool = True,
67
        cv=None,
68
    ):
69
        super().__init__()
70
71
        self.estimator = estimator
72
        self.params_config = params_config
73
        self.optimizer = optimizer
74
        self.n_iter = n_iter
75
        self.scoring = scoring
76
        self.n_jobs = n_jobs
77
        self.random_state = random_state
78
        self.refit = refit
79
        self.cv = cv
80
81
    def _refit(self, X, y=None, **fit_params):
82
        self.best_estimator_ = clone(self.estimator).set_params(
83
            **clone(self.best_params_, safe=False)
84
        )
85
86
        self.best_estimator_.fit(X, y, **fit_params)
87
        return self
88
89
    def _check_data(self, X, y):
90
        X, y = indexable(X, y)
91
        if hasattr(self, "_validate_data"):
92
            validate_data = self._validate_data
93
        else:
94
            from sklearn.utils.validation import validate_data
95
96
        return validate_data(X, y)
97
98
    @Checks.verify_fit
99
    def fit(self, X, y, **fit_params):
100
        """
101
        Fit the estimator using the provided training data.
102
103
        Parameters:
104
        - X: array-like or sparse matrix, shape (n_samples, n_features)
105
            The training input samples.
106
        - y: array-like, shape (n_samples,) or (n_samples, n_outputs)
107
            The target values.
108
        - **fit_params: dict of string -> object
109
            Additional fit parameters.
110
111
        Returns:
112
        - self: object
113
            Returns the instance itself.
114
        """
115
116
        X, y = self._check_data(X, y)
117
118
        fit_params = _check_method_params(X, params=fit_params)
119
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
120
121
        objective_function_adapter = ObjectiveFunctionAdapter(
122
            self.estimator,
123
        )
124
        objective_function_adapter.add_dataset(X, y)
125
        objective_function_adapter.add_validation(self.scorer_, self.cv)
126
        objective_function = objective_function_adapter.objective_function
127
128
        hyper = Hyperactive(verbosity=False)
129
        hyper.add_search(
130
            objective_function,
131
            search_space=self.params_config,
132
            optimizer=self.optimizer,
133
            n_iter=self.n_iter,
134
            n_jobs=self.n_jobs,
135
            random_state=self.random_state,
136
        )
137
        hyper.run()
138
139
        self.best_params_ = hyper.best_para(objective_function)
140
        self.best_score_ = hyper.best_score(objective_function)
141
        self.search_data_ = hyper.search_data(objective_function)
142
143
        if self.refit:
144
            self._refit(X, y, **fit_params)
145
146
        return self
147
148
    def score(self, X, y=None, **params):
149
        """
150
        Calculate the score of the best estimator on the input data.
151
152
        Parameters:
153
        - X: array-like or sparse matrix of shape (n_samples, n_features)
154
            The input samples.
155
        - y: array-like of shape (n_samples,), default=None
156
            The target values.
157
        - **params: dict
158
            Additional parameters to be passed to the scoring function.
159
160
        Returns:
161
        - float
162
            The score of the best estimator on the input data.
163
        """
164
165
        return self.scorer_(self.best_estimator_, X, y, **params)
166
167
    @property
168
    def fit_successful(self):
169
        self._fit_successful
170