Passed
Push — master ( 3434da...da0201 )
by Simon
02:16
created

HyperactiveSearchCV._check_data()   A

Complexity

Conditions 2

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 6
nop 3
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
# Author: Simon Blanke
2
# Email: [email protected]
3
# License: MIT License
4
5
from collections.abc import Iterable, Callable
6
from typing import Union, Dict, Type
7
8
from sklearn.base import BaseEstimator, clone
9
from sklearn.metrics import check_scoring
10
from sklearn.utils.validation import indexable, _check_method_params
11
12
from sklearn.base import BaseEstimator as SklearnBaseEstimator
13
from sklearn.model_selection import BaseCrossValidator
14
15
from hyperactive import Hyperactive
16
17
from .objective_function_adapter import ObjectiveFunctionAdapter
18
from .best_estimator import BestEstimator as _BestEstimator_
19
from .checks import Checks
20
from ...optimizers import RandomSearchOptimizer
21
22
23
class HyperactiveSearchCV(BaseEstimator, _BestEstimator_, Checks):
24
    """
25
    HyperactiveSearchCV class for hyperparameter tuning using cross-validation with sklearn estimators.
26
27
    Parameters:
28
    - estimator: SklearnBaseEstimator
29
        The estimator to be tuned.
30
    - params_config: Dict[str, list]
31
        Dictionary containing the hyperparameter search space.
32
    - optimizer: Union[str, Type[RandomSearchOptimizer]], optional
33
        The optimizer to be used for hyperparameter search, default is "default".
34
    - n_iter: int, optional
35
        Number of parameter settings that are sampled, default is 100.
36
    - scoring: Callable | str | None, optional
37
        Scoring method to evaluate the predictions on the test set.
38
    - n_jobs: int, optional
39
        Number of jobs to run in parallel, default is 1.
40
    - random_state: int | None, optional
41
        Random seed for reproducibility.
42
    - refit: bool, optional
43
        Refit the best estimator with the entire dataset, default is True.
44
    - cv: int | "BaseCrossValidator" | Iterable | None, optional
45
        Determines the cross-validation splitting strategy.
46
47
    Methods:
48
    - fit(X, y, **fit_params)
49
        Fit the estimator and tune hyperparameters.
50
    - score(X, y, **params)
51
        Return the score of the best estimator on the input data.
52
    """
53
54
    _required_parameters = ["estimator", "optimizer", "params_config"]
55
56
    def __init__(
57
        self,
58
        estimator: "SklearnBaseEstimator",
59
        params_config: Dict[str, list],
60
        optimizer: Union[str, Type[RandomSearchOptimizer]] = "default",
61
        n_iter: int = 100,
62
        *,
63
        scoring: Union[Callable, str, None] = None,
64
        n_jobs: int = 1,
65
        random_state: Union[int, None] = None,
66
        refit: bool = True,
67
        cv=None,
68
    ):
69
        super().__init__()
70
71
        self.estimator = estimator
72
        self.params_config = params_config
73
        self.optimizer = optimizer
74
        self.n_iter = n_iter
75
        self.scoring = scoring
76
        self.n_jobs = n_jobs
77
        self.random_state = random_state
78
        self.refit = refit
79
        self.cv = cv
80
81
    def _refit(self, X, y=None, **fit_params):
82
        self.best_estimator_ = clone(self.estimator).set_params(
83
            **clone(self.best_params_, safe=False)
84
        )
85
86
        self.best_estimator_.fit(X, y, **fit_params)
87
        return self
88
89
    def _check_data(self, X, y):
90
        X, y = indexable(X, y)
91
        if hasattr(self, "_validate_data"):
92
            validate_data = self._validate_data
93
        else:
94
            from sklearn.utils.validation import validate_data
95
96
        return validate_data(X, y)
97
98
    @Checks.verify_fit
99
    def fit(self, X, y, **fit_params):
100
        """
101
        Fit the estimator using the provided training data.
102
103
        Parameters:
104
        - X: array-like or sparse matrix, shape (n_samples, n_features)
105
            The training input samples.
106
        - y: array-like, shape (n_samples,) or (n_samples, n_outputs)
107
            The target values.
108
        - **fit_params: dict of string -> object
109
            Additional fit parameters.
110
111
        Returns:
112
        - self: object
113
            Returns the instance itself.
114
        """
115
116
        X, y = self._check_data(X, y)
117
118
        fit_params = _check_method_params(X, params=fit_params)
119
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
120
121
        objective_function_adapter = ObjectiveFunctionAdapter(
122
            self.estimator,
123
        )
124
        objective_function_adapter.add_dataset(X, y)
125
        objective_function_adapter.add_validation(self.scorer_, self.cv)
126
        objective_function = objective_function_adapter.objective_function
127
128
        hyper = Hyperactive(verbosity=False)
129
        hyper.add_search(
130
            objective_function,
131
            search_space=self.params_config,
132
            optimizer=self.optimizer,
133
            n_iter=self.n_iter,
134
            n_jobs=self.n_jobs,
135
            random_state=self.random_state,
136
        )
137
        hyper.run()
138
139
        self.best_params_ = hyper.best_para(objective_function)
140
        self.best_score_ = hyper.best_score(objective_function)
141
        self.search_data_ = hyper.search_data(objective_function)
142
143
        if self.refit:
144
            self._refit(X, y, **fit_params)
145
146
        return self
147
148
    def score(self, X, y=None, **params):
149
        """
150
        Calculate the score of the best estimator on the input data.
151
152
        Parameters:
153
        - X: array-like or sparse matrix of shape (n_samples, n_features)
154
            The input samples.
155
        - y: array-like of shape (n_samples,), default=None
156
            The target values.
157
        - **params: dict
158
            Additional parameters to be passed to the scoring function.
159
160
        Returns:
161
        - float
162
            The score of the best estimator on the input data.
163
        """
164
165
        return self.scorer_(self.best_estimator_, X, y, **params)
166
167
    @property
168
    def fit_successful(self):
169
        self._fit_successful
170