Passed
Pull Request — master (#110)
by
unknown
01:31
created

OptCV.fit()   A

Complexity

Conditions 2

Size

Total Lines 42
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 19
dl 0
loc 42
rs 9.45
c 0
b 0
f 0
cc 2
nop 4
1
# copyright: hyperactive developers, MIT License (see LICENSE file)
2
3
from collections.abc import Callable
4
from typing import Union
5
6
from sklearn.base import BaseEstimator, clone
7
from sklearn.utils.validation import indexable, _check_method_params
8
9
from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment
10
from hyperactive.integrations.sklearn.best_estimator import (
11
    BestEstimator as _BestEstimator_
12
)
13
from hyperactive.integrations.sklearn.checks import Checks
14
15
16
class OptCV(BaseEstimator, _BestEstimator_, Checks):
17
    """Tuning via any optimizer in the hyperactive API.
18
19
    Parameters
20
    ----------
21
    estimator : SklearnBaseEstimator
22
        The estimator to be tuned.
23
    optimizer : hyperactive BaseOptimizer
24
        The optimizer to be used for hyperparameter search.
25
    scoring : callable or str, default = accuracy_score or mean_squared_error
26
        sklearn scoring function or metric to evaluate the model's performance.
27
        Default is determined by the type of estimator:
28
        ``accuracy_score`` for classifiers, and
29
        ``mean_squared_error`` for regressors, as per sklearn convention
30
        through the default ``score`` method of the estimator.
31
    refit: bool, optional, default = True
32
        Whether to refit the best estimator with the entire dataset.
33
        If True, the best estimator is refit with the entire dataset after
34
        the optimization process.
35
        If False, does not refit, and predict is not available.
36
    cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
37
        The number of folds or cross-validation strategy to be used.
38
        If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).
39
40
    Example
41
    -------
42
    Tuning sklearn SVC via grid search
43
44
    1. defining the tuned estimator:
45
    >>> from sklearn.svm import SVC
46
    >>> from hyperactive.integrations.sklearn import OptCV
47
    >>> from hyperactive.opt import GridSearch
48
    >>>
49
    >>> param_grid = {"kernel": ["linear", "rbf"], "C": [1, 10]}
50
    >>> tuned_svc = OptCV(SVC(), GridSearch(param_grid))
51
52
    2. fitting the tuned estimator:
53
    >>> from sklearn.datasets import load_iris
54
    >>> from sklearn.model_selection import train_test_split
55
    >>> X, y = load_iris(return_X_y=True)
56
    >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
57
    >>>
58
    >>> tuned_svc.fit(X_train, y_train)
59
    OptCV(...)
60
    >>> y_pred = tuned_svc.predict(X_test)
61
62
    3. obtaining best parameters and best estimator
63
    >>> best_params = tuned_svc.best_params_
64
    >>> best_estimator = tuned_svc.best_estimator_
65
    """
66
67
    _required_parameters = ["estimator", "optimizer"]
68
69
    def __init__(
70
        self,
71
        estimator,
72
        optimizer,
73
        *,
74
        scoring: Union[Callable, str, None] = None,
75
        refit: bool = True,
76
        cv=None,
77
    ):
78
        super().__init__()
79
80
        self.estimator = estimator
81
        self.optimizer = optimizer
82
        self.scoring = scoring
83
        self.refit = refit
84
        self.cv = cv
85
86
    def _refit(self, X, y=None, **fit_params):
87
        self.best_estimator_ = clone(self.estimator).set_params(
88
            **clone(self.best_params_, safe=False)
89
        )
90
91
        self.best_estimator_.fit(X, y, **fit_params)
92
        return self
93
94
    def _check_data(self, X, y):
95
        X, y = indexable(X, y)
96
        if hasattr(self, "_validate_data"):
97
            validate_data = self._validate_data
98
        else:
99
            from sklearn.utils.validation import validate_data
100
101
        return validate_data(X, y)
102
103
    @Checks.verify_fit
104
    def fit(self, X, y, **fit_params):
105
        """Fit the model.
106
107
        Parameters
108
        ----------
109
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
110
            Training data.
111
112
        y : array-like of shape (n_samples,) or (n_samples, n_targets)
113
            Target values. Will be cast to X's dtype if necessary.
114
115
        Returns
116
        -------
117
        self : object
118
            Fitted Estimator.
119
        """
120
121
        X, y = self._check_data(X, y)
122
123
        fit_params = _check_method_params(X, params=fit_params)
124
125
        experiment = SklearnCvExperiment(
126
            estimator=self.estimator,
127
            scoring=self.scoring,
128
            cv=self.cv,
129
            X=X,
130
            y=y,
131
        )
132
        self.scorer_ = experiment.scorer_
133
134
        optimizer = self.optimizer.clone()
135
        optimizer.set_params(experiment=experiment)
136
        best_params = optimizer.run()
137
138
        self.best_params_ = best_params
139
        self.best_estimator_ = clone(self.estimator).set_params(**best_params)
140
141
        if self.refit:
142
            self._refit(X, y, **fit_params)
143
144
        return self
145
146
    def score(self, X, y=None, **params):
147
        """Return the score on the given data, if the estimator has been refit.
148
149
        This uses the score defined by ``scoring`` where provided, and the
150
        ``best_estimator_.score`` method otherwise.
151
152
        Parameters
153
        ----------
154
        X : array-like of shape (n_samples, n_features)
155
            Input data, where `n_samples` is the number of samples and
156
            `n_features` is the number of features.
157
158
        y : array-like of shape (n_samples, n_output) \
159
            or (n_samples,), default=None
160
            Target relative to X for classification or regression;
161
            None for unsupervised learning.
162
163
        **params : dict
164
            Parameters to be passed to the underlying scorer(s).
165
166
        Returns
167
        -------
168
        score : float
169
            The score defined by ``scoring`` if provided, and the
170
            ``best_estimator_.score`` method otherwise.
171
        """
172
        return self.scorer_(self.best_estimator_, X, y, **params)
173
174
    @property
175
    def fit_successful(self):
176
        self._fit_successful
177