Passed
Pull Request — master (#110)
by
unknown
01:39
created

OptCV.__init__()   A

Complexity

Conditions 1

Size

Total Lines 16
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 14
dl 0
loc 16
rs 9.7
c 0
b 0
f 0
cc 1
nop 7
1
# copyright: hyperactive developers, MIT License (see LICENSE file)
2
3
from collections.abc import Callable
4
from typing import Union
5
6
from sklearn.base import BaseEstimator, clone
7
from sklearn.utils.validation import indexable, _check_method_params
8
9
from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment
10
from hyperactive.integrations.sklearn.best_estimator import (
11
    BestEstimator as _BestEstimator_
12
)
13
from hyperactive.integrations.sklearn.checks import Checks
14
15
16
class OptCV(BaseEstimator, _BestEstimator_, Checks):
17
    """Tuning via any optimizer in the hyperactive API.
18
19
    Parameters
20
    ----------
21
    estimator : SklearnBaseEstimator
22
        The estimator to be tuned.
23
    optimizer : hyperactive BaseOptimizer
24
        The optimizer to be used for hyperparameter search.
25
    estimator : sklearn estimator
26
        The estimator to be used for the experiment.
27
    scoring : callable or str, default = accuracy_score or mean_squared_error
28
        sklearn scoring function or metric to evaluate the model's performance.
29
        Default is determined by the type of estimator:
30
        ``accuracy_score`` for classifiers, and
31
        ``mean_squared_error`` for regressors, as per sklearn convention
32
        through the default ``score`` method of the estimator.
33
    refit: bool, optional, default = True
34
        Whether to refit the best estimator with the entire dataset.
35
        If True, the best estimator is refit with the entire dataset after
36
        the optimization process.
37
        If False, does not refit, and predict is not available.
38
    cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
39
        The number of folds or cross-validation strategy to be used.
40
        If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).
41
42
    Example
43
    -------
44
    Tuning sklearn SVC via grid search
45
46
    1. defining the tuned estimator:
47
    >>> from sklearn.svm import SVC
48
    >>> from hyperactive.integrations.sklearn import OptCV
49
    >>> from hyperactive.opt import GridSearch
50
    >>>
51
    >>> param_grid = {"kernel": ["linear", "rbf"], "C": [1, 10]}
52
    >>> tuned_svc = OptCV(SVC(), GridSearch(param_grid))
53
54
    2. fitting the tuned estimator:
55
    >>> from sklearn.datasets import load_iris
56
    >>> from sklearn.model_selection import train_test_split
57
    >>> X, y = load_iris(return_X_y=True)
58
    >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
59
    >>>
60
    >>> tuned_svc.fit(X_train, y_train)
61
    OptCV(...)
62
    >>> y_pred = tuned_svc.predict(X_test)
63
64
    3. obtaining best parameters and best estimator
65
    >>> best_params = tuned_svc.best_params_
66
    >>> best_estimator = tuned_svc.best_estimator_
67
    """
68
69
    _required_parameters = ["estimator", "optimizer"]
70
71
    def __init__(
72
        self,
73
        estimator,
74
        optimizer,
75
        *,
76
        scoring: Union[Callable, str, None] = None,
77
        refit: bool = True,
78
        cv=None,
79
    ):
80
        super().__init__()
81
82
        self.estimator = estimator
83
        self.optimizer = optimizer
84
        self.scoring = scoring
85
        self.refit = refit
86
        self.cv = cv
87
88
    def _refit(self, X, y=None, **fit_params):
89
        self.best_estimator_ = clone(self.estimator).set_params(
90
            **clone(self.best_params_, safe=False)
91
        )
92
93
        self.best_estimator_.fit(X, y, **fit_params)
94
        return self
95
96
    def _check_data(self, X, y):
97
        X, y = indexable(X, y)
98
        if hasattr(self, "_validate_data"):
99
            validate_data = self._validate_data
100
        else:
101
            from sklearn.utils.validation import validate_data
102
103
        return validate_data(X, y)
104
105
    @Checks.verify_fit
106
    def fit(self, X, y, **fit_params):
107
        """Fit the model.
108
109
        Parameters
110
        ----------
111
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
112
            Training data.
113
114
        y : array-like of shape (n_samples,) or (n_samples, n_targets)
115
            Target values. Will be cast to X's dtype if necessary.
116
117
        Returns
118
        -------
119
        self : object
120
            Fitted Estimator.
121
        """
122
123
        X, y = self._check_data(X, y)
124
125
        fit_params = _check_method_params(X, params=fit_params)
126
127
        experiment = SklearnCvExperiment(
128
            estimator=self.estimator,
129
            scoring=self.scoring,
130
            cv=self.cv,
131
            X=X,
132
            y=y,
133
        )
134
        self.scorer_ = experiment.scorer_
135
136
        optimizer = self.optimizer.clone()
137
        optimizer.set_params(experiment=experiment)
138
        best_params = optimizer.run()
139
140
        self.best_params_ = best_params
141
        self.best_estimator_ = clone(self.estimator).set_params(**best_params)
142
143
        if self.refit:
144
            self._refit(X, y, **fit_params)
145
146
        return self
147
148
    def score(self, X, y=None, **params):
149
        """Return the score on the given data, if the estimator has been refit.
150
151
        This uses the score defined by ``scoring`` where provided, and the
152
        ``best_estimator_.score`` method otherwise.
153
154
        Parameters
155
        ----------
156
        X : array-like of shape (n_samples, n_features)
157
            Input data, where `n_samples` is the number of samples and
158
            `n_features` is the number of features.
159
160
        y : array-like of shape (n_samples, n_output) \
161
            or (n_samples,), default=None
162
            Target relative to X for classification or regression;
163
            None for unsupervised learning.
164
165
        **params : dict
166
            Parameters to be passed to the underlying scorer(s).
167
168
        Returns
169
        -------
170
        score : float
171
            The score defined by ``scoring`` if provided, and the
172
            ``best_estimator_.score`` method otherwise.
173
        """
174
        return self.scorer_(self.best_estimator_, X, y, **params)
175
176
    @property
177
    def fit_successful(self):
178
        self._fit_successful
179