hyperactive.integrations.sklearn.opt_cv.OptCV.__init__() - Code Metrics - Inspection of "V5 API rework - experimental" - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#110)

unknown

created 2025-05-05 18:42 UTC

OptCV.init() A

↳ Parent: hyperactive.integrations.sklearn.opt_cv

Complexity

Conditions

Size

Total Lines	16
Code Lines	14

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	14
dl	0
loc	16
rs	9.7
c	0
b	0
f	0
cc	1
nop	7

# copyright: hyperactive developers, MIT License (see LICENSE file)

from collections.abc import Callable
from typing import Union

from sklearn.base import BaseEstimator, clone
from sklearn.utils.validation import indexable, _check_method_params

from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment
from hyperactive.integrations.sklearn.best_estimator import (
    BestEstimator as _BestEstimator_
)
from hyperactive.integrations.sklearn.checks import Checks


class OptCV(BaseEstimator, _BestEstimator_, Checks):
    """Tuning via any optimizer in the hyperactive API.

    Parameters
    ----------
    estimator : SklearnBaseEstimator
        The estimator to be tuned.
    optimizer : hyperactive BaseOptimizer
        The optimizer to be used for hyperparameter search.
    estimator : sklearn estimator
        The estimator to be used for the experiment.
    scoring : callable or str, default = accuracy_score or mean_squared_error
        sklearn scoring function or metric to evaluate the model's performance.
        Default is determined by the type of estimator:
        ``accuracy_score`` for classifiers, and
        ``mean_squared_error`` for regressors, as per sklearn convention
        through the default ``score`` method of the estimator.
    refit: bool, optional, default = True
        Whether to refit the best estimator with the entire dataset.
        If True, the best estimator is refit with the entire dataset after
        the optimization process.
        If False, does not refit, and predict is not available.
    cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
        The number of folds or cross-validation strategy to be used.
        If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).

    Example
    -------
    Tuning sklearn SVC via grid search

    1. defining the tuned estimator:
    >>> from sklearn.svm import SVC
    >>> from hyperactive.integrations.sklearn import OptCV
    >>> from hyperactive.opt import GridSearch
    >>>
    >>> param_grid = {"kernel": ["linear", "rbf"], "C": [1, 10]}
    >>> tuned_svc = OptCV(SVC(), GridSearch(param_grid))

    2. fitting the tuned estimator:
    >>> from sklearn.datasets import load_iris
    >>> from sklearn.model_selection import train_test_split
    >>> X, y = load_iris(return_X_y=True)
    >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    >>>
    >>> tuned_svc.fit(X_train, y_train)
    OptCV(...)
    >>> y_pred = tuned_svc.predict(X_test)

    3. obtaining best parameters and best estimator
    >>> best_params = tuned_svc.best_params_
    >>> best_estimator = tuned_svc.best_estimator_
    """

    _required_parameters = ["estimator", "optimizer"]

    def __init__(
        self,
        estimator,
        optimizer,
        *,
        scoring: Union[Callable, str, None] = None,
        refit: bool = True,
        cv=None,
    ):
        super().__init__()

        self.estimator = estimator
        self.optimizer = optimizer
        self.scoring = scoring
        self.refit = refit
        self.cv = cv

    def _refit(self, X, y=None, **fit_params):
        self.best_estimator_ = clone(self.estimator).set_params(
            **clone(self.best_params_, safe=False)
        )

        self.best_estimator_.fit(X, y, **fit_params)
        return self

    def _check_data(self, X, y):
        X, y = indexable(X, y)
        if hasattr(self, "_validate_data"):
            validate_data = self._validate_data
        else:
            from sklearn.utils.validation import validate_data

        return validate_data(X, y)

    @Checks.verify_fit
    def fit(self, X, y, **fit_params):
        """Fit the model.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values. Will be cast to X's dtype if necessary.

        Returns
        -------
        self : object
            Fitted Estimator.
        """

        X, y = self._check_data(X, y)

        fit_params = _check_method_params(X, params=fit_params)

        experiment = SklearnCvExperiment(
            estimator=self.estimator,
            scoring=self.scoring,
            cv=self.cv,
            X=X,
            y=y,
        )
        self.scorer_ = experiment.scorer_

        optimizer = self.optimizer.clone()
        optimizer.set_params(experiment=experiment)
        best_params = optimizer.run()

        self.best_params_ = best_params
        self.best_estimator_ = clone(self.estimator).set_params(**best_params)

        if self.refit:
            self._refit(X, y, **fit_params)

        return self

    def score(self, X, y=None, **params):
        """Return the score on the given data, if the estimator has been refit.

        This uses the score defined by ``scoring`` where provided, and the
        ``best_estimator_.score`` method otherwise.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Input data, where `n_samples` is the number of samples and
            `n_features` is the number of features.

        y : array-like of shape (n_samples, n_output) \
            or (n_samples,), default=None
            Target relative to X for classification or regression;
            None for unsupervised learning.

        **params : dict
            Parameters to be passed to the underlying scorer(s).

        Returns
        -------
        score : float
            The score defined by ``scoring`` if provided, and the
            ``best_estimator_.score`` method otherwise.
        """
        return self.scorer_(self.best_estimator_, X, y, **params)

    @property
    def fit_successful(self):
        self._fit_successful


1			# copyright: hyperactive developers, MIT License (see LICENSE file)
2
3			from collections.abc import Callable
4			from typing import Union
5
6			from sklearn.base import BaseEstimator, clone
7			from sklearn.utils.validation import indexable, _check_method_params
8
9			from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment
10			from hyperactive.integrations.sklearn.best_estimator import (
11			BestEstimator as _BestEstimator_
12			)
13			from hyperactive.integrations.sklearn.checks import Checks
14
15
16			class OptCV(BaseEstimator, _BestEstimator_, Checks):
17			"""Tuning via any optimizer in the hyperactive API.
18
19			Parameters
20			----------
21			estimator : SklearnBaseEstimator
22			The estimator to be tuned.
23			optimizer : hyperactive BaseOptimizer
24			The optimizer to be used for hyperparameter search.
25			estimator : sklearn estimator
26			The estimator to be used for the experiment.
27			scoring : callable or str, default = accuracy_score or mean_squared_error
28			sklearn scoring function or metric to evaluate the model's performance.
29			Default is determined by the type of estimator:
30			``accuracy_score`` for classifiers, and
31			``mean_squared_error`` for regressors, as per sklearn convention
32			through the default ``score`` method of the estimator.
33			refit: bool, optional, default = True
34			Whether to refit the best estimator with the entire dataset.
35			If True, the best estimator is refit with the entire dataset after
36			the optimization process.
37			If False, does not refit, and predict is not available.
38			cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
39			The number of folds or cross-validation strategy to be used.
40			If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).
41
42			Example
43			-------
44			Tuning sklearn SVC via grid search
45
46			1. defining the tuned estimator:
47			>>> from sklearn.svm import SVC
48			>>> from hyperactive.integrations.sklearn import OptCV
49			>>> from hyperactive.opt import GridSearch
50			>>>
51			>>> param_grid = {"kernel": ["linear", "rbf"], "C": [1, 10]}
52			>>> tuned_svc = OptCV(SVC(), GridSearch(param_grid))
53
54			2. fitting the tuned estimator:
55			>>> from sklearn.datasets import load_iris
56			>>> from sklearn.model_selection import train_test_split
57			>>> X, y = load_iris(return_X_y=True)
58			>>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
59			>>>
60			>>> tuned_svc.fit(X_train, y_train)
61			OptCV(...)
62			>>> y_pred = tuned_svc.predict(X_test)
63
64			3. obtaining best parameters and best estimator
65			>>> best_params = tuned_svc.best_params_
66			>>> best_estimator = tuned_svc.best_estimator_
67			"""
68
69			_required_parameters = ["estimator", "optimizer"]
70
71			def __init__(
72			self,
73			estimator,
74			optimizer,
75			*,
76			scoring: Union[Callable, str, None] = None,
77			refit: bool = True,
78			cv=None,
79			):
80			super().__init__()
81
82			self.estimator = estimator
83			self.optimizer = optimizer
84			self.scoring = scoring
85			self.refit = refit
86			self.cv = cv
87
88			def _refit(self, X, y=None, **fit_params):
89			self.best_estimator_ = clone(self.estimator).set_params(
90			**clone(self.best_params_, safe=False)
91			)
92
93			self.best_estimator_.fit(X, y, **fit_params)
94			return self
95
96			def _check_data(self, X, y):
97			X, y = indexable(X, y)
98			if hasattr(self, "_validate_data"):
99			validate_data = self._validate_data
100			else:
101			from sklearn.utils.validation import validate_data
102
103			return validate_data(X, y)
104
105			@Checks.verify_fit
106			def fit(self, X, y, **fit_params):
107			"""Fit the model.
108
109			Parameters
110			----------
111			X : {array-like, sparse matrix} of shape (n_samples, n_features)
112			Training data.
113
114			y : array-like of shape (n_samples,) or (n_samples, n_targets)
115			Target values. Will be cast to X's dtype if necessary.
116
117			Returns
118			-------
119			self : object
120			Fitted Estimator.
121			"""
122
123			X, y = self._check_data(X, y)
124
125			fit_params = _check_method_params(X, params=fit_params)
126
127			experiment = SklearnCvExperiment(
128			estimator=self.estimator,
129			scoring=self.scoring,
130			cv=self.cv,
131			X=X,
132			y=y,
133			)
134			self.scorer_ = experiment.scorer_
135
136			optimizer = self.optimizer.clone()
137			optimizer.set_params(experiment=experiment)
138			best_params = optimizer.run()
139
140			self.best_params_ = best_params
141			self.best_estimator_ = clone(self.estimator).set_params(**best_params)
142
143			if self.refit:
144			self._refit(X, y, **fit_params)
145
146			return self
147
148			def score(self, X, y=None, **params):
149			"""Return the score on the given data, if the estimator has been refit.
150
151			This uses the score defined by ``scoring`` where provided, and the
152			``best_estimator_.score`` method otherwise.
153
154			Parameters
155			----------
156			X : array-like of shape (n_samples, n_features)
157			Input data, where `n_samples` is the number of samples and
158			`n_features` is the number of features.
159
160			y : array-like of shape (n_samples, n_output) \
161			or (n_samples,), default=None
162			Target relative to X for classification or regression;
163			None for unsupervised learning.
164
165			**params : dict
166			Parameters to be passed to the underlying scorer(s).
167
168			Returns
169			-------
170			score : float
171			The score defined by ``scoring`` if provided, and the
172			``best_estimator_.score`` method otherwise.
173			"""
174			return self.scorer_(self.best_estimator_, X, y, **params)
175
176			@property
177			def fit_successful(self):
178			self._fit_successful
179

SimonBlanke / Hyperactive

Pull Request — master (#110)

OptCV.__init__() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

OptCV.init() A