hyperactive.integrations.sklearn.opt_cv - Code Metrics - Inspection of "V5 API rework - unified API for optimizers and exp..." - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#110)

unknown

created 2025-05-17 13:44 UTC

hyperactive.integrations.sklearn.opt_cv A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	177
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	8
eloc	60
dl	0
loc	177
rs	10
c	0
b	0
f	0

6 Methods

Rating	Name	Size	Complexity
A	OptCV.score()	27	1
A	OptCV.fit()	42	2
A	OptCV.__init__()	16	1
A	OptCV._refit()	7	1
A	OptCV._check_data()	8	2
A	OptCV.fit_successful()	3	1

# copyright: hyperactive developers, MIT License (see LICENSE file)

from collections.abc import Callable
from typing import Union

from sklearn.base import BaseEstimator, clone
from sklearn.utils.validation import indexable, _check_method_params

from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment
from hyperactive.integrations.sklearn.best_estimator import (
    BestEstimator as _BestEstimator_
)
from hyperactive.integrations.sklearn.checks import Checks


class OptCV(BaseEstimator, _BestEstimator_, Checks):
    """Tuning via any optimizer in the hyperactive API.

    Parameters
    ----------
    estimator : SklearnBaseEstimator
        The estimator to be tuned.
    optimizer : hyperactive BaseOptimizer
        The optimizer to be used for hyperparameter search.
    scoring : callable or str, default = accuracy_score or mean_squared_error
        sklearn scoring function or metric to evaluate the model's performance.
        Default is determined by the type of estimator:
        ``accuracy_score`` for classifiers, and
        ``mean_squared_error`` for regressors, as per sklearn convention
        through the default ``score`` method of the estimator.
    refit: bool, optional, default = True
        Whether to refit the best estimator with the entire dataset.
        If True, the best estimator is refit with the entire dataset after
        the optimization process.
        If False, does not refit, and predict is not available.
    cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
        The number of folds or cross-validation strategy to be used.
        If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).

    Example
    -------
    Tuning sklearn SVC via grid search

    1. defining the tuned estimator:
    >>> from sklearn.svm import SVC
    >>> from hyperactive.integrations.sklearn import OptCV
    >>> from hyperactive.opt import GridSearch
    >>>
    >>> param_grid = {"kernel": ["linear", "rbf"], "C": [1, 10]}
    >>> tuned_svc = OptCV(SVC(), GridSearch(param_grid))

    2. fitting the tuned estimator:
    >>> from sklearn.datasets import load_iris
    >>> from sklearn.model_selection import train_test_split
    >>> X, y = load_iris(return_X_y=True)
    >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    >>>
    >>> tuned_svc.fit(X_train, y_train)
    OptCV(...)
    >>> y_pred = tuned_svc.predict(X_test)

    3. obtaining best parameters and best estimator
    >>> best_params = tuned_svc.best_params_
    >>> best_estimator = tuned_svc.best_estimator_
    """

    _required_parameters = ["estimator", "optimizer"]

    def __init__(
        self,
        estimator,
        optimizer,
        *,
        scoring: Union[Callable, str, None] = None,
        refit: bool = True,
        cv=None,
    ):
        super().__init__()

        self.estimator = estimator
        self.optimizer = optimizer
        self.scoring = scoring
        self.refit = refit
        self.cv = cv

    def _refit(self, X, y=None, **fit_params):
        self.best_estimator_ = clone(self.estimator).set_params(
            **clone(self.best_params_, safe=False)
        )

        self.best_estimator_.fit(X, y, **fit_params)
        return self

    def _check_data(self, X, y):
        X, y = indexable(X, y)
        if hasattr(self, "_validate_data"):
            validate_data = self._validate_data
        else:
            from sklearn.utils.validation import validate_data

        return validate_data(X, y)

    @Checks.verify_fit
    def fit(self, X, y, **fit_params):
        """Fit the model.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training data.

        y : array-like of shape (n_samples,) or (n_samples, n_targets)
            Target values. Will be cast to X's dtype if necessary.

        Returns
        -------
        self : object
            Fitted Estimator.
        """

        X, y = self._check_data(X, y)

        fit_params = _check_method_params(X, params=fit_params)

        experiment = SklearnCvExperiment(
            estimator=self.estimator,
            scoring=self.scoring,
            cv=self.cv,
            X=X,
            y=y,
        )
        self.scorer_ = experiment.scorer_

        optimizer = self.optimizer.clone()
        optimizer.set_params(experiment=experiment)
        best_params = optimizer.run()

        self.best_params_ = best_params
        self.best_estimator_ = clone(self.estimator).set_params(**best_params)

        if self.refit:
            self._refit(X, y, **fit_params)

        return self

    def score(self, X, y=None, **params):
        """Return the score on the given data, if the estimator has been refit.

        This uses the score defined by ``scoring`` where provided, and the
        ``best_estimator_.score`` method otherwise.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Input data, where `n_samples` is the number of samples and
            `n_features` is the number of features.

        y : array-like of shape (n_samples, n_output) \
            or (n_samples,), default=None
            Target relative to X for classification or regression;
            None for unsupervised learning.

        **params : dict
            Parameters to be passed to the underlying scorer(s).

        Returns
        -------
        score : float
            The score defined by ``scoring`` if provided, and the
            ``best_estimator_.score`` method otherwise.
        """
        return self.scorer_(self.best_estimator_, X, y, **params)

    @property
    def fit_successful(self):
        self._fit_successful


1			# copyright: hyperactive developers, MIT License (see LICENSE file)
2
3			from collections.abc import Callable
4			from typing import Union
5
6			from sklearn.base import BaseEstimator, clone
7			from sklearn.utils.validation import indexable, _check_method_params
8
9			from hyperactive.experiment.integrations.sklearn_cv import SklearnCvExperiment
10			from hyperactive.integrations.sklearn.best_estimator import (
11			BestEstimator as _BestEstimator_
12			)
13			from hyperactive.integrations.sklearn.checks import Checks
14
15
16			class OptCV(BaseEstimator, _BestEstimator_, Checks):
17			"""Tuning via any optimizer in the hyperactive API.
18
19			Parameters
20			----------
21			estimator : SklearnBaseEstimator
22			The estimator to be tuned.
23			optimizer : hyperactive BaseOptimizer
24			The optimizer to be used for hyperparameter search.
25			scoring : callable or str, default = accuracy_score or mean_squared_error
26			sklearn scoring function or metric to evaluate the model's performance.
27			Default is determined by the type of estimator:
28			``accuracy_score`` for classifiers, and
29			``mean_squared_error`` for regressors, as per sklearn convention
30			through the default ``score`` method of the estimator.
31			refit: bool, optional, default = True
32			Whether to refit the best estimator with the entire dataset.
33			If True, the best estimator is refit with the entire dataset after
34			the optimization process.
35			If False, does not refit, and predict is not available.
36			cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
37			The number of folds or cross-validation strategy to be used.
38			If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).
39
40			Example
41			-------
42			Tuning sklearn SVC via grid search
43
44			1. defining the tuned estimator:
45			>>> from sklearn.svm import SVC
46			>>> from hyperactive.integrations.sklearn import OptCV
47			>>> from hyperactive.opt import GridSearch
48			>>>
49			>>> param_grid = {"kernel": ["linear", "rbf"], "C": [1, 10]}
50			>>> tuned_svc = OptCV(SVC(), GridSearch(param_grid))
51
52			2. fitting the tuned estimator:
53			>>> from sklearn.datasets import load_iris
54			>>> from sklearn.model_selection import train_test_split
55			>>> X, y = load_iris(return_X_y=True)
56			>>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
57			>>>
58			>>> tuned_svc.fit(X_train, y_train)
59			OptCV(...)
60			>>> y_pred = tuned_svc.predict(X_test)
61
62			3. obtaining best parameters and best estimator
63			>>> best_params = tuned_svc.best_params_
64			>>> best_estimator = tuned_svc.best_estimator_
65			"""
66
67			_required_parameters = ["estimator", "optimizer"]
68
69			def __init__(
70			self,
71			estimator,
72			optimizer,
73			*,
74			scoring: Union[Callable, str, None] = None,
75			refit: bool = True,
76			cv=None,
77			):
78			super().__init__()
79
80			self.estimator = estimator
81			self.optimizer = optimizer
82			self.scoring = scoring
83			self.refit = refit
84			self.cv = cv
85
86			def _refit(self, X, y=None, **fit_params):
87			self.best_estimator_ = clone(self.estimator).set_params(
88			**clone(self.best_params_, safe=False)
89			)
90
91			self.best_estimator_.fit(X, y, **fit_params)
92			return self
93
94			def _check_data(self, X, y):
95			X, y = indexable(X, y)
96			if hasattr(self, "_validate_data"):
97			validate_data = self._validate_data
98			else:
99			from sklearn.utils.validation import validate_data
100
101			return validate_data(X, y)
102
103			@Checks.verify_fit
104			def fit(self, X, y, **fit_params):
105			"""Fit the model.
106
107			Parameters
108			----------
109			X : {array-like, sparse matrix} of shape (n_samples, n_features)
110			Training data.
111
112			y : array-like of shape (n_samples,) or (n_samples, n_targets)
113			Target values. Will be cast to X's dtype if necessary.
114
115			Returns
116			-------
117			self : object
118			Fitted Estimator.
119			"""
120
121			X, y = self._check_data(X, y)
122
123			fit_params = _check_method_params(X, params=fit_params)
124
125			experiment = SklearnCvExperiment(
126			estimator=self.estimator,
127			scoring=self.scoring,
128			cv=self.cv,
129			X=X,
130			y=y,
131			)
132			self.scorer_ = experiment.scorer_
133
134			optimizer = self.optimizer.clone()
135			optimizer.set_params(experiment=experiment)
136			best_params = optimizer.run()
137
138			self.best_params_ = best_params
139			self.best_estimator_ = clone(self.estimator).set_params(**best_params)
140
141			if self.refit:
142			self._refit(X, y, **fit_params)
143
144			return self
145
146			def score(self, X, y=None, **params):
147			"""Return the score on the given data, if the estimator has been refit.
148
149			This uses the score defined by ``scoring`` where provided, and the
150			``best_estimator_.score`` method otherwise.
151
152			Parameters
153			----------
154			X : array-like of shape (n_samples, n_features)
155			Input data, where `n_samples` is the number of samples and
156			`n_features` is the number of features.
157
158			y : array-like of shape (n_samples, n_output) \
159			or (n_samples,), default=None
160			Target relative to X for classification or regression;
161			None for unsupervised learning.
162
163			**params : dict
164			Parameters to be passed to the underlying scorer(s).
165
166			Returns
167			-------
168			score : float
169			The score defined by ``scoring`` if provided, and the
170			``best_estimator_.score`` method otherwise.
171			"""
172			return self.scorer_(self.best_estimator_, X, y, **params)
173
174			@property
175			def fit_successful(self):
176			self._fit_successful
177

SimonBlanke / Hyperactive

Pull Request — master (#110)

hyperactive.integrations.sklearn.opt_cv A

Complexity

Size/Duplication

Importance

6 Methods

Duplication Side-by-Side

Filter issues like