hyperactive.integrations.sklearn.hyperactive_search_cv.HyperactiveSearchCV._check_data() - Code Metrics - Inspection of "adapt 'validate_data' future sklearn versions" - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 3434da...da0201 )

by Simon

created 2024-10-13 06:00 UTC

HyperactiveSearchCV._check_data() A

↳ Parent: hyperactive.integrations.sklearn.hyperactive_search_cv

Complexity

Conditions

Size

Total Lines	8
Code Lines	6

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	2
eloc	6
nop	3
dl	0
loc	8
rs	10
c	0
b	0
f	0

# Author: Simon Blanke
# Email: [email protected]
# License: MIT License

from collections.abc import Iterable, Callable
from typing import Union, Dict, Type

from sklearn.base import BaseEstimator, clone
from sklearn.metrics import check_scoring
from sklearn.utils.validation import indexable, _check_method_params

from sklearn.base import BaseEstimator as SklearnBaseEstimator
from sklearn.model_selection import BaseCrossValidator

from hyperactive import Hyperactive

from .objective_function_adapter import ObjectiveFunctionAdapter
from .best_estimator import BestEstimator as _BestEstimator_
from .checks import Checks
from ...optimizers import RandomSearchOptimizer


class HyperactiveSearchCV(BaseEstimator, _BestEstimator_, Checks):
    """
    HyperactiveSearchCV class for hyperparameter tuning using cross-validation with sklearn estimators.

    Parameters:
    - estimator: SklearnBaseEstimator
        The estimator to be tuned.
    - params_config: Dict[str, list]
        Dictionary containing the hyperparameter search space.
    - optimizer: Union[str, Type[RandomSearchOptimizer]], optional
        The optimizer to be used for hyperparameter search, default is "default".
    - n_iter: int, optional
        Number of parameter settings that are sampled, default is 100.
    - scoring: Callable | str | None, optional
        Scoring method to evaluate the predictions on the test set.
    - n_jobs: int, optional
        Number of jobs to run in parallel, default is 1.
    - random_state: int | None, optional
        Random seed for reproducibility.
    - refit: bool, optional
        Refit the best estimator with the entire dataset, default is True.
    - cv: int | "BaseCrossValidator" | Iterable | None, optional
        Determines the cross-validation splitting strategy.

    Methods:
    - fit(X, y, **fit_params)
        Fit the estimator and tune hyperparameters.
    - score(X, y, **params)
        Return the score of the best estimator on the input data.
    """

    _required_parameters = ["estimator", "optimizer", "params_config"]

    def __init__(
        self,
        estimator: "SklearnBaseEstimator",
        params_config: Dict[str, list],
        optimizer: Union[str, Type[RandomSearchOptimizer]] = "default",
        n_iter: int = 100,
        *,
        scoring: Union[Callable, str, None] = None,
        n_jobs: int = 1,
        random_state: Union[int, None] = None,
        refit: bool = True,
        cv=None,
    ):
        super().__init__()

        self.estimator = estimator
        self.params_config = params_config
        self.optimizer = optimizer
        self.n_iter = n_iter
        self.scoring = scoring
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.refit = refit
        self.cv = cv

    def _refit(self, X, y=None, **fit_params):
        self.best_estimator_ = clone(self.estimator).set_params(
            **clone(self.best_params_, safe=False)
        )

        self.best_estimator_.fit(X, y, **fit_params)
        return self

    def _check_data(self, X, y):
        X, y = indexable(X, y)
        if hasattr(self, "_validate_data"):
            validate_data = self._validate_data
        else:
            from sklearn.utils.validation import validate_data

        return validate_data(X, y)

    @Checks.verify_fit
    def fit(self, X, y, **fit_params):
        """
        Fit the estimator using the provided training data.

        Parameters:
        - X: array-like or sparse matrix, shape (n_samples, n_features)
            The training input samples.
        - y: array-like, shape (n_samples,) or (n_samples, n_outputs)
            The target values.
        - **fit_params: dict of string -> object
            Additional fit parameters.

        Returns:
        - self: object
            Returns the instance itself.
        """

        X, y = self._check_data(X, y)

        fit_params = _check_method_params(X, params=fit_params)
        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)

        objective_function_adapter = ObjectiveFunctionAdapter(
            self.estimator,
        )
        objective_function_adapter.add_dataset(X, y)
        objective_function_adapter.add_validation(self.scorer_, self.cv)
        objective_function = objective_function_adapter.objective_function

        hyper = Hyperactive(verbosity=False)
        hyper.add_search(
            objective_function,
            search_space=self.params_config,
            optimizer=self.optimizer,
            n_iter=self.n_iter,
            n_jobs=self.n_jobs,
            random_state=self.random_state,
        )
        hyper.run()

        self.best_params_ = hyper.best_para(objective_function)
        self.best_score_ = hyper.best_score(objective_function)
        self.search_data_ = hyper.search_data(objective_function)

        if self.refit:
            self._refit(X, y, **fit_params)

        return self

    def score(self, X, y=None, **params):
        """
        Calculate the score of the best estimator on the input data.

        Parameters:
        - X: array-like or sparse matrix of shape (n_samples, n_features)
            The input samples.
        - y: array-like of shape (n_samples,), default=None
            The target values.
        - **params: dict
            Additional parameters to be passed to the scoring function.

        Returns:
        - float
            The score of the best estimator on the input data.
        """

        return self.scorer_(self.best_estimator_, X, y, **params)

    @property
    def fit_successful(self):
        self._fit_successful


1			# Author: Simon Blanke
2			# Email: [email protected]
3			# License: MIT License
4
5			from collections.abc import Iterable, Callable
6			from typing import Union, Dict, Type
7
8			from sklearn.base import BaseEstimator, clone
9			from sklearn.metrics import check_scoring
10			from sklearn.utils.validation import indexable, _check_method_params
11
12			from sklearn.base import BaseEstimator as SklearnBaseEstimator
13			from sklearn.model_selection import BaseCrossValidator
14
15			from hyperactive import Hyperactive
16
17			from .objective_function_adapter import ObjectiveFunctionAdapter
18			from .best_estimator import BestEstimator as _BestEstimator_
19			from .checks import Checks
20			from ...optimizers import RandomSearchOptimizer
21
22
23			class HyperactiveSearchCV(BaseEstimator, _BestEstimator_, Checks):
24			"""
25			HyperactiveSearchCV class for hyperparameter tuning using cross-validation with sklearn estimators.
26
27			Parameters:
28			- estimator: SklearnBaseEstimator
29			The estimator to be tuned.
30			- params_config: Dict[str, list]
31			Dictionary containing the hyperparameter search space.
32			- optimizer: Union[str, Type[RandomSearchOptimizer]], optional
33			The optimizer to be used for hyperparameter search, default is "default".
34			- n_iter: int, optional
35			Number of parameter settings that are sampled, default is 100.
36			- scoring: Callable \| str \| None, optional
37			Scoring method to evaluate the predictions on the test set.
38			- n_jobs: int, optional
39			Number of jobs to run in parallel, default is 1.
40			- random_state: int \| None, optional
41			Random seed for reproducibility.
42			- refit: bool, optional
43			Refit the best estimator with the entire dataset, default is True.
44			- cv: int \| "BaseCrossValidator" \| Iterable \| None, optional
45			Determines the cross-validation splitting strategy.
46
47			Methods:
48			- fit(X, y, **fit_params)
49			Fit the estimator and tune hyperparameters.
50			- score(X, y, **params)
51			Return the score of the best estimator on the input data.
52			"""
53
54			_required_parameters = ["estimator", "optimizer", "params_config"]
55
56			def __init__(
57			self,
58			estimator: "SklearnBaseEstimator",
59			params_config: Dict[str, list],
60			optimizer: Union[str, Type[RandomSearchOptimizer]] = "default",
61			n_iter: int = 100,
62			*,
63			scoring: Union[Callable, str, None] = None,
64			n_jobs: int = 1,
65			random_state: Union[int, None] = None,
66			refit: bool = True,
67			cv=None,
68			):
69			super().__init__()
70
71			self.estimator = estimator
72			self.params_config = params_config
73			self.optimizer = optimizer
74			self.n_iter = n_iter
75			self.scoring = scoring
76			self.n_jobs = n_jobs
77			self.random_state = random_state
78			self.refit = refit
79			self.cv = cv
80
81			def _refit(self, X, y=None, **fit_params):
82			self.best_estimator_ = clone(self.estimator).set_params(
83			**clone(self.best_params_, safe=False)
84			)
85
86			self.best_estimator_.fit(X, y, **fit_params)
87			return self
88
89			def _check_data(self, X, y):
90			X, y = indexable(X, y)
91			if hasattr(self, "_validate_data"):
92			validate_data = self._validate_data
93			else:
94			from sklearn.utils.validation import validate_data
95
96			return validate_data(X, y)
97
98			@Checks.verify_fit
99			def fit(self, X, y, **fit_params):
100			"""
101			Fit the estimator using the provided training data.
102
103			Parameters:
104			- X: array-like or sparse matrix, shape (n_samples, n_features)
105			The training input samples.
106			- y: array-like, shape (n_samples,) or (n_samples, n_outputs)
107			The target values.
108			- **fit_params: dict of string -> object
109			Additional fit parameters.
110
111			Returns:
112			- self: object
113			Returns the instance itself.
114			"""
115
116			X, y = self._check_data(X, y)
117
118			fit_params = _check_method_params(X, params=fit_params)
119			self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
120
121			objective_function_adapter = ObjectiveFunctionAdapter(
122			self.estimator,
123			)
124			objective_function_adapter.add_dataset(X, y)
125			objective_function_adapter.add_validation(self.scorer_, self.cv)
126			objective_function = objective_function_adapter.objective_function
127
128			hyper = Hyperactive(verbosity=False)
129			hyper.add_search(
130			objective_function,
131			search_space=self.params_config,
132			optimizer=self.optimizer,
133			n_iter=self.n_iter,
134			n_jobs=self.n_jobs,
135			random_state=self.random_state,
136			)
137			hyper.run()
138
139			self.best_params_ = hyper.best_para(objective_function)
140			self.best_score_ = hyper.best_score(objective_function)
141			self.search_data_ = hyper.search_data(objective_function)
142
143			if self.refit:
144			self._refit(X, y, **fit_params)
145
146			return self
147
148			def score(self, X, y=None, **params):
149			"""
150			Calculate the score of the best estimator on the input data.
151
152			Parameters:
153			- X: array-like or sparse matrix of shape (n_samples, n_features)
154			The input samples.
155			- y: array-like of shape (n_samples,), default=None
156			The target values.
157			- **params: dict
158			Additional parameters to be passed to the scoring function.
159
160			Returns:
161			- float
162			The score of the best estimator on the input data.
163			"""
164
165			return self.scorer_(self.best_estimator_, X, y, **params)
166
167			@property
168			def fit_successful(self):
169			self._fit_successful
170

SimonBlanke / Hyperactive

Push — master ( 3434da...da0201 )

HyperactiveSearchCV._check_data() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like