hyperactive.experiment.integrations.sklearn_cv.SklearnCvExperiment.__init__() - Code Metrics - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

SklearnCvExperiment.init() B
last analyzed 2025-08-16 19:01 UTC

↳ Parent: hyperactive.experiment.integrations.sklearn_cv

Complexity

Conditions

Size

Total Lines	41
Code Lines	28

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	28
dl	0
loc	41
rs	7.3333
c	0
b	0
f	0
cc	8
nop	6

"""Experiment adapter for sklearn cross-validation experiments."""
# copyright: hyperactive developers, MIT License (see LICENSE file)

from sklearn import clone
from sklearn.metrics import check_scoring
from sklearn.model_selection import cross_validate
from sklearn.utils.validation import _num_samples

from hyperactive.base import BaseExperiment


class SklearnCvExperiment(BaseExperiment):
    """Experiment adapter for sklearn cross-validation experiments.

    This class is used to perform cross-validation experiments using a given
    sklearn estimator. It allows for hyperparameter tuning and evaluation of
    the model's performance using cross-validation.

    The score returned is the mean of the cross-validation scores,
    of applying cross-validation to ``estimator`` with the parameters given in
    ``score`` ``params``.

    The cross-validation performed is specified by the ``cv`` parameter,
    and the scoring metric is specified by the ``scoring`` parameter.
    The ``X`` and ``y`` parameters are the input data and target values,
    which are used in fit/predict cross-validation.

    Parameters
    ----------
    estimator : sklearn estimator
        The estimator to be used for the experiment.
    X : array-like, shape (n_samples, n_features)
            The input data for the model.
    y : array-like, shape (n_samples,) or (n_samples, n_outputs)
        The target values for the model.
    cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
        The number of folds or cross-validation strategy to be used.
        If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).
    scoring : callable or str, default = accuracy_score or mean_squared_error
        sklearn scoring function or metric to evaluate the model's performance.
        Default is determined by the type of estimator:
        ``accuracy_score`` for classifiers, and
        ``mean_squared_error`` for regressors, as per sklearn convention
        through the default ``score`` method of the estimator.

    Example
    -------
    >>> from hyperactive.experiment.integrations import SklearnCvExperiment
    >>> from sklearn.datasets import load_iris
    >>> from sklearn.svm import SVC
    >>> from sklearn.metrics import accuracy_score
    >>> from sklearn.model_selection import KFold
    >>>
    >>> X, y = load_iris(return_X_y=True)
    >>>
    >>> sklearn_exp = SklearnCvExperiment(
    ...     estimator=SVC(),
    ...     scoring=accuracy_score,
    ...     cv=KFold(n_splits=3, shuffle=True),
    ...     X=X,
    ...     y=y,
    ... )
    >>> params = {"C": 1.0, "kernel": "linear"}
    >>> score, add_info = sklearn_exp.score(params)

    For default choices of ``scoring`` and ``cv``:
    >>> sklearn_exp = SklearnCvExperiment(
    ...     estimator=SVC(),
    ...     X=X,
    ...     y=y,
    ... )
    >>> params = {"C": 1.0, "kernel": "linear"}
    >>> score, add_info = sklearn_exp.score(params)

    Quick call without metadata return or dictionary:
    >>> score = sklearn_exp(C=1.0, kernel="linear")
    """

    def __init__(self, estimator, X, y, scoring=None, cv=None):
        self.estimator = estimator
        self.X = X
        self.y = y
        self.scoring = scoring
        self.cv = cv

        super().__init__()

        if cv is None:
            from sklearn.model_selection import KFold

            self._cv = KFold(n_splits=3, shuffle=True)
        elif isinstance(cv, int):
            from sklearn.model_selection import KFold

            self._cv = KFold(n_splits=cv, shuffle=True)
        else:
            self._cv = cv

        # check if scoring is a scorer by checking for "estimator" in signature
        if scoring is None:
            self._scoring = check_scoring(self.estimator)
        # check using inspect.signature for "estimator" in signature
        elif callable(scoring):
            from inspect import signature

            if "estimator" in signature(scoring).parameters:
                self._scoring = scoring
            else:
                from sklearn.metrics import make_scorer

                self._scoring = make_scorer(scoring)
        self.scorer_ = self._scoring

        # Set the sign of the scoring function
        if hasattr(self._scoring, "_score"):
            score_func = self._scoring._score_func
            _sign = _guess_sign_of_sklmetric(score_func)
            _sign_str = "higher" if _sign == 1 else "lower"
            self.set_tags(**{"property:higher_or_lower_is_better": _sign_str})

    def _paramnames(self):
        """Return the parameter names of the search.

        Returns
        -------
        list of str
            The parameter names of the search parameters.
        """
        return list(self.estimator.get_params().keys())

    def _evaluate(self, params):
        """Evaluate the parameters.

        Parameters
        ----------
        params : dict with string keys
            Parameters to evaluate.

        Returns
        -------
        float
            The value of the parameters as per evaluation.
        dict
            Additional metadata about the search.
        """
        estimator = clone(self.estimator)
        estimator.set_params(**params)

        cv_results = cross_validate(
            estimator,
            self.X,
            self.y,
            scoring=self._scoring,
            cv=self._cv,
        )

        add_info_d = {
            "score_time": cv_results["score_time"],
            "fit_time": cv_results["fit_time"],
            "n_test_samples": _num_samples(self.X),
        }

        return cv_results["test_score"].mean(), add_info_d

    @classmethod
    def get_test_params(cls, parameter_set="default"):
        """Return testing parameter settings for the skbase object.

        ``get_test_params`` is a unified interface point to store
        parameter settings for testing purposes. This function is also
        used in ``create_test_instance`` and ``create_test_instances_and_names``
        to construct test instances.

        ``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``.

        Each ``dict`` is a parameter configuration for testing,
        and can be used to construct an "interesting" test instance.
        A call to ``cls(**params)`` should
        be valid for all dictionaries ``params`` in the return of ``get_test_params``.

        The ``get_test_params`` need not return fixed lists of dictionaries,
        it can also return dynamic or stochastic parameter settings.

        Parameters
        ----------
        parameter_set : str, default="default"
            Name of the set of test parameters to return, for use in tests. If no
            special parameters are defined for a value, will return `"default"` set.

        Returns
        -------
        params : dict or list of dict, default = {}
            Parameters to create testing instances of the class
            Each dict are parameters to construct an "interesting" test instance, i.e.,
            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
            `create_test_instance` uses the first (or only) dictionary in `params`
        """
        from sklearn.datasets import load_diabetes, load_iris
        from sklearn.metrics import accuracy_score, mean_absolute_error
        from sklearn.model_selection import KFold
        from sklearn.svm import SVC, SVR

        X, y = load_iris(return_X_y=True)
        params_classif = {
            "estimator": SVC(),
            "scoring": accuracy_score,
            "cv": KFold(n_splits=3, shuffle=True),
            "X": X,
            "y": y,
        }

        X, y = load_diabetes(return_X_y=True)
        params_regress = {
            "estimator": SVR(),
            "scoring": mean_absolute_error,
            "cv": 2,
            "X": X,
            "y": y,
        }

        X, y = load_diabetes(return_X_y=True)
        params_all_default = {
            "estimator": SVR(),
            "X": X,
            "y": y,
        }

        return [params_classif, params_regress, params_all_default]

    @classmethod
    def _get_score_params(self):
        """Return settings for testing score/evaluate functions. Used in tests only.

        Returns a list, the i-th element should be valid arguments for
        self.evaluate and self.score, of an instance constructed with
        self.get_test_params()[i].

        Returns
        -------
        list of dict
            The parameters to be used for scoring.
        """
        score_params_classif = {"C": 1.0, "kernel": "linear"}
        score_params_regress = {"C": 1.0, "kernel": "linear"}
        score_params_defaults = {"C": 1.0, "kernel": "linear"}
        return [score_params_classif, score_params_regress, score_params_defaults]


def _guess_sign_of_sklmetric(scorer):
    """Guess the sign of a sklearn metric scorer.

    Parameters
    ----------
    scorer : callable
        The sklearn metric scorer to guess the sign for.

    Returns
    -------
    int
        1 if higher scores are better, -1 if lower scores are better.
    """
    HIGHER_IS_BETTER = {
        # Classification
        "accuracy_score": True,
        "auc": True,
        "average_precision_score": True,
        "balanced_accuracy_score": True,
        "brier_score_loss": False,
        "class_likelihood_ratios": False,
        "cohen_kappa_score": True,
        "d2_log_loss_score": True,
        "dcg_score": True,
        "f1_score": True,
        "fbeta_score": True,
        "hamming_loss": False,
        "hinge_loss": False,
        "jaccard_score": True,
        "log_loss": False,
        "matthews_corrcoef": True,
        "ndcg_score": True,
        "precision_score": True,
        "recall_score": True,
        "roc_auc_score": True,
        "top_k_accuracy_score": True,
        "zero_one_loss": False,
        # Regression
        "d2_absolute_error_score": True,
        "d2_pinball_score": True,
        "d2_tweedie_score": True,
        "explained_variance_score": True,
        "max_error": False,
        "mean_absolute_error": False,
        "mean_absolute_percentage_error": False,
        "mean_gamma_deviance": False,
        "mean_pinball_loss": False,
        "mean_poisson_deviance": False,
        "mean_squared_error": False,
        "mean_squared_log_error": False,
        "mean_tweedie_deviance": False,
        "median_absolute_error": False,
        "r2_score": True,
        "root_mean_squared_error": False,
        "root_mean_squared_log_error": False,
    }

    scorer_name = getattr(scorer, "__name__", None)

    if hasattr(scorer, "greater_is_better"):
        return 1 if scorer.greater_is_better else -1
    elif scorer_name in HIGHER_IS_BETTER:
        return 1 if HIGHER_IS_BETTER[scorer_name] else -1
    elif scorer_name.endswith("_score"):
        # If the scorer name ends with "_score", we assume higher is better
        return 1
    elif scorer_name.endswith("_loss") or scorer_name.endswith("_deviance"):
        # If the scorer name ends with "_loss", we assume lower is better
        return -1
    elif scorer_name.endswith("_error"):
        return -1
    else:
        # If we cannot determine the sign, we assume lower is better
        return -1


1			"""Experiment adapter for sklearn cross-validation experiments."""
2			# copyright: hyperactive developers, MIT License (see LICENSE file)
3
4			from sklearn import clone
5			from sklearn.metrics import check_scoring
6			from sklearn.model_selection import cross_validate
7			from sklearn.utils.validation import _num_samples
8
9			from hyperactive.base import BaseExperiment
10
11
12			class SklearnCvExperiment(BaseExperiment):
13			"""Experiment adapter for sklearn cross-validation experiments.
14
15			This class is used to perform cross-validation experiments using a given
16			sklearn estimator. It allows for hyperparameter tuning and evaluation of
17			the model's performance using cross-validation.
18
19			The score returned is the mean of the cross-validation scores,
20			of applying cross-validation to ``estimator`` with the parameters given in
21			``score`` ``params``.
22
23			The cross-validation performed is specified by the ``cv`` parameter,
24			and the scoring metric is specified by the ``scoring`` parameter.
25			The ``X`` and ``y`` parameters are the input data and target values,
26			which are used in fit/predict cross-validation.
27
28			Parameters
29			----------
30			estimator : sklearn estimator
31			The estimator to be used for the experiment.
32			X : array-like, shape (n_samples, n_features)
33			The input data for the model.
34			y : array-like, shape (n_samples,) or (n_samples, n_outputs)
35			The target values for the model.
36			cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
37			The number of folds or cross-validation strategy to be used.
38			If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).
39			scoring : callable or str, default = accuracy_score or mean_squared_error
40			sklearn scoring function or metric to evaluate the model's performance.
41			Default is determined by the type of estimator:
42			``accuracy_score`` for classifiers, and
43			``mean_squared_error`` for regressors, as per sklearn convention
44			through the default ``score`` method of the estimator.
45
46			Example
47			-------
48			>>> from hyperactive.experiment.integrations import SklearnCvExperiment
49			>>> from sklearn.datasets import load_iris
50			>>> from sklearn.svm import SVC
51			>>> from sklearn.metrics import accuracy_score
52			>>> from sklearn.model_selection import KFold
53			>>>
54			>>> X, y = load_iris(return_X_y=True)
55			>>>
56			>>> sklearn_exp = SklearnCvExperiment(
57			... estimator=SVC(),
58			... scoring=accuracy_score,
59			... cv=KFold(n_splits=3, shuffle=True),
60			... X=X,
61			... y=y,
62			... )
63			>>> params = {"C": 1.0, "kernel": "linear"}
64			>>> score, add_info = sklearn_exp.score(params)
65
66			For default choices of ``scoring`` and ``cv``:
67			>>> sklearn_exp = SklearnCvExperiment(
68			... estimator=SVC(),
69			... X=X,
70			... y=y,
71			... )
72			>>> params = {"C": 1.0, "kernel": "linear"}
73			>>> score, add_info = sklearn_exp.score(params)
74
75			Quick call without metadata return or dictionary:
76			>>> score = sklearn_exp(C=1.0, kernel="linear")
77			"""
78
79			def __init__(self, estimator, X, y, scoring=None, cv=None):
80			self.estimator = estimator
81			self.X = X
82			self.y = y
83			self.scoring = scoring
84			self.cv = cv
85
86			super().__init__()
87
88			if cv is None:
89			from sklearn.model_selection import KFold
90
91			self._cv = KFold(n_splits=3, shuffle=True)
92			elif isinstance(cv, int):
93			from sklearn.model_selection import KFold
94
95			self._cv = KFold(n_splits=cv, shuffle=True)
96			else:
97			self._cv = cv
98
99			# check if scoring is a scorer by checking for "estimator" in signature
100			if scoring is None:
101			self._scoring = check_scoring(self.estimator)
102			# check using inspect.signature for "estimator" in signature
103			elif callable(scoring):
104			from inspect import signature
105
106			if "estimator" in signature(scoring).parameters:
107			self._scoring = scoring
108			else:
109			from sklearn.metrics import make_scorer
110
111			self._scoring = make_scorer(scoring)
112			self.scorer_ = self._scoring
113
114			# Set the sign of the scoring function
115			if hasattr(self._scoring, "_score"):
116			score_func = self._scoring._score_func
117			_sign = _guess_sign_of_sklmetric(score_func)
118			_sign_str = "higher" if _sign == 1 else "lower"
119			self.set_tags(**{"property:higher_or_lower_is_better": _sign_str})
120
121			def _paramnames(self):
122			"""Return the parameter names of the search.
123
124			Returns
125			-------
126			list of str
127			The parameter names of the search parameters.
128			"""
129			return list(self.estimator.get_params().keys())
130
131			def _evaluate(self, params):
132			"""Evaluate the parameters.
133
134			Parameters
135			----------
136			params : dict with string keys
137			Parameters to evaluate.
138
139			Returns
140			-------
141			float
142			The value of the parameters as per evaluation.
143			dict
144			Additional metadata about the search.
145			"""
146			estimator = clone(self.estimator)
147			estimator.set_params(**params)
148
149			cv_results = cross_validate(
150			estimator,
151			self.X,
152			self.y,
153			scoring=self._scoring,
154			cv=self._cv,
155			)
156
157			add_info_d = {
158			"score_time": cv_results["score_time"],
159			"fit_time": cv_results["fit_time"],
160			"n_test_samples": _num_samples(self.X),
161			}
162
163			return cv_results["test_score"].mean(), add_info_d
164
165			@classmethod
166			def get_test_params(cls, parameter_set="default"):
167			"""Return testing parameter settings for the skbase object.
168
169			``get_test_params`` is a unified interface point to store
170			parameter settings for testing purposes. This function is also
171			used in ``create_test_instance`` and ``create_test_instances_and_names``
172			to construct test instances.
173
174			``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``.
175
176			Each ``dict`` is a parameter configuration for testing,
177			and can be used to construct an "interesting" test instance.
178			A call to ``cls(**params)`` should
179			be valid for all dictionaries ``params`` in the return of ``get_test_params``.
180
181			The ``get_test_params`` need not return fixed lists of dictionaries,
182			it can also return dynamic or stochastic parameter settings.
183
184			Parameters
185			----------
186			parameter_set : str, default="default"
187			Name of the set of test parameters to return, for use in tests. If no
188			special parameters are defined for a value, will return `"default"` set.
189
190			Returns
191			-------
192			params : dict or list of dict, default = {}
193			Parameters to create testing instances of the class
194			Each dict are parameters to construct an "interesting" test instance, i.e.,
195			`MyClass(params)` or `MyClass(params[i])` creates a valid test instance.
196			`create_test_instance` uses the first (or only) dictionary in `params`
197			"""
198			from sklearn.datasets import load_diabetes, load_iris
199			from sklearn.metrics import accuracy_score, mean_absolute_error
200			from sklearn.model_selection import KFold
201			from sklearn.svm import SVC, SVR
202
203			X, y = load_iris(return_X_y=True)
204			params_classif = {
205			"estimator": SVC(),
206			"scoring": accuracy_score,
207			"cv": KFold(n_splits=3, shuffle=True),
208			"X": X,
209			"y": y,
210			}
211
212			X, y = load_diabetes(return_X_y=True)
213			params_regress = {
214			"estimator": SVR(),
215			"scoring": mean_absolute_error,
216			"cv": 2,
217			"X": X,
218			"y": y,
219			}
220
221			X, y = load_diabetes(return_X_y=True)
222			params_all_default = {
223			"estimator": SVR(),
224			"X": X,
225			"y": y,
226			}
227
228			return [params_classif, params_regress, params_all_default]
229
230			@classmethod
231			def _get_score_params(self):
232			"""Return settings for testing score/evaluate functions. Used in tests only.
233
234			Returns a list, the i-th element should be valid arguments for
235			self.evaluate and self.score, of an instance constructed with
236			self.get_test_params()[i].
237
238			Returns
239			-------
240			list of dict
241			The parameters to be used for scoring.
242			"""
243			score_params_classif = {"C": 1.0, "kernel": "linear"}
244			score_params_regress = {"C": 1.0, "kernel": "linear"}
245			score_params_defaults = {"C": 1.0, "kernel": "linear"}
246			return [score_params_classif, score_params_regress, score_params_defaults]
247
248
249			def _guess_sign_of_sklmetric(scorer):
250			"""Guess the sign of a sklearn metric scorer.
251
252			Parameters
253			----------
254			scorer : callable
255			The sklearn metric scorer to guess the sign for.
256
257			Returns
258			-------
259			int
260			1 if higher scores are better, -1 if lower scores are better.
261			"""
262			HIGHER_IS_BETTER = {
263			# Classification
264			"accuracy_score": True,
265			"auc": True,
266			"average_precision_score": True,
267			"balanced_accuracy_score": True,
268			"brier_score_loss": False,
269			"class_likelihood_ratios": False,
270			"cohen_kappa_score": True,
271			"d2_log_loss_score": True,
272			"dcg_score": True,
273			"f1_score": True,
274			"fbeta_score": True,
275			"hamming_loss": False,
276			"hinge_loss": False,
277			"jaccard_score": True,
278			"log_loss": False,
279			"matthews_corrcoef": True,
280			"ndcg_score": True,
281			"precision_score": True,
282			"recall_score": True,
283			"roc_auc_score": True,
284			"top_k_accuracy_score": True,
285			"zero_one_loss": False,
286			# Regression
287			"d2_absolute_error_score": True,
288			"d2_pinball_score": True,
289			"d2_tweedie_score": True,
290			"explained_variance_score": True,
291			"max_error": False,
292			"mean_absolute_error": False,
293			"mean_absolute_percentage_error": False,
294			"mean_gamma_deviance": False,
295			"mean_pinball_loss": False,
296			"mean_poisson_deviance": False,
297			"mean_squared_error": False,
298			"mean_squared_log_error": False,
299			"mean_tweedie_deviance": False,
300			"median_absolute_error": False,
301			"r2_score": True,
302			"root_mean_squared_error": False,
303			"root_mean_squared_log_error": False,
304			}
305
306			scorer_name = getattr(scorer, "__name__", None)
307
308			if hasattr(scorer, "greater_is_better"):
309			return 1 if scorer.greater_is_better else -1
310			elif scorer_name in HIGHER_IS_BETTER:
311			return 1 if HIGHER_IS_BETTER[scorer_name] else -1
312			elif scorer_name.endswith("_score"):
313			# If the scorer name ends with "_score", we assume higher is better
314			return 1
315			elif scorer_name.endswith("_loss") or scorer_name.endswith("_deviance"):
316			# If the scorer name ends with "_loss", we assume lower is better
317			return -1
318			elif scorer_name.endswith("_error"):
319			return -1
320			else:
321			# If we cannot determine the sign, we assume lower is better
322			return -1
323

SimonBlanke / Hyperactive

SklearnCvExperiment.__init__() B last analyzed 2025-08-16 19:01 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

SklearnCvExperiment.init() B
last analyzed 2025-08-16 19:01 UTC