hyperactive.experiment.integrations.sklearn_cv - Code Metrics - Inspection of "V5 API rework - experimental" - SimonBlanke/Hyperactive - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#110)

unknown

created 2025-05-05 15:16 UTC

hyperactive.experiment.integrations.sklearn_cv A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	236
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	10
eloc	78
dl	0
loc	236
rs	10
c	0
b	0
f	0

5 Methods

Rating	Name	Size	Complexity
A	SklearnCvExperiment._score()	33	1
A	SklearnCvExperiment.get_test_params()	64	1
A	SklearnCvExperiment._get_score_params()	16	1
B	SklearnCvExperiment.__init__()	33	6
A	SklearnCvExperiment._paramnames()	9	1

"""Experiment adapter for sklearn cross-validation experiments."""

from sklearn import clone
from sklearn.metrics import check_scoring
from sklearn.model_selection import cross_validate
from sklearn.utils.validation import _num_samples

from hyperactive.base import BaseExperiment

class SklearnCvExperiment(BaseExperiment):
    """Experiment adapter for sklearn cross-validation experiments.

    This class is used to perform cross-validation experiments using a given
    sklearn estimator. It allows for hyperparameter tuning and evaluation of
    the model's performance using cross-validation.

    The score returned is the mean of the cross-validation scores,
    of applying cross-validation to ``estimator`` with the parameters given in
    ``score`` ``params``.

    The cross-validation performed is specified by the ``cv`` parameter,
    and the scoring metric is specified by the ``scoring`` parameter.
    The ``X`` and ``y`` parameters are the input data and target values,
    which are used in fit/predict cross-validation.

    Parameters
    ----------
    estimator : sklearn estimator
        The estimator to be used for the experiment.
    X : array-like, shape (n_samples, n_features)
            The input data for the model.
    y : array-like, shape (n_samples,) or (n_samples, n_outputs)
        The target values for the model.
    cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
        The number of folds or cross-validation strategy to be used.
        If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).
    scoring : callable or str, default = accuracy_score or mean_squared_error
        sklearn scoring function or metric to evaluate the model's performance.
        Default is determined by the type of estimator:
        ``accuracy_score`` for classifiers, and
        ``mean_squared_error`` for regressors, as per sklearn convention
        through the default ``score`` method of the estimator.

    Example
    -------
    >>> from hyperactive.experiment.integrations import SklearnCvExperiment
    >>> from sklearn.datasets import load_iris
    >>> from sklearn.svm import SVC
    >>> from sklearn.metrics import accuracy_score
    >>> from sklearn.model_selection import KFold
    >>>
    >>> X, y = load_iris(return_X_y=True)
    >>>
    >>> sklearn_exp = SklearnCvExperiment(
    ...     estimator=SVC(),
    ...     scoring=accuracy_score,
    ...     cv=KFold(n_splits=3, shuffle=True),
    ...     X=X,
    ...     y=y,
    ... )
    >>> params = {"C": 1.0, "kernel": "linear"}
    >>> score, add_info = sklearn_exp.score(params)

    For default choices of ``scoring`` and ``cv``:
    >>> sklearn_exp = SklearnCvExperiment(
    ...     estimator=SVC(),
    ...     X=X,
    ...     y=y,
    ... )
    >>> params = {"C": 1.0, "kernel": "linear"}
    >>> score, add_info = sklearn_exp.score(params)

    Quick call without metadata return or dictionary:
    >>> score = sklearn_exp(C=1.0, kernel="linear")
    """

    def __init__(self, estimator, X, y, scoring=None, cv=None):
        self.estimator = estimator
        self.X = X
        self.y = y
        self.scoring = scoring
        self.cv = cv

        super().__init__()

        if cv is None:
            from sklearn.model_selection import KFold

            self._cv = KFold(n_splits=3, shuffle=True)
        elif isinstance(cv, int):
            from sklearn.model_selection import KFold

            self._cv = KFold(n_splits=cv, shuffle=True)
        else:
            self._cv = cv

        # check if scoring is a scorer by checking for "estimator" in signature
        if scoring is None:
            self._scoring = check_scoring(self.estimator)
        # check using inspect.signature for "estimator" in signature
        elif callable(scoring):
            from inspect import signature

            if "estimator" in signature(scoring).parameters:
                self._scoring = scoring
            else:
                from sklearn.metrics import make_scorer

                self._scoring = make_scorer(scoring)

    def _paramnames(self):
        """Return the parameter names of the search.

        Returns
        -------
        list of str
            The parameter names of the search parameters.
        """
        return list(self.estimator.get_params().keys())

    def _score(self, params):
        """Score the parameters.

        Parameters
        ----------
        params : dict with string keys
            Parameters to score.

        Returns
        -------
        float
            The score of the parameters.
        dict
            Additional metadata about the search.
        """
        estimator = clone(self.estimator)
        estimator.set_params(**params)

        cv_results = cross_validate(
            estimator,
            self.X,
            self.y,
            scoring=self._scoring,
            cv=self._cv,
        )

        add_info_d = {
            "score_time": cv_results["score_time"],
            "fit_time": cv_results["fit_time"],
            "n_test_samples": _num_samples(self.X),
        }

        return cv_results["test_score"].mean(), add_info_d

    @classmethod
    def get_test_params(cls, parameter_set="default"):
        """Return testing parameter settings for the skbase object.

        ``get_test_params`` is a unified interface point to store
        parameter settings for testing purposes. This function is also
        used in ``create_test_instance`` and ``create_test_instances_and_names``
        to construct test instances.

        ``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``.

        Each ``dict`` is a parameter configuration for testing,
        and can be used to construct an "interesting" test instance.
        A call to ``cls(**params)`` should
        be valid for all dictionaries ``params`` in the return of ``get_test_params``.

        The ``get_test_params`` need not return fixed lists of dictionaries,
        it can also return dynamic or stochastic parameter settings.

        Parameters
        ----------
        parameter_set : str, default="default"
            Name of the set of test parameters to return, for use in tests. If no
            special parameters are defined for a value, will return `"default"` set.

        Returns
        -------
        params : dict or list of dict, default = {}
            Parameters to create testing instances of the class
            Each dict are parameters to construct an "interesting" test instance, i.e.,
            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
            `create_test_instance` uses the first (or only) dictionary in `params`
        """
        from sklearn.datasets import load_diabetes, load_iris
        from sklearn.svm import SVC, SVR
        from sklearn.metrics import accuracy_score, mean_absolute_error
        from sklearn.model_selection import KFold

        X, y = load_iris(return_X_y=True)
        params_classif = {
            "estimator": SVC(),
            "scoring": accuracy_score,
            "cv": KFold(n_splits=3, shuffle=True),
            "X": X,
            "y": y,
        }

        X, y = load_diabetes(return_X_y=True)
        params_regress = {
            "estimator": SVR(),
            "scoring": mean_absolute_error,
            "cv": 2,
            "X": X,
            "y": y,
        }

        X, y = load_diabetes(return_X_y=True)
        params_all_default = {
            "estimator": SVR(),
            "X": X,
            "y": y,
        }

        return [params_classif, params_regress, params_all_default]

    @classmethod
    def _get_score_params(self):
        """Return settings for the score function.

        Returns a list, the i-th element corresponds to self.get_test_params()[i].
        It should be a valid call for self.score.

        Returns
        -------
        list of dict
            The parameters to be used for scoring.
        """
        score_params_classif = {"C": 1.0, "kernel": "linear"}
        score_params_regress = {"C": 1.0, "kernel": "linear"}
        score_params_defaults = {"C": 1.0, "kernel": "linear"}
        return [score_params_classif, score_params_regress, score_params_defaults]


1			"""Experiment adapter for sklearn cross-validation experiments."""
2
3			from sklearn import clone
4			from sklearn.metrics import check_scoring
5			from sklearn.model_selection import cross_validate
6			from sklearn.utils.validation import _num_samples
7
8			from hyperactive.base import BaseExperiment
9
10			class SklearnCvExperiment(BaseExperiment):
11			"""Experiment adapter for sklearn cross-validation experiments.
12
13			This class is used to perform cross-validation experiments using a given
14			sklearn estimator. It allows for hyperparameter tuning and evaluation of
15			the model's performance using cross-validation.
16
17			The score returned is the mean of the cross-validation scores,
18			of applying cross-validation to ``estimator`` with the parameters given in
19			``score`` ``params``.
20
21			The cross-validation performed is specified by the ``cv`` parameter,
22			and the scoring metric is specified by the ``scoring`` parameter.
23			The ``X`` and ``y`` parameters are the input data and target values,
24			which are used in fit/predict cross-validation.
25
26			Parameters
27			----------
28			estimator : sklearn estimator
29			The estimator to be used for the experiment.
30			X : array-like, shape (n_samples, n_features)
31			The input data for the model.
32			y : array-like, shape (n_samples,) or (n_samples, n_outputs)
33			The target values for the model.
34			cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
35			The number of folds or cross-validation strategy to be used.
36			If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).
37			scoring : callable or str, default = accuracy_score or mean_squared_error
38			sklearn scoring function or metric to evaluate the model's performance.
39			Default is determined by the type of estimator:
40			``accuracy_score`` for classifiers, and
41			``mean_squared_error`` for regressors, as per sklearn convention
42			through the default ``score`` method of the estimator.
43
44			Example
45			-------
46			>>> from hyperactive.experiment.integrations import SklearnCvExperiment
47			>>> from sklearn.datasets import load_iris
48			>>> from sklearn.svm import SVC
49			>>> from sklearn.metrics import accuracy_score
50			>>> from sklearn.model_selection import KFold
51			>>>
52			>>> X, y = load_iris(return_X_y=True)
53			>>>
54			>>> sklearn_exp = SklearnCvExperiment(
55			... estimator=SVC(),
56			... scoring=accuracy_score,
57			... cv=KFold(n_splits=3, shuffle=True),
58			... X=X,
59			... y=y,
60			... )
61			>>> params = {"C": 1.0, "kernel": "linear"}
62			>>> score, add_info = sklearn_exp.score(params)
63
64			For default choices of ``scoring`` and ``cv``:
65			>>> sklearn_exp = SklearnCvExperiment(
66			... estimator=SVC(),
67			... X=X,
68			... y=y,
69			... )
70			>>> params = {"C": 1.0, "kernel": "linear"}
71			>>> score, add_info = sklearn_exp.score(params)
72
73			Quick call without metadata return or dictionary:
74			>>> score = sklearn_exp(C=1.0, kernel="linear")
75			"""
76
77			def __init__(self, estimator, X, y, scoring=None, cv=None):
78			self.estimator = estimator
79			self.X = X
80			self.y = y
81			self.scoring = scoring
82			self.cv = cv
83
84			super().__init__()
85
86			if cv is None:
87			from sklearn.model_selection import KFold
88
89			self._cv = KFold(n_splits=3, shuffle=True)
90			elif isinstance(cv, int):
91			from sklearn.model_selection import KFold
92
93			self._cv = KFold(n_splits=cv, shuffle=True)
94			else:
95			self._cv = cv
96
97			# check if scoring is a scorer by checking for "estimator" in signature
98			if scoring is None:
99			self._scoring = check_scoring(self.estimator)
100			# check using inspect.signature for "estimator" in signature
101			elif callable(scoring):
102			from inspect import signature
103
104			if "estimator" in signature(scoring).parameters:
105			self._scoring = scoring
106			else:
107			from sklearn.metrics import make_scorer
108
109			self._scoring = make_scorer(scoring)
110
111			def _paramnames(self):
112			"""Return the parameter names of the search.
113
114			Returns
115			-------
116			list of str
117			The parameter names of the search parameters.
118			"""
119			return list(self.estimator.get_params().keys())
120
121			def _score(self, params):
122			"""Score the parameters.
123
124			Parameters
125			----------
126			params : dict with string keys
127			Parameters to score.
128
129			Returns
130			-------
131			float
132			The score of the parameters.
133			dict
134			Additional metadata about the search.
135			"""
136			estimator = clone(self.estimator)
137			estimator.set_params(**params)
138
139			cv_results = cross_validate(
140			estimator,
141			self.X,
142			self.y,
143			scoring=self._scoring,
144			cv=self._cv,
145			)
146
147			add_info_d = {
148			"score_time": cv_results["score_time"],
149			"fit_time": cv_results["fit_time"],
150			"n_test_samples": _num_samples(self.X),
151			}
152
153			return cv_results["test_score"].mean(), add_info_d
154
155			@classmethod
156			def get_test_params(cls, parameter_set="default"):
157			"""Return testing parameter settings for the skbase object.
158
159			``get_test_params`` is a unified interface point to store
160			parameter settings for testing purposes. This function is also
161			used in ``create_test_instance`` and ``create_test_instances_and_names``
162			to construct test instances.
163
164			``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``.
165
166			Each ``dict`` is a parameter configuration for testing,
167			and can be used to construct an "interesting" test instance.
168			A call to ``cls(**params)`` should
169			be valid for all dictionaries ``params`` in the return of ``get_test_params``.
170
171			The ``get_test_params`` need not return fixed lists of dictionaries,
172			it can also return dynamic or stochastic parameter settings.
173
174			Parameters
175			----------
176			parameter_set : str, default="default"
177			Name of the set of test parameters to return, for use in tests. If no
178			special parameters are defined for a value, will return `"default"` set.
179
180			Returns
181			-------
182			params : dict or list of dict, default = {}
183			Parameters to create testing instances of the class
184			Each dict are parameters to construct an "interesting" test instance, i.e.,
185			`MyClass(params)` or `MyClass(params[i])` creates a valid test instance.
186			`create_test_instance` uses the first (or only) dictionary in `params`
187			"""
188			from sklearn.datasets import load_diabetes, load_iris
189			from sklearn.svm import SVC, SVR
190			from sklearn.metrics import accuracy_score, mean_absolute_error
191			from sklearn.model_selection import KFold
192
193			X, y = load_iris(return_X_y=True)
194			params_classif = {
195			"estimator": SVC(),
196			"scoring": accuracy_score,
197			"cv": KFold(n_splits=3, shuffle=True),
198			"X": X,
199			"y": y,
200			}
201
202			X, y = load_diabetes(return_X_y=True)
203			params_regress = {
204			"estimator": SVR(),
205			"scoring": mean_absolute_error,
206			"cv": 2,
207			"X": X,
208			"y": y,
209			}
210
211			X, y = load_diabetes(return_X_y=True)
212			params_all_default = {
213			"estimator": SVR(),
214			"X": X,
215			"y": y,
216			}
217
218			return [params_classif, params_regress, params_all_default]
219
220			@classmethod
221			def _get_score_params(self):
222			"""Return settings for the score function.
223
224			Returns a list, the i-th element corresponds to self.get_test_params()[i].
225			It should be a valid call for self.score.
226
227			Returns
228			-------
229			list of dict
230			The parameters to be used for scoring.
231			"""
232			score_params_classif = {"C": 1.0, "kernel": "linear"}
233			score_params_regress = {"C": 1.0, "kernel": "linear"}
234			score_params_defaults = {"C": 1.0, "kernel": "linear"}
235			return [score_params_classif, score_params_regress, score_params_defaults]
236

SimonBlanke / Hyperactive

Pull Request — master (#110)

hyperactive.experiment.integrations.sklearn_cv A

Complexity

Size/Duplication

Importance

5 Methods

Duplication Side-by-Side

Filter issues like