SklearnCvExperiment.__init__()   B
last analyzed

Complexity

Conditions 6

Size

Total Lines 34
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 23
dl 0
loc 34
rs 8.3946
c 0
b 0
f 0
cc 6
nop 6
1
"""Experiment adapter for sklearn cross-validation experiments."""
2
# copyright: hyperactive developers, MIT License (see LICENSE file)
3
4
from sklearn import clone
5
from sklearn.metrics import check_scoring
6
from sklearn.model_selection import cross_validate
7
from sklearn.utils.validation import _num_samples
8
9
from hyperactive.base import BaseExperiment
10
11
class SklearnCvExperiment(BaseExperiment):
12
    """Experiment adapter for sklearn cross-validation experiments.
13
14
    This class is used to perform cross-validation experiments using a given
15
    sklearn estimator. It allows for hyperparameter tuning and evaluation of
16
    the model's performance using cross-validation.
17
18
    The score returned is the mean of the cross-validation scores,
19
    of applying cross-validation to ``estimator`` with the parameters given in
20
    ``score`` ``params``.
21
22
    The cross-validation performed is specified by the ``cv`` parameter,
23
    and the scoring metric is specified by the ``scoring`` parameter.
24
    The ``X`` and ``y`` parameters are the input data and target values,
25
    which are used in fit/predict cross-validation.
26
27
    Parameters
28
    ----------
29
    estimator : sklearn estimator
30
        The estimator to be used for the experiment.
31
    X : array-like, shape (n_samples, n_features)
32
            The input data for the model.
33
    y : array-like, shape (n_samples,) or (n_samples, n_outputs)
34
        The target values for the model.
35
    cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
36
        The number of folds or cross-validation strategy to be used.
37
        If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).
38
    scoring : callable or str, default = accuracy_score or mean_squared_error
39
        sklearn scoring function or metric to evaluate the model's performance.
40
        Default is determined by the type of estimator:
41
        ``accuracy_score`` for classifiers, and
42
        ``mean_squared_error`` for regressors, as per sklearn convention
43
        through the default ``score`` method of the estimator.
44
45
    Example
46
    -------
47
    >>> from hyperactive.experiment.integrations import SklearnCvExperiment
48
    >>> from sklearn.datasets import load_iris
49
    >>> from sklearn.svm import SVC
50
    >>> from sklearn.metrics import accuracy_score
51
    >>> from sklearn.model_selection import KFold
52
    >>>
53
    >>> X, y = load_iris(return_X_y=True)
54
    >>>
55
    >>> sklearn_exp = SklearnCvExperiment(
56
    ...     estimator=SVC(),
57
    ...     scoring=accuracy_score,
58
    ...     cv=KFold(n_splits=3, shuffle=True),
59
    ...     X=X,
60
    ...     y=y,
61
    ... )
62
    >>> params = {"C": 1.0, "kernel": "linear"}
63
    >>> score, add_info = sklearn_exp.score(params)
64
65
    For default choices of ``scoring`` and ``cv``:
66
    >>> sklearn_exp = SklearnCvExperiment(
67
    ...     estimator=SVC(),
68
    ...     X=X,
69
    ...     y=y,
70
    ... )
71
    >>> params = {"C": 1.0, "kernel": "linear"}
72
    >>> score, add_info = sklearn_exp.score(params)
73
74
    Quick call without metadata return or dictionary:
75
    >>> score = sklearn_exp(C=1.0, kernel="linear")
76
    """
77
78
    def __init__(self, estimator, X, y, scoring=None, cv=None):
79
        self.estimator = estimator
80
        self.X = X
81
        self.y = y
82
        self.scoring = scoring
83
        self.cv = cv
84
85
        super().__init__()
86
87
        if cv is None:
88
            from sklearn.model_selection import KFold
89
90
            self._cv = KFold(n_splits=3, shuffle=True)
91
        elif isinstance(cv, int):
92
            from sklearn.model_selection import KFold
93
94
            self._cv = KFold(n_splits=cv, shuffle=True)
95
        else:
96
            self._cv = cv
97
98
        # check if scoring is a scorer by checking for "estimator" in signature
99
        if scoring is None:
100
            self._scoring = check_scoring(self.estimator)
101
        # check using inspect.signature for "estimator" in signature
102
        elif callable(scoring):
103
            from inspect import signature
104
105
            if "estimator" in signature(scoring).parameters:
106
                self._scoring = scoring
107
            else:
108
                from sklearn.metrics import make_scorer
109
110
                self._scoring = make_scorer(scoring)
111
        self.scorer_ = self._scoring
112
113
    def _paramnames(self):
114
        """Return the parameter names of the search.
115
116
        Returns
117
        -------
118
        list of str
119
            The parameter names of the search parameters.
120
        """
121
        return list(self.estimator.get_params().keys())
122
123
    def _score(self, params):
124
        """Score the parameters.
125
126
        Parameters
127
        ----------
128
        params : dict with string keys
129
            Parameters to score.
130
131
        Returns
132
        -------
133
        float
134
            The score of the parameters.
135
        dict
136
            Additional metadata about the search.
137
        """
138
        estimator = clone(self.estimator)
139
        estimator.set_params(**params)
140
141
        cv_results = cross_validate(
142
            estimator,
143
            self.X,
144
            self.y,
145
            scoring=self._scoring,
146
            cv=self._cv,
147
        )
148
149
        add_info_d = {
150
            "score_time": cv_results["score_time"],
151
            "fit_time": cv_results["fit_time"],
152
            "n_test_samples": _num_samples(self.X),
153
        }
154
155
        return cv_results["test_score"].mean(), add_info_d
156
157
    @classmethod
158
    def get_test_params(cls, parameter_set="default"):
159
        """Return testing parameter settings for the skbase object.
160
161
        ``get_test_params`` is a unified interface point to store
162
        parameter settings for testing purposes. This function is also
163
        used in ``create_test_instance`` and ``create_test_instances_and_names``
164
        to construct test instances.
165
166
        ``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``.
167
168
        Each ``dict`` is a parameter configuration for testing,
169
        and can be used to construct an "interesting" test instance.
170
        A call to ``cls(**params)`` should
171
        be valid for all dictionaries ``params`` in the return of ``get_test_params``.
172
173
        The ``get_test_params`` need not return fixed lists of dictionaries,
174
        it can also return dynamic or stochastic parameter settings.
175
176
        Parameters
177
        ----------
178
        parameter_set : str, default="default"
179
            Name of the set of test parameters to return, for use in tests. If no
180
            special parameters are defined for a value, will return `"default"` set.
181
182
        Returns
183
        -------
184
        params : dict or list of dict, default = {}
185
            Parameters to create testing instances of the class
186
            Each dict are parameters to construct an "interesting" test instance, i.e.,
187
            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
188
            `create_test_instance` uses the first (or only) dictionary in `params`
189
        """
190
        from sklearn.datasets import load_diabetes, load_iris
191
        from sklearn.svm import SVC, SVR
192
        from sklearn.metrics import accuracy_score, mean_absolute_error
193
        from sklearn.model_selection import KFold
194
195
        X, y = load_iris(return_X_y=True)
196
        params_classif = {
197
            "estimator": SVC(),
198
            "scoring": accuracy_score,
199
            "cv": KFold(n_splits=3, shuffle=True),
200
            "X": X,
201
            "y": y,
202
        }
203
204
        X, y = load_diabetes(return_X_y=True)
205
        params_regress = {
206
            "estimator": SVR(),
207
            "scoring": mean_absolute_error,
208
            "cv": 2,
209
            "X": X,
210
            "y": y,
211
        }
212
213
        X, y = load_diabetes(return_X_y=True)
214
        params_all_default = {
215
            "estimator": SVR(),
216
            "X": X,
217
            "y": y,
218
        }
219
220
        return [params_classif, params_regress, params_all_default]
221
222
    @classmethod
223
    def _get_score_params(self):
224
        """Return settings for testing the score function. Used in tests only.
225
226
        Returns a list, the i-th element corresponds to self.get_test_params()[i].
227
        It should be a valid call for self.score.
228
229
        Returns
230
        -------
231
        list of dict
232
            The parameters to be used for scoring.
233
        """
234
        score_params_classif = {"C": 1.0, "kernel": "linear"}
235
        score_params_regress = {"C": 1.0, "kernel": "linear"}
236
        score_params_defaults = {"C": 1.0, "kernel": "linear"}
237
        return [score_params_classif, score_params_regress, score_params_defaults]
238