Passed
Pull Request — master (#110)
by
unknown
01:32
created

SklearnCvExperiment.__init__()   A

Complexity

Conditions 3

Size

Total Lines 21
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 15
dl 0
loc 21
rs 9.65
c 0
b 0
f 0
cc 3
nop 6
1
"""Experiment adapter for sklearn cross-validation experiments."""
2
3
from sklearn import clone
4
from sklearn.metrics import check_scoring
5
from sklearn.model_selection import cross_validate
6
from sklearn.utils.validation import _num_samples
7
8
from hyperactive.base import BaseExperiment
9
10
class SklearnCvExperiment(BaseExperiment):
11
    """Experiment adapter for sklearn cross-validation experiments.
12
13
    This class is used to perform cross-validation experiments using a given
14
    sklearn estimator. It allows for hyperparameter tuning and evaluation of
15
    the model's performance using cross-validation.
16
17
    The score returned is the mean of the cross-validation scores,
18
    of applying cross-validation to ``estimator`` with the parameters given in
19
    ``score`` ``params``.
20
21
    The cross-validation performed is specified by the ``cv`` parameter,
22
    and the scoring metric is specified by the ``scoring`` parameter.
23
    The ``X`` and ``y`` parameters are the input data and target values,
24
    which are used in fit/predict cross-validation.
25
26
    Parameters
27
    ----------
28
    estimator : sklearn estimator
29
        The estimator to be used for the experiment.
30
    X : array-like, shape (n_samples, n_features)
31
            The input data for the model.
32
    y : array-like, shape (n_samples,) or (n_samples, n_outputs)
33
        The target values for the model.
34
    cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
35
        The number of folds or cross-validation strategy to be used.
36
        If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).
37
    scoring : callable or str, default = accuracy_score or mean_squared_error
38
        sklearn scoring function or metric to evaluate the model's performance.
39
        Default is determined by the type of estimator:
40
        ``accuracy_score`` for classifiers, and
41
        ``mean_squared_error`` for regressors, as per sklearn convention
42
        through the default ``score`` method of the estimator.
43
44
    Example
45
    -------
46
    >>> from hyperactive.experiment.integrations import SklearnCvExperiment
47
    >>> from sklearn.datasets import load_iris
48
    >>> from sklearn.svm import SVC
49
    >>> from sklearn.metrics import accuracy_score
50
    >>> from sklearn.model_selection import KFold
51
    >>>
52
    >>> X, y = load_iris(return_X_y=True)
53
    >>>
54
    >>> sklearn_exp = SklearnCvExperiment(
55
    ...    estimator=SVC(),
56
    ...     scoring=accuracy_score,
57
    ...     cv=KFold(n_splits=3, shuffle=True),
58
    ...     X=X,
59
    ...     y=y,
60
    ... )
61
    >>> params = {"C": 1.0, "kernel": "linear"}
62
    >>> score, add_info = sklearn_exp.score(params)
63
64
    For default choices of ``scoring`` and ``cv``:
65
    >>> sklearn_exp = SklearnCvExperiment(
66
    ...    estimator=SVC(),
67
    ...     X=X,
68
    ...     y=y,
69
    ... )
70
    >>> params = {"C": 1.0, "kernel": "linear"}
71
    >>> score, add_info = sklearn_exp.score(params)
72
73
    Quick call without metadata return or dictionary:
74
    >>> score = sklearn_exp(C=1.0, kernel="linear")
75
    """
76
77
    def __init__(self, estimator, X, y, scoring, cv):
78
        self.estimator = estimator
79
        self.X = X
80
        self.y = y
81
        self.scoring = scoring
82
        self.cv = cv
83
84
        super().__init__()
85
86
        if cv is None:
87
            from sklearn.model_selection import KFold
88
89
            self._cv = KFold(n_splits=3, shuffle=True)
90
        elif isinstance(cv, int):
91
            from sklearn.model_selection import KFold
92
93
            self._cv = KFold(n_splits=cv, shuffle=True)
94
        else:
95
            self._cv = cv
96
97
        self._scoring = check_scoring(estimator=estimator, scoring=scoring)
98
99
    def _paramnames(self):
100
        """Return the parameter names of the search.
101
102
        Returns
103
        -------
104
        list of str
105
            The parameter names of the search parameters.
106
        """
107
        return list(self.estimator.get_params().keys())
108
109
    def _score(self, params):
110
        """Score the parameters.
111
112
        Parameters
113
        ----------
114
        params : dict with string keys
115
            Parameters to score.
116
117
        Returns
118
        -------
119
        float
120
            The score of the parameters.
121
        dict
122
            Additional metadata about the search.
123
        """
124
        estimator = clone(self.estimator)
125
        estimator.set_params(**params)
126
127
        cv_results = cross_validate(
128
            estimator,
129
            self.X,
130
            self.y,
131
            cv=self._cv,
132
        )
133
134
        add_info_d = {
135
            "score_time": cv_results["score_time"],
136
            "fit_time": cv_results["fit_time"],
137
            "n_test_samples": _num_samples(self.X),
138
        }
139
140
        return cv_results["test_score"].mean(), add_info_d
141
142
    @classmethod
143
    def get_test_params(cls, parameter_set="default"):
144
        """Return testing parameter settings for the skbase object.
145
146
        ``get_test_params`` is a unified interface point to store
147
        parameter settings for testing purposes. This function is also
148
        used in ``create_test_instance`` and ``create_test_instances_and_names``
149
        to construct test instances.
150
151
        ``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``.
152
153
        Each ``dict`` is a parameter configuration for testing,
154
        and can be used to construct an "interesting" test instance.
155
        A call to ``cls(**params)`` should
156
        be valid for all dictionaries ``params`` in the return of ``get_test_params``.
157
158
        The ``get_test_params`` need not return fixed lists of dictionaries,
159
        it can also return dynamic or stochastic parameter settings.
160
161
        Parameters
162
        ----------
163
        parameter_set : str, default="default"
164
            Name of the set of test parameters to return, for use in tests. If no
165
            special parameters are defined for a value, will return `"default"` set.
166
167
        Returns
168
        -------
169
        params : dict or list of dict, default = {}
170
            Parameters to create testing instances of the class
171
            Each dict are parameters to construct an "interesting" test instance, i.e.,
172
            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
173
            `create_test_instance` uses the first (or only) dictionary in `params`
174
        """
175
        from sklearn.datasets import load_diabetes, load_iris
176
        from sklearn.svm import SVC, SVR
177
        from sklearn.metrics import accuracy_score, mean_absolute_error
178
        from sklearn.model_selection import KFold
179
180
        X, y = load_iris(return_X_y=True)
181
        params_classif = {
182
            "estimator": SVC(),
183
            "scoring": accuracy_score,
184
            "cv": KFold(n_splits=3, shuffle=True),
185
            "X": X,
186
            "y": y,
187
        }
188
189
        X, y = load_diabetes(return_X_y=True)
190
        params_regress = {
191
            "estimator": SVR(),
192
            "scoring": mean_absolute_error,
193
            "cv": 2,
194
            "X": X,
195
            "y": y,
196
        }
197
198
        X, y = load_diabetes(return_X_y=True)
199
        params_all_default = {
200
            "estimator": SVR(),
201
            "X": X,
202
            "y": y,
203
        }
204
205
        return [params_classif, params_regress, params_all_default]
206
207
    @classmethod
208
    def _get_score_params(self):
209
        """Return settings for the score function.
210
211
        Returns a list, the i-th element corresponds to self.get_test_params()[i].
212
        It should be a valid call for self.score.
213
214
        Returns
215
        -------
216
        list of dict
217
            The parameters to be used for scoring.
218
        """
219
        score_params_classif = {"C": 1.0, "kernel": "linear"}
220
        score_params_regress = {"C": 1.0, "kernel": "linear"}
221
        score_params_defaults = {"C": 1.0, "kernel": "linear"}
222
        return [score_params_classif, score_params_regress, score_params_defaults]
223