Passed
Pull Request — master (#110)
by
unknown
02:58 queued 01:25
created

SklearnCvExperiment.get_test_params()   A

Complexity

Conditions 1

Size

Total Lines 64
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 26
dl 0
loc 64
rs 9.256
c 0
b 0
f 0
cc 1
nop 2

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""Experiment adapter for sklearn cross-validation experiments."""
2
3
from sklearn import clone
4
from sklearn.metrics import check_scoring
5
from sklearn.model_selection import cross_validate
6
from sklearn.utils.validation import _num_samples
7
8
from hyperactive.base import BaseExperiment
9
10
class SklearnCvExperiment(BaseExperiment):
11
    """Experiment adapter for sklearn cross-validation experiments.
12
13
    This class is used to perform cross-validation experiments using a given
14
    sklearn estimator. It allows for hyperparameter tuning and evaluation of
15
    the model's performance using cross-validation.
16
17
    The score returned is the mean of the cross-validation scores,
18
    of applying cross-validation to ``estimator`` with the parameters given in
19
    ``score`` ``params``.
20
21
    The cross-validation performed is specified by the ``cv`` parameter,
22
    and the scoring metric is specified by the ``scoring`` parameter.
23
    The ``X`` and ``y`` parameters are the input data and target values,
24
    which are used in fit/predict cross-validation.
25
26
    Parameters
27
    ----------
28
    estimator : sklearn estimator
29
        The estimator to be used for the experiment.
30
    X : array-like, shape (n_samples, n_features)
31
            The input data for the model.
32
    y : array-like, shape (n_samples,) or (n_samples, n_outputs)
33
        The target values for the model.
34
    cv : int or cross-validation generator, default = KFold(n_splits=3, shuffle=True)
35
        The number of folds or cross-validation strategy to be used.
36
        If int, the cross-validation used is KFold(n_splits=cv, shuffle=True).
37
    scoring : callable or str, default = accuracy_score or mean_squared_error
38
        sklearn scoring function or metric to evaluate the model's performance.
39
        Default is determined by the type of estimator:
40
        ``accuracy_score`` for classifiers, and
41
        ``mean_squared_error`` for regressors, as per sklearn convention
42
        through the default ``score`` method of the estimator.
43
44
    Example
45
    -------
46
    >>> from hyperactive.experiment.integrations import SklearnCvExperiment
47
    >>> from sklearn.datasets import load_iris
48
    >>> from sklearn.svm import SVC
49
    >>> from sklearn.metrics import accuracy_score
50
    >>> from sklearn.model_selection import KFold
51
    >>>
52
    >>> X, y = load_iris(return_X_y=True)
53
    >>>
54
    >>> sklearn_exp = SklearnCvExperiment(
55
    ...    estimator=SVC(),
56
    ...     scoring=accuracy_score,
57
    ...     cv=KFold(n_splits=3, shuffle=True),
58
    ...     X=X,
59
    ...     y=y,
60
    ... )
61
    >>> params = {"C": 1.0, "kernel": "linear"}
62
    >>> score, add_info = sklearn_exp._score(params)
63
    """
64
65
    def __init__(self, estimator, X, y, scoring, cv):
66
        self.estimator = estimator
67
        self.X = X
68
        self.y = y
69
        self.scoring = scoring
70
        self.cv = cv
71
72
        super().__init__()
73
74
        if cv is None:
75
            from sklearn.model_selection import KFold
76
77
            self._cv = KFold(n_splits=3, shuffle=True)
78
        elif isinstance(cv, int):
79
            from sklearn.model_selection import KFold
80
81
            self._cv = KFold(n_splits=cv, shuffle=True)
82
        else:
83
            self._cv = cv
84
85
        self._scoring = check_scoring(estimator=estimator, scoring=scoring)
86
87
    def _paramnames(self):
88
        """Return the parameter names of the search.
89
90
        Returns
91
        -------
92
        list of str
93
            The parameter names of the search parameters.
94
        """
95
        return list(self.estimator.get_params().keys())
96
97
    def _score(self, params):
98
        """Score the parameters.
99
100
        Parameters
101
        ----------
102
        params : dict with string keys
103
            Parameters to score.
104
105
        Returns
106
        -------
107
        float
108
            The score of the parameters.
109
        dict
110
            Additional metadata about the search.
111
        """
112
        estimator = clone(self.estimator)
113
        estimator.set_params(**params)
114
115
        cv_results = cross_validate(
116
            estimator,
117
            self.X,
118
            self.y,
119
            cv=self._cv,
120
        )
121
122
        add_info_d = {
123
            "score_time": cv_results["score_time"],
124
            "fit_time": cv_results["fit_time"],
125
            "n_test_samples": _num_samples(self.X),
126
        }
127
128
        return cv_results["test_score"].mean(), add_info_d
129
130
    @classmethod
131
    def get_test_params(cls, parameter_set="default"):
132
        """Return testing parameter settings for the skbase object.
133
134
        ``get_test_params`` is a unified interface point to store
135
        parameter settings for testing purposes. This function is also
136
        used in ``create_test_instance`` and ``create_test_instances_and_names``
137
        to construct test instances.
138
139
        ``get_test_params`` should return a single ``dict``, or a ``list`` of ``dict``.
140
141
        Each ``dict`` is a parameter configuration for testing,
142
        and can be used to construct an "interesting" test instance.
143
        A call to ``cls(**params)`` should
144
        be valid for all dictionaries ``params`` in the return of ``get_test_params``.
145
146
        The ``get_test_params`` need not return fixed lists of dictionaries,
147
        it can also return dynamic or stochastic parameter settings.
148
149
        Parameters
150
        ----------
151
        parameter_set : str, default="default"
152
            Name of the set of test parameters to return, for use in tests. If no
153
            special parameters are defined for a value, will return `"default"` set.
154
155
        Returns
156
        -------
157
        params : dict or list of dict, default = {}
158
            Parameters to create testing instances of the class
159
            Each dict are parameters to construct an "interesting" test instance, i.e.,
160
            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
161
            `create_test_instance` uses the first (or only) dictionary in `params`
162
        """
163
        from sklearn.datasets import load_diabetes, load_iris
164
        from sklearn.svm import SVC, SVR
165
        from sklearn.metrics import accuracy_score, mean_absolute_error
166
        from sklearn.model_selection import KFold
167
168
        X, y = load_iris(return_X_y=True)
169
        params_classif = {
170
            "estimator": SVC(),
171
            "scoring": accuracy_score,
172
            "cv": KFold(n_splits=3, shuffle=True),
173
            "X": X,
174
            "y": y,
175
        }
176
177
        X, y = load_diabetes(return_X_y=True)
178
        params_regress = {
179
            "estimator": SVR(),
180
            "scoring": mean_absolute_error,
181
            "cv": 2,
182
            "X": X,
183
            "y": y,
184
        }
185
186
        X, y = load_diabetes(return_X_y=True)
187
        params_all_default = {
188
            "estimator": SVR(),
189
            "X": X,
190
            "y": y,
191
        }
192
193
        return [params_classif, params_regress, params_all_default]
194
195
    @classmethod
196
    def _get_score_params(self):
197
        """Return settings for the score function.
198
199
        Returns a list, the i-th element corresponds to self.get_test_params()[i].
200
        It should be a valid call for self.score.
201
202
        Returns
203
        -------
204
        list of dict
205
            The parameters to be used for scoring.
206
        """
207
        score_params_classif = {"C": 1.0, "kernel": "linear"}
208
        score_params_regress = {"C": 1.0, "kernel": "linear"}
209
        score_params_defaults = {"C": 1.0, "kernel": "linear"}
210
        return [score_params_classif, score_params_regress, score_params_defaults]
211