test_classification.make_prediction()   A
last analyzed

Complexity

Conditions 4

Size

Total Lines 42
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 22
nop 2
dl 0
loc 42
rs 9.352
c 0
b 0
f 0
1
import numpy as np
2
import pytest
3
from sklearn import datasets, svm
4
from sklearn.exceptions import UndefinedMetricWarning
5
from sklearn.utils._testing import (
6
    assert_almost_equal,
7
    assert_array_almost_equal,
8
    assert_array_equal,
9
    assert_no_warnings,
10
    ignore_warnings,
11
)
12
from sklearn.utils.validation import check_random_state
13
14
from precision_recall_gain import (
15
    f1_gain_score,
16
    fbeta_gain_score,
17
    precision_gain_score,
18
    precision_recall_fgain_score_support,
19
    recall_gain_score,
20
)
21
22
###############################################################################
23
# Utilities for testing
24
25
26
def make_prediction(dataset=None, binary=False):
27
    """Make some classification predictions on a toy dataset using a SVC
28
29
    If binary is True restrict to a binary classification problem instead of a
30
    multiclass classification problem
31
    """
32
33
    if dataset is None:
34
        # import some data to play with
35
        dataset = datasets.load_iris()
36
37
    X = dataset.data
38
    y = dataset.target
39
40
    if binary:
41
        # restrict to a binary classification task
42
        X, y = X[y < 2], y[y < 2]
43
44
    n_samples, n_features = X.shape
45
    p = np.arange(n_samples)
46
47
    rng = check_random_state(37)
48
    rng.shuffle(p)
49
    X, y = X[p], y[p]
50
    half = int(n_samples / 2)
51
52
    # add noisy features to make the problem harder and avoid perfect results
53
    rng = np.random.RandomState(0)
54
    X = np.c_[X, rng.randn(n_samples, 200 * n_features)]
55
56
    # run classifier, get class probabilities and label predictions
57
    clf = svm.SVC(kernel="linear", probability=True, random_state=0)
58
    probas_pred = clf.fit(X[:half], y[:half]).predict_proba(X[half:])
59
60
    if binary:
61
        # only interested in probabilities of the positive case
62
        # XXX: do we really want a special API for the binary case?
63
        probas_pred = probas_pred[:, 1]
64
65
    y_pred = clf.predict(X[half:])
66
    y_true = y[half:]
67
    return y_true, y_pred, probas_pred
68
69
70
###############################################################################
71
# Tests
72
73
74
def test_precision_recall_f1_gain_score_averages():
75
    # Test Precision Recall and F1 Score for binary classification task
76
    y_true, y_pred, _ = make_prediction(binary=True)
77
78
    # binary average
79
    p, r, f, s = precision_recall_fgain_score_support(y_true, y_pred, average="binary")
80
    assert_array_almost_equal(p, 0.82, 2)
81
    assert_array_almost_equal(r, 0.53, 2)
82
    assert_array_almost_equal(f, 0.68, 2)
83
84
    # macro average
85
    p, r, f, s = precision_recall_fgain_score_support(y_true, y_pred, average="macro")
86
    assert_array_almost_equal(p, 0.73, 2)
87
    assert_array_almost_equal(r, 0.70, 2)
88
    assert_array_almost_equal(f, 0.72, 2)
89
90
    # Test Precision Recall and F1 Score for multi classification task
91
    y_true, y_pred, _ = make_prediction(binary=False)
92
93
    # weighted average
94
    p, r, f, s = precision_recall_fgain_score_support(
95
        y_true, y_pred, average="weighted"
96
    )
97
    assert_array_almost_equal(p, 0.25, 2)
98
    assert_array_almost_equal(r, -1.77, 2)
99
    assert_array_almost_equal(f, -0.76, 2)
100
101
102
def test_precision_recall_f1_gain_score_class_dist():
103
    # Test Precision Recall and F1 Score for binary classification task
104
    y_true, y_pred, _ = make_prediction(binary=True)
105
106
    # binary average
107
    p, r, f, s = precision_recall_fgain_score_support(
108
        y_true, y_pred, average="binary", class_distribution=[0.4, 0.6]
109
    )
110
    assert_array_almost_equal(p, 0.74, 2)
111
    assert_array_almost_equal(r, 0.29, 2)
112
    assert_array_almost_equal(f, 0.51, 2)
113
114
    # macro average
115
    p, r, f, s = precision_recall_fgain_score_support(
116
        y_true, y_pred, average="macro", class_distribution=[0.4, 0.6]
117
    )
118
    assert_array_almost_equal(p, 0.75, 2)
119
    assert_array_almost_equal(r, 0.60, 2)
120
    assert_array_almost_equal(f, 0.67, 2)
121
122
    # Test Precision Recall and F1 Score for multi classification task
123
    y_true, y_pred, _ = make_prediction(binary=False)
124
125
    # weighted average
126
    p, r, f, s = precision_recall_fgain_score_support(
127
        y_true, y_pred, average="weighted", class_distribution=[0.4, 0.2, 0.4]
128
    )
129
    assert_array_almost_equal(p, 0.50, 2)
130
    assert_array_almost_equal(r, -0.04, 2)
131
    assert_array_almost_equal(f, 0.23, 2)
132
133
134
def test_precision_recall_f1_gain_score_binary():
135
    # Test Precision Recall and F1 Score for binary classification task
136
    y_true, y_pred, _ = make_prediction(binary=True)
137
138
    # detailed measures for each class
139
    p, r, f, s = precision_recall_fgain_score_support(y_true, y_pred, average=None)
140
    assert_array_almost_equal(p, [0.64, 0.82], 2)
141
    assert_array_almost_equal(r, [0.86, 0.53], 2)
142
    assert_array_almost_equal(f, [0.75, 0.68], 2)
143
    assert_array_equal(s, [25, 25])
144
145
    # individual scoring function that can be used for grid search: in the
146
    # binary class case the score is the value of the measure for the positive
147
    # class (e.g. label == 1). This is deprecated for average != 'binary'.
148
    for kwargs, my_assert in [
149
        ({}, assert_no_warnings),
150
        ({"average": "binary"}, assert_no_warnings),
151
    ]:
152
        ps = my_assert(precision_gain_score, y_true, y_pred, **kwargs)
153
        assert_array_almost_equal(ps, 0.82, 2)
154
155
        rs = my_assert(recall_gain_score, y_true, y_pred, **kwargs)
156
        assert_array_almost_equal(rs, 0.53, 2)
157
158
        fs = my_assert(f1_gain_score, y_true, y_pred, **kwargs)
159
        assert_array_almost_equal(fs, 0.68, 2)
160
161
        beta = 2
162
        assert_almost_equal(
163
            my_assert(fbeta_gain_score, y_true, y_pred, beta=beta, **kwargs),
164
            (ps + ((beta**2) * rs)) / (1 + (beta**2)),
165
            2,
166
        )
167
168
169
@ignore_warnings
170
def test_precision_recall_f_gain_binary_single_class():
171
    # Test precision, recall and F-scores behave with a single positive or
172
    # negative class. Such a case may occur with non-stratified cross-validation
173
    assert 1.0 == precision_gain_score([1, 1], [1, 1])
174
    assert 1.0 == recall_gain_score([1, 1], [1, 1])
175
    assert 1.0 == f1_gain_score([1, 1], [1, 1])
176
    assert 1.0 == fbeta_gain_score([1, 1], [1, 1], beta=0)
177
    assert 1.0 == f1_gain_score([2, 2], [2, 2], pos_label=2)
178
179
    # test case when no positive class present in true or predicted labels
180
    assert np.isnan(precision_gain_score([2, 2], [2, 2]))
181
    assert np.isnan(precision_gain_score([-1, -1], [-1, -1]))
182
    assert np.isnan(recall_gain_score([-1, -1], [-1, -1]))
183
    assert np.isnan(f1_gain_score([-1, -1], [-1, -1]))
184
    assert np.isnan(fbeta_gain_score([-1, -1], [-1, -1], beta=float("inf")))
185
    assert np.isnan(fbeta_gain_score([-1, -1], [-1, -1], beta=1e5))
186
187
    # test case when true labels all positive
188
    assert precision_gain_score([1, 1], [1, 0]) == 1
189
    assert precision_gain_score([1, 1], [0, 1]) == 1
190
    assert recall_gain_score([1, 1], [1, 0]) == -np.inf
191
    assert recall_gain_score([1, 1], [0, 1]) == -np.inf
192
    assert f1_gain_score([1, 1], [1, 0]) == -np.inf
193
    assert f1_gain_score([1, 1], [0, 1]) == -np.inf
194
195
    # test case when predicted labels all positive
196
    assert precision_gain_score([1, 0], [1, 1]) == 0
197
    assert precision_gain_score([0, 1], [1, 1]) == 0
198
    assert recall_gain_score([1, 0], [1, 1]) == 1
199
    assert recall_gain_score([0, 1], [1, 1]) == 1
200
    assert_array_almost_equal(f1_gain_score([1, 0], [1, 1]), 0.5)
201
    assert_array_almost_equal(f1_gain_score([0, 1], [1, 1]), 0.5)
202
203
204
@ignore_warnings
205
def test_precision_recall_fgain_score_support_errors():
206
    y_true, y_pred, _ = make_prediction(binary=True)
207
208
    # Bad beta
209
    with pytest.raises(ValueError):
210
        precision_recall_fgain_score_support(y_true, y_pred, beta=-0.1)
211
212
    # Bad pos_label
213
    with pytest.raises(ValueError):
214
        precision_recall_fgain_score_support(
215
            y_true, y_pred, pos_label=2, average="binary"
216
        )
217
218
    # Bad average option 1
219
    with pytest.raises(ValueError):
220
        precision_recall_fgain_score_support([0, 1, 2], [1, 2, 0], average="mega")
221
222
    # Bad average option 2
223
    with pytest.raises(ValueError):
224
        precision_recall_fgain_score_support([0, 1, 2], [1, 2, 0], average="micro")
225
226
    # Bad class_distribution dimension
227
    with pytest.raises(ValueError):
228
        precision_recall_fgain_score_support(
229
            [0, 1, 2], [1, 2, 0], class_distribution=[3]
230
        )
231
232
    # Bad class_distribution values
233
    with pytest.raises(ValueError):
234
        precision_recall_fgain_score_support(
235
            [0, 1, 2], [1, 2, 0], class_distribution=[0.4, 0.6, 0.1]
236
        )
237
238
239
def test_precision_recall_f1_gain_score_multiclass():
240
    # Test Precision Recall and F1 Score for multiclass classification task
241
    y_true, y_pred, _ = make_prediction(binary=False)
242
243
    # compute scores with default labels introspection
244
    p, r, f, s = precision_recall_fgain_score_support(y_true, y_pred, average=None)
245
    assert_array_almost_equal(p, [0.9, -0.41, 0.49], 2)
246
    assert_array_almost_equal(r, [0.88, -5.58, 0.96], 2)
247
    assert_array_almost_equal(f, [0.89, -2.99, 0.73], 2)
248
    assert_array_equal(s, [24, 31, 20])
249
250
    # averaging tests
251
    ps = precision_gain_score(y_true, y_pred, average="macro")
252
    assert_array_almost_equal(ps, 0.33, 2)
253
254
    rs = recall_gain_score(y_true, y_pred, average="macro")
255
    assert_array_almost_equal(rs, -1.25, 2)
256
257
    fs = f1_gain_score(y_true, y_pred, average="macro")
258
    assert_array_almost_equal(fs, -0.46, 2)
259
260
    ps = precision_gain_score(y_true, y_pred, average="weighted")
261
    assert_array_almost_equal(ps, 0.25, 2)
262
263
    rs = recall_gain_score(y_true, y_pred, average="weighted")
264
    assert_array_almost_equal(rs, -1.77, 2)
265
266
    fs = f1_gain_score(y_true, y_pred, average="weighted")
267
    assert_array_almost_equal(fs, -0.76, 2)
268
269
    with pytest.raises(ValueError):
270
        precision_gain_score(y_true, y_pred, average="samples")
271
    with pytest.raises(ValueError):
272
        recall_gain_score(y_true, y_pred, average="samples")
273
    with pytest.raises(ValueError):
274
        f1_gain_score(y_true, y_pred, average="samples")
275
    with pytest.raises(ValueError):
276
        fbeta_gain_score(y_true, y_pred, average="samples", beta=0.5)
277
278
    # same prediction but with and explicit label ordering
279
    p, r, f, s = precision_recall_fgain_score_support(
280
        y_true, y_pred, labels=[0, 2, 1], average=None
281
    )
282
    assert_array_almost_equal(p, [0.9, 0.49, -0.41], 2)
283
    assert_array_almost_equal(r, [0.88, 0.96, -5.58], 2)
284
    assert_array_almost_equal(f, [0.89, 0.73, -2.99], 2)
285
    assert_array_equal(s, [24, 20, 31])
286
287
288
def test_precision_gain_score_docs():
289
    y_true = [0, 1, 2, 0, 1, 2]
290
    y_pred = [0, 2, 1, 0, 0, 1]
291
    assert precision_gain_score(y_true, y_pred, average="macro") < -1e14
292
    assert precision_gain_score(y_true, y_pred, average="weighted") < -1e14
293
294
    result = precision_gain_score(y_true, y_pred, average=None)
295
    assert np.isclose(result[0], 0.75)
296
    assert np.all(result[1:] < -1e14)
297
298
    y_pred = [0, 0, 0, 0, 0, 0]
299
    with pytest.warns(UndefinedMetricWarning):
300
        result = precision_gain_score(y_true, y_pred, average=None)
301
    assert np.isclose(result[0], 0)
302
    assert np.all(result[1:] < -1e14)
303
304
    assert_array_almost_equal(
305
        precision_gain_score(y_true, y_pred, average=None, zero_division=1),
306
        [0.0, 1.0, 1.0],
307
        2,
308
    )
309
310
    # multilabel classification
311
    y_true = [[0, 0, 0], [1, 1, 1], [0, 1, 1]]
312
    y_pred = [[0, 0, 0], [1, 1, 1], [1, 1, 0]]
313
    # this one is correct
314
    assert_array_almost_equal(
315
        precision_gain_score(y_true, y_pred, average=None), [0.5, 1.0, 1.0], 2
316
    )
317
    assert_array_almost_equal(
318
        recall_gain_score(y_true, y_pred, average=None), [1.0, 1.0, -1.0], 2
319
    )
320
321
    # binary classification
322
    y_pred = [0, 0, 1, 0]
323
    y_true = [0, 1, 1, 0]
324
    result = precision_recall_fgain_score_support(y_true, y_pred, average="binary")
325
    assert_almost_equal(result[:3], [1, 0, 0.5])
326
    assert result[3] is None
327
328
329
def test_recall_gain_docs():
330
    y_true = [0, 1, 2, 0, 1, 2]
331
    y_pred = [0, 2, 1, 0, 0, 1]
332
333
    assert recall_gain_score(y_true, y_pred, average="macro") < -1e14
334
    assert recall_gain_score(y_true, y_pred, average="weighted") < -1e14
335
    result = recall_gain_score(y_true, y_pred, average=None)
336
    assert np.isclose(result[0], 1)
337
    assert np.all(result[1:] < -1e14)
338
339
    y_true = [0, 0, 0, 0, 0, 0]
340
341
    with pytest.warns((UndefinedMetricWarning, RuntimeWarning)):
342
        result = recall_gain_score(y_true, y_pred, average=None)
343
    assert_array_almost_equal(result, [-np.inf, np.nan, np.nan], 2)
344
345
    with pytest.warns(RuntimeWarning):
346
        assert_array_almost_equal(
347
            recall_gain_score(y_true, y_pred, average=None, zero_division=1),
348
            [-np.inf, 1.0, 1.0],
349
            2,
350
        )
351