precision_recall_gain._classification   A
last analyzed

Complexity

Total Complexity 30

Size/Duplication

Total Lines 1140
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 228
dl 0
loc 1140
rs 10
c 0
b 0
f 0
wmc 30

8 Functions

Rating   Name   Duplication   Size   Complexity  
A fbeta_gain_score() 0 142 1
A f1_gain_score() 0 143 1
A prg_gain_transform() 0 38 3
A precision_gain_score() 0 149 1
F _precision_recall_fscore_support() 0 281 16
A _check_valid_class_distribution() 0 18 5
A recall_gain_score() 0 151 1
A precision_recall_fgain_score_support() 0 185 2
1
"""
2
https://github.com/scikit-learn/scikit-learn/pull/24121
3
"""
4
5
# ruff: noqa: E501
6
import numpy as np
7
from sklearn.metrics._classification import (
8
    _check_set_wise_labels,
9
    _check_zero_division,
10
    _prf_divide,
11
    _warn_prf,
12
    multilabel_confusion_matrix,
13
)
14
from sklearn.utils.multiclass import unique_labels
15
16
17
def _precision_recall_fscore_support(
18
    y_true,
19
    y_pred,
20
    *,
21
    beta=1.0,
22
    labels=None,
23
    pos_label=1,
24
    average=None,
25
    warn_for=("precision", "recall", "f-score"),
26
    sample_weight=None,
27
    zero_division="warn",
28
    return_in_gain_space=False,
29
    class_distribution=None,
30
):
31
    """Compute precision, recall, F-measure and support for each class.
32
33
    The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
34
    true positives and ``fp`` the number of false positives. The precision is
35
    intuitively the ability of the classifier not to label a negative sample as
36
    positive.
37
38
    The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
39
    true positives and ``fn`` the number of false negatives. The recall is
40
    intuitively the ability of the classifier to find all the positive samples.
41
42
    The F-beta score can be interpreted as a weighted harmonic mean of
43
    the precision and recall, where an F-beta score reaches its best
44
    value at 1 and worst score at 0.
45
46
    The F-beta score weights recall more than precision by a factor of
47
    ``beta``. ``beta == 1.0`` means recall and precision are equally important.
48
49
    The support is the number of occurrences of each class in ``y_true``.
50
51
    If ``pos_label is None`` and in binary classification, this function
52
    returns the average precision, recall and F-measure if ``average``
53
    is one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``.
54
55
    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
56
57
    Parameters
58
    ----------
59
    y_true : 1d array-like, or label indicator array / sparse matrix
60
        Ground truth (correct) target values.
61
62
    y_pred : 1d array-like, or label indicator array / sparse matrix
63
        Estimated targets as returned by a classifier.
64
65
    beta : float, default=1.0
66
        The strength of recall versus precision in the F-score.
67
68
    labels : array-like, default=None
69
        The set of labels to include when ``average != 'binary'``, and their
70
        order if ``average is None``. Labels present in the data can be
71
        excluded, for example to calculate a multiclass average ignoring a
72
        majority negative class, while labels not present in the data will
73
        result in 0 components in a macro average. For multilabel targets,
74
        labels are column indices. By default, all labels in ``y_true`` and
75
        ``y_pred`` are used in sorted order.
76
77
    pos_label : str or int, default=1
78
        The class to report if ``average='binary'`` and the data is binary.
79
        If the data are multiclass or multilabel, this will be ignored;
80
        setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
81
        scores for that label only.
82
83
    average : {'binary', 'micro', 'macro', 'samples', 'weighted'}, \
84
            default=None
85
        If ``None``, the scores for each class are returned. Otherwise, this
86
        determines the type of averaging performed on the data:
87
88
        ``'binary'``:
89
            Only report results for the class specified by ``pos_label``.
90
            This is applicable only if targets (``y_{true,pred}``) are binary.
91
        ``'micro'``:
92
            Calculate metrics globally by counting the total true positives,
93
            false negatives and false positives.
94
        ``'macro'``:
95
            Calculate metrics for each label, and find their unweighted
96
            mean.  This does not take label imbalance into account.
97
        ``'weighted'``:
98
            Calculate metrics for each label, and find their average weighted
99
            by support (the number of true instances for each label). This
100
            alters 'macro' to account for label imbalance; it can result in an
101
            F-score that is not between precision and recall.
102
        ``'samples'``:
103
            Calculate metrics for each instance, and find their average (only
104
            meaningful for multilabel classification where this differs from
105
            :func:`accuracy_score`).
106
107
    warn_for : tuple or set, for internal use
108
        This determines which warnings will be made in the case that this
109
        function is being used to return only one of its metrics.
110
111
    sample_weight : array-like of shape (n_samples,), default=None
112
        Sample weights.
113
114
    zero_division : "warn", 0 or 1, default="warn"
115
        Sets the value to return when there is a zero division:
116
           - recall: when there are no positive labels
117
           - precision: when there are no positive predictions
118
           - f-score: both
119
120
        If set to "warn", this acts as 0, but warnings are also raised.
121
122
    return_in_gain_space : bool, default=False
123
        If True, Precision Gain, Recall Gain and FScore Gain are returned.
124
125
    class_distribution : Optional list, default=None
126
        The proportion that each class makes up in the dataset. It's used only
127
        when return_in_gain_space=True. If not provided then it's estimated from
128
        y_true.
129
130
    Returns
131
    -------
132
    precision : float (if average is not None) or array of float, shape =\
133
        [n_unique_labels]
134
        Precision score.
135
136
    recall : float (if average is not None) or array of float, shape =\
137
        [n_unique_labels]
138
        Recall score.
139
140
    fbeta_score : float (if average is not None) or array of float, shape =\
141
        [n_unique_labels]
142
        F-beta score.
143
144
    support : None (if average is not None) or array of int, shape =\
145
        [n_unique_labels]
146
        The number of occurrences of each label in ``y_true``.
147
148
    Notes
149
    -----
150
    When ``true positive + false positive == 0``, precision is undefined.
151
    When ``true positive + false negative == 0``, recall is undefined.
152
    In such cases, by default the metric will be set to 0, as will f-score,
153
    and ``UndefinedMetricWarning`` will be raised. This behavior can be
154
    modified with ``zero_division``.
155
156
    References
157
    ----------
158
    .. [1] `Wikipedia entry for the Precision and recall
159
           <https://en.wikipedia.org/wiki/Precision_and_recall>`_.
160
161
    .. [2] `Wikipedia entry for the F1-score
162
           <https://en.wikipedia.org/wiki/F1_score>`_.
163
164
    .. [3] `Discriminative Methods for Multi-labeled Classification Advances
165
           in Knowledge Discovery and Data Mining (2004), pp. 22-30 by Shantanu
166
           Godbole, Sunita Sarawagi
167
           <http://www.godbole.net/shantanu/pubs/multilabelsvm-pakdd04.pdf>`_.
168
169
    Examples
170
    --------
171
    >>> import numpy as np
172
    >>> from sklearn.metrics import precision_recall_fscore_support
173
    >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
174
    >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
175
    >>> precision_recall_fscore_support(y_true, y_pred, average='macro')
176
    (0.22..., 0.33..., 0.26..., None)
177
    >>> precision_recall_fscore_support(y_true, y_pred, average='micro')
178
    (0.33..., 0.33..., 0.33..., None)
179
    >>> precision_recall_fscore_support(y_true, y_pred, average='weighted')
180
    (0.22..., 0.33..., 0.26..., None)
181
182
    It is possible to compute per-label precisions, recalls, F1-scores and
183
    supports instead of averaging:
184
185
    >>> precision_recall_fscore_support(y_true, y_pred, average=None,
186
    ... labels=['pig', 'dog', 'cat'])
187
    (array([0.        , 0.        , 0.66...]),
188
     array([0., 0., 1.]), array([0. , 0. , 0.8]),
189
     array([2, 2, 2]))
190
    """
191
    _check_zero_division(zero_division)
192
    if beta < 0:
193
        raise ValueError("beta should be >=0 in the F-beta score")
194
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
195
    class_distribution = _check_valid_class_distribution(
196
        class_distribution, y_true, y_pred, average, pos_label
197
    )
198
199
    # Calculate tp_sum, pred_sum, true_sum ###
200
    samplewise = average == "samples"
201
    MCM = multilabel_confusion_matrix(
202
        y_true,
203
        y_pred,
204
        sample_weight=sample_weight,
205
        labels=labels,
206
        samplewise=samplewise,
207
    )
208
    tp_sum = MCM[:, 1, 1]
209
    pred_sum = tp_sum + MCM[:, 0, 1]
210
    true_sum = tp_sum + MCM[:, 1, 0]
211
212
    if average == "micro":
213
        tp_sum = np.array([tp_sum.sum()])
214
        pred_sum = np.array([pred_sum.sum()])
215
        true_sum = np.array([true_sum.sum()])
216
217
    # Finally, we have all our sufficient statistics. Divide! #
218
    beta2 = beta**2
219
220
    # Divide, and on zero-division, set scores and/or warn according to
221
    # zero_division:
222
    precision = _prf_divide(
223
        tp_sum, pred_sum, "precision", "predicted", average, warn_for, zero_division
224
    )
225
    recall = _prf_divide(
226
        tp_sum, true_sum, "recall", "true", average, warn_for, zero_division
227
    )
228
229
    # warn for f-score only if zero_division is warn, it is in warn_for
230
    # and BOTH prec and rec are ill-defined
231
    if zero_division == "warn" and ("f-score",) == warn_for:
232
        if (pred_sum[true_sum == 0] == 0).any():
233
            _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
234
235
    # if tp == 0 F will be 1 only if all predictions are zero, all labels are
236
    # zero, and zero_division=1. In all other case, 0
237
    if np.isposinf(beta):
238
        f_score = recall
239
    else:
240
        denom = beta2 * precision + recall
241
242
        denom[denom == 0.0] = 1  # avoid division by 0
243
        f_score = (1 + beta2) * precision * recall / denom
244
245
    if return_in_gain_space:
246
        for class_index, (
247
            precision_i,
248
            recall_i,
249
            f_score_i,
250
            true_sum_i,
251
            cm_i,
252
        ) in enumerate(zip(precision, recall, f_score, true_sum, MCM)):
253
            class_proportion = (
254
                (true_sum_i / cm_i.sum())
255
                if class_distribution is None
256
                else class_distribution[class_index]
257
            )
258
            precision[class_index] = prg_gain_transform(
259
                precision_i, pi=class_proportion
260
            )
261
            recall[class_index] = prg_gain_transform(recall_i, pi=class_proportion)
262
            f_score[class_index] = prg_gain_transform(f_score_i, pi=class_proportion)
263
264
    # Average the results
265
    if average == "weighted":
266
        weights = true_sum
267
        if weights.sum() == 0:
268
            zero_division_value = np.float64(1.0)
269
            if zero_division in ["warn", 0]:
270
                zero_division_value = np.float64(0.0)
271
            # precision is zero_division if there are no positive predictions
272
            # recall is zero_division if there are no positive labels
273
            # fscore is zero_division if all labels AND predictions are
274
            # negative
275
            if pred_sum.sum() == 0:
276
                return (
277
                    zero_division_value,
278
                    zero_division_value,
279
                    zero_division_value,
280
                    None,
281
                )
282
            else:
283
                return (np.float64(0.0), zero_division_value, np.float64(0.0), None)
284
285
    elif average == "samples":
286
        weights = sample_weight
287
    else:
288
        weights = None
289
290
    if average is not None:
291
        assert average != "binary" or len(precision) == 1
292
        precision = np.average(precision, weights=weights)
293
        recall = np.average(recall, weights=weights)
294
        f_score = np.average(f_score, weights=weights)
295
        true_sum = None  # return no support
296
297
    return precision, recall, f_score, true_sum
298
299
300
def _check_valid_class_distribution(
301
    class_distribution, y_true, y_pred, average, pos_label
302
):
303
    if class_distribution:
304
        classes = unique_labels(y_true, y_pred).tolist()
305
        num_classes = len(classes)
306
        if len(class_distribution) != num_classes:
307
            raise ValueError(
308
                "Class distribution must have the same length as the number of classes"
309
                f" - {num_classes}."
310
            )
311
        if sum(class_distribution) != 1:
312
            raise ValueError("Class distribution values do not sum to 1.")
313
314
        if average == "binary":
315
            class_distribution = [class_distribution[classes.index(pos_label)]]
316
317
    return class_distribution
318
319
320
def f1_gain_score(
321
    y_true,
322
    y_pred,
323
    *,
324
    labels=None,
325
    pos_label=1,
326
    average="binary",
327
    sample_weight=None,
328
    zero_division="warn",
329
    class_distribution=None,
330
):
331
    """Compute the F1 Gain score, also known as balanced F-Gain score or
332
    F-Gain measure.
333
334
    The F1 Gain score can be interpreted as a arithmetic mean of the precision
335
    gain and recall gain, where an F1 Gain score reaches its best value at 1 and
336
    worst score at -Inf. The relative contribution of precision and recall to
337
    the F1 score are equal. The formula for the F1 score is::
338
339
        F1_Gain = (precision_gain + recall_gain) / 2
340
341
    In the multi-class and multi-label case, this is the average of the F1 Gain
342
    score of each class with weighting depending on the ``average`` parameter.
343
    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
344
345
    Parameters
346
    ----------
347
    y_true : 1d array-like, or label indicator array / sparse matrix
348
        Ground truth (correct) target values.
349
350
    y_pred : 1d array-like, or label indicator array / sparse matrix
351
        Estimated targets as returned by a classifier.
352
353
    labels : array-like, default=None
354
        The set of labels to include when ``average != 'binary'``, and their
355
        order if ``average is None``. Labels present in the data can be
356
        excluded, for example to calculate a multiclass average ignoring a
357
        majority negative class, while labels not present in the data will
358
        result in 0 components in a macro average. For multilabel targets,
359
        labels are column indices. By default, all labels in ``y_true`` and
360
        ``y_pred`` are used in sorted order.
361
362
        .. versionchanged:: 0.17
363
           Parameter `labels` improved for multiclass problem.
364
365
    pos_label : str or int, default=1
366
        The class to report if ``average='binary'`` and the data is binary.
367
        If the data are multiclass or multilabel, this will be ignored;
368
        setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
369
        scores for that label only.
370
371
    average : {'macro', 'weighted', 'binary'} or None, \
372
            default='binary'
373
        This parameter is required for multiclass/multilabel targets.
374
        If ``None``, the scores for each class are returned. Otherwise, this
375
        determines the type of averaging performed on the data:
376
377
        ``'binary'``:
378
            Only report results for the class specified by ``pos_label``.
379
            This is applicable only if targets (``y_{true,pred}``) are binary.
380
        ``'macro'``:
381
            Calculate metrics for each label, and find their unweighted
382
            mean.  This does not take label imbalance into account.
383
        ``'weighted'``:
384
            Calculate metrics for each label, and find their average weighted
385
            by support (the number of true instances for each label). This
386
            alters 'macro' to account for label imbalance; it can result in an
387
            F-score that is not between precision and recall.
388
389
    sample_weight : array-like of shape (n_samples,), default=None
390
        Sample weights.
391
392
    zero_division : "warn", 0 or 1, default="warn"
393
        Sets the value to return when there is a zero division, i.e. when all
394
        predictions and labels are negative. If set to "warn", this acts as 0,
395
        but warnings are also raised.
396
397
    class_distribution : Optional list, default=None
398
        The proportion that each class makes up in the dataset. If not
399
        provided then it's estimated from y_true.
400
401
    Returns
402
    -------
403
    f1_gain_score : float or array of float, shape = [n_unique_labels]
404
        F1 Gain score of the positive class in binary classification or weighted
405
        average of the F1 Gain scores of each class for the multiclass task.
406
407
    See Also
408
    --------
409
    fbeta_gain_score : Compute the F-Gain beta score.
410
    precision_recall_fgain_score_support : Compute the precision gain, recall
411
        gain, F-Gain score, and support.
412
    jaccard_score : Compute the Jaccard similarity coefficient score.
413
    multilabel_confusion_matrix : Compute a confusion matrix for each class or
414
        sample.
415
416
    Notes
417
    -----
418
    When ``true positive + false positive == 0``, precision is undefined.
419
    When ``true positive + false negative == 0``, recall is undefined.
420
    In such cases, by default the metric will be set to 0, as will f-score,
421
    and ``UndefinedMetricWarning`` will be raised. This behavior can be
422
    modified with ``zero_division``.
423
424
    References
425
    ----------
426
    .. [1] `Precision-Recall-Gain Curves: PR Analysis Done Right (2015) by
427
            Peter A. Flach and Meelis Kull
428
           <https://papers.nips.cc/paper/2015/file/33e8075e9970de0cfea955afd4644bb2-Paper.pdf>`_.
429
    .. [2] `Wikipedia entry for the F1-score
430
           <https://en.wikipedia.org/wiki/F1_score>`_.
431
432
    Examples
433
    --------
434
    >>> from precision_recall_gain import f1_gain_score
435
    >>> y_true = [0, 1, 2, 0, 1, 2, 2]
436
    >>> y_pred = [0, 2, 1, 0, 1, 1, 2]
437
    >>> f1_gain_score(y_true, y_pred, average='macro')
438
    0.42...
439
    >>> f1_gain_score(y_true, y_pred, average='weighted')
440
    0.34...
441
    >>> f1_gain_score(y_true, y_pred, average=None)
442
    array([ 1.   ,  0.4  , -0.125])
443
    >>> y_true = [0, 0, 0, 0, 0, 0]
444
    >>> y_pred = [0, 0, 0, 0, 0, 0]
445
    >>> f1_gain_score(y_true, y_pred, zero_division=1)
446
    1.0
447
    >>> # multilabel classification
448
    >>> y_true = [[0, 0, 0], [1, 1, 1], [0, 1, 1]]
449
    >>> y_pred = [[0, 0, 0], [1, 1, 1], [1, 1, 0]]
450
    >>> f1_gain_score(y_true, y_pred, average=None)
451
    array([0.75, 1.  , 0.  ])
452
    """
453
    return fbeta_gain_score(
454
        y_true,
455
        y_pred,
456
        beta=1,
457
        labels=labels,
458
        pos_label=pos_label,
459
        average=average,
460
        sample_weight=sample_weight,
461
        zero_division=zero_division,
462
        class_distribution=class_distribution,
463
    )
464
465
466
def fbeta_gain_score(
467
    y_true,
468
    y_pred,
469
    *,
470
    beta,
471
    labels=None,
472
    pos_label=1,
473
    average="binary",
474
    sample_weight=None,
475
    zero_division="warn",
476
    class_distribution=None,
477
):
478
    """Compute the F-Gain beta score.
479
480
    The F-Gain beta score is the weighted arthimetic mean of precision gain
481
    and recall gain, reaching its optimal value at 1 and its worst value at
482
    -Inf.
483
484
    The `beta` parameter determines the weight of recall gain in the combined
485
    score. ``beta < 1`` lends more weight to precision, while ``beta > 1``
486
    favors recall (``beta -> 0`` considers only precision, ``beta -> +inf``
487
    only recall).
488
489
    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
490
491
    Parameters
492
    ----------
493
    y_true : 1d array-like, or label indicator array / sparse matrix
494
        Ground truth (correct) target values.
495
496
    y_pred : 1d array-like, or label indicator array / sparse matrix
497
        Estimated targets as returned by a classifier.
498
499
    beta : float
500
        Determines the weight of recall in the combined score.
501
502
    labels : array-like, default=None
503
        The set of labels to include when ``average != 'binary'``, and their
504
        order if ``average is None``. Labels present in the data can be
505
        excluded, for example to calculate a multiclass average ignoring a
506
        majority negative class, while labels not present in the data will
507
        result in 0 components in a macro average. For multilabel targets,
508
        labels are column indices. By default, all labels in ``y_true`` and
509
        ``y_pred`` are used in sorted order.
510
511
        .. versionchanged:: 0.17
512
           Parameter `labels` improved for multiclass problem.
513
514
    pos_label : str or int, default=1
515
        The class to report if ``average='binary'`` and the data is binary.
516
        If the data are multiclass or multilabel, this will be ignored;
517
        setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
518
        scores for that label only.
519
520
    average : {'macro', 'weighted', 'binary'} or None, \
521
            default='binary'
522
        This parameter is required for multiclass/multilabel targets.
523
        If ``None``, the scores for each class are returned. Otherwise, this
524
        determines the type of averaging performed on the data:
525
526
        ``'binary'``:
527
            Only report results for the class specified by ``pos_label``.
528
            This is applicable only if targets (``y_{true,pred}``) are binary.
529
        ``'macro'``:
530
            Calculate metrics for each label, and find their unweighted
531
            mean.  This does not take label imbalance into account.
532
        ``'weighted'``:
533
            Calculate metrics for each label, and find their average weighted
534
            by support (the number of true instances for each label). This
535
            alters 'macro' to account for label imbalance; it can result in an
536
            F-score that is not between precision and recall.
537
538
    sample_weight : array-like of shape (n_samples,), default=None
539
        Sample weights.
540
541
    zero_division : "warn", 0 or 1, default="warn"
542
        Sets the value to return when there is a zero division, i.e. when all
543
        predictions and labels are negative. If set to "warn", this acts as 0,
544
        but warnings are also raised.
545
546
    class_distribution : Optional list, default=None
547
        The proportion that each class makes up in the dataset. If not
548
        provided then it's estimated from y_true.
549
550
    Returns
551
    -------
552
    fgain_beta_score : float (if average is not None) or array of float, shape =\
553
        [n_unique_labels]
554
        F-Gain beta score of the positive class in binary classification or weighted
555
        average of the F-Gain beta score of each class for the multiclass task.
556
557
    See Also
558
    --------
559
    precision_recall_fgain_score_support : Compute the precision gain, recall
560
        gain, F-Gain score, and support.
561
    multilabel_confusion_matrix : Compute a confusion matrix for each class or
562
        sample.
563
564
    Notes
565
    -----
566
    When ``true positive + false positive == 0`` or
567
    ``true positive + false negative == 0``, f-score returns 0 and raises
568
    ``UndefinedMetricWarning``. This behavior can be
569
    modified with ``zero_division``.
570
571
    References
572
    ----------
573
    .. [1] `Precision-Recall-Gain Curves: PR Analysis Done Right (2015) by
574
            Peter A. Flach and Meelis Kull
575
           <https://papers.nips.cc/paper/2015/file/33e8075e9970de0cfea955afd4644bb2-Paper.pdf>`_.
576
    .. [2] R. Baeza-Yates and B. Ribeiro-Neto (2011).
577
           Modern Information Retrieval. Addison Wesley, pp. 327-328.
578
579
    .. [3] `Wikipedia entry for the F1-score
580
           <https://en.wikipedia.org/wiki/F1_score>`_.
581
582
    Examples
583
    --------
584
    >>> from precision_recall_gain import fbeta_gain_score
585
    >>> y_true = [0, 1, 2, 0, 1, 2, 2]
586
    >>> y_pred = [0, 2, 1, 0, 1, 1, 2]
587
    >>> fbeta_gain_score(y_true, y_pred, average='macro', beta=0.5)
588
    0.45...
589
    >>> fbeta_gain_score(y_true, y_pred, average='weighted', beta=0.5)
590
    0.40...
591
    >>> fbeta_gain_score(y_true, y_pred, average=None, beta=0.5)
592
    array([1.  , 0.28, 0.1 ])
593
    """
594
595
    _, _, f, _ = precision_recall_fgain_score_support(
596
        y_true,
597
        y_pred,
598
        beta=beta,
599
        labels=labels,
600
        pos_label=pos_label,
601
        average=average,
602
        warn_for=("f-score",),
603
        sample_weight=sample_weight,
604
        zero_division=zero_division,
605
        class_distribution=class_distribution,
606
    )
607
    return f
608
609
610
def precision_recall_fgain_score_support(
611
    y_true,
612
    y_pred,
613
    *,
614
    class_distribution=None,
615
    beta=1.0,
616
    labels=None,
617
    pos_label=1,
618
    average=None,
619
    warn_for=("precision", "recall", "f-score"),
620
    sample_weight=None,
621
    zero_division="warn",
622
):
623
    """Compute precision gain, recall gain, F-Gain measure and support for each
624
    class.
625
626
    All three measures are derrived by applying the following transform to their
627
    respective vanilla metric values.
628
629
        f(x) = (x - pi) / ((1 - pi) * x)
630
631
            pi = proportion of positives
632
633
    The vanilla metrics prior to transformation are defined as follows:
634
635
        The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number
636
        of true positives and ``fp`` the number of false positives. The
637
        precision is intuitively the ability of the classifier not to label a
638
        negative sample as positive.
639
640
        The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
641
        true positives and ``fn`` the number of false negatives. The recall is
642
        intuitively the ability of the classifier to find all the positive
643
        samples.
644
645
        The F-beta score can be interpreted as a weighted harmonic mean of the
646
        precision and recall, where an F-beta score reaches its best value at 1
647
        and worst score at 0.
648
649
        The F-beta score weights recall more than precision by a factor of
650
        ``beta``. ``beta == 1.0`` means recall and precision are equally
651
        important.
652
653
    The support is the number of occurrences of each class in ``y_true``.
654
655
    If ``pos_label is None`` and in binary classification, this function returns
656
    the average precision gain, recall gain and F-gain measure if ``average`` is
657
    one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``.
658
659
    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
660
661
    Parameters
662
    ----------
663
    y_true : 1d array-like, or label indicator array / sparse matrix
664
        Ground truth (correct) target values.
665
666
    y_pred : 1d array-like, or label indicator array / sparse matrix
667
        Estimated targets as returned by a classifier.
668
669
    beta : float, default=1.0
670
        The strength of recall versus precision in the F-score.
671
672
    labels : array-like, default=None
673
        The set of labels to include when ``average != 'binary'``, and their
674
        order if ``average is None``. Labels present in the data can be
675
        excluded, for example to calculate a multiclass average ignoring a
676
        majority negative class, while labels not present in the data will
677
        result in 0 components in a macro average. For multilabel targets,
678
        labels are column indices. By default, all labels in ``y_true`` and
679
        ``y_pred`` are used in sorted order.
680
681
    pos_label : str or int, default=1
682
        The class to report if ``average='binary'`` and the data is binary.
683
        If the data are multiclass or multilabel, this will be ignored;
684
        setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
685
        scores for that label only.
686
687
    average : {'binary', 'macro', 'weighted'}, \
688
            default=None
689
        If ``None``, the scores for each class are returned. Otherwise, this
690
        determines the type of averaging performed on the data:
691
692
        ``'binary'``:
693
            Only report results for the class specified by ``pos_label``.
694
            This is applicable only if targets (``y_{true,pred}``) are binary.
695
        ``'macro'``:
696
            Calculate metrics for each label, and find their unweighted
697
            mean.  This does not take label imbalance into account.
698
        ``'weighted'``:
699
            Calculate metrics for each label, and find their average weighted
700
            by support (the number of true instances for each label). This
701
            alters 'macro' to account for label imbalance; it can result in an
702
            F-score that is not between precision and recall.
703
704
    warn_for : tuple or set, for internal use
705
        This determines which warnings will be made in the case that this
706
        function is being used to return only one of its metrics.
707
708
    sample_weight : array-like of shape (n_samples,), default=None
709
        Sample weights.
710
711
    zero_division : "warn", 0 or 1, default="warn"
712
        Sets the value to return when there is a zero division:
713
           - recall: when there are no positive labels
714
           - precision: when there are no positive predictions
715
           - f-score: both
716
717
        If set to "warn", this acts as 0, but warnings are also raised.
718
719
    class_distribution : Optional list, default=None
720
        The proportion that each class makes up in the dataset. If not
721
        provided then it's estimated from y_true.
722
723
    Returns
724
    -------
725
    precision_gain : float (if average is not None) or array of float, shape =\
726
        [n_unique_labels]
727
        Precision Gain score.
728
729
    recall_gain : float (if average is not None) or array of float, shape =\
730
        [n_unique_labels]
731
        Recall Gain score.
732
733
    f_gain_beta_score : float (if average is not None) or array of float, shape =\
734
        [n_unique_labels]
735
        F-beta Gain score.
736
737
    support : None (if average is not None) or array of int, shape =\
738
        [n_unique_labels]
739
        The number of occurrences of each label in ``y_true``.
740
741
    Notes
742
    -----
743
    When ``true positive + false positive == 0``, precision is undefined.
744
    When ``true positive + false negative == 0``, recall is undefined.
745
    In such cases, by default the metric will be set to 0, as will f-score,
746
    and ``UndefinedMetricWarning`` will be raised. This behavior can be
747
    modified with ``zero_division``.
748
749
    References
750
    ----------
751
    .. [1] `Precision-Recall-Gain Curves: PR Analysis Done Right (2015) by Peter
752
            A. Flach and Meelis Kull
753
           <https://papers.nips.cc/paper/2015/file/33e8075e9970de0cfea955afd4644bb2-Paper.pdf>`_.
754
    .. [2] `Wikipedia entry for the Precision and recall
755
           <https://en.wikipedia.org/wiki/Precision_and_recall>`_.
756
757
    .. [3] `Wikipedia entry for the F1-score
758
           <https://en.wikipedia.org/wiki/F1_score>`_.
759
760
    .. [4] `Discriminative Methods for Multi-labeled Classification Advances in
761
           Knowledge Discovery and Data Mining (2004), pp. 22-30 by Shantanu
762
           Godbole, Sunita Sarawagi
763
           <http://www.godbole.net/shantanu/pubs/multilabelsvm-pakdd04.pdf>`_.
764
765
    Examples
766
    --------
767
    >>> import numpy as np
768
    >>> from precision_recall_gain import precision_recall_fgain_score_support
769
    >>> y_true = np.array(['cat', 'dog', 'pig', 'dog', 'cat', 'pig', 'pig'])
770
    >>> y_pred = np.array(['cat', 'pig', 'dog', 'dog', 'cat', 'dog', 'pig'])
771
772
    It is possible to compute per-label precisions, recalls, F1-scores and
773
    supports instead of averaging:
774
775
    >>> precision_recall_fgain_score_support(y_true, y_pred, average=None,
776
    ... labels=['pig', 'dog', 'cat'])
777
    (array([0.25, 0.2 , 1.  ]), array([-0.5,  0.6,  1. ]), array([-0.125,  0.4  ,  1.   ]), array([3, 2, 2]))
778
    """
779
    average_options = (None, "binary", "macro", "weighted")
780
    if average not in average_options:
781
        raise ValueError("average has to be one of " + str(average_options))
782
783
    return _precision_recall_fscore_support(
784
        y_true=y_true,
785
        y_pred=y_pred,
786
        beta=beta,
787
        labels=labels,
788
        pos_label=pos_label,
789
        average=average,
790
        warn_for=warn_for,
791
        sample_weight=sample_weight,
792
        zero_division=zero_division,
793
        return_in_gain_space=True,
794
        class_distribution=class_distribution,
795
    )
796
797
798
def precision_gain_score(
799
    y_true,
800
    y_pred,
801
    *,
802
    labels=None,
803
    pos_label=1,
804
    average="binary",
805
    sample_weight=None,
806
    zero_division="warn",
807
    class_distribution=None,
808
):
809
    """Compute the precision Gain.
810
811
    The metric is derrived by applying the following transform to precision:
812
813
        f(x) = (x - pi) / ((1 - pi) * x)
814
815
            pi = proportion of positives
816
817
    The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
818
    true positives and ``fp`` the number of false positives. The precision is
819
    intuitively the ability of the classifier not to label as positive a sample
820
    that is negative.
821
822
    The best value is 1 and the worst value is -Inf.
823
824
    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
825
826
    Parameters
827
    ----------
828
    y_true : 1d array-like, or label indicator array / sparse matrix
829
        Ground truth (correct) target values.
830
831
    y_pred : 1d array-like, or label indicator array / sparse matrix
832
        Estimated targets as returned by a classifier.
833
834
    labels : array-like, default=None
835
        The set of labels to include when ``average != 'binary'``, and their
836
        order if ``average is None``. Labels present in the data can be
837
        excluded, for example to calculate a multiclass average ignoring a
838
        majority negative class, while labels not present in the data will
839
        result in 0 components in a macro average. For multilabel targets,
840
        labels are column indices. By default, all labels in ``y_true`` and
841
        ``y_pred`` are used in sorted order.
842
843
        .. versionchanged:: 0.17
844
           Parameter `labels` improved for multiclass problem.
845
846
    pos_label : str or int, default=1
847
        The class to report if ``average='binary'`` and the data is binary.
848
        If the data are multiclass or multilabel, this will be ignored;
849
        setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
850
        scores for that label only.
851
852
    average : {'macro', 'weighted', 'binary'} or None, \
853
            default='binary'
854
        This parameter is required for multiclass/multilabel targets.
855
        If ``None``, the scores for each class are returned. Otherwise, this
856
        determines the type of averaging performed on the data:
857
858
        ``'binary'``:
859
            Only report results for the class specified by ``pos_label``.
860
            This is applicable only if targets (``y_{true,pred}``) are binary.
861
        ``'macro'``:
862
            Calculate metrics for each label, and find their unweighted
863
            mean.  This does not take label imbalance into account.
864
        ``'weighted'``:
865
            Calculate metrics for each label, and find their average weighted
866
            by support (the number of true instances for each label). This
867
            alters 'macro' to account for label imbalance; it can result in an
868
            F-score that is not between precision and recall.
869
870
    sample_weight : array-like of shape (n_samples,), default=None
871
        Sample weights.
872
873
    zero_division : "warn", 0 or 1, default="warn"
874
        Sets the value to return when there is a zero division. If set to
875
        "warn", this acts as 0, but warnings are also raised.
876
877
    class_distribution : Optional list, default=None
878
        The proportion that each class makes up in the dataset. If not
879
        provided then it's estimated from y_true.
880
881
    Returns
882
    -------
883
    precision_gain : float (if average is not None) or array of float of shape \
884
                (n_unique_labels,)
885
        Precision of the positive class in binary classification or weighted
886
        average of the precision of each class for the multiclass task.
887
888
    See Also
889
    --------
890
    precision_recall_fgain_score_support : Compute precision, recall, F-measure and
891
        support for each class.
892
    recall_gain_score :  Compute the ratio ``tp / (tp + fn)`` where ``tp`` is the
893
        number of true positives and ``fn`` the number of false negatives.
894
    PrecisionRecallDisplay.from_estimator : Plot precision-recall curve given
895
        an estimator and some data.
896
    PrecisionRecallDisplay.from_predictions : Plot precision-recall curve given
897
        binary class predictions.
898
    multilabel_confusion_matrix : Compute a confusion matrix for each class or
899
        sample.
900
901
    Notes
902
    -----
903
    When ``true positive + false positive == 0``, precision returns 0 and
904
    raises ``UndefinedMetricWarning``. This behavior can be
905
    modified with ``zero_division``.
906
907
    References
908
    ----------
909
    .. [1] `Precision-Recall-Gain Curves: PR Analysis Done Right (2015) by Peter
910
            A. Flach and Meelis Kull
911
           <https://papers.nips.cc/paper/2015/file/33e8075e9970de0cfea955afd4644bb2-Paper.pdf>`_.
912
913
    Examples
914
    --------
915
    >>> from precision_recall_gain import precision_gain_score
916
    >>> y_true = [0, 1, 2, 0, 1, 2]
917
    >>> y_pred = [0, 2, 1, 0, 0, 1]
918
    >>> int(precision_gain_score(y_true, y_pred, average='macro'))
919
    -333333333333333
920
    >>> int(precision_gain_score(y_true, y_pred, average='weighted'))
921
    -333333333333333
922
    >>> precision_gain_score(y_true, y_pred, average=None)
923
    array([ 7.5e-01, -5.0e+14, -5.0e+14])
924
    >>> y_pred = [0, 0, 0, 0, 0, 0]
925
    >>> precision_gain_score(y_true, y_pred, average=None)
926
    array([ 0.e+00, -5.e+14, -5.e+14])
927
    >>> precision_gain_score(y_true, y_pred, average=None, zero_division=1)
928
    array([0., 1., 1.])
929
    >>> # multilabel classification
930
    >>> y_true = [[0, 0, 0], [1, 1, 1], [0, 1, 1]]
931
    >>> y_pred = [[0, 0, 0], [1, 1, 1], [1, 1, 0]]
932
    >>> precision_gain_score(y_true, y_pred, average=None)
933
    array([0.5, 1. , 1. ])
934
    """
935
    p, _, _, _ = precision_recall_fgain_score_support(
936
        y_true,
937
        y_pred,
938
        labels=labels,
939
        pos_label=pos_label,
940
        average=average,
941
        warn_for=("precision",),
942
        sample_weight=sample_weight,
943
        zero_division=zero_division,
944
        class_distribution=class_distribution,
945
    )
946
    return p
947
948
949
def recall_gain_score(
950
    y_true,
951
    y_pred,
952
    *,
953
    labels=None,
954
    pos_label=1,
955
    average="binary",
956
    sample_weight=None,
957
    zero_division="warn",
958
    class_distribution=None,
959
):
960
    """Compute the recall Gain.
961
962
    The metric is derrived by applying the following transform to precision:
963
964
        f(x) = (x - pi) / ((1 - pi) * x)
965
966
            pi = proportion of positives
967
968
    The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
969
    true positives and ``fn`` the number of false negatives. The recall is
970
    intuitively the ability of the classifier to find all the positive samples.
971
972
    The best value is 1 and the worst value is -Inf.
973
974
    Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
975
976
    Parameters
977
    ----------
978
    y_true : 1d array-like, or label indicator array / sparse matrix
979
        Ground truth (correct) target values.
980
981
    y_pred : 1d array-like, or label indicator array / sparse matrix
982
        Estimated targets as returned by a classifier.
983
984
    labels : array-like, default=None
985
        The set of labels to include when ``average != 'binary'``, and their
986
        order if ``average is None``. Labels present in the data can be
987
        excluded, for example to calculate a multiclass average ignoring a
988
        majority negative class, while labels not present in the data will
989
        result in 0 components in a macro average. For multilabel targets,
990
        labels are column indices. By default, all labels in ``y_true`` and
991
        ``y_pred`` are used in sorted order.
992
993
        .. versionchanged:: 0.17
994
           Parameter `labels` improved for multiclass problem.
995
996
    pos_label : str or int, default=1
997
        The class to report if ``average='binary'`` and the data is binary.
998
        If the data are multiclass or multilabel, this will be ignored;
999
        setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
1000
        scores for that label only.
1001
1002
    average : {'macro', 'weighted', 'binary'} or None, \
1003
            default='binary'
1004
        This parameter is required for multiclass/multilabel targets.
1005
        If ``None``, the scores for each class are returned. Otherwise, this
1006
        determines the type of averaging performed on the data:
1007
1008
        ``'binary'``:
1009
            Only report results for the class specified by ``pos_label``.
1010
            This is applicable only if targets (``y_{true,pred}``) are binary.
1011
        ``'macro'``:
1012
            Calculate metrics for each label, and find their unweighted
1013
            mean.  This does not take label imbalance into account.
1014
        ``'weighted'``:
1015
            Calculate metrics for each label, and find their average weighted
1016
            by support (the number of true instances for each label). This
1017
            alters 'macro' to account for label imbalance; it can result in an
1018
            F-score that is not between precision and recall. Weighted recall
1019
            is equal to accuracy.
1020
1021
    sample_weight : array-like of shape (n_samples,), default=None
1022
        Sample weights.
1023
1024
    zero_division : "warn", 0 or 1, default="warn"
1025
        Sets the value to return when there is a zero division. If set to
1026
        "warn", this acts as 0, but warnings are also raised.
1027
1028
    class_distribution : Optional list, default=None
1029
        The proportion that each class makes up in the dataset. If not
1030
        provided then it's estimated from y_true.
1031
1032
    Returns
1033
    -------
1034
    recall : float (if average is not None) or array of float of shape \
1035
             (n_unique_labels,)
1036
        Recall of the positive class in binary classification or weighted
1037
        average of the recall of each class for the multiclass task.
1038
1039
    See Also
1040
    --------
1041
    precision_recall_fgain_score_support : Compute precision, recall, F-measure and
1042
        support for each class.
1043
    precision_gain_score : Compute the ratio ``tp / (tp + fp)`` where ``tp`` is the
1044
        number of true positives and ``fp`` the number of false positives.
1045
    balanced_accuracy_score : Compute balanced accuracy to deal with imbalanced
1046
        datasets.
1047
    multilabel_confusion_matrix : Compute a confusion matrix for each class or
1048
        sample.
1049
    PrecisionRecallDisplay.from_estimator : Plot precision-recall curve given
1050
        an estimator and some data.
1051
    PrecisionRecallDisplay.from_predictions : Plot precision-recall curve given
1052
        binary class predictions.
1053
1054
    Notes
1055
    -----
1056
    When ``true positive + false negative == 0``, recall returns 0 and raises
1057
    ``UndefinedMetricWarning``. This behavior can be modified with
1058
    ``zero_division``.
1059
1060
    References
1061
    ----------
1062
    .. [1] `Precision-Recall-Gain Curves: PR Analysis Done Right (2015) by Peter
1063
            A. Flach and Meelis Kull
1064
           <https://papers.nips.cc/paper/2015/file/33e8075e9970de0cfea955afd4644bb2-Paper.pdf>`_.
1065
1066
    Examples
1067
    --------
1068
    >>> from precision_recall_gain import recall_gain_score
1069
    >>> y_true = [0, 1, 2, 0, 1, 2]
1070
    >>> y_pred = [0, 2, 1, 0, 0, 1]
1071
    >>> int(recall_gain_score(y_true, y_pred, average='macro'))
1072
    -333333333333333
1073
    >>> int(recall_gain_score(y_true, y_pred, average='weighted'))
1074
    -333333333333333
1075
    >>> recall_gain_score(y_true, y_pred, average=None)
1076
    array([ 1.e+00, -5.e+14, -5.e+14])
1077
    >>> y_true = [0, 0, 0, 0, 0, 0]
1078
    >>> recall_gain_score(y_true, y_pred, average=None)
1079
    array([-inf,  nan,  nan])
1080
    >>> recall_gain_score(y_true, y_pred, average=None, zero_division=1)
1081
    array([-inf,   1.,   1.])
1082
    >>> # multilabel classification
1083
    >>> y_true = [[0, 0, 0], [1, 1, 1], [0, 1, 1]]
1084
    >>> y_pred = [[0, 0, 0], [1, 1, 1], [1, 1, 0]]
1085
    >>> recall_gain_score(y_true, y_pred, average=None)
1086
    array([ 1.,  1., -1.])
1087
    """
1088
    _, r, _, _ = precision_recall_fgain_score_support(
1089
        y_true,
1090
        y_pred,
1091
        labels=labels,
1092
        pos_label=pos_label,
1093
        average=average,
1094
        warn_for=("recall",),
1095
        sample_weight=sample_weight,
1096
        zero_division=zero_division,
1097
        class_distribution=class_distribution,
1098
    )
1099
    return r
1100
1101
1102
def prg_gain_transform(x, *, pi):
1103
    """Transfrom from Precision Recall space into Precision Recall Gain space.
1104
1105
    Parameters
1106
    ----------
1107
    x : scaler or 1d array-like
1108
        The metric, either precision, recall or F-score to be transformed into
1109
        PRG space.
1110
    pi : scaler
1111
        The proportion of datapoints belonging to the positive class in the
1112
        dataset.
1113
1114
    Returns
1115
    -------
1116
    x' : scaler or 1d array-like
1117
        The transformed metric in PRG space.
1118
1119
    References
1120
    ----------
1121
    .. [1] `Precision-Recall-Gain Curves: PR Analysis Done Right (2015) by Peter
1122
            A. Flach and Meelis Kull
1123
           <https://papers.nips.cc/paper/2015/file/33e8075e9970de0cfea955afd4644bb2-Paper.pdf>`_.
1124
    """
1125
    if x == pi == 1:
1126
        return 1
1127
    elif x == pi == 0:
1128
        # if no positive class in true or predicted labels, return NaN
1129
        return np.nan
1130
    # note: if x == 0, then the metric value is -Inf
1131
    # and if x<pi, then the metric value is negative
1132
    # for our purposes we will add a small value to x
1133
    # to avoid division by zero and so that the metric
1134
    # value is not nan if one of the classes have a precision
1135
    # or recall of 0
1136
    x = min(1, x + 1e-15)
1137
    # we have to also adjust pi for cases when pi is 0
1138
    pi = min(1, pi + 1e-15)
1139
    return (x - pi) / ((1 - pi) * x)
1140