Completed
Push — master ( 3ac297...afe14d )
by Chris
16:40 queued 07:25
created

ConfusionTable.test_pos_pop()   A

Complexity

Conditions 1

Size

Total Lines 16
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 1
dl 0
loc 16
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
r"""abydos.stats._confusion_table.
20
21
This includes the ConfusionTable object, which includes members capable of
22
calculating the following data based on a confusion table:
23
24
    - population counts
25
    - precision, recall, specificity, negative predictive value, fall-out,
26
      false discovery rate, accuracy, balanced accuracy, informedness,
27
      and markedness
28
    - various means of the precision & recall, including: arithmetic,
29
      geometric, harmonic, quadratic, logarithmic, contraharmonic,
30
      identric (exponential), & Hölder (power/generalized) means
31
    - :math:`F_{\beta}`-scores, :math:`E`-scores, :math:`G`-measures, along
32
      with special functions for :math:`F_{1}`, :math:`F_{0.5}`, &
33
      :math:`F_{2}` scores
34
    - significance & Matthews correlation coefficient calculation
35
"""
36
37 1
from __future__ import (
38
    absolute_import,
39
    division,
40
    print_function,
41
    unicode_literals,
42
)
43
44 1
import math
45
46 1
from ._mean import (
47
    aghmean,
48
    agmean,
49
    amean,
50
    cmean,
51
    ghmean,
52
    gmean,
53
    heronian_mean,
54
    hmean,
55
    hoelder_mean,
56
    imean,
57
    lehmer_mean,
58
    qmean,
59
    seiffert_mean,
60
)
61
62 1
__all__ = ['ConfusionTable']
63
64
65 1
class ConfusionTable(object):
66
    """ConfusionTable object.
67
68
    This object is initialized by passing either four integers (or a tuple of
69
    four integers) representing the squares of a confusion table:
70
    true positives, true negatives, false positives, and false negatives
71
72
    The object possesses methods for the calculation of various statistics
73
    based on the confusion table.
74
    """
75
76 1
    _tp, _tn, _fp, _fn = 0, 0, 0, 0
77
78 1
    def __init__(self, tp=0, tn=0, fp=0, fn=0):
79
        """Initialize ConfusionTable.
80
81
        Parameters
82
        ----------
83
        tp : int or a tuple, list, or dict
84
            True positives; If a tuple or list is supplied, it must include 4
85
            values in the order [tp, tn, fp, fn]. If a dict is supplied, it
86
            must have 4 keys, namely 'tp', 'tn', 'fp', & 'fn'.
87
        tn : int
88
            True negatives
89
        fp : int
90
            False positives
91
        fn : int
92
            False negatives
93
94
        Raises
95
        ------
96
        AttributeError
97
            ConfusionTable requires a 4-tuple when being created from a tuple.
98
99
        Examples
100
        --------
101
        >>> ct = ConfusionTable(120, 60, 20, 30)
102
        >>> ct == ConfusionTable((120, 60, 20, 30))
103
        True
104
        >>> ct == ConfusionTable([120, 60, 20, 30])
105
        True
106
        >>> ct == ConfusionTable({'tp': 120, 'tn': 60, 'fp': 20, 'fn': 30})
107
        True
108
109
        """
110 1
        if isinstance(tp, (tuple, list)):
111 1
            if len(tp) == 4:
112 1
                self._tp = tp[0]
113 1
                self._tn = tp[1]
114 1
                self._fp = tp[2]
115 1
                self._fn = tp[3]
116
            else:
117 1
                raise AttributeError(
118
                    'ConfusionTable requires a 4-tuple when being created '
119
                    + 'from a tuple.'
120
                )
121 1
        elif isinstance(tp, dict):
122 1
            if 'tp' in tp:
123 1
                self._tp = tp['tp']
124 1
            if 'tn' in tp:
125 1
                self._tn = tp['tn']
126 1
            if 'fp' in tp:
127 1
                self._fp = tp['fp']
128 1
            if 'fn' in tp:
129 1
                self._fn = tp['fn']
130
        else:
131 1
            self._tp = tp
132 1
            self._tn = tn
133 1
            self._fp = fp
134 1
            self._fn = fn
135
136 1
    def __eq__(self, other):
137
        """Perform eqality (==) comparison.
138
139
        Compares a ConfusionTable to another ConfusionTable or its equivalent
140
        in the form of a tuple, list, or dict.
141
142
        Parameters
143
        ----------
144
        other : ConfusionTable
145
            Another ConfusionTable object to compare to
146
147
        Returns
148
        -------
149
        bool
150
            True if two ConfusionTables are the same object or all four of
151
            their attributes are equal
152
153
        Examples
154
        --------
155
        >>> ct1 = ConfusionTable(120, 60, 20, 30)
156
        >>> ct2 = ConfusionTable(120, 60, 20, 30)
157
        >>> ct3 = ConfusionTable(60, 30, 10, 15)
158
159
        >>> ct1 == ct2
160
        True
161
        >>> ct1 == ct3
162
        False
163
164
        >>> ct1 != ct2
165
        False
166
        >>> ct1 != ct3
167
        True
168
169
        """
170 1
        if isinstance(other, ConfusionTable):
171 1
            if id(self) == id(other):
172 1
                return True
173 1
            if (
174
                self._tp == other.true_pos()
175
                and self._tn == other.true_neg()
176
                and self._fp == other.false_pos()
177
                and self._fn == other.false_neg()
178
            ):
179 1
                return True
180 1
        elif isinstance(other, (tuple, list)):
181 1
            if (
182
                self._tp == other[0]
183
                and self._tn == other[1]
184
                and self._fp == other[2]
185
                and self._fn == other[3]
186
            ):
187 1
                return True
188 1
        elif isinstance(other, dict):
189 1
            if (
190
                self._tp == other['tp']
191
                and self._tn == other['tn']
192
                and self._fp == other['fp']
193
                and self._fn == other['fn']
194
            ):
195 1
                return True
196 1
        return False
197
198 1
    def __str__(self):
199
        """Cast to str.
200
201
        Returns
202
        -------
203
        str
204
            A human-readable version of the confusion table
205
206
        Example
207
        -------
208
        >>> ct = ConfusionTable(120, 60, 20, 30)
209
        >>> str(ct)
210
        'tp:120, tn:60, fp:20, fn:30'
211
212
        """
213 1
        return 'tp:{}, tn:{}, fp:{}, fn:{}'.format(
214
            self._tp, self._tn, self._fp, self._fn
215
        )
216
217 1
    def to_tuple(self):
218
        """Cast to tuple.
219
220
        Returns
221
        -------
222
        tuple
223
            The confusion table as a 4-tuple (tp, tn, fp, fn)
224
225
        Example
226
        -------
227
        >>> ct = ConfusionTable(120, 60, 20, 30)
228
        >>> ct.to_tuple()
229
        (120, 60, 20, 30)
230
231
        """
232 1
        return self._tp, self._tn, self._fp, self._fn
233
234 1
    def to_dict(self):
235
        """Cast to dict.
236
237
        Returns
238
        -------
239
        dict
240
            The confusion table as a dict
241
242
        Example
243
        -------
244
        >>> ct = ConfusionTable(120, 60, 20, 30)
245
        >>> import pprint
246
        >>> pprint.pprint(ct.to_dict())
247
        {'fn': 30, 'fp': 20, 'tn': 60, 'tp': 120}
248
249
        """
250 1
        return {'tp': self._tp, 'tn': self._tn, 'fp': self._fp, 'fn': self._fn}
251
252 1
    def true_pos(self):
253
        """Return true positives.
254
255
        Returns
256
        -------
257
        int
258
            The true positives of the confusion table
259
260
        Example
261
        -------
262
        >>> ct = ConfusionTable(120, 60, 20, 30)
263
        >>> ct.true_pos()
264
        120
265
266
        """
267 1
        return self._tp
268
269 1
    def true_neg(self):
270
        """Return true negatives.
271
272
        Returns
273
        -------
274
        int
275
            The true negatives of the confusion table
276
277
        Example
278
        -------
279
        >>> ct = ConfusionTable(120, 60, 20, 30)
280
        >>> ct.true_neg()
281
        60
282
283
        """
284 1
        return self._tn
285
286 1
    def false_pos(self):
287
        """Return false positives.
288
289
        Returns
290
        -------
291
        int
292
            The false positives of the confusion table
293
294
        Example
295
        -------
296
        >>> ct = ConfusionTable(120, 60, 20, 30)
297
        >>> ct.false_pos()
298
        20
299
300
        """
301 1
        return self._fp
302
303 1
    def false_neg(self):
304
        """Return false negatives.
305
306
        Returns
307
        -------
308
        int
309
            The false negatives of the confusion table
310
311
        Example
312
        -------
313
        >>> ct = ConfusionTable(120, 60, 20, 30)
314
        >>> ct.false_neg()
315
        30
316
317
        """
318 1
        return self._fn
319
320 1
    def correct_pop(self):
321
        """Return correct population.
322
323
        Returns
324
        -------
325
        int
326
            The correct population of the confusion table
327
328
        Example
329
        -------
330
        >>> ct = ConfusionTable(120, 60, 20, 30)
331
        >>> ct.correct_pop()
332
        180
333
334
        """
335 1
        return self._tp + self._tn
336
337 1
    def error_pop(self):
338
        """Return error population.
339
340
        Returns
341
        -------
342
        int
343
            The error population of the confusion table
344
345
        Example
346
        -------
347
        >>> ct = ConfusionTable(120, 60, 20, 30)
348
        >>> ct.error_pop()
349
        50
350
351
        """
352 1
        return self._fp + self._fn
353
354 1
    def test_pos_pop(self):
355
        """Return test positive population.
356
357
        Returns
358
        -------
359
        int
360
            The test positive population of the confusion table
361
362
        Example
363
        -------
364
        >>> ct = ConfusionTable(120, 60, 20, 30)
365
        >>> ct.test_pos_pop()
366
        140
367
368
        """
369 1
        return self._tp + self._fp
370
371 1
    def test_neg_pop(self):
372
        """Return test negative population.
373
374
        Returns
375
        -------
376
        int
377
            The test negative population of the confusion table
378
379
        Example
380
        -------
381
        >>> ct = ConfusionTable(120, 60, 20, 30)
382
        >>> ct.test_neg_pop()
383
        90
384
385
        """
386 1
        return self._tn + self._fn
387
388 1
    def cond_pos_pop(self):
389
        """Return condition positive population.
390
391
        Returns
392
        -------
393
        int
394
            The condition positive population of the confusion table
395
396
        Example
397
        -------
398
        >>> ct = ConfusionTable(120, 60, 20, 30)
399
        >>> ct.cond_pos_pop()
400
        150
401
402
        """
403 1
        return self._tp + self._fn
404
405 1
    def cond_neg_pop(self):
406
        """Return condition negative population.
407
408
        Returns
409
        -------
410
        int
411
            The condition negative population of the confusion table
412
413
        Example
414
        -------
415
        >>> ct = ConfusionTable(120, 60, 20, 30)
416
        >>> ct.cond_neg_pop()
417
        80
418
419
        """
420 1
        return self._fp + self._tn
421
422 1
    def population(self):
423
        """Return population, N.
424
425
        Returns
426
        -------
427
        int
428
            The population (N) of the confusion table
429
430
        Example
431
        -------
432
        >>> ct = ConfusionTable(120, 60, 20, 30)
433
        >>> ct.population()
434
        230
435
436
        """
437 1
        return self._tp + self._tn + self._fp + self._fn
438
439 1
    def precision(self):
440
        r"""Return precision.
441
442
        Precision is defined as :math:`\frac{tp}{tp + fp}`
443
444
        AKA positive predictive value (PPV)
445
446
        Cf. https://en.wikipedia.org/wiki/Precision_and_recall
447
448
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Precision
449
450
        Returns
451
        -------
452
        float
453
            The precision of the confusion table
454
455
        Example
456
        -------
457
        >>> ct = ConfusionTable(120, 60, 20, 30)
458
        >>> ct.precision()
459
        0.8571428571428571
460
461
        """
462 1
        if self._tp + self._fp == 0:
463 1
            return float('NaN')
464 1
        return self._tp / (self._tp + self._fp)
465
466 1
    def precision_gain(self):
467
        r"""Return gain in precision.
468
469
        The gain in precision is defined as:
470
        :math:`G(precision) = \frac{precision}{random~ precision}`
471
472
        Cf. https://en.wikipedia.org/wiki/Gain_(information_retrieval)
473
474
        Returns
475
        -------
476
        float
477
            The gain in precision of the confusion table
478
479
        Example
480
        -------
481
        >>> ct = ConfusionTable(120, 60, 20, 30)
482
        >>> ct.precision_gain()
483
        1.3142857142857143
484
485
        """
486 1
        if self.population() == 0:
487 1
            return float('NaN')
488 1
        random_precision = self.cond_pos_pop() / self.population()
489 1
        return self.precision() / random_precision
490
491 1
    def recall(self):
492
        r"""Return recall.
493
494
        Recall is defined as :math:`\frac{tp}{tp + fn}`
495
496
        AKA sensitivity
497
498
        AKA true positive rate (TPR)
499
500
        Cf. https://en.wikipedia.org/wiki/Precision_and_recall
501
502
        Cf. https://en.wikipedia.org/wiki/Sensitivity_(test)
503
504
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Recall
505
506
        Returns
507
        -------
508
        float
509
            The recall of the confusion table
510
511
        Example
512
        -------
513
        >>> ct = ConfusionTable(120, 60, 20, 30)
514
        >>> ct.recall()
515
        0.8
516
517
        """
518 1
        if self._tp + self._fn == 0:
519 1
            return float('NaN')
520 1
        return self._tp / (self._tp + self._fn)
521
522 1
    def specificity(self):
523
        r"""Return specificity.
524
525
        Specificity is defined as :math:`\frac{tn}{tn + fp}`
526
527
        AKA true negative rate (TNR)
528
529
        Cf. https://en.wikipedia.org/wiki/Specificity_(tests)
530
531
        Returns
532
        -------
533
        float
534
            The specificity of the confusion table
535
536
        Example
537
        -------
538
        >>> ct = ConfusionTable(120, 60, 20, 30)
539
        >>> ct.specificity()
540
        0.75
541
542
        """
543 1
        if self._tn + self._fp == 0:
544 1
            return float('NaN')
545 1
        return self._tn / (self._tn + self._fp)
546
547 1
    def npv(self):
548
        r"""Return negative predictive value (NPV).
549
550
        NPV is defined as :math:`\frac{tn}{tn + fn}`
551
552
        Cf. https://en.wikipedia.org/wiki/Negative_predictive_value
553
554
        Returns
555
        -------
556
        float
557
            The negative predictive value of the confusion table
558
559
        Example
560
        -------
561
        >>> ct = ConfusionTable(120, 60, 20, 30)
562
        >>> ct.npv()
563
        0.6666666666666666
564
565
        """
566 1
        if self._tn + self._fn == 0:
567 1
            return float('NaN')
568 1
        return self._tn / (self._tn + self._fn)
569
570 1
    def fallout(self):
571
        r"""Return fall-out.
572
573
        Fall-out is defined as :math:`\frac{fp}{fp + tn}`
574
575
        AKA false positive rate (FPR)
576
577
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Fall-out
578
579
        Returns
580
        -------
581
        float
582
            The fall-out of the confusion table
583
584
        Example
585
        -------
586
        >>> ct = ConfusionTable(120, 60, 20, 30)
587
        >>> ct.fallout()
588
        0.25
589
590
        """
591 1
        if self._fp + self._tn == 0:
592 1
            return float('NaN')
593 1
        return self._fp / (self._fp + self._tn)
594
595 1
    def fdr(self):
596
        r"""Return false discovery rate (FDR).
597
598
        False discovery rate is defined as :math:`\frac{fp}{fp + tp}`
599
600
        Cf. https://en.wikipedia.org/wiki/False_discovery_rate
601
602
        Returns
603
        -------
604
        float
605
            The false discovery rate of the confusion table
606
607
        Example
608
        -------
609
        >>> ct = ConfusionTable(120, 60, 20, 30)
610
        >>> ct.fdr()
611
        0.14285714285714285
612
613
        """
614 1
        if self._fp + self._tp == 0:
615 1
            return float('NaN')
616 1
        return self._fp / (self._fp + self._tp)
617
618 1
    def accuracy(self):
619
        r"""Return accuracy.
620
621
        Accuracy is defined as :math:`\frac{tp + tn}{population}`
622
623
        Cf. https://en.wikipedia.org/wiki/Accuracy
624
625
        Returns
626
        -------
627
        float
628
            The accuracy of the confusion table
629
630
        Example
631
        -------
632
        >>> ct = ConfusionTable(120, 60, 20, 30)
633
        >>> ct.accuracy()
634
        0.782608695652174
635
636
        """
637 1
        if self.population() == 0:
638 1
            return float('NaN')
639 1
        return (self._tp + self._tn) / self.population()
640
641 1
    def accuracy_gain(self):
642
        r"""Return gain in accuracy.
643
644
        The gain in accuracy is defined as:
645
        :math:`G(accuracy) = \frac{accuracy}{random~ accuracy}`
646
647
        Cf. https://en.wikipedia.org/wiki/Gain_(information_retrieval)
648
649
        Returns
650
        -------
651
        float
652
            The gain in accuracy of the confusion table
653
654
        Example
655
        -------
656
        >>> ct = ConfusionTable(120, 60, 20, 30)
657
        >>> ct.accuracy_gain()
658
        1.4325259515570934
659
660
        """
661 1
        if self.population() == 0:
662 1
            return float('NaN')
663 1
        random_accuracy = (self.cond_pos_pop() / self.population()) ** 2 + (
664
            self.cond_neg_pop() / self.population()
665
        ) ** 2
666 1
        return self.accuracy() / random_accuracy
667
668 1
    def balanced_accuracy(self):
669
        r"""Return balanced accuracy.
670
671
        Balanced accuracy is defined as
672
        :math:`\frac{sensitivity + specificity}{2}`
673
674
        Cf. https://en.wikipedia.org/wiki/Accuracy
675
676
        Returns
677
        -------
678
        float
679
            The balanced accuracy of the confusion table
680
681
        Example
682
        -------
683
        >>> ct = ConfusionTable(120, 60, 20, 30)
684
        >>> ct.balanced_accuracy()
685
        0.775
686
687
        """
688 1
        return 0.5 * (self.recall() + self.specificity())
689
690 1
    def informedness(self):
691
        """Return informedness.
692
693
        Informedness is defined as :math:`sensitivity + specificity - 1`.
694
695
        AKA Youden's J statistic (:cite:`Youden:1950`)
696
697
        AKA DeltaP'
698
699
        Cf. https://en.wikipedia.org/wiki/Youden%27s_J_statistic
700
701
        Returns
702
        -------
703
        float
704
            The informedness of the confusion table
705
706
        Example
707
        -------
708
        >>> ct = ConfusionTable(120, 60, 20, 30)
709
        >>> ct.informedness()
710
        0.55
711
712
        """
713 1
        return self.recall() + self.specificity() - 1
714
715 1
    def markedness(self):
716
        """Return markedness.
717
718
        Markedness is defined as :math:`precision + npv - 1`
719
720
        Returns
721
        -------
722
        float
723
            The markedness of the confusion table
724
725
        Example
726
        -------
727
        >>> ct = ConfusionTable(120, 60, 20, 30)
728
        >>> ct.markedness()
729
        0.5238095238095237
730
731
        """
732 1
        return self.precision() + self.npv() - 1
733
734 1
    def pr_amean(self):
735
        r"""Return arithmetic mean of precision & recall.
736
737
        The arithmetic mean of precision and recall is defined as:
738
        :math:`\frac{precision \cdot recall}{2}`
739
740
        Cf. https://en.wikipedia.org/wiki/Arithmetic_mean
741
742
        Returns
743
        -------
744
        float
745
            The arithmetic mean of the confusion table's precision & recall
746
747
        Example
748
        -------
749
        >>> ct = ConfusionTable(120, 60, 20, 30)
750
        >>> ct.pr_amean()
751
        0.8285714285714285
752
753
        """
754 1
        return amean((self.precision(), self.recall()))
755
756 1
    def pr_gmean(self):
757
        r"""Return geometric mean of precision & recall.
758
759
        The geometric mean of precision and recall is defined as:
760
        :math:`\sqrt{precision \cdot recall}`
761
762
        Cf. https://en.wikipedia.org/wiki/Geometric_mean
763
764
        Returns
765
        -------
766
        float
767
            The geometric mean of the confusion table's precision & recall
768
769
        Example
770
        -------
771
        >>> ct = ConfusionTable(120, 60, 20, 30)
772
        >>> ct.pr_gmean()
773
        0.828078671210825
774
775
        """
776 1
        return gmean((self.precision(), self.recall()))
777
778 1
    def pr_hmean(self):
779
        r"""Return harmonic mean of precision & recall.
780
781
        The harmonic mean of precision and recall is defined as:
782
        :math:`\frac{2 \cdot precision \cdot recall}{precision + recall}`
783
784
        Cf. https://en.wikipedia.org/wiki/Harmonic_mean
785
786
        Returns
787
        -------
788
        float
789
            The harmonic mean of the confusion table's precision & recall
790
791
        Example
792
        -------
793
        >>> ct = ConfusionTable(120, 60, 20, 30)
794
        >>> ct.pr_hmean()
795
        0.8275862068965516
796
797
        """
798 1
        return hmean((self.precision(), self.recall()))
799
800 1
    def pr_qmean(self):
801
        r"""Return quadratic mean of precision & recall.
802
803
        The quadratic mean of precision and recall is defined as:
804
        :math:`\sqrt{\frac{precision^{2} + recall^{2}}{2}}`
805
806
        Cf. https://en.wikipedia.org/wiki/Quadratic_mean
807
808
        Returns
809
        -------
810
        float
811
            The quadratic mean of the confusion table's precision & recall
812
813
        Example
814
        -------
815
        >>> ct = ConfusionTable(120, 60, 20, 30)
816
        >>> ct.pr_qmean()
817
        0.8290638930598233
818
819
        """
820 1
        return qmean((self.precision(), self.recall()))
821
822 1
    def pr_cmean(self):
823
        r"""Return contraharmonic mean of precision & recall.
824
825
        The contraharmonic mean is:
826
        :math:`\frac{precision^{2} + recall^{2}}{precision + recall}`
827
828
        Cf. https://en.wikipedia.org/wiki/Contraharmonic_mean
829
830
        Returns
831
        -------
832
        float
833
            The contraharmonic mean of the confusion table's precision & recall
834
835
        Example
836
        -------
837
        >>> ct = ConfusionTable(120, 60, 20, 30)
838
        >>> ct.pr_cmean()
839
        0.8295566502463055
840
841
        """
842 1
        return cmean((self.precision(), self.recall()))
843
844 1
    def pr_lmean(self):
845
        r"""Return logarithmic mean of precision & recall.
846
847
        The logarithmic mean is:
848
        0 if either precision or recall is 0,
849
        the precision if they are equal,
850
        otherwise :math:`\frac{precision - recall}
851
        {ln(precision) - ln(recall)}`
852
853
        Cf. https://en.wikipedia.org/wiki/Logarithmic_mean
854
855
        Returns
856
        -------
857
        float
858
            The logarithmic mean of the confusion table's precision & recall
859
860
        Example
861
        -------
862
        >>> ct = ConfusionTable(120, 60, 20, 30)
863
        >>> ct.pr_lmean()
864
        0.8282429171492667
865
866
        """
867 1
        precision = self.precision()
868 1
        recall = self.recall()
869 1
        if not precision or not recall:
870 1
            return 0.0
871 1
        elif precision == recall:
872 1
            return precision
873 1
        return (precision - recall) / (math.log(precision) - math.log(recall))
874
875 1
    def pr_imean(self):
876
        r"""Return identric (exponential) mean of precision & recall.
877
878
        The identric mean is:
879
        precision if precision = recall,
880
        otherwise :math:`\frac{1}{e} \cdot
881
        \sqrt[precision - recall]{\frac{precision^{precision}}
882
        {recall^{recall}}}`
883
884
        Cf. https://en.wikipedia.org/wiki/Identric_mean
885
886
        Returns
887
        -------
888
        float
889
            The identric mean of the confusion table's precision & recall
890
891
        Example
892
        -------
893
        >>> ct = ConfusionTable(120, 60, 20, 30)
894
        >>> ct.pr_imean()
895
        0.8284071826325543
896
897
        """
898 1
        return imean((self.precision(), self.recall()))
899
900 1
    def pr_seiffert_mean(self):
901
        r"""Return Seiffert's mean of precision & recall.
902
903
        Seiffert's mean of precision and recall is:
904
        :math:`\frac{precision - recall}{4 \cdot arctan
905
        \sqrt{\frac{precision}{recall}} - \pi}`
906
907
        It is defined in :cite:`Seiffert:1993`.
908
909
        Returns
910
        -------
911
        float
912
            Seiffert's mean of the confusion table's precision & recall
913
914
        Example
915
        -------
916
        >>> ct = ConfusionTable(120, 60, 20, 30)
917
        >>> ct.pr_seiffert_mean()
918
        0.8284071696048312
919
920
        """
921 1
        return seiffert_mean((self.precision(), self.recall()))
922
923 1
    def pr_lehmer_mean(self, exp=2.0):
924
        r"""Return Lehmer mean of precision & recall.
925
926
        The Lehmer mean is:
927
        :math:`\frac{precision^{exp} + recall^{exp}}
928
        {precision^{exp-1} + recall^{exp-1}}`
929
930
        Cf. https://en.wikipedia.org/wiki/Lehmer_mean
931
932
        Parameters
933
        ----------
934
        exp : float
935
            The exponent of the Lehmer mean
936
937
        Returns
938
        -------
939
        float
940
            The Lehmer mean for the given exponent of the confusion table's
941
            precision & recall
942
943
        Example
944
        -------
945
        >>> ct = ConfusionTable(120, 60, 20, 30)
946
        >>> ct.pr_lehmer_mean()
947
        0.8295566502463055
948
949
        """
950 1
        return lehmer_mean((self.precision(), self.recall()), exp)
951
952 1
    def pr_heronian_mean(self):
953
        r"""Return Heronian mean of precision & recall.
954
955
        The Heronian mean of precision and recall is defined as:
956
        :math:`\frac{precision + \sqrt{precision \cdot recall} + recall}{3}`
957
958
        Cf. https://en.wikipedia.org/wiki/Heronian_mean
959
960
        Returns
961
        -------
962
        float
963
            The Heronian mean of the confusion table's precision & recall
964
965
        Example
966
        -------
967
        >>> ct = ConfusionTable(120, 60, 20, 30)
968
        >>> ct.pr_heronian_mean()
969
        0.8284071761178939
970
971
        """
972 1
        return heronian_mean((self.precision(), self.recall()))
973
974 1
    def pr_hoelder_mean(self, exp=2):
975
        r"""Return Hölder (power/generalized) mean of precision & recall.
976
977
        The power mean of precision and recall is defined as:
978
        :math:`\frac{1}{2} \cdot
979
        \sqrt[exp]{precision^{exp} + recall^{exp}}`
980
        for :math:`exp \ne 0`, and the geometric mean for :math:`exp = 0`
981
982
        Cf. https://en.wikipedia.org/wiki/Generalized_mean
983
984
        Parameters
985
        ----------
986
        exp : float
987
            The exponent of the Hölder mean
988
989
        Returns
990
        -------
991
        float
992
            The Hölder mean for the given exponent of the confusion table's
993
            precision & recall
994
995
        Example
996
        -------
997
        >>> ct = ConfusionTable(120, 60, 20, 30)
998
        >>> ct.pr_hoelder_mean()
999
        0.8290638930598233
1000
1001
        """
1002 1
        return hoelder_mean((self.precision(), self.recall()), exp)
1003
1004 1
    def pr_agmean(self):
1005
        """Return arithmetic-geometric mean of precision & recall.
1006
1007
        Iterates between arithmetic & geometric means until they converge to
1008
        a single value (rounded to 12 digits)
1009
1010
        Cf. https://en.wikipedia.org/wiki/Arithmetic-geometric_mean
1011
1012
        Returns
1013
        -------
1014
        float
1015
            The arithmetic-geometric mean of the confusion table's precision &
1016
            recall
1017
1018
        Example
1019
        -------
1020
        >>> ct = ConfusionTable(120, 60, 20, 30)
1021
        >>> ct.pr_agmean()
1022
        0.8283250315702829
1023
1024
        """
1025 1
        return agmean((self.precision(), self.recall()))
1026
1027 1
    def pr_ghmean(self):
1028
        """Return geometric-harmonic mean of precision & recall.
1029
1030
        Iterates between geometric & harmonic means until they converge to
1031
        a single value (rounded to 12 digits)
1032
1033
        Cf. https://en.wikipedia.org/wiki/Geometric-harmonic_mean
1034
1035
        Returns
1036
        -------
1037
        float
1038
            The geometric-harmonic mean of the confusion table's precision &
1039
            recall
1040
1041
        Example
1042
        -------
1043
        >>> ct = ConfusionTable(120, 60, 20, 30)
1044
        >>> ct.pr_ghmean()
1045
        0.8278323841238441
1046
1047
        """
1048 1
        return ghmean((self.precision(), self.recall()))
1049
1050 1
    def pr_aghmean(self):
1051
        """Return arithmetic-geometric-harmonic mean of precision & recall.
1052
1053
        Iterates over arithmetic, geometric, & harmonic means until they
1054
        converge to a single value (rounded to 12 digits), following the
1055
        method described in :cite:`Raissouli:2009`.
1056
1057
        Returns
1058
        -------
1059
        float
1060
            The arithmetic-geometric-harmonic mean of the confusion table's
1061
            precision & recall
1062
1063
        Example
1064
        -------
1065
        >>> ct = ConfusionTable(120, 60, 20, 30)
1066
        >>> ct.pr_aghmean()
1067
        0.8280786712108288
1068
1069
        """
1070 1
        return aghmean((self.precision(), self.recall()))
1071
1072 1
    def fbeta_score(self, beta=1.0):
1073
        r"""Return :math:`F_{\beta}` score.
1074
1075
        :math:`F_{\beta}` for a positive real value :math:`\beta` "measures
1076
        the effectiveness of retrieval with respect to a user who
1077
        attaches :math:`\beta` times as much importance to recall as
1078
        precision" (van Rijsbergen 1979)
1079
1080
        :math:`F_{\beta}` score is defined as:
1081
        :math:`(1 + \beta^2) \cdot \frac{precision \cdot recall}
1082
        {((\beta^2 \cdot precision) + recall)}`
1083
1084
        Cf. https://en.wikipedia.org/wiki/F1_score
1085
1086
        Parameters
1087
        ----------
1088
        beta : float
1089
            The :math:`\beta` parameter in the above formula
1090
1091
        Returns
1092
        -------
1093
        float
1094
            The :math:`F_{\beta}` of the confusion table
1095
1096
        Raises
1097
        ------
1098
        AttributeError
1099
            Beta must be a positive real value
1100
1101
        Examples
1102
        --------
1103
        >>> ct = ConfusionTable(120, 60, 20, 30)
1104
        >>> ct.fbeta_score()
1105
        0.8275862068965518
1106
        >>> ct.fbeta_score(beta=0.1)
1107
        0.8565371024734982
1108
1109
        """
1110 1
        if beta <= 0:
1111 1
            raise AttributeError('Beta must be a positive real value.')
1112 1
        precision = self.precision()
1113 1
        recall = self.recall()
1114 1
        return (
1115
            (1 + beta ** 2)
1116
            * precision
1117
            * recall
1118
            / ((beta ** 2 * precision) + recall)
1119
        )
1120
1121 1
    def f2_score(self):
1122
        """Return :math:`F_{2}`.
1123
1124
        The :math:`F_{2}` score emphasizes recall over precision in comparison
1125
        to the :math:`F_{1}` score
1126
1127
        Cf. https://en.wikipedia.org/wiki/F1_score
1128
1129
        Returns
1130
        -------
1131
        float
1132
            The :math:`F_{2}` of the confusion table
1133
1134
        Example
1135
        -------
1136
        >>> ct = ConfusionTable(120, 60, 20, 30)
1137
        >>> ct.f2_score()
1138
        0.8108108108108109
1139
1140
        """
1141 1
        return self.fbeta_score(2.0)
1142
1143 1
    def fhalf_score(self):
1144
        """Return :math:`F_{0.5}` score.
1145
1146
        The :math:`F_{0.5}` score emphasizes precision over recall in
1147
        comparison to the :math:`F_{1}` score
1148
1149
        Cf. https://en.wikipedia.org/wiki/F1_score
1150
1151
        Returns
1152
        -------
1153
        float
1154
            The :math:`F_{0.5}` score of the confusion table
1155
1156
        Example
1157
        -------
1158
        >>> ct = ConfusionTable(120, 60, 20, 30)
1159
        >>> ct.fhalf_score()
1160
        0.8450704225352114
1161
1162
        """
1163 1
        return self.fbeta_score(0.5)
1164
1165 1
    def e_score(self, beta=1):
1166
        r"""Return :math:`E`-score.
1167
1168
        This is Van Rijsbergen's effectiveness measure:
1169
        :math:`E=1-F_{\beta}`.
1170
1171
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#F-measure
1172
1173
        Parameters
1174
        ----------
1175
        beta : float
1176
            The :math:`\beta` parameter in the above formula
1177
1178
        Returns
1179
        -------
1180
        float
1181
            The :math:`E`-score of the confusion table
1182
1183
        Example
1184
        -------
1185
        >>> ct = ConfusionTable(120, 60, 20, 30)
1186
        >>> ct.e_score()
1187
        0.17241379310344818
1188
1189
        """
1190 1
        return 1 - self.fbeta_score(beta)
1191
1192 1
    def f1_score(self):
1193
        r"""Return :math:`F_{1}` score.
1194
1195
        :math:`F_{1}` score is the harmonic mean of precision and recall:
1196
        :math:`2 \cdot \frac{precision \cdot recall}{precision + recall}`
1197
1198
        Cf. https://en.wikipedia.org/wiki/F1_score
1199
1200
        Returns
1201
        -------
1202
        float
1203
            The :math:`F_{1}` of the confusion table
1204
1205
        Example
1206
        -------
1207
        >>> ct = ConfusionTable(120, 60, 20, 30)
1208
        >>> ct.f1_score()
1209
        0.8275862068965516
1210
1211
        """
1212 1
        return self.pr_hmean()
1213
1214 1
    def f_measure(self):
1215
        r"""Return :math:`F`-measure.
1216
1217
        :math:`F`-measure is the harmonic mean of precision and recall:
1218
        :math:`2 \cdot \frac{precision \cdot recall}{precision + recall}`
1219
1220
        Cf. https://en.wikipedia.org/wiki/F1_score
1221
1222
        Returns
1223
        -------
1224
        float
1225
            The math:`F`-measure of the confusion table
1226
1227
        Example
1228
        -------
1229
        >>> ct = ConfusionTable(120, 60, 20, 30)
1230
        >>> ct.f_measure()
1231
        0.8275862068965516
1232
1233
        """
1234 1
        return self.pr_hmean()
1235
1236 1
    def g_measure(self):
1237
        r"""Return G-measure.
1238
1239
        :math:`G`-measure is the geometric mean of precision and recall:
1240
        :math:`\sqrt{precision \cdot recall}`
1241
1242
        This is identical to the Fowlkes–Mallows (FM) index for two
1243
        clusters.
1244
1245
        Cf. https://en.wikipedia.org/wiki/F1_score#G-measure
1246
1247
        Cf. https://en.wikipedia.org/wiki/Fowlkes%E2%80%93Mallows_index
1248
1249
        Returns
1250
        -------
1251
        float
1252
            The :math:`G`-measure of the confusion table
1253
1254
        Example
1255
        -------
1256
        >>> ct = ConfusionTable(120, 60, 20, 30)
1257
        >>> ct.g_measure()
1258
        0.828078671210825
1259
1260
        """
1261 1
        return self.pr_gmean()
1262
1263 1
    def mcc(self):
1264
        r"""Return Matthews correlation coefficient (MCC).
1265
1266
        The Matthews correlation coefficient is defined in
1267
        :cite:`Matthews:1975` as:
1268
        :math:`\frac{(tp \cdot tn) - (fp \cdot fn)}
1269
        {\sqrt{(tp + fp)(tp + fn)(tn + fp)(tn + fn)}}`
1270
1271
        This is equivalent to the geometric mean of informedness and
1272
        markedness, defined above.
1273
1274
        Cf. https://en.wikipedia.org/wiki/Matthews_correlation_coefficient
1275
1276
        Returns
1277
        -------
1278
        float
1279
            The Matthews correlation coefficient of the confusion table
1280
1281
        Example
1282
        -------
1283
        >>> ct = ConfusionTable(120, 60, 20, 30)
1284
        >>> ct.mcc()
1285
        0.5367450401216932
1286
1287
        """
1288 1
        if (
1289
            (
1290
                (self._tp + self._fp)
1291
                * (self._tp + self._fn)
1292
                * (self._tn + self._fp)
1293
                * (self._tn + self._fn)
1294
            )
1295
        ) == 0:
1296 1
            return float('NaN')
1297 1
        return ((self._tp * self._tn) - (self._fp * self._fn)) / math.sqrt(
1298
            (self._tp + self._fp)
1299
            * (self._tp + self._fn)
1300
            * (self._tn + self._fp)
1301
            * (self._tn + self._fn)
1302
        )
1303
1304 1
    def significance(self):
1305
        r"""Return the significance, :math:`\chi^{2}`.
1306
1307
        Significance is defined as:
1308
        :math:`\chi^{2} =
1309
        \frac{(tp \cdot tn - fp \cdot fn)^{2} (tp + tn + fp + fn)}
1310
        {((tp + fp)(tp + fn)(tn + fp)(tn + fn)}`
1311
1312
        Also: :math:`\chi^{2} = MCC^{2} \cdot n`
1313
1314
        Cf. https://en.wikipedia.org/wiki/Pearson%27s_chi-square_test
1315
1316
        Returns
1317
        -------
1318
        float
1319
            The significance of the confusion table
1320
1321
        Example
1322
        -------
1323
        >>> ct = ConfusionTable(120, 60, 20, 30)
1324
        >>> ct.significance()
1325
        66.26190476190476
1326
1327
        """
1328 1
        if (
1329
            (
1330
                (self._tp + self._fp)
1331
                * (self._tp + self._fn)
1332
                * (self._tn + self._fp)
1333
                * (self._tn + self._fn)
1334
            )
1335
        ) == 0:
1336 1
            return float('NaN')
1337 1
        return (
1338
            (self._tp * self._tn - self._fp * self._fn) ** 2
1339
            * (self._tp + self._tn + self._fp + self._fn)
1340
        ) / (
1341
            (self._tp + self._fp)
1342
            * (self._tp + self._fn)
1343
            * (self._tn + self._fp)
1344
            * (self._tn + self._fn)
1345
        )
1346
1347 1
    def kappa_statistic(self):
1348
        r"""Return κ statistic.
1349
1350
        The κ statistic is defined as:
1351
        :math:`\kappa = \frac{accuracy - random~ accuracy}
1352
        {1 - random~ accuracy}`
1353
1354
        The κ statistic compares the performance of the classifier relative to
1355
        the performance of a random classifier. :math:`\kappa` = 0 indicates
1356
        performance identical to random. :math:`\kappa` = 1 indicates perfect
1357
        predictive success. :math:`\kappa` = -1 indicates perfect predictive
1358
        failure.
1359
1360
        Returns
1361
        -------
1362
        float
1363
            The κ statistic of the confusion table
1364
1365
        Example
1366
        -------
1367
        >>> ct = ConfusionTable(120, 60, 20, 30)
1368
        >>> ct.kappa_statistic()
1369
        0.5344129554655871
1370
1371
        """
1372 1
        if self.population() == 0:
1373 1
            return float('NaN')
1374 1
        random_accuracy = (
1375
            (self._tn + self._fp) * (self._tn + self._fn)
1376
            + (self._fn + self._tp) * (self._fp + self._tp)
1377
        ) / self.population() ** 2
1378 1
        return (self.accuracy() - random_accuracy) / (1 - random_accuracy)
1379
1380
1381
if __name__ == '__main__':
1382
    import doctest
1383
1384
    doctest.testmod()
1385