Test Failed
Push — master ( 64abe2...a464fa )
by Chris
04:02 queued 11s
created

ConfusionTable.test_pos_pop()   A

Complexity

Conditions 1

Size

Total Lines 11
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 1
dl 0
loc 11
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
r"""abydos.stats.confusion_table.
20
21
This includes the ConfusionTable object, which includes members capable of
22
calculating the following data based on a confusion table:
23
24
    - population counts
25
    - precision, recall, specificity, negative predictive value, fall-out,
26
      false discovery rate, accuracy, balanced accuracy, informedness,
27
      and markedness
28
    - various means of the precision & recall, including: arithmetic,
29
      geometric, harmonic, quadratic, logarithmic, contraharmonic,
30
      identric (exponential), & Hölder (power/generalized) means
31
    - :math:`F_{\beta}`-scores, :math:`E`-scores, :math:`G`-measures, along
32
      with special functions for :math:`F_{1}`, :math:`F_{0.5}`, &
33
      :math:`F_{2}` scores
34
    - significance & Matthews correlation coefficient calculation
35
"""
36
37
from __future__ import division, unicode_literals
38
39
import math
40
41
from .mean import aghmean, agmean, amean, cmean, ghmean, gmean, \
42
    heronian_mean, hmean, hoelder_mean, imean, lehmer_mean, qmean, \
43
    seiffert_mean
44
45
__all__ = ['ConfusionTable']
46
47
48
class ConfusionTable(object):
49
    """ConfusionTable object.
50
51
    This object is initialized by passing either four integers (or a tuple of
52
    four integers) representing the squares of a confusion table:
53
    true positives, true negatives, false positives, and false negatives
54
55
    The object possesses methods for the calculation of various statistics
56
    based on the confusion table.
57
    """
58
59
    _tp, _tn, _fp, _fn = 0, 0, 0, 0
60
61
    def __init__(self, tp=0, tn=0, fp=0, fn=0):
62
        """Initialize ConfusionTable.
63
64
        :param int tp: true positives (or a tuple, list, or dict); If a tuple
65
            or list is supplied, it must include 4 values in the order [tp, tn,
66
            fp, fn]. If a dict is supplied, it must have 4 keys, namely 'tp',
67
            'tn', 'fp', & 'fn'.
68
        :param int tn: true negatives
69
        :param int fp: false positives
70
        :param int fn: false negatives
71
72
        >>> ct = ConfusionTable(120, 60, 20, 30)
73
        >>> ct == ConfusionTable((120, 60, 20, 30))
74
        True
75
        >>> ct == ConfusionTable([120, 60, 20, 30])
76
        True
77
        >>> ct == ConfusionTable({'tp': 120, 'tn': 60, 'fp': 20, 'fn': 30})
78
        True
79
        """
80
        if isinstance(tp, (tuple, list)):
81
            if len(tp) == 4:
82
                self._tp = tp[0]
83
                self._tn = tp[1]
84
                self._fp = tp[2]
85
                self._fn = tp[3]
86
            else:
87
                raise AttributeError('ConfusionTable requires a 4-tuple ' +
88
                                     'when being created from a tuple.')
89
        elif isinstance(tp, dict):
90
            if 'tp' in tp:
91
                self._tp = tp['tp']
92
            if 'tn' in tp:
93
                self._tn = tp['tn']
94
            if 'fp' in tp:
95
                self._fp = tp['fp']
96
            if 'fn' in tp:
97
                self._fn = tp['fn']
98
        else:
99
            self._tp = tp
100
            self._tn = tn
101
            self._fp = fp
102
            self._fn = fn
103
104
    def __eq__(self, other):
105
        """Perform eqality (==) comparison.
106
107
        Compares a ConfusionTable to another ConfusionTable or its equivalent
108
        in the form of a tuple, list, or dict.
109
110
        :returns: True if two ConfusionTables are the same object or all four
111
        of their attributes are equal
112
        :rtype: bool
113
114
        >>> ct1 = ConfusionTable(120, 60, 20, 30)
115
        >>> ct2 = ConfusionTable(120, 60, 20, 30)
116
        >>> ct3 = ConfusionTable(60, 30, 10, 15)
117
118
        >>> ct1 == ct2
119
        True
120
        >>> ct1 == ct3
121
        False
122
123
        >>> ct1 != ct2
124
        False
125
        >>> ct1 != ct3
126
        True
127
        """
128
        if isinstance(other, ConfusionTable):
129
            if id(self) == id(other):
130
                return True
131
            if ((self._tp == other.true_pos() and
132
                 self._tn == other.true_neg() and
133
                 self._fp == other.false_pos() and
134
                 self._fn == other.false_neg())):
135
                return True
136
        elif isinstance(other, (tuple, list)):
137
            if ((self._tp == other[0] and self._tn == other[1] and
138
                 self._fp == other[2] and self._fn == other[3])):
139
                return True
140
        elif isinstance(other, dict):
141
            if ((self._tp == other['tp'] and self._tn == other['tn'] and
142
                 self._fp == other['fp'] and self._fn == other['fn'])):
143
                return True
144
        return False
145
146
    def __str__(self):
147
        """Cast to str.
148
149
        :returns: a human-readable version of the confusion table
150
        :rtype: str
151
152
        >>> ct = ConfusionTable(120, 60, 20, 30)
153
        >>> str(ct)
154
        'tp:120, tn:60, fp:20, fn:30'
155
        """
156
        return ('tp:' + str(self._tp) + ', tn:' + str(self._tn) + ', fp:' +
157
                str(self._fp) + ', fn:' + str(self._fn))
158
159
    def to_tuple(self):
160
        """Cast to tuple.
161
162
        :returns: the confusion table as a 4-tuple (tp, tn, fp, fn)
163
        :rtype: tuple
164
165
        >>> ct = ConfusionTable(120, 60, 20, 30)
166
        >>> ct.to_tuple()
167
        (120, 60, 20, 30)
168
        """
169
        return self._tp, self._tn, self._fp, self._fn
170
171
    def to_dict(self):
172
        """Cast to dict.
173
174
        :returns: the confusion table as a dict
175
        :rtype: dict
176
177
        >>> ct = ConfusionTable(120, 60, 20, 30)
178
        >>> import pprint
179
        >>> pprint.pprint(ct.to_dict())
180
        {'fn': 30, 'fp': 20, 'tn': 60, 'tp': 120}
181
        """
182
        return {'tp': self._tp, 'tn': self._tn,
183
                'fp': self._fp, 'fn': self._fn}
184
185
    def true_pos(self):
186
        """Return true positives.
187
188
        :returns: the true positives of the confusion table
189
        :rtype: int
190
191
        >>> ct = ConfusionTable(120, 60, 20, 30)
192
        >>> ct.true_pos()
193
        120
194
        """
195
        return self._tp
196
197
    def true_neg(self):
198
        """Return true negatives.
199
200
        :returns: the true negatives of the confusion table
201
        :rtype: int
202
203
        >>> ct = ConfusionTable(120, 60, 20, 30)
204
        >>> ct.true_neg()
205
        60
206
        """
207
        return self._tn
208
209
    def false_pos(self):
210
        """Return false positives.
211
212
        :returns: the false positives of the confusion table
213
        :rtype: int
214
215
        >>> ct = ConfusionTable(120, 60, 20, 30)
216
        >>> ct.false_pos()
217
        20
218
        """
219
        return self._fp
220
221
    def false_neg(self):
222
        """Return false negatives.
223
224
        :returns: the false negatives of the confusion table
225
        :rtype: int
226
227
        >>> ct = ConfusionTable(120, 60, 20, 30)
228
        >>> ct.false_neg()
229
        30
230
        """
231
        return self._fn
232
233
    def correct_pop(self):
234
        """Return correct population.
235
236
        :returns: the correct population of the confusion table
237
        :rtype: int
238
239
        >>> ct = ConfusionTable(120, 60, 20, 30)
240
        >>> ct.correct_pop()
241
        180
242
        """
243
        return self._tp + self._tn
244
245
    def error_pop(self):
246
        """Return error population.
247
248
        :returns: The error population of the confusion table
249
        :rtype: int
250
251
        >>> ct = ConfusionTable(120, 60, 20, 30)
252
        >>> ct.error_pop()
253
        50
254
        """
255
        return self._fp + self._fn
256
257
    def test_pos_pop(self):
258
        """Return test positive population.
259
260
        :returns: The test positive population of the confusion table
261
        :rtype: int
262
263
        >>> ct = ConfusionTable(120, 60, 20, 30)
264
        >>> ct.test_pos_pop()
265
        140
266
        """
267
        return self._tp + self._fp
268
269
    def test_neg_pop(self):
270
        """Return test negative population.
271
272
        :returns: The test negative population of the confusion table
273
        :rtype: int
274
275
        >>> ct = ConfusionTable(120, 60, 20, 30)
276
        >>> ct.test_neg_pop()
277
        90
278
        """
279
        return self._tn + self._fn
280
281
    def cond_pos_pop(self):
282
        """Return condition positive population.
283
284
        :returns: The condition positive population of the confusion table
285
        :rtype: int
286
287
        >>> ct = ConfusionTable(120, 60, 20, 30)
288
        >>> ct.cond_pos_pop()
289
        150
290
        """
291
        return self._tp + self._fn
292
293
    def cond_neg_pop(self):
294
        """Return condition negative population.
295
296
        :returns: The condition negative population of the confusion table
297
        :rtype: int
298
299
        >>> ct = ConfusionTable(120, 60, 20, 30)
300
        >>> ct.cond_neg_pop()
301
        80
302
        """
303
        return self._fp + self._tn
304
305
    def population(self):
306
        """Return population, N.
307
308
        :returns: The population (N) of the confusion table
309
        :rtype: int
310
311
        >>> ct = ConfusionTable(120, 60, 20, 30)
312
        >>> ct.population()
313
        230
314
        """
315
        return self._tp + self._tn + self._fp + self._fn
316
317
    def precision(self):
318
        r"""Return precision.
319
320
        Precision is defined as :math:`\frac{tp}{tp + fp}`
321
322
        AKA positive predictive value (PPV)
323
324
        Cf. https://en.wikipedia.org/wiki/Precision_and_recall
325
326
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Precision
327
328
        :returns: The precision of the confusion table
329
        :rtype: float
330
331
        >>> ct = ConfusionTable(120, 60, 20, 30)
332
        >>> ct.precision()
333
        0.8571428571428571
334
        """
335
        if self._tp + self._fp == 0:
336
            return float('NaN')
337
        return self._tp / (self._tp + self._fp)
338
339
    def precision_gain(self):
340
        r"""Return gain in precision.
341
342
        The gain in precision is defined as:
343
        :math:`G(precision) = \frac{precision}{random~ precision}`
344
345
        Cf. https://en.wikipedia.org/wiki/Gain_(information_retrieval)
346
347
        :returns: The gain in precision of the confusion table
348
        :rtype: float
349
350
        >>> ct = ConfusionTable(120, 60, 20, 30)
351
        >>> ct.precision_gain()
352
        1.3142857142857143
353
        """
354
        if self.population() == 0:
355
            return float('NaN')
356
        random_precision = self.cond_pos_pop()/self.population()
357
        return self.precision()/random_precision
358
359
    def recall(self):
360
        r"""Return recall.
361
362
        Recall is defined as :math:`\frac{tp}{tp + fn}`
363
364
        AKA sensitivity
365
366
        AKA true positive rate (TPR)
367
368
        Cf. https://en.wikipedia.org/wiki/Precision_and_recall
369
370
        Cf. https://en.wikipedia.org/wiki/Sensitivity_(test)
371
372
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Recall
373
374
        :returns: The recall of the confusion table
375
        :rtype: float
376
377
        >>> ct = ConfusionTable(120, 60, 20, 30)
378
        >>> ct.recall()
379
        0.8
380
        """
381
        if self._tp + self._fn == 0:
382
            return float('NaN')
383
        return self._tp / (self._tp + self._fn)
384
385
    def specificity(self):
386
        r"""Return specificity.
387
388
        Specificity is defined as :math:`\frac{tn}{tn + fp}`
389
390
        AKA true negative rate (TNR)
391
392
        Cf. https://en.wikipedia.org/wiki/Specificity_(tests)
393
394
        :returns: The specificity of the confusion table
395
        :rtype: float
396
397
        >>> ct = ConfusionTable(120, 60, 20, 30)
398
        >>> ct.specificity()
399
        0.75
400
        """
401
        if self._tn + self._fp == 0:
402
            return float('NaN')
403
        return self._tn / (self._tn + self._fp)
404
405
    def npv(self):
406
        r"""Return negative predictive value (NPV).
407
408
        NPV is defined as :math:`\frac{tn}{tn + fn}`
409
410
        Cf. https://en.wikipedia.org/wiki/Negative_predictive_value
411
412
        :returns: The negative predictive value of the confusion table
413
        :rtype: float
414
415
        >>> ct = ConfusionTable(120, 60, 20, 30)
416
        >>> ct.npv()
417
        0.6666666666666666
418
        """
419
        if self._tn + self._fn == 0:
420
            return float('NaN')
421
        return self._tn / (self._tn + self._fn)
422
423
    def fallout(self):
424
        r"""Return fall-out.
425
426
        Fall-out is defined as :math:`\frac{fp}{fp + tn}`
427
428
        AKA false positive rate (FPR)
429
430
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Fall-out
431
432
        :returns: The fall-out of the confusion table
433
        :rtype: float
434
435
        >>> ct = ConfusionTable(120, 60, 20, 30)
436
        >>> ct.fallout()
437
        0.25
438
        """
439
        if self._fp + self._tn == 0:
440
            return float('NaN')
441
        return self._fp / (self._fp + self._tn)
442
443
    def fdr(self):
444
        r"""Return false discovery rate (FDR).
445
446
        False discovery rate is defined as :math:`\frac{fp}{fp + tp}`
447
448
        Cf. https://en.wikipedia.org/wiki/False_discovery_rate
449
450
        :returns: The false discovery rate of the confusion table
451
        :rtype: float
452
453
        >>> ct = ConfusionTable(120, 60, 20, 30)
454
        >>> ct.fdr()
455
        0.14285714285714285
456
        """
457
        if self._fp + self._tp == 0:
458
            return float('NaN')
459
        return self._fp / (self._fp + self._tp)
460
461
    def accuracy(self):
462
        r"""Return accuracy.
463
464
        Accuracy is defined as :math:`\frac{tp + tn}{population}`
465
466
        Cf. https://en.wikipedia.org/wiki/Accuracy
467
468
        :returns: The accuracy of the confusion table
469
        :rtype: float
470
471
        >>> ct = ConfusionTable(120, 60, 20, 30)
472
        >>> ct.accuracy()
473
        0.782608695652174
474
        """
475
        if self.population() == 0:
476
            return float('NaN')
477
        return (self._tp + self._tn) / self.population()
478
479
    def accuracy_gain(self):
480
        r"""Return gain in accuracy.
481
482
        The gain in accuracy is defined as:
483
        :math:`G(accuracy) = \frac{accuracy}{random~ accuracy}`
484
485
        Cf. https://en.wikipedia.org/wiki/Gain_(information_retrieval)
486
487
        :returns: The gain in accuracy of the confusion table
488
        :rtype: float
489
490
        >>> ct = ConfusionTable(120, 60, 20, 30)
491
        >>> ct.accuracy_gain()
492
        1.4325259515570934
493
        """
494
        if self.population() == 0:
495
            return float('NaN')
496
        random_accuracy = ((self.cond_pos_pop()/self.population())**2 +
497
                           (self.cond_neg_pop()/self.population())**2)
498
        return self.accuracy()/random_accuracy
499
500
    def balanced_accuracy(self):
501
        r"""Return balanced accuracy.
502
503
        Balanced accuracy is defined as
504
        :math:`\frac{sensitivity + specificity}{2}`
505
506
        Cf. https://en.wikipedia.org/wiki/Accuracy
507
508
        :returns: The balanced accuracy of the confusion table
509
        :rtype: float
510
511
        >>> ct = ConfusionTable(120, 60, 20, 30)
512
        >>> ct.balanced_accuracy()
513
        0.775
514
        """
515
        return 0.5 * (self.recall() + self.specificity())
516
517
    def informedness(self):
518
        """Return informedness.
519
520
        Informedness is defined as :math:`sensitivity + specificity - 1`.
521
522
        AKA Youden's J statistic
523
524
        AKA DeltaP'
525
526
        Cf. https://en.wikipedia.org/wiki/Youden%27s_J_statistic
527
528
        Cf.
529
        http://dspace.flinders.edu.au/xmlui/bitstream/handle/2328/27165/Powers%20Evaluation.pdf
530
531
        :returns: The informedness of the confusion table
532
        :rtype: float
533
534
        >>> ct = ConfusionTable(120, 60, 20, 30)
535
        >>> ct.informedness()
536
        0.55
537
        """
538
        return self.recall() + self.specificity() - 1
539
540
    def markedness(self):
541
        """Return markedness.
542
543
        Markedness is defined as :math:`precision + npv - 1`
544
545
        AKA DeltaP
546
547
        Cf. https://en.wikipedia.org/wiki/Youden%27s_J_statistic
548
549
        Cf.
550
        http://dspace.flinders.edu.au/xmlui/bitstream/handle/2328/27165/Powers%20Evaluation.pdf
551
552
        :returns: The markedness of the confusion table
553
        :rtype: float
554
555
        >>> ct = ConfusionTable(120, 60, 20, 30)
556
        >>> ct.markedness()
557
        0.5238095238095237
558
        """
559
        return self.precision() + self.npv() - 1
560
561
    def pr_amean(self):
562
        r"""Return arithmetic mean of precision & recall.
563
564
        The arithmetic mean of precision and recall is defined as:
565
        :math:`\frac{precision \cdot recall}{2}`
566
567
        Cf. https://en.wikipedia.org/wiki/Arithmetic_mean
568
569
        :returns: The arithmetic mean of the confusion table's precision &
570
            recall
571
        :rtype: float
572
573
        >>> ct = ConfusionTable(120, 60, 20, 30)
574
        >>> ct.pr_amean()
575
        0.8285714285714285
576
        """
577
        return amean((self.precision(), self.recall()))
578
579
    def pr_gmean(self):
580
        r"""Return geometric mean of precision & recall.
581
582
        The geometric mean of precision and recall is defined as:
583
        :math:`\sqrt{precision \cdot recall}`
584
585
        Cf. https://en.wikipedia.org/wiki/Geometric_mean
586
587
        :returns: The geometric mean of the confusion table's precision &
588
            recall
589
        :rtype: float
590
591
        >>> ct = ConfusionTable(120, 60, 20, 30)
592
        >>> ct.pr_gmean()
593
        0.828078671210825
594
        """
595
        return gmean((self.precision(), self.recall()))
596
597
    def pr_hmean(self):
598
        r"""Return harmonic mean of precision & recall.
599
600
        The harmonic mean of precision and recall is defined as:
601
        :math:`\frac{2 \cdot precision \cdot recall}{precision + recall}`
602
603
        Cf. https://en.wikipedia.org/wiki/Harmonic_mean
604
605
        :returns: The harmonic mean of the confusion table's precision & recall
606
        :rtype: float
607
608
        >>> ct = ConfusionTable(120, 60, 20, 30)
609
        >>> ct.pr_hmean()
610
        0.8275862068965516
611
        """
612
        return hmean((self.precision(), self.recall()))
613
614
    def pr_qmean(self):
615
        r"""Return quadratic mean of precision & recall.
616
617
        The quadratic mean of precision and recall is defined as:
618
        :math:`\sqrt{\frac{precision^{2} + recall^{2}}{2}}`
619
620
        Cf. https://en.wikipedia.org/wiki/Quadratic_mean
621
622
        :returns: The quadratic mean of the confusion table's precision &
623
            recall
624
        :rtype: float
625
626
        >>> ct = ConfusionTable(120, 60, 20, 30)
627
        >>> ct.pr_qmean()
628
        0.8290638930598233
629
        """
630
        return qmean((self.precision(), self.recall()))
631
632
    def pr_cmean(self):
633
        r"""Return contraharmonic mean of precision & recall.
634
635
        The contraharmonic mean is:
636
        :math:`\frac{precision^{2} + recall^{2}}{precision + recall}`
637
638
        Cf. https://en.wikipedia.org/wiki/Contraharmonic_mean
639
640
        :returns: The contraharmonic mean of the confusion table's precision &
641
            recall
642
        :rtype: float
643
644
        >>> ct = ConfusionTable(120, 60, 20, 30)
645
        >>> ct.pr_cmean()
646
        0.8295566502463055
647
        """
648
        return cmean((self.precision(), self.recall()))
649
650
    def pr_lmean(self):
651
        r"""Return logarithmic mean of precision & recall.
652
653
        The logarithmic mean is:
654
        0 if either precision or recall is 0,
655
        the precision if they are equal,
656
        otherwise :math:`\frac{precision - recall}
657
        {ln(precision) - ln(recall)}`
658
659
        Cf. https://en.wikipedia.org/wiki/Logarithmic_mean
660
661
        :returns: The logarithmic mean of the confusion table's precision &
662
            recall
663
        :rtype: float
664
665
        >>> ct = ConfusionTable(120, 60, 20, 30)
666
        >>> ct.pr_lmean()
667
        0.8282429171492667
668
        """
669
        precision = self.precision()
670
        recall = self.recall()
671
        if not precision or not recall:
672
            return 0.0
673
        elif precision == recall:
674
            return precision
675
        return ((precision - recall) /
676
                (math.log(precision) - math.log(recall)))
677
678
    def pr_imean(self):
679
        r"""Return identric (exponential) mean of precision & recall.
680
681
        The identric mean is:
682
        precision if precision = recall,
683
        otherwise :math:`\frac{1}{e} \cdot
684
        \sqrt[precision - recall]{\frac{precision^{precision}}
685
        {recall^{recall}}}`
686
687
        Cf. https://en.wikipedia.org/wiki/Identric_mean
688
689
        :returns: The identric mean of the confusion table's precision & recall
690
        :rtype: float
691
692
        >>> ct = ConfusionTable(120, 60, 20, 30)
693
        >>> ct.pr_imean()
694
        0.8284071826325543
695
        """
696
        return imean((self.precision(), self.recall()))
697
698
    def pr_seiffert_mean(self):
699
        r"""Return Seiffert's mean of precision & recall.
700
701
        Seiffert's mean of precision and recall is:
702
        :math:`\frac{precision - recall}{4 \cdot arctan
703
        \sqrt{\frac{precision}{recall}} - \pi}`
704
705
        Cf. http://www.helsinki.fi/~hasto/pp/miaPreprint.pdf
706
707
        :returns: Seiffer's mean of the confusion table's precision & recall
708
        :rtype: float
709
710
        >>> ct = ConfusionTable(120, 60, 20, 30)
711
        >>> ct.pr_seiffert_mean()
712
        0.8284071696048312
713
        """
714
        return seiffert_mean((self.precision(), self.recall()))
715
716
    def pr_lehmer_mean(self, exp=2.0):
717
        r"""Return Lehmer mean of precision & recall.
718
719
        The Lehmer mean is:
720
        :math:`\frac{precision^{exp} + recall^{exp}}
721
        {precision^{exp-1} + recall^{exp-1}}`
722
723
        Cf. https://en.wikipedia.org/wiki/Lehmer_mean
724
725
        :param float exp: The exponent of the Lehmer mean
726
        :returns: The Lehmer mean for the given exponent of the confusion
727
            table's precision & recall
728
        :rtype: float
729
730
        >>> ct = ConfusionTable(120, 60, 20, 30)
731
        >>> ct.pr_lehmer_mean()
732
        0.8295566502463055
733
        """
734
        return lehmer_mean((self.precision(), self.recall()), exp)
735
736
    def pr_heronian_mean(self):
737
        r"""Return Heronian mean of precision & recall.
738
739
        The Heronian mean of precision and recall is defined as:
740
        :math:`\frac{precision + \sqrt{precision \cdot recall} + recall}{3}`
741
742
        Cf. https://en.wikipedia.org/wiki/Heronian_mean
743
744
        :returns: The Heronian mean of the confusion table's precision & recall
745
        :rtype: float
746
747
        >>> ct = ConfusionTable(120, 60, 20, 30)
748
        >>> ct.pr_heronian_mean()
749
        0.8284071761178939
750
        """
751
        return heronian_mean((self.precision(), self.recall()))
752
753
    def pr_hoelder_mean(self, exp=2):
754
        r"""Return Hölder (power/generalized) mean of precision & recall.
755
756
        The power mean of precision and recall is defined as:
757
        :math:`\frac{1}{2} \cdot
758
        \sqrt[exp]{precision^{exp} + recall^{exp}}`
759
        for :math:`exp \ne 0`, and the geometric mean for :math:`exp = 0`
760
761
        Cf. https://en.wikipedia.org/wiki/Generalized_mean
762
763
        :param float exp: The exponent of the Hölder mean
764
        :returns: The Hölder mean for the given exponent of the confusion
765
            table's precision & recall
766
        :rtype: float
767
768
        >>> ct = ConfusionTable(120, 60, 20, 30)
769
        >>> ct.pr_hoelder_mean()
770
        0.8290638930598233
771
        """
772
        return hoelder_mean((self.precision(), self.recall()), exp)
773
774
    def pr_agmean(self):
775
        """Return arithmetic-geometric mean of precision & recall.
776
777
        Iterates between arithmetic & geometric means until they converge to
778
        a single value (rounded to 12 digits)
779
780
        Cf. https://en.wikipedia.org/wiki/Arithmetic-geometric_mean
781
782
        :returns: The arithmetic-geometric mean of the confusion table's
783
            precision & recall
784
        :rtype: float
785
786
        >>> ct = ConfusionTable(120, 60, 20, 30)
787
        >>> ct.pr_agmean()
788
        0.8283250315702829
789
        """
790
        return agmean((self.precision(), self.recall()))
791
792
    def pr_ghmean(self):
793
        """Return geometric-harmonic mean of precision & recall.
794
795
        Iterates between geometric & harmonic means until they converge to
796
        a single value (rounded to 12 digits)
797
798
        Cf. https://en.wikipedia.org/wiki/Geometric-harmonic_mean
799
800
        :returns: The geometric-harmonic mean of the confusion table's
801
            precision & recall
802
        :rtype: float
803
804
        >>> ct = ConfusionTable(120, 60, 20, 30)
805
        >>> ct.pr_ghmean()
806
        0.8278323841238441
807
        """
808
        return ghmean((self.precision(), self.recall()))
809
810
    def pr_aghmean(self):
811
        """Return arithmetic-geometric-harmonic mean of precision & recall.
812
813
        Iterates over arithmetic, geometric, & harmonic means until they
814
        converge to a single value (rounded to 12 digits), following the
815
        method described by Raïssouli, Leazizi, & Chergui:
816
        http://www.emis.de/journals/JIPAM/images/014_08_JIPAM/014_08.pdf
817
818
        :returns: The arithmetic-geometric-harmonic mean of the confusion
819
            table's precision & recall
820
        :rtype: float
821
822
        >>> ct = ConfusionTable(120, 60, 20, 30)
823
        >>> ct.pr_aghmean()
824
        0.8280786712108288
825
        """
826
        return aghmean((self.precision(), self.recall()))
827
828
    def fbeta_score(self, beta=1.0):
829
        r"""Return :math:`F_{\beta}` score.
830
831
        :math:`F_{\beta}` for a positive real value :math:`\beta` "measures
832
        the effectiveness of retrieval with respect to a user who
833
        attaches :math:`\beta` times as much importance to recall as
834
        precision" (van Rijsbergen 1979)
835
836
        :math:`F_{\beta}` score is defined as:
837
        :math:`(1 + \beta^2) \cdot \frac{precision \cdot recall}
838
        {((\beta^2 \cdot precision) + recall)}`
839
840
        Cf. https://en.wikipedia.org/wiki/F1_score
841
842
        :params float beta: The :math:`\beta` parameter in the above formula
843
        :returns: The :math:`F_{\beta}` of the confusion table
844
        :rtype: float
845
846
        >>> ct = ConfusionTable(120, 60, 20, 30)
847
        >>> ct.fbeta_score()
848
        0.8275862068965518
849
        >>> ct.fbeta_score(beta=0.1)
850
        0.8565371024734982
851
        """
852
        if beta <= 0:
853
            raise AttributeError('Beta must be a positive real value.')
854
        precision = self.precision()
855
        recall = self.recall()
856
        return ((1 + beta**2) *
857
                precision * recall / ((beta**2 * precision) + recall))
858
859
    def f2_score(self):
860
        """Return :math:`F_{2}`.
861
862
        The :math:`F_{2}` score emphasizes recall over precision in comparison
863
        to the :math:`F_{1}` score
864
865
        Cf. https://en.wikipedia.org/wiki/F1_score
866
867
        :returns: The :math:`F_{2}` of the confusion table
868
        :rtype: float
869
870
        >>> ct = ConfusionTable(120, 60, 20, 30)
871
        >>> ct.f2_score()
872
        0.8108108108108109
873
        """
874
        return self.fbeta_score(2.0)
875
876
    def fhalf_score(self):
877
        """Return :math:`F_{0.5}` score.
878
879
        The :math:`F_{0.5}` score emphasizes precision over recall in
880
        comparison to the :math:`F_{1}` score
881
882
        Cf. https://en.wikipedia.org/wiki/F1_score
883
884
        :returns: The :math:`F_{0.5}` score of the confusion table
885
        :rtype: float
886
887
        >>> ct = ConfusionTable(120, 60, 20, 30)
888
        >>> ct.fhalf_score()
889
        0.8450704225352114
890
        """
891
        return self.fbeta_score(0.5)
892
893
    def e_score(self, beta=1):
894
        r"""Return :math:`E`-score.
895
896
        This is Van Rijsbergen's effectiveness measure:
897
        :math:`E=1-F_{\beta}`.
898
899
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#F-measure
900
901
        :param float beta: The :math:`\beta` parameter in the above formula
902
        :returns: The :math:`E`-score of the confusion table
903
        :rtype: float
904
905
        >>> ct = ConfusionTable(120, 60, 20, 30)
906
        >>> ct.e_score()
907
        0.17241379310344818
908
        """
909
        return 1-self.fbeta_score(beta)
910
911
    def f1_score(self):
912
        r"""Return :math:`F_{1}` score.
913
914
        :math:`F_{1}` score is the harmonic mean of precision and recall:
915
        :math:`2 \cdot \frac{precision \cdot recall}{precision + recall}`
916
917
        Cf. https://en.wikipedia.org/wiki/F1_score
918
919
        :returns: The :math:`F_{1}` of the confusion table
920
        :rtype: float
921
922
        >>> ct = ConfusionTable(120, 60, 20, 30)
923
        >>> ct.f1_score()
924
        0.8275862068965516
925
        """
926
        return self.pr_hmean()
927
928
    def f_measure(self):
929
        r"""Return :math:`F`-measure.
930
931
        :math:`F`-measure is the harmonic mean of precision and recall:
932
        :math:`2 \cdot \frac{precision \cdot recall}{precision + recall}`
933
934
        Cf. https://en.wikipedia.org/wiki/F1_score
935
936
        :returns: The math:`F`-measure of the confusion table
937
        :rtype: float
938
939
        >>> ct = ConfusionTable(120, 60, 20, 30)
940
        >>> ct.f_measure()
941
        0.8275862068965516
942
        """
943
        return self.pr_hmean()
944
945
    def g_measure(self):
946
        r"""Return G-measure.
947
948
        :math:`G`-measure is the geometric mean of precision and recall:
949
        :math:`\sqrt{precision \cdot recall}`
950
951
        This is identical to the Fowlkes–Mallows (FM) index for two
952
        clusters.
953
954
        Cf. https://en.wikipedia.org/wiki/F1_score#G-measure
955
956
        Cf. https://en.wikipedia.org/wiki/Fowlkes%E2%80%93Mallows_index
957
958
        :returns: The :math:`G`-measure of the confusion table
959
        :rtype: float
960
961
        >>> ct = ConfusionTable(120, 60, 20, 30)
962
        >>> ct.g_measure()
963
        0.828078671210825
964
        """
965
        return self.pr_gmean()
966
967
    def mcc(self):
968
        r"""Return Matthews correlation coefficient (MCC).
969
970
        The Matthews correlation coefficient is defined as:
971
        :math:`\frac{(tp \cdot tn) - (fp \cdot fn)}
972
        {\sqrt{(tp + fp)(tp + fn)(tn + fp)(tn + fn)}}`
973
974
        This is equivalent to the geometric mean of informedness and
975
        markedness, defined above.
976
977
        Cf. https://en.wikipedia.org/wiki/Matthews_correlation_coefficient
978
979
        :returns: The Matthews correlation coefficient of the confusion table
980
        :rtype: float
981
982
        >>> ct = ConfusionTable(120, 60, 20, 30)
983
        >>> ct.mcc()
984
        0.5367450401216932
985
        """
986
        if (((self._tp + self._fp) * (self._tp + self._fn) *
987
             (self._tn + self._fp) * (self._tn + self._fn))) == 0:
988
            return float('NaN')
989
        return (((self._tp * self._tn) - (self._fp * self._fn)) /
990
                math.sqrt((self._tp + self._fp) * (self._tp + self._fn) *
991
                          (self._tn + self._fp) * (self._tn + self._fn)))
992
993
    def significance(self):
994
        r"""Return the significance, :math:`\chi^{2}`.
995
996
        Significance is defined as:
997
        :math:`\chi^{2} =
998
        \frac{(tp \cdot tn - fp \cdot fn)^{2} (tp + tn + fp + fn)}
999
        {((tp + fp)(tp + fn)(tn + fp)(tn + fn)}`
1000
1001
        Also: :math:`\chi^{2} = MCC^{2} \cdot n`
1002
1003
        Cf. https://en.wikipedia.org/wiki/Pearson%27s_chi-square_test
1004
1005
        :returns: The significance of the confusion table
1006
        :rtype: float
1007
1008
        >>> ct = ConfusionTable(120, 60, 20, 30)
1009
        >>> ct.significance()
1010
        66.26190476190476
1011
        """
1012
        if (((self._tp + self._fp) * (self._tp + self._fn) *
1013
             (self._tn + self._fp) * (self._tn + self._fn))) == 0:
1014
            return float('NaN')
1015
        return (((self._tp * self._tn - self._fp * self._fn)**2 *
1016
                 (self._tp + self._tn + self._fp + self._fn)) /
1017
                ((self._tp + self._fp) * (self._tp + self._fn) *
1018
                 (self._tn + self._fp) * (self._tn + self._fn)))
1019
1020
    def kappa_statistic(self):
1021
        r"""Return κ statistic.
1022
1023
        The κ statistic is defined as:
1024
        :math:`\kappa = \frac{accuracy - random~ accuracy}
1025
        {1 - random~ accuracy}`
1026
1027
        The κ statistic compares the performance of the classifier relative to
1028
        the performance of a random classifier. κ = 0 indicates performance
1029
        identical to random. κ = 1 indicates perfect predictive success.
1030
        κ = -1 indicates perfect predictive failure.
1031
1032
        :returns: The κ statistic of the confusion table
1033
        :rtype: float
1034
1035
        >>> ct = ConfusionTable(120, 60, 20, 30)
1036
        >>> ct.kappa_statistic()
1037
        0.5344129554655871
1038
        """
1039
        if self.population() == 0:
1040
            return float('NaN')
1041
        random_accuracy = (((self._tn + self._fp) *
1042
                            (self._tn + self._fn) +
1043
                            (self._fn + self._tp) *
1044
                            (self._fp + self._tp)) /
1045
                           self.population()**2)
1046
        return (self.accuracy()-random_accuracy) / (1-random_accuracy)
1047
1048
1049
if __name__ == '__main__':
1050
    import doctest
1051
    doctest.testmod()
1052