Completed
Branch master (78a222)
by Chris
14:36
created

ConfusionTable.pr_amean()   A

Complexity

Conditions 1

Size

Total Lines 17
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 2
dl 0
loc 17
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nop 1
crap 1
1
# -*- coding: utf-8 -*-
0 ignored issues
show
coding-style introduced by
Too many lines in module (1106/1000)
Loading history...
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
r"""abydos.stats._confusion_table.
20
21
This includes the ConfusionTable object, which includes members capable of
22
calculating the following data based on a confusion table:
23
24
    - population counts
25
    - precision, recall, specificity, negative predictive value, fall-out,
26
      false discovery rate, accuracy, balanced accuracy, informedness,
27
      and markedness
28
    - various means of the precision & recall, including: arithmetic,
29
      geometric, harmonic, quadratic, logarithmic, contraharmonic,
30
      identric (exponential), & Hölder (power/generalized) means
31
    - :math:`F_{\beta}`-scores, :math:`E`-scores, :math:`G`-measures, along
32
      with special functions for :math:`F_{1}`, :math:`F_{0.5}`, &
33
      :math:`F_{2}` scores
34
    - significance & Matthews correlation coefficient calculation
35
"""
36
37 1
from __future__ import division, unicode_literals
38
39 1
import math
40
41 1
from ._mean import (
42
    aghmean,
43
    agmean,
44
    amean,
45
    cmean,
46
    ghmean,
47
    gmean,
48
    heronian_mean,
49
    hmean,
50
    hoelder_mean,
51
    imean,
52
    lehmer_mean,
53
    qmean,
54
    seiffert_mean,
55
)
56
57 1
__all__ = ['ConfusionTable']
58
59
60 1
class ConfusionTable(object):
0 ignored issues
show
Unused Code introduced by
The variable __class__ seems to be unused.
Loading history...
best-practice introduced by
Too many public methods (49/20)
Loading history...
61
    """ConfusionTable object.
62
63
    This object is initialized by passing either four integers (or a tuple of
64
    four integers) representing the squares of a confusion table:
65
    true positives, true negatives, false positives, and false negatives
66
67
    The object possesses methods for the calculation of various statistics
68
    based on the confusion table.
69
    """
70
71 1
    _tp, _tn, _fp, _fn = 0, 0, 0, 0
72
73 1
    def __init__(self, tp=0, tn=0, fp=0, fn=0):
74
        """Initialize ConfusionTable.
75
76
        :param int tp: true positives (or a tuple, list, or dict); If a tuple
77
            or list is supplied, it must include 4 values in the order [tp, tn,
78
            fp, fn]. If a dict is supplied, it must have 4 keys, namely 'tp',
79
            'tn', 'fp', & 'fn'.
80
        :param int tn: true negatives
81
        :param int fp: false positives
82
        :param int fn: false negatives
83
84
        >>> ct = ConfusionTable(120, 60, 20, 30)
85
        >>> ct == ConfusionTable((120, 60, 20, 30))
86
        True
87
        >>> ct == ConfusionTable([120, 60, 20, 30])
88
        True
89
        >>> ct == ConfusionTable({'tp': 120, 'tn': 60, 'fp': 20, 'fn': 30})
90
        True
91
        """
92 1
        if isinstance(tp, (tuple, list)):
93 1
            if len(tp) == 4:
94 1
                self._tp = tp[0]
95 1
                self._tn = tp[1]
96 1
                self._fp = tp[2]
97 1
                self._fn = tp[3]
98
            else:
99 1
                raise AttributeError(
100
                    'ConfusionTable requires a 4-tuple '
101
                    + 'when being created from a tuple.'
102
                )
103 1
        elif isinstance(tp, dict):
104 1
            if 'tp' in tp:
105 1
                self._tp = tp['tp']
106 1
            if 'tn' in tp:
107 1
                self._tn = tp['tn']
108 1
            if 'fp' in tp:
109 1
                self._fp = tp['fp']
110 1
            if 'fn' in tp:
111 1
                self._fn = tp['fn']
112
        else:
113 1
            self._tp = tp
114 1
            self._tn = tn
115 1
            self._fp = fp
116 1
            self._fn = fn
117
118 1
    def __eq__(self, other):
119
        """Perform eqality (==) comparison.
120
121
        Compares a ConfusionTable to another ConfusionTable or its equivalent
122
        in the form of a tuple, list, or dict.
123
124
        :returns: True if two ConfusionTables are the same object or all four
125
        of their attributes are equal
126
        :rtype: bool
127
128
        >>> ct1 = ConfusionTable(120, 60, 20, 30)
129
        >>> ct2 = ConfusionTable(120, 60, 20, 30)
130
        >>> ct3 = ConfusionTable(60, 30, 10, 15)
131
132
        >>> ct1 == ct2
133
        True
134
        >>> ct1 == ct3
135
        False
136
137
        >>> ct1 != ct2
138
        False
139
        >>> ct1 != ct3
140
        True
141
        """
142 1
        if isinstance(other, ConfusionTable):
143 1
            if id(self) == id(other):
144 1
                return True
145 1
            if (
146
                self._tp == other.true_pos()
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
147
                and self._tn == other.true_neg()
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
148
                and self._fp == other.false_pos()
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
149
                and self._fn == other.false_neg()
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
150
            ):
151 1
                return True
152 1
        elif isinstance(other, (tuple, list)):
153 1
            if (
154
                self._tp == other[0]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
155
                and self._tn == other[1]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
156
                and self._fp == other[2]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
157
                and self._fn == other[3]
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
158
            ):
159 1
                return True
160 1
        elif isinstance(other, dict):
161 1
            if (
162
                self._tp == other['tp']
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
163
                and self._tn == other['tn']
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
164
                and self._fp == other['fp']
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
165
                and self._fn == other['fn']
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
166
            ):
167 1
                return True
168 1
        return False
169
170 1
    def __str__(self):
171
        """Cast to str.
172
173
        :returns: a human-readable version of the confusion table
174
        :rtype: str
175
176
        >>> ct = ConfusionTable(120, 60, 20, 30)
177
        >>> str(ct)
178
        'tp:120, tn:60, fp:20, fn:30'
179
        """
180 1
        return (
181
            'tp:'
182
            + str(self._tp)
183
            + ', tn:'
184
            + str(self._tn)
185
            + ', fp:'
186
            + str(self._fp)
187
            + ', fn:'
188
            + str(self._fn)
189
        )
190
191 1
    def to_tuple(self):
192
        """Cast to tuple.
193
194
        :returns: the confusion table as a 4-tuple (tp, tn, fp, fn)
195
        :rtype: tuple
196
197
        >>> ct = ConfusionTable(120, 60, 20, 30)
198
        >>> ct.to_tuple()
199
        (120, 60, 20, 30)
200
        """
201 1
        return self._tp, self._tn, self._fp, self._fn
202
203 1
    def to_dict(self):
204
        """Cast to dict.
205
206
        :returns: the confusion table as a dict
207
        :rtype: dict
208
209
        >>> ct = ConfusionTable(120, 60, 20, 30)
210
        >>> import pprint
211
        >>> pprint.pprint(ct.to_dict())
212
        {'fn': 30, 'fp': 20, 'tn': 60, 'tp': 120}
213
        """
214 1
        return {'tp': self._tp, 'tn': self._tn, 'fp': self._fp, 'fn': self._fn}
215
216 1
    def true_pos(self):
217
        """Return true positives.
218
219
        :returns: the true positives of the confusion table
220
        :rtype: int
221
222
        >>> ct = ConfusionTable(120, 60, 20, 30)
223
        >>> ct.true_pos()
224
        120
225
        """
226 1
        return self._tp
227
228 1
    def true_neg(self):
229
        """Return true negatives.
230
231
        :returns: the true negatives of the confusion table
232
        :rtype: int
233
234
        >>> ct = ConfusionTable(120, 60, 20, 30)
235
        >>> ct.true_neg()
236
        60
237
        """
238 1
        return self._tn
239
240 1
    def false_pos(self):
241
        """Return false positives.
242
243
        :returns: the false positives of the confusion table
244
        :rtype: int
245
246
        >>> ct = ConfusionTable(120, 60, 20, 30)
247
        >>> ct.false_pos()
248
        20
249
        """
250 1
        return self._fp
251
252 1
    def false_neg(self):
253
        """Return false negatives.
254
255
        :returns: the false negatives of the confusion table
256
        :rtype: int
257
258
        >>> ct = ConfusionTable(120, 60, 20, 30)
259
        >>> ct.false_neg()
260
        30
261
        """
262 1
        return self._fn
263
264 1
    def correct_pop(self):
265
        """Return correct population.
266
267
        :returns: the correct population of the confusion table
268
        :rtype: int
269
270
        >>> ct = ConfusionTable(120, 60, 20, 30)
271
        >>> ct.correct_pop()
272
        180
273
        """
274 1
        return self._tp + self._tn
275
276 1
    def error_pop(self):
277
        """Return error population.
278
279
        :returns: The error population of the confusion table
280
        :rtype: int
281
282
        >>> ct = ConfusionTable(120, 60, 20, 30)
283
        >>> ct.error_pop()
284
        50
285
        """
286 1
        return self._fp + self._fn
287
288 1
    def test_pos_pop(self):
289
        """Return test positive population.
290
291
        :returns: The test positive population of the confusion table
292
        :rtype: int
293
294
        >>> ct = ConfusionTable(120, 60, 20, 30)
295
        >>> ct.test_pos_pop()
296
        140
297
        """
298 1
        return self._tp + self._fp
299
300 1
    def test_neg_pop(self):
301
        """Return test negative population.
302
303
        :returns: The test negative population of the confusion table
304
        :rtype: int
305
306
        >>> ct = ConfusionTable(120, 60, 20, 30)
307
        >>> ct.test_neg_pop()
308
        90
309
        """
310 1
        return self._tn + self._fn
311
312 1
    def cond_pos_pop(self):
313
        """Return condition positive population.
314
315
        :returns: The condition positive population of the confusion table
316
        :rtype: int
317
318
        >>> ct = ConfusionTable(120, 60, 20, 30)
319
        >>> ct.cond_pos_pop()
320
        150
321
        """
322 1
        return self._tp + self._fn
323
324 1
    def cond_neg_pop(self):
325
        """Return condition negative population.
326
327
        :returns: The condition negative population of the confusion table
328
        :rtype: int
329
330
        >>> ct = ConfusionTable(120, 60, 20, 30)
331
        >>> ct.cond_neg_pop()
332
        80
333
        """
334 1
        return self._fp + self._tn
335
336 1
    def population(self):
337
        """Return population, N.
338
339
        :returns: The population (N) of the confusion table
340
        :rtype: int
341
342
        >>> ct = ConfusionTable(120, 60, 20, 30)
343
        >>> ct.population()
344
        230
345
        """
346 1
        return self._tp + self._tn + self._fp + self._fn
347
348 1
    def precision(self):
349
        r"""Return precision.
350
351
        Precision is defined as :math:`\frac{tp}{tp + fp}`
352
353
        AKA positive predictive value (PPV)
354
355
        Cf. https://en.wikipedia.org/wiki/Precision_and_recall
356
357
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Precision
358
359
        :returns: The precision of the confusion table
360
        :rtype: float
361
362
        >>> ct = ConfusionTable(120, 60, 20, 30)
363
        >>> ct.precision()
364
        0.8571428571428571
365
        """
366 1
        if self._tp + self._fp == 0:
367 1
            return float('NaN')
368 1
        return self._tp / (self._tp + self._fp)
369
370 1
    def precision_gain(self):
371
        r"""Return gain in precision.
372
373
        The gain in precision is defined as:
374
        :math:`G(precision) = \frac{precision}{random~ precision}`
375
376
        Cf. https://en.wikipedia.org/wiki/Gain_(information_retrieval)
377
378
        :returns: The gain in precision of the confusion table
379
        :rtype: float
380
381
        >>> ct = ConfusionTable(120, 60, 20, 30)
382
        >>> ct.precision_gain()
383
        1.3142857142857143
384
        """
385 1
        if self.population() == 0:
386 1
            return float('NaN')
387 1
        random_precision = self.cond_pos_pop() / self.population()
388 1
        return self.precision() / random_precision
389
390 1
    def recall(self):
391
        r"""Return recall.
392
393
        Recall is defined as :math:`\frac{tp}{tp + fn}`
394
395
        AKA sensitivity
396
397
        AKA true positive rate (TPR)
398
399
        Cf. https://en.wikipedia.org/wiki/Precision_and_recall
400
401
        Cf. https://en.wikipedia.org/wiki/Sensitivity_(test)
402
403
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Recall
404
405
        :returns: The recall of the confusion table
406
        :rtype: float
407
408
        >>> ct = ConfusionTable(120, 60, 20, 30)
409
        >>> ct.recall()
410
        0.8
411
        """
412 1
        if self._tp + self._fn == 0:
413 1
            return float('NaN')
414 1
        return self._tp / (self._tp + self._fn)
415
416 1
    def specificity(self):
417
        r"""Return specificity.
418
419
        Specificity is defined as :math:`\frac{tn}{tn + fp}`
420
421
        AKA true negative rate (TNR)
422
423
        Cf. https://en.wikipedia.org/wiki/Specificity_(tests)
424
425
        :returns: The specificity of the confusion table
426
        :rtype: float
427
428
        >>> ct = ConfusionTable(120, 60, 20, 30)
429
        >>> ct.specificity()
430
        0.75
431
        """
432 1
        if self._tn + self._fp == 0:
433 1
            return float('NaN')
434 1
        return self._tn / (self._tn + self._fp)
435
436 1
    def npv(self):
437
        r"""Return negative predictive value (NPV).
438
439
        NPV is defined as :math:`\frac{tn}{tn + fn}`
440
441
        Cf. https://en.wikipedia.org/wiki/Negative_predictive_value
442
443
        :returns: The negative predictive value of the confusion table
444
        :rtype: float
445
446
        >>> ct = ConfusionTable(120, 60, 20, 30)
447
        >>> ct.npv()
448
        0.6666666666666666
449
        """
450 1
        if self._tn + self._fn == 0:
451 1
            return float('NaN')
452 1
        return self._tn / (self._tn + self._fn)
453
454 1
    def fallout(self):
455
        r"""Return fall-out.
456
457
        Fall-out is defined as :math:`\frac{fp}{fp + tn}`
458
459
        AKA false positive rate (FPR)
460
461
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Fall-out
462
463
        :returns: The fall-out of the confusion table
464
        :rtype: float
465
466
        >>> ct = ConfusionTable(120, 60, 20, 30)
467
        >>> ct.fallout()
468
        0.25
469
        """
470 1
        if self._fp + self._tn == 0:
471 1
            return float('NaN')
472 1
        return self._fp / (self._fp + self._tn)
473
474 1
    def fdr(self):
475
        r"""Return false discovery rate (FDR).
476
477
        False discovery rate is defined as :math:`\frac{fp}{fp + tp}`
478
479
        Cf. https://en.wikipedia.org/wiki/False_discovery_rate
480
481
        :returns: The false discovery rate of the confusion table
482
        :rtype: float
483
484
        >>> ct = ConfusionTable(120, 60, 20, 30)
485
        >>> ct.fdr()
486
        0.14285714285714285
487
        """
488 1
        if self._fp + self._tp == 0:
489 1
            return float('NaN')
490 1
        return self._fp / (self._fp + self._tp)
491
492 1
    def accuracy(self):
493
        r"""Return accuracy.
494
495
        Accuracy is defined as :math:`\frac{tp + tn}{population}`
496
497
        Cf. https://en.wikipedia.org/wiki/Accuracy
498
499
        :returns: The accuracy of the confusion table
500
        :rtype: float
501
502
        >>> ct = ConfusionTable(120, 60, 20, 30)
503
        >>> ct.accuracy()
504
        0.782608695652174
505
        """
506 1
        if self.population() == 0:
507 1
            return float('NaN')
508 1
        return (self._tp + self._tn) / self.population()
509
510 1
    def accuracy_gain(self):
511
        r"""Return gain in accuracy.
512
513
        The gain in accuracy is defined as:
514
        :math:`G(accuracy) = \frac{accuracy}{random~ accuracy}`
515
516
        Cf. https://en.wikipedia.org/wiki/Gain_(information_retrieval)
517
518
        :returns: The gain in accuracy of the confusion table
519
        :rtype: float
520
521
        >>> ct = ConfusionTable(120, 60, 20, 30)
522
        >>> ct.accuracy_gain()
523
        1.4325259515570934
524
        """
525 1
        if self.population() == 0:
526 1
            return float('NaN')
527 1
        random_accuracy = (self.cond_pos_pop() / self.population()) ** 2 + (
528
            self.cond_neg_pop() / self.population()
529
        ) ** 2
530 1
        return self.accuracy() / random_accuracy
531
532 1
    def balanced_accuracy(self):
533
        r"""Return balanced accuracy.
534
535
        Balanced accuracy is defined as
536
        :math:`\frac{sensitivity + specificity}{2}`
537
538
        Cf. https://en.wikipedia.org/wiki/Accuracy
539
540
        :returns: The balanced accuracy of the confusion table
541
        :rtype: float
542
543
        >>> ct = ConfusionTable(120, 60, 20, 30)
544
        >>> ct.balanced_accuracy()
545
        0.775
546
        """
547 1
        return 0.5 * (self.recall() + self.specificity())
548
549 1
    def informedness(self):
550
        """Return informedness.
551
552
        Informedness is defined as :math:`sensitivity + specificity - 1`.
553
554
        AKA Youden's J statistic
555
556
        AKA DeltaP'
557
558
        Cf. https://en.wikipedia.org/wiki/Youden%27s_J_statistic
559
560
        Cf.
561
        http://dspace.flinders.edu.au/xmlui/bitstream/handle/2328/27165/Powers%20Evaluation.pdf
562
563
        :returns: The informedness of the confusion table
564
        :rtype: float
565
566
        >>> ct = ConfusionTable(120, 60, 20, 30)
567
        >>> ct.informedness()
568
        0.55
569
        """
570 1
        return self.recall() + self.specificity() - 1
571
572 1
    def markedness(self):
573
        """Return markedness.
574
575
        Markedness is defined as :math:`precision + npv - 1`
576
577
        AKA DeltaP
578
579
        Cf. https://en.wikipedia.org/wiki/Youden%27s_J_statistic
580
581
        Cf.
582
        http://dspace.flinders.edu.au/xmlui/bitstream/handle/2328/27165/Powers%20Evaluation.pdf
583
584
        :returns: The markedness of the confusion table
585
        :rtype: float
586
587
        >>> ct = ConfusionTable(120, 60, 20, 30)
588
        >>> ct.markedness()
589
        0.5238095238095237
590
        """
591 1
        return self.precision() + self.npv() - 1
592
593 1
    def pr_amean(self):
594
        r"""Return arithmetic mean of precision & recall.
595
596
        The arithmetic mean of precision and recall is defined as:
597
        :math:`\frac{precision \cdot recall}{2}`
598
599
        Cf. https://en.wikipedia.org/wiki/Arithmetic_mean
600
601
        :returns: The arithmetic mean of the confusion table's precision &
602
            recall
603
        :rtype: float
604
605
        >>> ct = ConfusionTable(120, 60, 20, 30)
606
        >>> ct.pr_amean()
607
        0.8285714285714285
608
        """
609 1
        return amean((self.precision(), self.recall()))
610
611 1
    def pr_gmean(self):
612
        r"""Return geometric mean of precision & recall.
613
614
        The geometric mean of precision and recall is defined as:
615
        :math:`\sqrt{precision \cdot recall}`
616
617
        Cf. https://en.wikipedia.org/wiki/Geometric_mean
618
619
        :returns: The geometric mean of the confusion table's precision &
620
            recall
621
        :rtype: float
622
623
        >>> ct = ConfusionTable(120, 60, 20, 30)
624
        >>> ct.pr_gmean()
625
        0.828078671210825
626
        """
627 1
        return gmean((self.precision(), self.recall()))
628
629 1
    def pr_hmean(self):
630
        r"""Return harmonic mean of precision & recall.
631
632
        The harmonic mean of precision and recall is defined as:
633
        :math:`\frac{2 \cdot precision \cdot recall}{precision + recall}`
634
635
        Cf. https://en.wikipedia.org/wiki/Harmonic_mean
636
637
        :returns: The harmonic mean of the confusion table's precision & recall
638
        :rtype: float
639
640
        >>> ct = ConfusionTable(120, 60, 20, 30)
641
        >>> ct.pr_hmean()
642
        0.8275862068965516
643
        """
644 1
        return hmean((self.precision(), self.recall()))
645
646 1
    def pr_qmean(self):
647
        r"""Return quadratic mean of precision & recall.
648
649
        The quadratic mean of precision and recall is defined as:
650
        :math:`\sqrt{\frac{precision^{2} + recall^{2}}{2}}`
651
652
        Cf. https://en.wikipedia.org/wiki/Quadratic_mean
653
654
        :returns: The quadratic mean of the confusion table's precision &
655
            recall
656
        :rtype: float
657
658
        >>> ct = ConfusionTable(120, 60, 20, 30)
659
        >>> ct.pr_qmean()
660
        0.8290638930598233
661
        """
662 1
        return qmean((self.precision(), self.recall()))
663
664 1
    def pr_cmean(self):
665
        r"""Return contraharmonic mean of precision & recall.
666
667
        The contraharmonic mean is:
668
        :math:`\frac{precision^{2} + recall^{2}}{precision + recall}`
669
670
        Cf. https://en.wikipedia.org/wiki/Contraharmonic_mean
671
672
        :returns: The contraharmonic mean of the confusion table's precision &
673
            recall
674
        :rtype: float
675
676
        >>> ct = ConfusionTable(120, 60, 20, 30)
677
        >>> ct.pr_cmean()
678
        0.8295566502463055
679
        """
680 1
        return cmean((self.precision(), self.recall()))
681
682 1
    def pr_lmean(self):
683
        r"""Return logarithmic mean of precision & recall.
684
685
        The logarithmic mean is:
686
        0 if either precision or recall is 0,
687
        the precision if they are equal,
688
        otherwise :math:`\frac{precision - recall}
689
        {ln(precision) - ln(recall)}`
690
691
        Cf. https://en.wikipedia.org/wiki/Logarithmic_mean
692
693
        :returns: The logarithmic mean of the confusion table's precision &
694
            recall
695
        :rtype: float
696
697
        >>> ct = ConfusionTable(120, 60, 20, 30)
698
        >>> ct.pr_lmean()
699
        0.8282429171492667
700
        """
701 1
        precision = self.precision()
702 1
        recall = self.recall()
703 1
        if not precision or not recall:
704 1
            return 0.0
705 1
        elif precision == recall:
706 1
            return precision
707 1
        return (precision - recall) / (math.log(precision) - math.log(recall))
708
709 1
    def pr_imean(self):
710
        r"""Return identric (exponential) mean of precision & recall.
711
712
        The identric mean is:
713
        precision if precision = recall,
714
        otherwise :math:`\frac{1}{e} \cdot
715
        \sqrt[precision - recall]{\frac{precision^{precision}}
716
        {recall^{recall}}}`
717
718
        Cf. https://en.wikipedia.org/wiki/Identric_mean
719
720
        :returns: The identric mean of the confusion table's precision & recall
721
        :rtype: float
722
723
        >>> ct = ConfusionTable(120, 60, 20, 30)
724
        >>> ct.pr_imean()
725
        0.8284071826325543
726
        """
727 1
        return imean((self.precision(), self.recall()))
728
729 1
    def pr_seiffert_mean(self):
730
        r"""Return Seiffert's mean of precision & recall.
731
732
        Seiffert's mean of precision and recall is:
733
        :math:`\frac{precision - recall}{4 \cdot arctan
734
        \sqrt{\frac{precision}{recall}} - \pi}`
735
736
        Cf. http://www.helsinki.fi/~hasto/pp/miaPreprint.pdf
737
738
        :returns: Seiffer's mean of the confusion table's precision & recall
739
        :rtype: float
740
741
        >>> ct = ConfusionTable(120, 60, 20, 30)
742
        >>> ct.pr_seiffert_mean()
743
        0.8284071696048312
744
        """
745 1
        return seiffert_mean((self.precision(), self.recall()))
746
747 1
    def pr_lehmer_mean(self, exp=2.0):
748
        r"""Return Lehmer mean of precision & recall.
749
750
        The Lehmer mean is:
751
        :math:`\frac{precision^{exp} + recall^{exp}}
752
        {precision^{exp-1} + recall^{exp-1}}`
753
754
        Cf. https://en.wikipedia.org/wiki/Lehmer_mean
755
756
        :param float exp: The exponent of the Lehmer mean
757
        :returns: The Lehmer mean for the given exponent of the confusion
758
            table's precision & recall
759
        :rtype: float
760
761
        >>> ct = ConfusionTable(120, 60, 20, 30)
762
        >>> ct.pr_lehmer_mean()
763
        0.8295566502463055
764
        """
765 1
        return lehmer_mean((self.precision(), self.recall()), exp)
766
767 1
    def pr_heronian_mean(self):
768
        r"""Return Heronian mean of precision & recall.
769
770
        The Heronian mean of precision and recall is defined as:
771
        :math:`\frac{precision + \sqrt{precision \cdot recall} + recall}{3}`
772
773
        Cf. https://en.wikipedia.org/wiki/Heronian_mean
774
775
        :returns: The Heronian mean of the confusion table's precision & recall
776
        :rtype: float
777
778
        >>> ct = ConfusionTable(120, 60, 20, 30)
779
        >>> ct.pr_heronian_mean()
780
        0.8284071761178939
781
        """
782 1
        return heronian_mean((self.precision(), self.recall()))
783
784 1
    def pr_hoelder_mean(self, exp=2):
785
        r"""Return Hölder (power/generalized) mean of precision & recall.
786
787
        The power mean of precision and recall is defined as:
788
        :math:`\frac{1}{2} \cdot
789
        \sqrt[exp]{precision^{exp} + recall^{exp}}`
790
        for :math:`exp \ne 0`, and the geometric mean for :math:`exp = 0`
791
792
        Cf. https://en.wikipedia.org/wiki/Generalized_mean
793
794
        :param float exp: The exponent of the Hölder mean
795
        :returns: The Hölder mean for the given exponent of the confusion
796
            table's precision & recall
797
        :rtype: float
798
799
        >>> ct = ConfusionTable(120, 60, 20, 30)
800
        >>> ct.pr_hoelder_mean()
801
        0.8290638930598233
802
        """
803 1
        return hoelder_mean((self.precision(), self.recall()), exp)
804
805 1
    def pr_agmean(self):
806
        """Return arithmetic-geometric mean of precision & recall.
807
808
        Iterates between arithmetic & geometric means until they converge to
809
        a single value (rounded to 12 digits)
810
811
        Cf. https://en.wikipedia.org/wiki/Arithmetic-geometric_mean
812
813
        :returns: The arithmetic-geometric mean of the confusion table's
814
            precision & recall
815
        :rtype: float
816
817
        >>> ct = ConfusionTable(120, 60, 20, 30)
818
        >>> ct.pr_agmean()
819
        0.8283250315702829
820
        """
821 1
        return agmean((self.precision(), self.recall()))
822
823 1
    def pr_ghmean(self):
824
        """Return geometric-harmonic mean of precision & recall.
825
826
        Iterates between geometric & harmonic means until they converge to
827
        a single value (rounded to 12 digits)
828
829
        Cf. https://en.wikipedia.org/wiki/Geometric-harmonic_mean
830
831
        :returns: The geometric-harmonic mean of the confusion table's
832
            precision & recall
833
        :rtype: float
834
835
        >>> ct = ConfusionTable(120, 60, 20, 30)
836
        >>> ct.pr_ghmean()
837
        0.8278323841238441
838
        """
839 1
        return ghmean((self.precision(), self.recall()))
840
841 1
    def pr_aghmean(self):
842
        """Return arithmetic-geometric-harmonic mean of precision & recall.
843
844
        Iterates over arithmetic, geometric, & harmonic means until they
845
        converge to a single value (rounded to 12 digits), following the
846
        method described by Raïssouli, Leazizi, & Chergui:
847
        http://www.emis.de/journals/JIPAM/images/014_08_JIPAM/014_08.pdf
848
849
        :returns: The arithmetic-geometric-harmonic mean of the confusion
850
            table's precision & recall
851
        :rtype: float
852
853
        >>> ct = ConfusionTable(120, 60, 20, 30)
854
        >>> ct.pr_aghmean()
855
        0.8280786712108288
856
        """
857 1
        return aghmean((self.precision(), self.recall()))
858
859 1
    def fbeta_score(self, beta=1.0):
860
        r"""Return :math:`F_{\beta}` score.
861
862
        :math:`F_{\beta}` for a positive real value :math:`\beta` "measures
863
        the effectiveness of retrieval with respect to a user who
864
        attaches :math:`\beta` times as much importance to recall as
865
        precision" (van Rijsbergen 1979)
866
867
        :math:`F_{\beta}` score is defined as:
868
        :math:`(1 + \beta^2) \cdot \frac{precision \cdot recall}
869
        {((\beta^2 \cdot precision) + recall)}`
870
871
        Cf. https://en.wikipedia.org/wiki/F1_score
872
873
        :params float beta: The :math:`\beta` parameter in the above formula
874
        :returns: The :math:`F_{\beta}` of the confusion table
875
        :rtype: float
876
877
        >>> ct = ConfusionTable(120, 60, 20, 30)
878
        >>> ct.fbeta_score()
879
        0.8275862068965518
880
        >>> ct.fbeta_score(beta=0.1)
881
        0.8565371024734982
882
        """
883 1
        if beta <= 0:
884 1
            raise AttributeError('Beta must be a positive real value.')
885 1
        precision = self.precision()
886 1
        recall = self.recall()
887 1
        return (
888
            (1 + beta ** 2)
889
            * precision
890
            * recall
891
            / ((beta ** 2 * precision) + recall)
892
        )
893
894 1
    def f2_score(self):
895
        """Return :math:`F_{2}`.
896
897
        The :math:`F_{2}` score emphasizes recall over precision in comparison
898
        to the :math:`F_{1}` score
899
900
        Cf. https://en.wikipedia.org/wiki/F1_score
901
902
        :returns: The :math:`F_{2}` of the confusion table
903
        :rtype: float
904
905
        >>> ct = ConfusionTable(120, 60, 20, 30)
906
        >>> ct.f2_score()
907
        0.8108108108108109
908
        """
909 1
        return self.fbeta_score(2.0)
910
911 1
    def fhalf_score(self):
912
        """Return :math:`F_{0.5}` score.
913
914
        The :math:`F_{0.5}` score emphasizes precision over recall in
915
        comparison to the :math:`F_{1}` score
916
917
        Cf. https://en.wikipedia.org/wiki/F1_score
918
919
        :returns: The :math:`F_{0.5}` score of the confusion table
920
        :rtype: float
921
922
        >>> ct = ConfusionTable(120, 60, 20, 30)
923
        >>> ct.fhalf_score()
924
        0.8450704225352114
925
        """
926 1
        return self.fbeta_score(0.5)
927
928 1
    def e_score(self, beta=1):
929
        r"""Return :math:`E`-score.
930
931
        This is Van Rijsbergen's effectiveness measure:
932
        :math:`E=1-F_{\beta}`.
933
934
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#F-measure
935
936
        :param float beta: The :math:`\beta` parameter in the above formula
937
        :returns: The :math:`E`-score of the confusion table
938
        :rtype: float
939
940
        >>> ct = ConfusionTable(120, 60, 20, 30)
941
        >>> ct.e_score()
942
        0.17241379310344818
943
        """
944 1
        return 1 - self.fbeta_score(beta)
945
946 1
    def f1_score(self):
947
        r"""Return :math:`F_{1}` score.
948
949
        :math:`F_{1}` score is the harmonic mean of precision and recall:
950
        :math:`2 \cdot \frac{precision \cdot recall}{precision + recall}`
951
952
        Cf. https://en.wikipedia.org/wiki/F1_score
953
954
        :returns: The :math:`F_{1}` of the confusion table
955
        :rtype: float
956
957
        >>> ct = ConfusionTable(120, 60, 20, 30)
958
        >>> ct.f1_score()
959
        0.8275862068965516
960
        """
961 1
        return self.pr_hmean()
962
963 1
    def f_measure(self):
964
        r"""Return :math:`F`-measure.
965
966
        :math:`F`-measure is the harmonic mean of precision and recall:
967
        :math:`2 \cdot \frac{precision \cdot recall}{precision + recall}`
968
969
        Cf. https://en.wikipedia.org/wiki/F1_score
970
971
        :returns: The math:`F`-measure of the confusion table
972
        :rtype: float
973
974
        >>> ct = ConfusionTable(120, 60, 20, 30)
975
        >>> ct.f_measure()
976
        0.8275862068965516
977
        """
978 1
        return self.pr_hmean()
979
980 1
    def g_measure(self):
981
        r"""Return G-measure.
982
983
        :math:`G`-measure is the geometric mean of precision and recall:
984
        :math:`\sqrt{precision \cdot recall}`
985
986
        This is identical to the Fowlkes–Mallows (FM) index for two
987
        clusters.
988
989
        Cf. https://en.wikipedia.org/wiki/F1_score#G-measure
990
991
        Cf. https://en.wikipedia.org/wiki/Fowlkes%E2%80%93Mallows_index
992
993
        :returns: The :math:`G`-measure of the confusion table
994
        :rtype: float
995
996
        >>> ct = ConfusionTable(120, 60, 20, 30)
997
        >>> ct.g_measure()
998
        0.828078671210825
999
        """
1000 1
        return self.pr_gmean()
1001
1002 1
    def mcc(self):
1003
        r"""Return Matthews correlation coefficient (MCC).
1004
1005
        The Matthews correlation coefficient is defined as:
1006
        :math:`\frac{(tp \cdot tn) - (fp \cdot fn)}
1007
        {\sqrt{(tp + fp)(tp + fn)(tn + fp)(tn + fn)}}`
1008
1009
        This is equivalent to the geometric mean of informedness and
1010
        markedness, defined above.
1011
1012
        Cf. https://en.wikipedia.org/wiki/Matthews_correlation_coefficient
1013
1014
        :returns: The Matthews correlation coefficient of the confusion table
1015
        :rtype: float
1016
1017
        >>> ct = ConfusionTable(120, 60, 20, 30)
1018
        >>> ct.mcc()
1019
        0.5367450401216932
1020
        """
1021 1
        if (
1022
            (
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
1023
                (self._tp + self._fp)
1024
                * (self._tp + self._fn)
1025
                * (self._tn + self._fp)
1026
                * (self._tn + self._fn)
1027
            )
1028
        ) == 0:
1029 1
            return float('NaN')
1030 1
        return ((self._tp * self._tn) - (self._fp * self._fn)) / math.sqrt(
1031
            (self._tp + self._fp)
1032
            * (self._tp + self._fn)
1033
            * (self._tn + self._fp)
1034
            * (self._tn + self._fn)
1035
        )
1036
1037 1
    def significance(self):
1038
        r"""Return the significance, :math:`\chi^{2}`.
1039
1040
        Significance is defined as:
1041
        :math:`\chi^{2} =
1042
        \frac{(tp \cdot tn - fp \cdot fn)^{2} (tp + tn + fp + fn)}
1043
        {((tp + fp)(tp + fn)(tn + fp)(tn + fn)}`
1044
1045
        Also: :math:`\chi^{2} = MCC^{2} \cdot n`
1046
1047
        Cf. https://en.wikipedia.org/wiki/Pearson%27s_chi-square_test
1048
1049
        :returns: The significance of the confusion table
1050
        :rtype: float
1051
1052
        >>> ct = ConfusionTable(120, 60, 20, 30)
1053
        >>> ct.significance()
1054
        66.26190476190476
1055
        """
1056 1
        if (
1057
            (
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
1058
                (self._tp + self._fp)
1059
                * (self._tp + self._fn)
1060
                * (self._tn + self._fp)
1061
                * (self._tn + self._fn)
1062
            )
1063
        ) == 0:
1064 1
            return float('NaN')
1065 1
        return (
1066
            (self._tp * self._tn - self._fp * self._fn) ** 2
1067
            * (self._tp + self._tn + self._fp + self._fn)
1068
        ) / (
1069
            (self._tp + self._fp)
1070
            * (self._tp + self._fn)
1071
            * (self._tn + self._fp)
1072
            * (self._tn + self._fn)
1073
        )
1074
1075 1
    def kappa_statistic(self):
1076
        r"""Return κ statistic.
1077
1078
        The κ statistic is defined as:
1079
        :math:`\kappa = \frac{accuracy - random~ accuracy}
1080
        {1 - random~ accuracy}`
1081
1082
        The κ statistic compares the performance of the classifier relative to
1083
        the performance of a random classifier. κ = 0 indicates performance
1084
        identical to random. κ = 1 indicates perfect predictive success.
1085
        κ = -1 indicates perfect predictive failure.
1086
1087
        :returns: The κ statistic of the confusion table
1088
        :rtype: float
1089
1090
        >>> ct = ConfusionTable(120, 60, 20, 30)
1091
        >>> ct.kappa_statistic()
1092
        0.5344129554655871
1093
        """
1094 1
        if self.population() == 0:
1095 1
            return float('NaN')
1096 1
        random_accuracy = (
1097
            (self._tn + self._fp) * (self._tn + self._fn)
1098
            + (self._fn + self._tp) * (self._fp + self._tp)
1099
        ) / self.population() ** 2
1100 1
        return (self.accuracy() - random_accuracy) / (1 - random_accuracy)
1101
1102
1103
if __name__ == '__main__':
1104
    import doctest
1105
1106
    doctest.testmod()
1107