Completed
Push — master ( 10124b...a076b6 )
by Chris
14:59
created

abydos.stats.std()   A

Complexity

Conditions 1

Size

Total Lines 9
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nop 3
dl 0
loc 9
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19
r"""abydos.stats.
20
21
The stats module defines functions for calculating various statistical data
22
about linguistic objects.
23
24
This includes the ConfusionTable object, which includes members cable of
25
calculating the following data based on a confusion table:
26
27
    - population counts
28
    - precision, recall, specificity, negative predictive value, fall-out,
29
      false discovery rate, accuracy, balanced accuracy, informedness,
30
      and markedness
31
    - various means of the precision & recall, including: arithmetic,
32
      geometric, harmonic, quadratic, logarithmic, contraharmonic,
33
      identric (exponential), & Hölder (power/generalized) means
34
    - :math:`F_{\\beta}`-scores, :math:`E`-scores, :math:`G`-measures, along
35
      with special functions for :math:`F_{1}`, :math:`F_{0.5}`, &
36
      :math:`F_{2}` scores
37
    - significance & Matthews correlation coefficient calculation
38
39
Functions are provided for calculating the following means:
40
41
    - arithmetic
42
    - geometric
43
    - harmonic
44
    - quadratic
45
    - contraharmonic
46
    - logarithmic,
47
    - identric (exponential)
48
    - Seiffert's
49
    - Lehmer
50
    - Heronian
51
    - Hölder (power/generalized)
52
    - arithmetic-geometric
53
    - geometric-harmonic
54
    - arithmetic-geometric-harmonic
55
56
And for calculating:
57
58
    - midrange
59
    - median
60
    - mode
61
    - variance
62
    - standard deviation
63
"""
64
65
from __future__ import division, unicode_literals
66
67
import math
68
from collections import Counter
69
70
from six.moves import range
71
72
from .util import prod
73
74
75
class ConfusionTable(object):
76
    """ConfusionTable object.
77
78
    This object is initialized by passing either four integers (or a tuple of
79
    four integers) representing the squares of a confusion table:
80
    true positives, true negatives, false positives, and false negatives
81
82
    The object possesses methods for the calculation of various statistics
83
    based on the confusion table.
84
    """
85
86
    _tp, _tn, _fp, _fn = 0, 0, 0, 0
87
88
    def __init__(self, tp=0, tn=0, fp=0, fn=0):
89
        """Initialize ConfusionTable.
90
91
        :param int tp: true positives (or a tuple, list, or dict); If a tuple
92
            or list is supplied, it must include 4 values in the order [tp, tn,
93
            fp, fn]. If a dict is supplied, it must have 4 keys, namely 'tp',
94
            'tn', 'fp', & 'fn'.
95
        :param int tn: true negatives
96
        :param int fp: false positives
97
        :param int fn: false negatives
98
99
        >>> ct = ConfusionTable(120, 60, 20, 30)
100
        >>> ct == ConfusionTable((120, 60, 20, 30))
101
        True
102
        >>> ct == ConfusionTable([120, 60, 20, 30])
103
        True
104
        >>> ct == ConfusionTable({'tp': 120, 'tn': 60, 'fp': 20, 'fn': 30})
105
        True
106
        """
107
        if isinstance(tp, (tuple, list)):
108
            if len(tp) == 4:
109
                self._tp = tp[0]
110
                self._tn = tp[1]
111
                self._fp = tp[2]
112
                self._fn = tp[3]
113
            else:
114
                raise AttributeError('ConfusionTable requires a 4-tuple ' +
115
                                     'when being created from a tuple.')
116
        elif isinstance(tp, dict):
117
            if 'tp' in tp:
118
                self._tp = tp['tp']
119
            if 'tn' in tp:
120
                self._tn = tp['tn']
121
            if 'fp' in tp:
122
                self._fp = tp['fp']
123
            if 'fn' in tp:
124
                self._fn = tp['fn']
125
        else:
126
            self._tp = tp
127
            self._tn = tn
128
            self._fp = fp
129
            self._fn = fn
130
131
    def __eq__(self, other):
132
        """Perform eqality (==) comparison.
133
134
        Compares a ConfusionTable to another ConfusionTable or its equivalent
135
        in the form of a tuple, list, or dict.
136
137
        :returns: True if two ConfusionTables are the same object or all four
138
        of their attributes are equal
139
        :rtype: bool
140
141
        >>> ct1 = ConfusionTable(120, 60, 20, 30)
142
        >>> ct2 = ConfusionTable(120, 60, 20, 30)
143
        >>> ct3 = ConfusionTable(60, 30, 10, 15)
144
145
        >>> ct1 == ct2
146
        True
147
        >>> ct1 == ct3
148
        False
149
150
        >>> ct1 != ct2
151
        False
152
        >>> ct1 != ct3
153
        True
154
        """
155
        if isinstance(other, ConfusionTable):
156
            if id(self) == id(other):
157
                return True
158
            if ((self._tp == other.true_pos() and
159
                 self._tn == other.true_neg() and
160
                 self._fp == other.false_pos() and
161
                 self._fn == other.false_neg())):
162
                return True
163
        elif isinstance(other, (tuple, list)):
164
            if ((self._tp == other[0] and self._tn == other[1] and
165
                 self._fp == other[2] and self._fn == other[3])):
166
                return True
167
        elif isinstance(other, dict):
168
            if ((self._tp == other['tp'] and self._tn == other['tn'] and
169
                 self._fp == other['fp'] and self._fn == other['fn'])):
170
                return True
171
        return False
172
173
    def __str__(self):
174
        """Cast to str.
175
176
        :returns: a human-readable version of the confusion table
177
        :rtype: str
178
179
        >>> ct = ConfusionTable(120, 60, 20, 30)
180
        >>> str(ct)
181
        'tp:120, tn:60, fp:20, fn:30'
182
        """
183
        return ('tp:' + str(self._tp) + ', tn:' + str(self._tn) + ', fp:' +
184
                str(self._fp) + ', fn:' + str(self._fn))
185
186
    def to_tuple(self):
187
        """Cast to tuple.
188
189
        :returns: the confusion table as a 4-tuple (tp, tn, fp, fn)
190
        :rtype: tuple
191
192
        >>> ct = ConfusionTable(120, 60, 20, 30)
193
        >>> ct.tuple()
194
        (120, 60, 20, 30)
195
        """
196
        return (self._tp, self._tn, self._fp, self._fn)
197
198
    def to_dict(self):
199
        """Cast to dict.
200
201
        :returns: the confusion table as a dict
202
        :rtype: dict
203
204
        >>> ct = ConfusionTable(120, 60, 20, 30)
205
        >>> ct.dict()
206
        {'fp': 20, 'fn': 30, 'tn': 60, 'tp': 120}
207
        """
208
        return {'tp': self._tp, 'tn': self._tn,
209
                'fp': self._fp, 'fn': self._fn}
210
211
    def true_pos(self):
212
        """Return true positives.
213
214
        :returns: the true positives of the confusion table
215
        :rtype: int
216
217
        >>> ct = ConfusionTable(120, 60, 20, 30)
218
        >>> ct.true_pos()
219
        120
220
        """
221
        return self._tp
222
223
    def true_neg(self):
224
        """Return true negatives.
225
226
        :returns: the true negatives of the confusion table
227
        :rtype: int
228
229
        >>> ct = ConfusionTable(120, 60, 20, 30)
230
        >>> ct.true_neg()
231
        60
232
        """
233
        return self._tn
234
235
    def false_pos(self):
236
        """Return false positives.
237
238
        :returns: the false positives of the confusion table
239
        :rtype: int
240
241
        >>> ct = ConfusionTable(120, 60, 20, 30)
242
        >>> ct.false_pos()
243
        20
244
        """
245
        return self._fp
246
247
    def false_neg(self):
248
        """Return false negatives.
249
250
        :returns: the false negatives of the confusion table
251
        :rtype: int
252
253
        >>> ct = ConfusionTable(120, 60, 20, 30)
254
        >>> ct.false_neg()
255
        30
256
        """
257
        return self._fn
258
259
    def correct_pop(self):
260
        """Return correct population.
261
262
        :returns: the correct population of the confusion table
263
        :rtype: int
264
265
        >>> ct = ConfusionTable(120, 60, 20, 30)
266
        >>> ct.correct_pop()
267
        180
268
        """
269
        return self._tp + self._tn
270
271
    def error_pop(self):
272
        """Return error population.
273
274
        :returns: The error population of the confusion table
275
        :rtype: int
276
277
        >>> ct = ConfusionTable(120, 60, 20, 30)
278
        >>> ct.error_pop()
279
        50
280
        """
281
        return self._fp + self._fn
282
283
    def test_pos_pop(self):
284
        """Return test positive population.
285
286
        :returns: The test positive population of the confusion table
287
        :rtype: int
288
289
        >>> ct = ConfusionTable(120, 60, 20, 30)
290
        >>> ct.test_pos_pop()
291
        140
292
        """
293
        return self._tp + self._fp
294
295
    def test_neg_pop(self):
296
        """Return test negative population.
297
298
        :returns: The test negative population of the confusion table
299
        :rtype: int
300
301
        >>> ct = ConfusionTable(120, 60, 20, 30)
302
        >>> ct.test_neg_pop()
303
        90
304
        """
305
        return self._tn + self._fn
306
307
    def cond_pos_pop(self):
308
        """Return condition positive population.
309
310
        :returns: The condition positive population of the confusion table
311
        :rtype: int
312
313
        >>> ct = ConfusionTable(120, 60, 20, 30)
314
        >>> ct.cond_pos_pop()
315
        150
316
        """
317
        return self._tp + self._fn
318
319
    def cond_neg_pop(self):
320
        """Return condition negative population.
321
322
        :returns: The condition negative population of the confusion table
323
        :rtype: int
324
325
        >>> ct = ConfusionTable(120, 60, 20, 30)
326
        >>> ct.cond_neg_pop()
327
        80
328
        """
329
        return self._fp + self._tn
330
331
    def population(self):
332
        """Return population, N.
333
334
        :returns: The population (N) of the confusion table
335
        :rtype: int
336
337
        >>> ct = ConfusionTable(120, 60, 20, 30)
338
        >>> ct.population()
339
        230
340
        """
341
        return self._tp + self._tn + self._fp + self._fn
342
343
    def precision(self):
344
        r"""Return precision.
345
346
        Precision is defined as :math:`\\frac{tp}{tp + fp}`
347
348
        AKA positive predictive value (PPV)
349
350
        Cf. https://en.wikipedia.org/wiki/Precision_and_recall
351
352
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Precision
353
354
        :returns: The precision of the confusion table
355
        :rtype: float
356
357
        >>> ct = ConfusionTable(120, 60, 20, 30)
358
        >>> ct.precision()
359
        0.8571428571428571
360
        """
361
        if self._tp + self._fp == 0:
362
            return float('NaN')
363
        return self._tp / (self._tp + self._fp)
364
365
    def precision_gain(self):
366
        r"""Return gain in precision.
367
368
        The gain in precision is defined as:
369
        :math:`G(precision) = \\frac{precision}{random~ precision}`
370
371
        Cf. https://en.wikipedia.org/wiki/Gain_(information_retrieval)
372
373
        :returns: The gain in precision of the confusion table
374
        :rtype: float
375
376
        >>> ct = ConfusionTable(120, 60, 20, 30)
377
        >>> ct.precision_gain()
378
        1.3142857142857143
379
        """
380
        if self.population() == 0:
381
            return float('NaN')
382
        random_precision = self.cond_pos_pop()/self.population()
383
        return self.precision()/random_precision
384
385
    def recall(self):
386
        r"""Return recall.
387
388
        Recall is defined as :math:`\\frac{tp}{tp + fn}`
389
390
        AKA sensitivity
391
392
        AKA true positive rate (TPR)
393
394
        Cf. https://en.wikipedia.org/wiki/Precision_and_recall
395
        Cf. https://en.wikipedia.org/wiki/Sensitivity_(test)
396
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Recall
397
398
        :returns: The recall of the confusion table
399
        :rtype: float
400
401
        >>> ct = ConfusionTable(120, 60, 20, 30)
402
        >>> ct.recall()
403
        0.8
404
        """
405
        if self._tp + self._fn == 0:
406
            return float('NaN')
407
        return self._tp / (self._tp + self._fn)
408
409
    def specificity(self):
410
        r"""Return specificity.
411
412
        Specificity is defined as :math:`\\frac{tn}{tn + fp}`
413
414
        AKA true negative rate (TNR)
415
416
        Cf. https://en.wikipedia.org/wiki/Specificity_(tests)
417
418
        :returns: The specificity of the confusion table
419
        :rtype: float
420
421
        >>> ct = ConfusionTable(120, 60, 20, 30)
422
        >>> ct.specificity()
423
        0.75
424
        """
425
        if self._tn + self._fp == 0:
426
            return float('NaN')
427
        return self._tn / (self._tn + self._fp)
428
429
    def npv(self):
430
        r"""Return negative predictive value (NPV).
431
432
        NPV is defined as :math:`\\frac{tn}{tn + fn}`
433
434
        Cf. https://en.wikipedia.org/wiki/Negative_predictive_value
435
436
        :returns: The negative predictive value of the confusion table
437
        :rtype: float
438
439
        >>> ct = ConfusionTable(120, 60, 20, 30)
440
        >>> ct.npv()
441
        0.6666666666666666
442
        """
443
        if self._tn + self._fn == 0:
444
            return float('NaN')
445
        return self._tn / (self._tn + self._fn)
446
447
    def fallout(self):
448
        r"""Return fall-out.
449
450
        Fall-out is defined as :math:`\\frac{fp}{fp + tn}`
451
452
        AKA false positive rate (FPR)
453
454
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#Fall-out
455
456
        :returns: The fall-out of the confusion table
457
        :rtype: float
458
459
        >>> ct = ConfusionTable(120, 60, 20, 30)
460
        >>> ct.fallout()
461
        0.25
462
        """
463
        if self._fp + self._tn == 0:
464
            return float('NaN')
465
        return self._fp / (self._fp + self._tn)
466
467
    def fdr(self):
468
        r"""Return false discovery rate (FDR).
469
470
        False discovery rate is defined as :math:`\\frac{fp}{fp + tp}`
471
472
        Cf. https://en.wikipedia.org/wiki/False_discovery_rate
473
474
        :returns: The false discovery rate of the confusion table
475
        :rtype: float
476
477
        >>> ct = ConfusionTable(120, 60, 20, 30)
478
        >>> ct.fdr()
479
        0.14285714285714285
480
        """
481
        if self._fp + self._tp == 0:
482
            return float('NaN')
483
        return self._fp / (self._fp + self._tp)
484
485
    def accuracy(self):
486
        r"""Return accuracy.
487
488
        Accuracy is defined as :math:`\\frac{tp + tn}{population}`
489
490
        Cf. https://en.wikipedia.org/wiki/Accuracy
491
492
        :returns: The accuracy of the confusion table
493
        :rtype: float
494
495
        >>> ct = ConfusionTable(120, 60, 20, 30)
496
        >>> ct.accuracy()
497
        0.782608695652174
498
        """
499
        if self.population() == 0:
500
            return float('NaN')
501
        return (self._tp + self._tn) / self.population()
502
503
    def accuracy_gain(self):
504
        r"""Return gain in accuracy.
505
506
        The gain in accuracy is defined as:
507
        :math:`G(accuracy) = \\frac{accuracy}{random~ accuracy}`
508
509
        Cf. https://en.wikipedia.org/wiki/Gain_(information_retrieval)
510
511
        :returns: The gain in accuracy of the confusion table
512
        :rtype: float
513
514
        >>> ct = ConfusionTable(120, 60, 20, 30)
515
        1.4325259515570934
516
        """
517
        if self.population() == 0:
518
            return float('NaN')
519
        random_accuracy = ((self.cond_pos_pop()/self.population())**2 +
520
                           (self.cond_neg_pop()/self.population())**2)
521
        return self.accuracy()/random_accuracy
522
523
    def balanced_accuracy(self):
524
        r"""Return balanced accuracy.
525
526
        Balanced accuracy is defined as
527
        :math:`\\frac{sensitivity + specificity}{2}`
528
529
        Cf. https://en.wikipedia.org/wiki/Accuracy
530
531
        :returns: The balanced accuracy of the confusion table
532
        :rtype: float
533
534
        >>> ct = ConfusionTable(120, 60, 20, 30)
535
        >>> ct.balanced_accuracy()
536
        0.775
537
        """
538
        return 0.5 * (self.recall() + self.specificity())
539
540
    def informedness(self):
541
        """Return informedness.
542
543
        Informedness is defined as :math:`sensitivity + specificity - 1`.
544
545
        AKA Youden's J statistic
546
547
        AKA DeltaP'
548
549
        Cf. https://en.wikipedia.org/wiki/Youden%27s_J_statistic
550
551
        Cf.
552
        http://dspace.flinders.edu.au/xmlui/bitstream/handle/2328/27165/Powers%20Evaluation.pdf
553
554
        :returns: The informedness of the confusion table
555
        :rtype: float
556
557
        >>> ct = ConfusionTable(120, 60, 20, 30)
558
        >>> ct.informedness()
559
        0.55
560
        """
561
        return self.recall() + self.specificity() - 1
562
563
    def markedness(self):
564
        """Return markedness.
565
566
        Markedness is defined as :math:`precision + npv - 1`
567
568
        AKA DeltaP
569
570
        Cf. https://en.wikipedia.org/wiki/Youden%27s_J_statistic
571
572
        Cf.
573
        http://dspace.flinders.edu.au/xmlui/bitstream/handle/2328/27165/Powers%20Evaluation.pdf
574
575
        :returns: The markedness of the confusion table
576
        :rtype: float
577
578
        >>> ct = ConfusionTable(120, 60, 20, 30)
579
        >>> ct.markedness()
580
        0.5238095238095237
581
        """
582
        return self.precision() + self.npv() - 1
583
584
    def pr_amean(self):
585
        r"""Return arithmetic mean of precision & recall.
586
587
        The arithmetic mean of precision and recall is defined as:
588
        :math:`\\frac{precision \\cdot recall}{2}`
589
590
        Cf. https://en.wikipedia.org/wiki/Arithmetic_mean
591
592
        :returns: The arithmetic mean of the confusion table's precision &
593
            recall
594
        :rtype: float
595
596
        >>> ct = ConfusionTable(120, 60, 20, 30)
597
        >>> ct.pr_amean()
598
        0.8285714285714285
599
        """
600
        return amean((self.precision(), self.recall()))
601
602
    def pr_gmean(self):
603
        r"""Return geometric mean of precision & recall.
604
605
        The geometric mean of precision and recall is defined as:
606
        :math:`\\sqrt{precision \\cdot recall}`
607
608
        Cf. https://en.wikipedia.org/wiki/Geometric_mean
609
610
        :returns: The geometric mean of the confusion table's precision &
611
            recall
612
        :rtype: float
613
614
        >>> ct = ConfusionTable(120, 60, 20, 30)
615
        >>> ct.pr_gmean()
616
        0.828078671210825
617
        """
618
        return gmean((self.precision(), self.recall()))
619
620
    def pr_hmean(self):
621
        r"""Return harmonic mean of precision & recall.
622
623
        The harmonic mean of precision and recall is defined as:
624
        :math:`\\frac{2 \\cdot precision \\cdot recall}{precision + recall}`
625
626
        Cf. https://en.wikipedia.org/wiki/Harmonic_mean
627
628
        :returns: The harmonic mean of the confusion table's precision & recall
629
        :rtype: float
630
631
        >>> ct = ConfusionTable(120, 60, 20, 30)
632
        >>> ct.pr_hmean()
633
        0.8275862068965516
634
        """
635
        return hmean((self.precision(), self.recall()))
636
637
    def pr_qmean(self):
638
        r"""Return quadratic mean of precision & recall.
639
640
        The quadratic mean of precision and recall is defined as:
641
        :math:`\\sqrt{\\frac{precision^{2} + recall^{2}}{2}}`
642
643
        Cf. https://en.wikipedia.org/wiki/Quadratic_mean
644
645
        :returns: The quadratic mean of the confusion table's precision &
646
            recall
647
        :rtype: float
648
649
        >>> ct = ConfusionTable(120, 60, 20, 30)
650
        >>> ct.pr_qmean()
651
        0.8290638930598233
652
        """
653
        return qmean((self.precision(), self.recall()))
654
655
    def pr_cmean(self):
656
        r"""Return contraharmonic mean of precision & recall.
657
658
        The contraharmonic mean is:
659
        :math:`\\frac{precision^{2} + recall^{2}}{precision + recall}`
660
661
        Cf. https://en.wikipedia.org/wiki/Contraharmonic_mean
662
663
        :returns: The contraharmonic mean of the confusion table's precision &
664
            recall
665
        :rtype: float
666
667
        >>> ct = ConfusionTable(120, 60, 20, 30)
668
        >>> ct.pr_cmean()
669
        0.8295566502463055
670
        """
671
        return cmean((self.precision(), self.recall()))
672
673
    def pr_lmean(self):
674
        r"""Return logarithmic mean of precision & recall.
675
676
        The logarithmic mean is:
677
        0 if either precision or recall is 0,
678
        the precision if they are equal,
679
        otherwise :math:`\\frac{precision - recall}
680
        {ln(precision) - ln(recall)}`
681
682
        Cf. https://en.wikipedia.org/wiki/Logarithmic_mean
683
684
        :returns: The logarithmic mean of the confusion table's precision &
685
            recall
686
        :rtype: float
687
688
        >>> ct = ConfusionTable(120, 60, 20, 30)
689
        >>> ct.pr_lmean()
690
        0.8282429171492667
691
        """
692
        precision = self.precision()
693
        recall = self.recall()
694
        if not precision or not recall:
695
            return 0.0
696
        elif precision == recall:
697
            return precision
698
        return ((precision - recall) /
699
                (math.log(precision) - math.log(recall)))
700
701
    def pr_imean(self):
702
        r"""Return identric (exponential) mean of precision & recall.
703
704
        The identric mean is:
705
        precision if precision = recall,
706
        otherwise :math:`\\frac{1}{e} \\cdot
707
        \\sqrt[precision - recall]{\\frac{precision^{precision}}
708
        {recall^{recall}}}`
709
710
        Cf. https://en.wikipedia.org/wiki/Identric_mean
711
712
        :returns: The identric mean of the confusion table's precision & recall
713
        :rtype: float
714
715
        >>> ct = ConfusionTable(120, 60, 20, 30)
716
        >>> ct.pr_imean()
717
        0.8284071826325543
718
        """
719
        return imean((self.precision(), self.recall()))
720
721
    def pr_seiffert_mean(self):
722
        r"""Return Seiffert's mean of precision & recall.
723
724
        Seiffert's mean of precision and recall is:
725
        :math:`\\frac{precision - recall}{4 \\cdot arctan
726
        \\sqrt{\\frac{precision}{recall}} - \\pi}`
727
728
        Cf. http://www.helsinki.fi/~hasto/pp/miaPreprint.pdf
729
730
        :returns: Seiffer's mean of the confusion table's precision & recall
731
        :rtype: float
732
733
        >>> ct = ConfusionTable(120, 60, 20, 30)
734
        >>> ct.pr_seiffert_mean()
735
        0.8284071696048312
736
        """
737
        return seiffert_mean((self.precision(), self.recall()))
738
739
    def pr_lehmer_mean(self, exp=2):
740
        r"""Return Lehmer mean of precision & recall.
741
742
        The Lehmer mean is:
743
        :math:`\\frac{precision^{exp} + recall^{exp}}
744
        {precision^{exp-1} + recall^{exp-1}}`
745
746
        Cf. https://en.wikipedia.org/wiki/Lehmer_mean
747
748
        :param numeric exp: The exponent of the Lehmer mean
749
        :returns: The Lehmer mean for the given exponent of the confusion
750
            table's precision & recall
751
        :rtype: float
752
753
        >>> ct = ConfusionTable(120, 60, 20, 30)
754
        >>> ct.pr_lehmer_mean()
755
        0.8295566502463055
756
        """
757
        return lehmer_mean((self.precision(), self.recall()), exp)
758
759
    def pr_heronian_mean(self):
760
        r"""Return Heronian mean of precision & recall.
761
762
        The Heronian mean of precision and recall is defined as:
763
        :math:`\\frac{precision + \\sqrt{precision \\cdot recall} + recall}{3}`
764
765
        Cf. https://en.wikipedia.org/wiki/Heronian_mean
766
767
        :returns: The Heronian mean of the confusion table's precision & recall
768
        :rtype: float
769
770
        >>> ct = ConfusionTable(120, 60, 20, 30)
771
        >>> ct.pr_heronian_mean()
772
        0.8284071761178939
773
        """
774
        return heronian_mean((self.precision(), self.recall()))
775
776
    def pr_hoelder_mean(self, exp=2):
777
        r"""Return Hölder (power/generalized) mean of precision & recall.
778
779
        The power mean of precision and recall is defined as:
780
        :math:`\\frac{1}{2} \\cdot
781
        \\sqrt[exp]{precision^{exp} + recall^{exp}}`
782
        for :math:`exp \\ne 0`, and the geometric mean for :math:`exp = 0`
783
784
        Cf. https://en.wikipedia.org/wiki/Generalized_mean
785
786
        :param numeric exp: The exponent of the Hölder mean
787
        :returns: The Hölder mean for the given exponent of the confusion
788
            table's precision & recall
789
        :rtype: float
790
791
        >>> ct = ConfusionTable(120, 60, 20, 30)
792
        >>> ct.pr_hoelder_mean()
793
        0.8290638930598233
794
        """
795
        return hoelder_mean((self.precision(), self.recall()), exp)
796
797
    def pr_agmean(self):
798
        """Return arithmetic-geometric mean of precision & recall.
799
800
        Iterates between arithmetic & geometric means until they converge to
801
        a single value (rounded to 12 digits)
802
803
        Cf. https://en.wikipedia.org/wiki/Arithmetic–geometric_mean
804
805
        :returns: The arithmetic-geometric mean of the confusion table's
806
            precision & recall
807
        :rtype: float
808
809
        >>> ct = ConfusionTable(120, 60, 20, 30)
810
        >>> ct.pr_agmean()
811
        0.8283250315702829
812
        """
813
        return agmean((self.precision(), self.recall()))
814
815
    def pr_ghmean(self):
816
        """Return geometric-harmonic mean of precision & recall.
817
818
        Iterates between geometric & harmonic means until they converge to
819
        a single value (rounded to 12 digits)
820
821
        Cf. https://en.wikipedia.org/wiki/Geometric–harmonic_mean
822
823
        :returns: The geometric-harmonic mean of the confusion table's
824
            precision & recall
825
        :rtype: float
826
827
        >>> ct = ConfusionTable(120, 60, 20, 30)
828
        >>> ct.pr_ghmean()
829
        0.8278323841238441
830
        """
831
        return ghmean((self.precision(), self.recall()))
832
833
    def pr_aghmean(self):
834
        """Return arithmetic-geometric-harmonic mean of precision & recall.
835
836
        Iterates over arithmetic, geometric, & harmonic means until they
837
        converge to a single value (rounded to 12 digits), following the
838
        method described by Raïssouli, Leazizi, & Chergui:
839
        http://www.emis.de/journals/JIPAM/images/014_08_JIPAM/014_08.pdf
840
841
        :returns: The arithmetic-geometric-harmonic mean of the confusion
842
            table's precision & recall
843
        :rtype: float
844
845
        >>> ct = ConfusionTable(120, 60, 20, 30)
846
        >>> ct.pr_aghmean()
847
        0.8280786712108288
848
        """
849
        return aghmean((self.precision(), self.recall()))
850
851
    def fbeta_score(self, beta=1):
852
        r"""Return :math:`F_{\\beta}` score.
853
854
        :math:`F_{\\beta}` for a positive real value :math:`\\beta` "measures
855
        the effectiveness of retrieval with respect to a user who
856
        attaches :math:`\\beta` times as much importance to recall as
857
        precision" (van Rijsbergen 1979)
858
859
        :math:`F_{\\beta}` score is defined as:
860
        :math:`(1 + \\beta^2) \\cdot \\frac{precision \\cdot recall}
861
        {((\\beta^2 \\cdot precision) + recall)}`
862
863
        Cf. https://en.wikipedia.org/wiki/F1_score
864
865
        :params numeric beta: The :math:`\\beta` parameter in the above formula
866
        :returns: The :math:`F_{\\beta}` of the confusion table
867
        :rtype: float
868
869
        >>> ct = ConfusionTable(120, 60, 20, 30)
870
        >>> ct.fbeta_score()
871
        0.8275862068965518
872
        >>> ct.fbeta_score(beta=0.1)
873
        0.8565371024734982
874
        """
875
        if beta <= 0:
876
            raise AttributeError('Beta must be a positive real value.')
877
        precision = self.precision()
878
        recall = self.recall()
879
        return ((1 + beta**2) *
880
                precision * recall / ((beta**2 * precision) + recall))
881
882
    def f2_score(self):
883
        """Return :math:`F_{2}`.
884
885
        The :math:`F_{2}` score emphasizes recall over precision in comparison
886
        to the :math:`F_{1}` score
887
888
        Cf. https://en.wikipedia.org/wiki/F1_score
889
890
        :returns: The :math:`F_{2}` of the confusion table
891
        :rtype: float
892
893
        >>> ct = ConfusionTable(120, 60, 20, 30)
894
        >>> ct.f2_score()
895
        0.8108108108108109
896
        """
897
        return self.fbeta_score(2)
898
899
    def fhalf_score(self):
900
        """Return :math:`F_{0.5}` score.
901
902
        The :math:`F_{0.5}` score emphasizes precision over recall in
903
        comparison to the :math:`F_{1}` score
904
905
        Cf. https://en.wikipedia.org/wiki/F1_score
906
907
        :returns: The :math:`F_{0.5}` score of the confusion table
908
        :rtype: float
909
910
        >>> ct = ConfusionTable(120, 60, 20, 30)
911
        >>> ct.fhalf_score()
912
        0.8450704225352114
913
        """
914
        return self.fbeta_score(0.5)
915
916
    def e_score(self, beta=1):
917
        """Return :math:`E`-score.
918
919
        This is Van Rijsbergen's effectiveness measure
920
921
        Cf. https://en.wikipedia.org/wiki/Information_retrieval#F-measure
922
923
        :returns: The :math:`E`-score of the confusion table
924
        :rtype: float
925
926
        >>> ct = ConfusionTable(120, 60, 20, 30)
927
        >>> ct.e_score()
928
        0.17241379310344818
929
        """
930
        return 1-self.fbeta_score(beta)
931
932
    def f1_score(self):
933
        r"""Return :math:`F_{1}` score.
934
935
        :math:`F_{1}` score is the harmonic mean of precision and recall:
936
        :math:`2 \\cdot \\frac{precision \\cdot recall}{precision + recall}`
937
938
        Cf. https://en.wikipedia.org/wiki/F1_score
939
940
        :returns: The :math:`F_{1}` of the confusion table
941
        :rtype: float
942
943
        >>> ct = ConfusionTable(120, 60, 20, 30)
944
        >>> ct.f1_score()
945
        0.8275862068965516
946
        """
947
        return self.pr_hmean()
948
949
    def f_measure(self):
950
        r"""Return :math:`F`-measure.
951
952
        :math:`F`-measure is the harmonic mean of precision and recall:
953
        :math:`2 \\cdot \\frac{precision \\cdot recall}{precision + recall}`
954
955
        Cf. https://en.wikipedia.org/wiki/F1_score
956
957
        :returns: The math:`F`-measure of the confusion table
958
        :rtype: float
959
960
        >>> ct = ConfusionTable(120, 60, 20, 30)
961
        >>> ct.f_measure()
962
        0.8275862068965516
963
        """
964
        return self.pr_hmean()
965
966
    def g_measure(self):
967
        r"""Return G-measure.
968
969
        :math:`G`-measure is the geometric mean of precision and recall:
970
        :math:`\\sqrt{precision \\cdot recall}`
971
972
        This is identical to the Fowlkes–Mallows (FM) index for two
973
        clusters.
974
975
        Cf. https://en.wikipedia.org/wiki/F1_score#G-measure
976
977
        Cf. https://en.wikipedia.org/wiki/Fowlkes%E2%80%93Mallows_index
978
979
        :returns: The :math:`G`-measure of the confusion table
980
        :rtype: float
981
982
        >>> ct = ConfusionTable(120, 60, 20, 30)
983
        >>> ct.g_measure()
984
        0.828078671210825
985
        """
986
        return self.pr_gmean()
987
988
    def mcc(self):
989
        r"""Return Matthews correlation coefficient (MCC).
990
991
        The Matthews correlation coefficient is defined as:
992
        :math:`\\frac{(tp \\cdot tn) - (fp \\cdot fn)}
993
        {\\sqrt{(tp + fp)(tp + fn)(tn + fp)(tn + fn)}}`
994
995
        This is equivalent to the geometric mean of informedness and
996
        markedness, defined above.
997
998
        Cf. https://en.wikipedia.org/wiki/Matthews_correlation_coefficient
999
1000
        :returns: The Matthews correlation coefficient of the confusion table
1001
        :rtype: float
1002
1003
        >>> ct = ConfusionTable(120, 60, 20, 30)
1004
        >>> ct.mcc()
1005
        0.5367450401216932
1006
        """
1007
        if (((self._tp + self._fp) * (self._tp + self._fn) *
1008
             (self._tn + self._fp) * (self._tn + self._fn))) == 0:
1009
            return float('NaN')
1010
        return (((self._tp * self._tn) - (self._fp * self._fn)) /
1011
                math.sqrt((self._tp + self._fp) * (self._tp + self._fn) *
1012
                          (self._tn + self._fp) * (self._tn + self._fn)))
1013
1014
    def significance(self):
1015
        r"""Return the significance, :math:`\\chi^{2}`.
1016
1017
        Significance is defined as:
1018
        :math:`\\chi^{2} =
1019
        \\frac{(tp \\cdot tn - fp \\cdot fn)^{2} (tp + tn + fp + fn)}
1020
        {((tp + fp)(tp + fn)(tn + fp)(tn + fn)}`
1021
1022
        Also: :math:`\\chi^{2} = MCC^{2} \\cdot n`
1023
1024
        Cf. https://en.wikipedia.org/wiki/Pearson%27s_chi-square_test
1025
1026
        :returns: The significance of the confusion table
1027
        :rtype: float
1028
1029
        >>> ct = ConfusionTable(120, 60, 20, 30)
1030
        >>> ct.significance()
1031
        66.26190476190476
1032
        """
1033
        if (((self._tp + self._fp) * (self._tp + self._fn) *
1034
             (self._tn + self._fp) * (self._tn + self._fn))) == 0:
1035
            return float('NaN')
1036
        return (((self._tp * self._tn - self._fp * self._fn)**2 *
1037
                 (self._tp + self._tn + self._fp + self._fn)) /
1038
                ((self._tp + self._fp) * (self._tp + self._fn) *
1039
                 (self._tn + self._fp) * (self._tn + self._fn)))
1040
1041
    def kappa_statistic(self):
1042
        r"""Return κ statistic.
1043
1044
        The κ statistic is defined as:
1045
        :math:`\\kappa = \\frac{accuracy - random~ accuracy}
1046
        {1 - random~ accuracy}`
1047
1048
        The κ statistic compares the performance of the classifier relative to
1049
        the performance of a random classifier. κ = 0 indicates performance
1050
        identical to random. κ = 1 indicates perfect predictive success.
1051
        κ = -1 indicates perfect predictive failure.
1052
1053
        :returns: The κ statistic of the confusion table
1054
        :rtype: float
1055
1056
        >>> ct = ConfusionTable(120, 60, 20, 30)
1057
        >>> ct.kappa_statistic()
1058
        0.5344129554655871
1059
        """
1060
        if self.population() == 0:
1061
            return float('NaN')
1062
        random_accuracy = (((self._tn + self._fp) *
1063
                            (self._tn + self._fn) +
1064
                            (self._fn + self._tp) *
1065
                            (self._fp + self._tp)) /
1066
                           self.population()**2)
1067
        return (self.accuracy()-random_accuracy) / (1-random_accuracy)
1068
1069
1070
def amean(nums):
1071
    r"""Return arithmetic mean.
1072
1073
    The arithmetic mean is defined as:
1074
    :math:`\\frac{\\sum{nums}}{|nums|}`
1075
1076
    Cf. https://en.wikipedia.org/wiki/Arithmetic_mean
1077
1078
    :param list nums: A series of numbers
1079
    :returns: The arithmetric mean of nums
1080
    :rtype: float
1081
1082
    >>> amean([1, 2, 3, 4])
1083
    2.5
1084
    >>> amean([1, 2])
1085
    1.5
1086
    >>> amean([0, 5, 1000])
1087
    335.0
1088
    """
1089
    return sum(nums)/len(nums)
1090
1091
1092
def gmean(nums):
1093
    r"""Return geometric mean.
1094
1095
    The geometric mean is defined as:
1096
    :math:`\\sqrt[|nums|]{\\prod\\limits_{i} nums_{i}}`
1097
1098
    Cf. https://en.wikipedia.org/wiki/Geometric_mean
1099
1100
    :param list nums: A series of numbers
1101
    :returns: The geometric mean of nums
1102
    :rtype: float
1103
1104
    >>> gmean([1, 2, 3, 4])
1105
    2.213363839400643
1106
    >>> gmean([1, 2])
1107
    1.4142135623730951
1108
    >>> gmean([0, 5, 1000])
1109
    0.0
1110
    """
1111
    return prod(nums)**(1/len(nums))
1112
1113
1114
def hmean(nums):
1115
    r"""Return harmonic mean.
1116
1117
    The harmonic mean is defined as:
1118
    :math:`\\frac{|nums|}{\\sum\\limits_{i}\\frac{1}{nums_i}}`
1119
1120
    Following the behavior of Wolfram|Alpha:
1121
    If one of the values in nums is 0, return 0.
1122
    If more than one value in nums is 0, return NaN.
1123
1124
    Cf. https://en.wikipedia.org/wiki/Harmonic_mean
1125
1126
    :param list nums: A series of numbers
1127
    :returns: The harmonic mean of nums
1128
    :rtype: float
1129
1130
    >>> hmean([1, 2, 3, 4])
1131
    1.9200000000000004
1132
    >>> hmean([1, 2])
1133
    1.3333333333333333
1134
    >>> hmean([0, 5, 1000])
1135
    0
1136
    """
1137
    if len(nums) < 1:
1138
        raise AttributeError('hmean requires at least one value')
1139
    elif len(nums) == 1:
1140
        return nums[0]
1141
    else:
1142
        for i in range(1, len(nums)):
1143
            if nums[0] != nums[i]:
1144
                break
1145
        else:
1146
            return nums[0]
1147
1148
    if 0 in nums:
1149
        if nums.count(0) > 1:
1150
            return float('nan')
1151
        return 0
1152
    return len(nums)/sum(1/i for i in nums)
1153
1154
1155
def qmean(nums):
1156
    r"""Return quadratic mean.
1157
1158
    The quadratic mean of precision and recall is defined as:
1159
    :math:`\\sqrt{\\sum\\limits_{i} \\frac{num_i^2}{|nums|}}`
1160
1161
    Cf. https://en.wikipedia.org/wiki/Quadratic_mean
1162
1163
    :param list nums: A series of numbers
1164
    :returns: The quadratic mean of nums
1165
    :rtype: float
1166
1167
    >>> qmean([1, 2, 3, 4])
1168
    2.7386127875258306
1169
    >>> qmean([1, 2])
1170
    1.5811388300841898
1171
    >>> qmean([0, 5, 1000])
1172
    577.3574860228857
1173
    """
1174
    return (sum(i**2 for i in nums)/len(nums))**(0.5)
1175
1176
1177
def cmean(nums):
1178
    r"""Return contraharmonic mean.
1179
1180
    The contraharmonic mean is:
1181
    :math:`\\frac{\\sum\\limits_i x_i^2}{\\sum\\limits_i x_i}`
1182
1183
    Cf. https://en.wikipedia.org/wiki/Contraharmonic_mean
1184
1185
    :param list nums: A series of numbers
1186
    :returns: The contraharmonic mean of nums
1187
    :rtype: float
1188
1189
    >>> cmean([1, 2, 3, 4])
1190
    3.0
1191
    >>> cmean([1, 2])
1192
    1.6666666666666667
1193
    >>> cmean([0, 5, 1000])
1194
    995.0497512437811
1195
    """
1196
    return sum(x**2 for x in nums)/sum(nums)
1197
1198
1199
def lmean(nums):
1200
    r"""Return logarithmic mean.
1201
1202
    The logarithmic mean of an arbitrarily long series is defined by
1203
    http://www.survo.fi/papers/logmean.pdf
1204
    as:
1205
    :math:`L(x_1, x_2, ..., x_n) =
1206
    (n-1)! \\sum\\limits_{i=1}^n \\frac{x_i}
1207
    {\\prod\\limits_{\\substack{j = 1\\\\j \\ne i}}^n
1208
    ln \\frac{x_i}{x_j}}`
1209
1210
    Cf. https://en.wikipedia.org/wiki/Logarithmic_mean
1211
1212
    :param list nums: A series of numbers
1213
    :returns: The logarithmic mean of nums
1214
    :rtype: float
1215
1216
    >>> lmean([1, 2, 3, 4])
1217
    2.2724242417489258
1218
    >>> lmean([1, 2])
1219
    1.4426950408889634
1220
    """
1221
    if len(nums) != len(set(nums)):
1222
        raise AttributeError('No two values in the nums list may be equal.')
1223
    rolling_sum = 0
1224
    for i in range(len(nums)):
1225
        rolling_prod = 1
1226
        for j in range(len(nums)):
1227
            if i != j:
1228
                rolling_prod *= (math.log(nums[i]/nums[j]))
1229
        rolling_sum += nums[i]/rolling_prod
1230
    return math.factorial(len(nums)-1) * rolling_sum
1231
1232
1233
def imean(nums):
1234
    r"""Return identric (exponential) mean.
1235
1236
    The identric mean of two numbers x and y is:
1237
    x if x = y
1238
    otherwise :math:`\\frac{1}{e} \\sqrt[x-y]{\\frac{x^x}{y^y}}`
1239
1240
    Cf. https://en.wikipedia.org/wiki/Identric_mean
1241
1242
    :param list nums: A series of numbers
1243
    :returns: The identric mean of nums
1244
    :rtype: float
1245
1246
    >>> imean([1, 2])
1247
    1.4715177646857693
1248
    >>> imean([1, 0])
1249
    nan
1250
    >>> imean([2, 4])
1251
    2.9430355293715387
1252
    """
1253
    if len(nums) == 1:
1254
        return nums[0]
1255
    if len(nums) > 2:
1256
        raise AttributeError('imean supports no more than two values')
1257
    if nums[0] <= 0 or nums[1] <= 0:
1258
        return float('NaN')
1259
    elif nums[0] == nums[1]:
1260
        return nums[0]
1261
    return ((1/math.e) *
1262
            (nums[0]**nums[0]/nums[1]**nums[1])**(1/(nums[0]-nums[1])))
1263
1264
1265
def seiffert_mean(nums):
1266
    r"""Return Seiffert's mean.
1267
1268
    Seiffert's mean of two numbers x and y is:
1269
    :math:`\\frac{x - y}{4 \\cdot arctan \\sqrt{\\frac{x}{y}} - \\pi}`
1270
1271
    Cf. http://www.helsinki.fi/~hasto/pp/miaPreprint.pdf
1272
1273
    :param list nums: A series of numbers
1274
    :returns: Sieffert's mean of nums
1275
    :rtype: float
1276
1277
    >>> seiffert_mean([1, 2])
1278
    1.4712939827611637
1279
    >>> seiffert_mean([1, 0])
1280
    0.3183098861837907
1281
    >>> seiffert_mean([2, 4])
1282
    2.9425879655223275
1283
    >>> seiffert_mean([2, 1000])
1284
    336.84053300118825
1285
    """
1286
    if len(nums) == 1:
1287
        return nums[0]
1288
    if len(nums) > 2:
1289
        raise AttributeError('seiffert_mean supports no more than two values')
1290
    if nums[0]+nums[1] == 0 or nums[0]-nums[1] == 0:
1291
        return float('NaN')
1292
    return (nums[0]-nums[1])/(2*math.asin((nums[0]-nums[1])/(nums[0]+nums[1])))
1293
1294
1295
def lehmer_mean(nums, exp=2):
1296
    r"""Return Lehmer mean.
1297
1298
    The Lehmer mean is:
1299
    :math:`\\frac{\\sum\\limits_i{x_i^p}}{\\sum\\limits_i{x_i^(p-1)}}`
1300
1301
    Cf. https://en.wikipedia.org/wiki/Lehmer_mean
1302
1303
    :param list nums: A series of numbers
1304
    :param numeric exp: The exponent of the Lehmer mean
1305
    :returns: The Lehmer mean of nums for the given exponent
1306
    :rtype: float
1307
1308
    >>> lehmer_mean([1, 2, 3, 4])
1309
    3.0
1310
    >>> lehmer_mean([1, 2])
1311
    1.6666666666666667
1312
    >>> lehmer_mean([0, 5, 1000])
1313
    995.0497512437811
1314
    """
1315
    return sum(x**exp for x in nums)/sum(x**(exp-1) for x in nums)
1316
1317
1318
def heronian_mean(nums):
1319
    r"""Return Heronian mean.
1320
1321
    The Heronian mean is:
1322
    :math:`\\frac{\\sum\\limits_{i, j}\\sqrt{{x_i \\cdot x_j}}}
1323
    {|nums| \\cdot \\frac{|nums| + 1}{2}}`
1324
    for :math:`j \\ge i`
1325
1326
    Cf. https://en.wikipedia.org/wiki/Heronian_mean
1327
1328
    :param list nums: A series of numbers
1329
    :returns: The Heronian mean of nums
1330
    :rtype: float
1331
1332
    >>> heronian_mean([1, 2, 3, 4])
1333
    2.3888282852609093
1334
    >>> heronian_mean([1, 2])
1335
    1.4714045207910316
1336
    >>> heronian_mean([0, 5, 1000])
1337
    179.28511301977582
1338
    """
1339
    mag = len(nums)
1340
    rolling_sum = 0
1341
    for i in range(mag):
1342
        for j in range(i, mag):
1343
            if nums[i] == nums[j]:
1344
                rolling_sum += nums[i]
1345
            else:
1346
                rolling_sum += (nums[i]*nums[j])**(0.5)
1347
    return rolling_sum * 2 / (mag*(mag+1))
1348
1349
1350
def hoelder_mean(nums, exp=2):
1351
    r"""Return Hölder (power/generalized) mean.
1352
1353
    The Hölder mean is defined as:
1354
    :math:`\\sqrt[p]{\\frac{1}{|nums|} \\cdot \\sum\\limits_i{x_i^p}}`
1355
    for :math:`p \\ne 0`, and the geometric mean for :math:`p = 0`
1356
1357
    Cf. https://en.wikipedia.org/wiki/Generalized_mean
1358
1359
    :param list nums: A series of numbers
1360
    :param numeric exp: The exponent of the Hölder mean
1361
    :returns: The Hölder mean of nums for the given exponent
1362
    :rtype: float
1363
1364
    >>> hoelder_mean([1, 2, 3, 4])
1365
    2.7386127875258306
1366
    >>> hoelder_mean([1, 2])
1367
    1.5811388300841898
1368
    >>> hoelder_mean([0, 5, 1000])
1369
    577.3574860228857
1370
    """
1371
    if exp == 0:
1372
        return gmean(nums)
1373
    return ((1/len(nums)) * sum(i**exp for i in nums))**(1/exp)
1374
1375
1376
def agmean(nums):
1377
    """Return arithmetic-geometric mean.
1378
1379
    Iterates between arithmetic & geometric means until they converge to
1380
    a single value (rounded to 12 digits)
1381
    Cf. https://en.wikipedia.org/wiki/Arithmetic–geometric_mean
1382
1383
    :param list nums: A series of numbers
1384
    :returns: The arithmetic-geometric mean of nums
1385
    :rtype: float
1386
1387
    >>> agmean([1, 2, 3, 4])
1388
    2.3545004777751077
1389
    >>> agmean([1, 2])
1390
    1.4567910310469068
1391
    >>> agmean([0, 5, 1000])
1392
    2.9753977059954195e-13
1393
    """
1394
    m_a = amean(nums)
1395
    m_g = gmean(nums)
1396
    if math.isnan(m_a) or math.isnan(m_g):
1397
        return float('nan')
1398
    while round(m_a, 12) != round(m_g, 12):
1399
        m_a, m_g = (m_a+m_g)/2, (m_a*m_g)**(1/2)
1400
    return m_a
1401
1402
1403
def ghmean(nums):
1404
    """Return geometric-harmonic mean.
1405
1406
    Iterates between geometric & harmonic means until they converge to
1407
    a single value (rounded to 12 digits)
1408
    Cf. https://en.wikipedia.org/wiki/Geometric–harmonic_mean
1409
1410
    :param list nums: A series of numbers
1411
    :returns: The geometric-harmonic mean of nums
1412
    :rtype: float
1413
1414
    >>> ghmean([1, 2, 3, 4])
1415
    2.058868154613003
1416
    >>> ghmean([1, 2])
1417
    1.3728805006183502
1418
    >>> ghmean([0, 5, 1000])
1419
    0.0
1420
1421
    >>> ghmean([0, 0])
1422
    0.0
1423
    >>> ghmean([0, 0, 5])
1424
    nan
1425
    """
1426
    m_g = gmean(nums)
1427
    m_h = hmean(nums)
1428
    if math.isnan(m_g) or math.isnan(m_h):
1429
        return float('nan')
1430
    while round(m_h, 12) != round(m_g, 12):
1431
        m_g, m_h = (m_g*m_h)**(1/2), (2*m_g*m_h)/(m_g+m_h)
1432
    return m_g
1433
1434
1435
def aghmean(nums):
1436
    """Return arithmetic-geometric-harmonic mean.
1437
1438
    Iterates over arithmetic, geometric, & harmonic means until they
1439
    converge to a single value (rounded to 12 digits), following the
1440
    method described by Raïssouli, Leazizi, & Chergui:
1441
    http://www.emis.de/journals/JIPAM/images/014_08_JIPAM/014_08.pdf
1442
1443
    :param list nums: A series of numbers
1444
    :returns: The arithmetic-geometric-harmonic mean of nums
1445
    :rtype: float
1446
1447
    >>> aghmean([1, 2, 3, 4])
1448
    2.198327159900212
1449
    >>> aghmean([1, 2])
1450
    1.4142135623731884
1451
    >>> aghmean([0, 5, 1000])
1452
    335.0
1453
    """
1454
    m_a = amean(nums)
1455
    m_g = gmean(nums)
1456
    m_h = hmean(nums)
1457
    if math.isnan(m_a) or math.isnan(m_g) or math.isnan(m_h):
1458
        return float('nan')
1459
    while (round(m_a, 12) != round(m_g, 12) and
1460
           round(m_g, 12) != round(m_h, 12)):
1461
        m_a, m_g, m_h = ((m_a+m_g+m_h)/3,
1462
                         (m_a*m_g*m_h)**(1/3),
1463
                         3/(1/m_a+1/m_g+1/m_h))
1464
    return m_a
1465
1466
1467
def midrange(nums):
1468
    """Return midrange.
1469
1470
    The midrange is the arithmetic mean of the maximum & minimum of a series.
1471
1472
    Cf. https://en.wikipedia.org/wiki/Midrange
1473
1474
    :param list nums: A series of numbers
1475
    :returns: The midrange of nums
1476
    :rtype: float
1477
1478
    >>> midrange([1, 2, 3])
1479
    2.0
1480
    >>> midrange([1, 2, 2, 3])
1481
    2.0
1482
    >>> midrange([1, 2, 1000, 3])
1483
    500.5
1484
    """
1485
    return 0.5*(max(nums)+min(nums))
1486
1487
1488
def median(nums):
1489
    """Return median.
1490
1491
    With numbers sorted by value, the median is the middle value (if there is
1492
    an odd number of values) or the arithmetic mean of the two middle values
1493
    (if there is an even number of values).
1494
1495
    Cf. https://en.wikipedia.org/wiki/Median
1496
1497
    :param list nums: A series of numbers
1498
    :returns: The median of nums
1499
    :rtype: int or float
1500
1501
    >>> median([1, 2, 3])
1502
    2
1503
    >>> median([1, 2, 3, 4])
1504
    2.5
1505
    >>> median([1, 2, 2, 4])
1506
    2
1507
    """
1508
    nums = sorted(nums)
1509
    mag = len(nums)
1510
    if mag % 2:
1511
        mag = int((mag-1)/2)
1512
        return nums[mag]
1513
    mag = int(mag/2)
1514
    med = (nums[mag-1]+nums[mag])/2
1515
    return med if not med.is_integer() else int(med)
1516
1517
1518
def mode(nums):
1519
    """Return mode.
1520
1521
    The mode of a series is the most common element of that series
1522
1523
    Cf. https://en.wikipedia.org/wiki/Mode_(statistics)
1524
1525
    :param list nums: A series of numbers
1526
    :returns: The mode of nums
1527
    :rtype: float
1528
1529
    >>> mode([1, 2, 2, 3])
1530
    2
1531
    """
1532
    return Counter(nums).most_common(1)[0][0]
1533
1534
1535
def var(nums, mean_func=amean, ddof=0):
1536
    """Calculate the variance.
1537
1538
    :param nums:
1539
    :param mean_func:
1540
    :param ddof:
1541
    :return:
1542
    """
1543
    x_bar = mean_func(nums)
1544
    return sum((x - x_bar) ** 2 for x in nums) / (len(nums) - ddof)
1545
1546
1547
def std(nums, mean_func=amean, ddof=0):
1548
    """Return standard deviation
1549
1550
    :param nums:
1551
    :param mean_func:
1552
    :param ddof:
1553
    :return:
1554
    """
1555
    return var(nums, mean_func, ddof)**0.5
1556
1557
1558
if __name__ == '__main__':
1559
    import doctest
1560
    doctest.testmod()
1561