Completed
Push — master ( 3ac297...afe14d )
by Chris
16:40 queued 07:25
created

abydos.stats._mean.var()   A

Complexity

Conditions 1

Size

Total Lines 36
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 3
dl 0
loc 36
ccs 3
cts 3
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
3
# Copyright 2014-2018 by Christopher C. Little.
4
# This file is part of Abydos.
5
#
6
# Abydos is free software: you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation, either version 3 of the License, or
9
# (at your option) any later version.
10
#
11
# Abydos is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with Abydos. If not, see <http://www.gnu.org/licenses/>.
18
19 1
r"""abydos.stats._mean.
20
21
The stats._mean module defines functions for calculating means and other
22
measures of central tendencies.
23
"""
24
25 1
from __future__ import (
26
    absolute_import,
27
    division,
28
    print_function,
29
    unicode_literals,
30
)
31
32 1
import math
33 1
from collections import Counter
34
35 1
from six.moves import range
36
37 1
from ..util._prod import _prod
38
39 1
__all__ = [
40
    'amean',
41
    'gmean',
42
    'hmean',
43
    'agmean',
44
    'ghmean',
45
    'aghmean',
46
    'cmean',
47
    'imean',
48
    'lmean',
49
    'qmean',
50
    'heronian_mean',
51
    'hoelder_mean',
52
    'lehmer_mean',
53
    'seiffert_mean',
54
    'median',
55
    'midrange',
56
    'mode',
57
    'std',
58
    'var',
59
]
60
61
62 1
def amean(nums):
63
    r"""Return arithmetic mean.
64
65
    The arithmetic mean is defined as:
66
    :math:`\frac{\sum{nums}}{|nums|}`
67
68
    Cf. https://en.wikipedia.org/wiki/Arithmetic_mean
69
70
    Parameters
71
    ----------
72
    nums : list
73
        A series of numbers
74
75
    Returns
76
    -------
77
    float
78
        The arithmetric mean of nums
79
80
    Examples
81
    --------
82
    >>> amean([1, 2, 3, 4])
83
    2.5
84
    >>> amean([1, 2])
85
    1.5
86
    >>> amean([0, 5, 1000])
87
    335.0
88
89
    """
90 1
    return sum(nums) / len(nums)
91
92
93 1
def gmean(nums):
94
    r"""Return geometric mean.
95
96
    The geometric mean is defined as:
97
    :math:`\sqrt[|nums|]{\prod\limits_{i} nums_{i}}`
98
99
    Cf. https://en.wikipedia.org/wiki/Geometric_mean
100
101
    Parameters
102
    ----------
103
    nums : list
104
        A series of numbers
105
106
    Returns
107
    -------
108
    float
109
        The geometric mean of nums
110
111
    Examples
112
    --------
113
    >>> gmean([1, 2, 3, 4])
114
    2.213363839400643
115
    >>> gmean([1, 2])
116
    1.4142135623730951
117
    >>> gmean([0, 5, 1000])
118
    0.0
119
120
    """
121 1
    return _prod(nums) ** (1 / len(nums))
122
123
124 1
def hmean(nums):
125
    r"""Return harmonic mean.
126
127
    The harmonic mean is defined as:
128
    :math:`\frac{|nums|}{\sum\limits_{i}\frac{1}{nums_i}}`
129
130
    Following the behavior of Wolfram|Alpha:
131
    - If one of the values in nums is 0, return 0.
132
    - If more than one value in nums is 0, return NaN.
133
134
    Cf. https://en.wikipedia.org/wiki/Harmonic_mean
135
136
    Parameters
137
    ----------
138
    nums : list
139
        A series of numbers
140
141
    Returns
142
    -------
143
    float
144
        The harmonic mean of nums
145
146
    Raises
147
    ------
148
    AttributeError
149
        hmean requires at least one value
150
151
    Examples
152
    --------
153
    >>> hmean([1, 2, 3, 4])
154
    1.9200000000000004
155
    >>> hmean([1, 2])
156
    1.3333333333333333
157
    >>> hmean([0, 5, 1000])
158
    0
159
160
    """
161 1
    if len(nums) < 1:
162 1
        raise AttributeError('hmean requires at least one value')
163 1
    elif len(nums) == 1:
164 1
        return nums[0]
165
    else:
166 1
        for i in range(1, len(nums)):
167 1
            if nums[0] != nums[i]:
168 1
                break
169
        else:
170 1
            return nums[0]
171
172 1
    if 0 in nums:
173 1
        if nums.count(0) > 1:
174 1
            return float('nan')
175 1
        return 0
176 1
    return len(nums) / sum(1 / i for i in nums)
177
178
179 1
def qmean(nums):
180
    r"""Return quadratic mean.
181
182
    The quadratic mean of precision and recall is defined as:
183
    :math:`\sqrt{\sum\limits_{i} \frac{num_i^2}{|nums|}}`
184
185
    Cf. https://en.wikipedia.org/wiki/Quadratic_mean
186
187
    Parameters
188
    ----------
189
    nums : list
190
        A series of numbers
191
192
    Returns
193
    -------
194
    float
195
        The quadratic mean of nums
196
197
    Examples
198
    --------
199
    >>> qmean([1, 2, 3, 4])
200
    2.7386127875258306
201
    >>> qmean([1, 2])
202
    1.5811388300841898
203
    >>> qmean([0, 5, 1000])
204
    577.3574860228857
205
206
    """
207 1
    return (sum(i ** 2 for i in nums) / len(nums)) ** 0.5
208
209
210 1
def cmean(nums):
211
    r"""Return contraharmonic mean.
212
213
    The contraharmonic mean is:
214
    :math:`\frac{\sum\limits_i x_i^2}{\sum\limits_i x_i}`
215
216
    Cf. https://en.wikipedia.org/wiki/Contraharmonic_mean
217
218
    Parameters
219
    ----------
220
    nums : list
221
        A series of numbers
222
223
    Returns
224
    -------
225
    float
226
        The contraharmonic mean of nums
227
228
    Examples
229
    --------
230
    >>> cmean([1, 2, 3, 4])
231
    3.0
232
    >>> cmean([1, 2])
233
    1.6666666666666667
234
    >>> cmean([0, 5, 1000])
235
    995.0497512437811
236
237
    """
238 1
    return sum(x ** 2 for x in nums) / sum(nums)
239
240
241 1
def lmean(nums):
242
    r"""Return logarithmic mean.
243
244
    The logarithmic mean of an arbitrarily long series is defined by
245
    http://www.survo.fi/papers/logmean.pdf
246
    as:
247
    :math:`L(x_1, x_2, ..., x_n) =
248
    (n-1)! \sum\limits_{i=1}^n \frac{x_i}
249
    {\prod\limits_{\substack{j = 1\\j \ne i}}^n
250
    ln \frac{x_i}{x_j}}`
251
252
    Cf. https://en.wikipedia.org/wiki/Logarithmic_mean
253
254
    Parameters
255
    ----------
256
    nums : list
257
        A series of numbers
258
259
    Returns
260
    -------
261
    float
262
        The logarithmic mean of nums
263
264
    Raises
265
    ------
266
    AttributeError
267
        No two values in the nums list may be equal
268
269
    Examples
270
    --------
271
    >>> lmean([1, 2, 3, 4])
272
    2.2724242417489258
273
    >>> lmean([1, 2])
274
    1.4426950408889634
275
276
    """
277 1
    if len(nums) != len(set(nums)):
278 1
        raise AttributeError('No two values in the nums list may be equal')
279 1
    rolling_sum = 0
280 1
    for i in range(len(nums)):
281 1
        rolling_prod = 1
282 1
        for j in range(len(nums)):
283 1
            if i != j:
284 1
                rolling_prod *= math.log(nums[i] / nums[j])
285 1
        rolling_sum += nums[i] / rolling_prod
286 1
    return math.factorial(len(nums) - 1) * rolling_sum
287
288
289 1
def imean(nums):
290
    r"""Return identric (exponential) mean.
291
292
    The identric mean of two numbers x and y is:
293
    x if x = y
294
    otherwise :math:`\frac{1}{e} \sqrt[x-y]{\frac{x^x}{y^y}}`
295
296
    Cf. https://en.wikipedia.org/wiki/Identric_mean
297
298
    Parameters
299
    ----------
300
    nums : list
301
        A series of numbers
302
303
    Returns
304
    -------
305
    float
306
        The identric mean of nums
307
308
    Raises
309
    ------
310
    AttributeError
311
        imean supports no more than two values
312
313
    Examples
314
    --------
315
    >>> imean([1, 2])
316
    1.4715177646857693
317
    >>> imean([1, 0])
318
    nan
319
    >>> imean([2, 4])
320
    2.9430355293715387
321
322
    """
323 1
    if len(nums) == 1:
324 1
        return nums[0]
325 1
    if len(nums) > 2:
326 1
        raise AttributeError('imean supports no more than two values')
327 1
    if nums[0] <= 0 or nums[1] <= 0:
328 1
        return float('NaN')
329 1
    elif nums[0] == nums[1]:
330 1
        return nums[0]
331 1
    return (1 / math.e) * (nums[0] ** nums[0] / nums[1] ** nums[1]) ** (
332
        1 / (nums[0] - nums[1])
333
    )
334
335
336 1
def seiffert_mean(nums):
337
    r"""Return Seiffert's mean.
338
339
    Seiffert's mean of two numbers x and y is:
340
    :math:`\frac{x - y}{4 \cdot arctan \sqrt{\frac{x}{y}} - \pi}`
341
342
    It is defined in :cite:`Seiffert:1993`.
343
344
    Parameters
345
    ----------
346
    nums : list
347
        A series of numbers
348
349
    Returns
350
    -------
351
    float
352
        Sieffert's mean of nums
353
354
    Raises
355
    ------
356
    AttributeError
357
        seiffert_mean supports no more than two values
358
359
    Examples
360
    --------
361
    >>> seiffert_mean([1, 2])
362
    1.4712939827611637
363
    >>> seiffert_mean([1, 0])
364
    0.3183098861837907
365
    >>> seiffert_mean([2, 4])
366
    2.9425879655223275
367
    >>> seiffert_mean([2, 1000])
368
    336.84053300118825
369
370
    """
371 1
    if len(nums) == 1:
372 1
        return nums[0]
373 1
    if len(nums) > 2:
374 1
        raise AttributeError('seiffert_mean supports no more than two values')
375 1
    if nums[0] + nums[1] == 0 or nums[0] - nums[1] == 0:
376 1
        return float('NaN')
377 1
    return (nums[0] - nums[1]) / (
378
        2 * math.asin((nums[0] - nums[1]) / (nums[0] + nums[1]))
379
    )
380
381
382 1
def lehmer_mean(nums, exp=2):
383
    r"""Return Lehmer mean.
384
385
    The Lehmer mean is:
386
    :math:`\frac{\sum\limits_i{x_i^p}}{\sum\limits_i{x_i^(p-1)}}`
387
388
    Cf. https://en.wikipedia.org/wiki/Lehmer_mean
389
390
    Parameters
391
    ----------
392
    nums : list
393
        A series of numbers
394
    exp : numeric
395
        The exponent of the Lehmer mean
396
397
    Returns
398
    -------
399
    float
400
        The Lehmer mean of nums for the given exponent
401
402
    Examples
403
    --------
404
    >>> lehmer_mean([1, 2, 3, 4])
405
    3.0
406
    >>> lehmer_mean([1, 2])
407
    1.6666666666666667
408
    >>> lehmer_mean([0, 5, 1000])
409
    995.0497512437811
410
411
    """
412 1
    return sum(x ** exp for x in nums) / sum(x ** (exp - 1) for x in nums)
413
414
415 1
def heronian_mean(nums):
416
    r"""Return Heronian mean.
417
418
    The Heronian mean is:
419
    :math:`\frac{\sum\limits_{i, j}\sqrt{{x_i \cdot x_j}}}
420
    {|nums| \cdot \frac{|nums| + 1}{2}}`
421
    for :math:`j \ge i`
422
423
    Cf. https://en.wikipedia.org/wiki/Heronian_mean
424
425
    Parameters
426
    ----------
427
    nums : list
428
        A series of numbers
429
430
    Returns
431
    -------
432
    float
433
        The Heronian mean of nums
434
435
    Examples
436
    --------
437
    >>> heronian_mean([1, 2, 3, 4])
438
    2.3888282852609093
439
    >>> heronian_mean([1, 2])
440
    1.4714045207910316
441
    >>> heronian_mean([0, 5, 1000])
442
    179.28511301977582
443
444
    """
445 1
    mag = len(nums)
446 1
    rolling_sum = 0
447 1
    for i in range(mag):
448 1
        for j in range(i, mag):
449 1
            if nums[i] == nums[j]:
450 1
                rolling_sum += nums[i]
451
            else:
452 1
                rolling_sum += (nums[i] * nums[j]) ** 0.5
453 1
    return rolling_sum * 2 / (mag * (mag + 1))
454
455
456 1
def hoelder_mean(nums, exp=2):
457
    r"""Return Hölder (power/generalized) mean.
458
459
    The Hölder mean is defined as:
460
    :math:`\sqrt[p]{\frac{1}{|nums|} \cdot \sum\limits_i{x_i^p}}`
461
    for :math:`p \ne 0`, and the geometric mean for :math:`p = 0`
462
463
    Cf. https://en.wikipedia.org/wiki/Generalized_mean
464
465
    Parameters
466
    ----------
467
    nums : list
468
        A series of numbers
469
    exp : numeric
470
        The exponent of the Hölder mean
471
472
    Returns
473
    -------
474
    float
475
        The Hölder mean of nums for the given exponent
476
477
    Examples
478
    --------
479
    >>> hoelder_mean([1, 2, 3, 4])
480
    2.7386127875258306
481
    >>> hoelder_mean([1, 2])
482
    1.5811388300841898
483
    >>> hoelder_mean([0, 5, 1000])
484
    577.3574860228857
485
486
    """
487 1
    if exp == 0:
488 1
        return gmean(nums)
489 1
    return ((1 / len(nums)) * sum(i ** exp for i in nums)) ** (1 / exp)
490
491
492 1
def agmean(nums):
493
    """Return arithmetic-geometric mean.
494
495
    Iterates between arithmetic & geometric means until they converge to
496
    a single value (rounded to 12 digits).
497
498
    Cf. https://en.wikipedia.org/wiki/Arithmetic-geometric_mean
499
500
    Parameters
501
    ----------
502
    nums : list
503
        A series of numbers
504
505
    Returns
506
    -------
507
    float
508
        The arithmetic-geometric mean of nums
509
510
    Examples
511
    --------
512
    >>> agmean([1, 2, 3, 4])
513
    2.3545004777751077
514
    >>> agmean([1, 2])
515
    1.4567910310469068
516
    >>> agmean([0, 5, 1000])
517
    2.9753977059954195e-13
518
519
    """
520 1
    m_a = amean(nums)
521 1
    m_g = gmean(nums)
522 1
    if math.isnan(m_a) or math.isnan(m_g):
523 1
        return float('nan')
524 1
    while round(m_a, 12) != round(m_g, 12):
525 1
        m_a, m_g = (m_a + m_g) / 2, (m_a * m_g) ** (1 / 2)
526 1
    return m_a
527
528
529 1
def ghmean(nums):
530
    """Return geometric-harmonic mean.
531
532
    Iterates between geometric & harmonic means until they converge to
533
    a single value (rounded to 12 digits).
534
535
    Cf. https://en.wikipedia.org/wiki/Geometric-harmonic_mean
536
537
    Parameters
538
    ----------
539
    nums : list
540
        A series of numbers
541
542
    Returns
543
    -------
544
    float
545
        The geometric-harmonic mean of nums
546
547
    Examples
548
    --------
549
    >>> ghmean([1, 2, 3, 4])
550
    2.058868154613003
551
    >>> ghmean([1, 2])
552
    1.3728805006183502
553
    >>> ghmean([0, 5, 1000])
554
    0.0
555
556
    >>> ghmean([0, 0])
557
    0.0
558
    >>> ghmean([0, 0, 5])
559
    nan
560
561
    """
562 1
    m_g = gmean(nums)
563 1
    m_h = hmean(nums)
564 1
    if math.isnan(m_g) or math.isnan(m_h):
565 1
        return float('nan')
566 1
    while round(m_h, 12) != round(m_g, 12):
567 1
        m_g, m_h = (m_g * m_h) ** (1 / 2), (2 * m_g * m_h) / (m_g + m_h)
568 1
    return m_g
569
570
571 1
def aghmean(nums):
572
    """Return arithmetic-geometric-harmonic mean.
573
574
    Iterates over arithmetic, geometric, & harmonic means until they
575
    converge to a single value (rounded to 12 digits), following the
576
    method described in :cite:`Raissouli:2009`.
577
578
    Parameters
579
    ----------
580
    nums : list
581
        A series of numbers
582
583
    Returns
584
    -------
585
    float
586
        The arithmetic-geometric-harmonic mean of nums
587
588
    Examples
589
    --------
590
    >>> aghmean([1, 2, 3, 4])
591
    2.198327159900212
592
    >>> aghmean([1, 2])
593
    1.4142135623731884
594
    >>> aghmean([0, 5, 1000])
595
    335.0
596
597
    """
598 1
    m_a = amean(nums)
599 1
    m_g = gmean(nums)
600 1
    m_h = hmean(nums)
601 1
    if math.isnan(m_a) or math.isnan(m_g) or math.isnan(m_h):
602 1
        return float('nan')
603 1
    while round(m_a, 12) != round(m_g, 12) and round(m_g, 12) != round(
604
        m_h, 12
605
    ):
606 1
        m_a, m_g, m_h = (
607
            (m_a + m_g + m_h) / 3,
608
            (m_a * m_g * m_h) ** (1 / 3),
609
            3 / (1 / m_a + 1 / m_g + 1 / m_h),
610
        )
611 1
    return m_a
612
613
614 1
def midrange(nums):
615
    """Return midrange.
616
617
    The midrange is the arithmetic mean of the maximum & minimum of a series.
618
619
    Cf. https://en.wikipedia.org/wiki/Midrange
620
621
    Parameters
622
    ----------
623
    nums : list
624
        A series of numbers
625
626
    Returns
627
    -------
628
    float
629
        The midrange of nums
630
631
    Examples
632
    --------
633
    >>> midrange([1, 2, 3])
634
    2.0
635
    >>> midrange([1, 2, 2, 3])
636
    2.0
637
    >>> midrange([1, 2, 1000, 3])
638
    500.5
639
640
    """
641 1
    return 0.5 * (max(nums) + min(nums))
642
643
644 1
def median(nums):
645
    """Return median.
646
647
    With numbers sorted by value, the median is the middle value (if there is
648
    an odd number of values) or the arithmetic mean of the two middle values
649
    (if there is an even number of values).
650
651
    Cf. https://en.wikipedia.org/wiki/Median
652
653
    Parameters
654
    ----------
655
    nums : list
656
        A series of numbers
657
658
    Returns
659
    -------
660
    int or float
661
        The median of nums
662
663
    Examples
664
    --------
665
    >>> median([1, 2, 3])
666
    2
667
    >>> median([1, 2, 3, 4])
668
    2.5
669
    >>> median([1, 2, 2, 4])
670
    2
671
672
    """
673 1
    nums = sorted(nums)
674 1
    mag = len(nums)
675 1
    if mag % 2:
676 1
        mag = int((mag - 1) / 2)
677 1
        return nums[mag]
678 1
    mag = int(mag / 2)
679 1
    med = (nums[mag - 1] + nums[mag]) / 2
680 1
    return med if not med.is_integer() else int(med)
681
682
683 1
def mode(nums):
684
    """Return the mode.
685
686
    The mode of a series is the most common element of that series
687
688
    Cf. https://en.wikipedia.org/wiki/Mode_(statistics)
689
690
    Parameters
691
    ----------
692
    nums : list
693
        A series of numbers
694
695
    Returns
696
    -------
697
    int or float
698
        The mode of nums
699
700
    Example
701
    -------
702
    >>> mode([1, 2, 2, 3])
703
    2
704
705
    """
706 1
    return Counter(nums).most_common(1)[0][0]
707
708
709 1
def var(nums, mean_func=amean, ddof=0):
710
    r"""Calculate the variance.
711
712
    The variance (:math:`\sigma^2`) of a series of numbers (:math:`x_i`) with
713
    mean :math:`\mu` and population :math:`N` is:
714
715
    :math:`\sigma^2 = \frac{1}{N}\sum_{i=1}^{N}(x_i-\mu)^2`.
716
717
    Cf. https://en.wikipedia.org/wiki/Variance
718
719
    Parameters
720
    ----------
721
    nums : list
722
        A series of numbers
723
    mean_func : function
724
        A mean function (amean by default)
725
    ddof : int
726
        The degrees of freedom (0 by default)
727
728
    Returns
729
    -------
730
    float
731
        The variance of the values in the series
732
733
    Examples
734
    --------
735
    >>> var([1, 1, 1, 1])
736
    0.0
737
    >>> var([1, 2, 3, 4])
738
    1.25
739
    >>> round(var([1, 2, 3, 4], ddof=1), 12)
740
    1.666666666667
741
742
    """
743 1
    x_bar = mean_func(nums)
744 1
    return sum((x - x_bar) ** 2 for x in nums) / (len(nums) - ddof)
745
746
747 1
def std(nums, mean_func=amean, ddof=0):
748
    """Return the standard deviation.
749
750
    The standard deviation of a series of values is the square root of the
751
    variance.
752
753
    Cf. https://en.wikipedia.org/wiki/Standard_deviation
754
755
    Parameters
756
    ----------
757
    nums : list
758
        A series of numbers
759
    mean_func : function
760
        A mean function (amean by default)
761
    ddof : int
762
        The degrees of freedom (0 by default)
763
764
    Returns
765
    -------
766
    float
767
        The standard deviation of the values in the series
768
769
    Examples
770
    --------
771
    >>> std([1, 1, 1, 1])
772
    0.0
773
    >>> round(std([1, 2, 3, 4]), 12)
774
    1.11803398875
775
    >>> round(std([1, 2, 3, 4], ddof=1), 12)
776
    1.290994448736
777
778
    """
779 1
    return var(nums, mean_func, ddof) ** 0.5
780
781
782
if __name__ == '__main__':
783
    import doctest
784
785
    doctest.testmod()
786