GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Pull Request — master (#11)
by
unknown
01:12
created

DistributionTestBase.setUp()   A

Complexity

Conditions 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 1
c 2
b 0
f 0
dl 0
loc 3
rs 10
1
# Copyright (c) 2014, Salesforce.com, Inc.  All rights reserved.
2
# Copyright (c) 2015, Gamelan Labs, Inc.
3
# Copyright (c) 2016, Google, Inc.
4
# Copyright (c) 2016, Gamelan Labs, Inc.
5
#
6
# Redistribution and use in source and binary forms, with or without
7
# modification, are permitted provided that the following conditions
8
# are met:
9
#
10
# - Redistributions of source code must retain the above copyright
11
#   notice, this list of conditions and the following disclaimer.
12
# - Redistributions in binary form must reproduce the above copyright
13
#   notice, this list of conditions and the following disclaimer in the
14
#   documentation and/or other materials provided with the distribution.
15
# - Neither the name of Salesforce.com nor the names of its contributors
16
#   may be used to endorse or promote products derived from this
17
#   software without specific prior written permission.
18
#
19
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
28
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
29
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
from __future__ import division
31
try:
32
    from itertools import izip as zip
33
except ImportError:
34
    pass
35
import random
36
from unittest import skip
37
from unittest import TestCase
38
39
import numpy
40
import scipy.stats
41
from numpy import pi
42
from numpy.testing import rand
43
44
from nose.tools import assert_almost_equal
45
from nose.tools import assert_equal
46
from nose.tools import assert_greater
47
from nose.tools import assert_less
48
49
from goftests import get_dim
50
from goftests import multinomial_goodness_of_fit
51
from goftests import discrete_goodness_of_fit
52
from goftests import auto_density_goodness_of_fit
53
from goftests import mixed_density_goodness_of_fit
54
from goftests import split_discrete_continuous
55
from goftests import volume_of_sphere
56
57
NUM_BASE_SAMPLES = 250
58
59
NUM_SAMPLES_SCALE = 1000
60
61
TEST_FAILURE_RATE = 5e-4
62
63
64
def test_multinomial_goodness_of_fit():
65
    random.seed(0)
66
    numpy.random.seed(0)
67
    for dim in range(2, 20):
68
        sample_count = int(1e5)
69
        probs = numpy.random.dirichlet([1] * dim)
70
71
        counts = numpy.random.multinomial(sample_count, probs)
72
        p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
73
        assert_greater(p_good, TEST_FAILURE_RATE)
74
75
        unif_counts = numpy.random.multinomial(sample_count, [1. / dim] * dim)
76
        p_bad = multinomial_goodness_of_fit(probs, unif_counts, sample_count)
77
        assert_less(p_bad, TEST_FAILURE_RATE)
78
79
80
def test_volume_of_sphere():
81
    for r in [0.1, 1.0, 10.0]:
82
        assert_almost_equal(volume_of_sphere(1, r), 2 * r)
83
        assert_almost_equal(volume_of_sphere(2, r), pi * r ** 2)
84
        assert_almost_equal(volume_of_sphere(3, r), 4 / 3 * pi * r ** 3)
85
86
87
SPLIT_EXAMPLES = [
88
    (False, False, []),
89
    (0, 0, []),
90
    ('abc', 'abc', []),
91
    (0.0, None, [0.0]),
92
    ((), (), []),
93
    ([], (), []),
94
    ((0, ), (0, ), []),
95
    ([0], (0, ), []),
96
    ((0.0, ), (None, ), [0.0]),
97
    ([0.0], (None, ), [0.0]),
98
    ([True, 1, 'xyz', 3.14, [None, (), ([2.71],)]],
99
     (True, 1, 'xyz', None, (None, (), ((None,),))),
100
     [3.14, 2.71]),
101
    (numpy.zeros(3), (None, None, None), [0.0, 0.0, 0.0]),
102
]
103
104
105
def test_split_continuous_discrete():
106
    for mixed, discrete, continuous in SPLIT_EXAMPLES:
107
        d, c = split_discrete_continuous(mixed)
108
        assert_equal(d, discrete)
109
        assert_almost_equal(c, continuous)
110
111
112
class DistributionTestBase(object):
113
    """Abstract base class for probability distribution unit tests.
114
115
    This class supplies two test methods, :meth:`.test_goodness_of_fit`
116
    and :meth:`.test_mixed_density_goodness_of_fit` for testing the
117
    goodness of fit functions.
118
119
    Subclasses must override and implement one class attribute and two
120
    instance methods. The :attr:`.dist` class attribute must be set to
121
    one of SciPy probability distribution constructors in
122
    :mod:`scipy.stats`. The :meth:`.goodness_of_fit` method must return
123
    the result of calling one of the goodness of fit functions being
124
    tested. The :meth:`.probabilites` method must return an object
125
    representing the probabilities for each sample; the output depends
126
    on the format of the inputs to the :meth:`.goodness_of_fit` method.
127
128
    Subclasses may also set the :attr:`.params` attribute, which is a
129
    list of tuples that will be provided as arguments to the underlying
130
    SciPy distribution constructor as specified in :attr:`.dist`. If not
131
    specified, random arguments will be provided.
132
133
    """
134
135
    #: The SciPy distribution constructor to test.
136
    dist = None
137
138
    #: An optional list of arguments to the distribution constructor.
139
    #:
140
    #: Each tuple in this list will be provided as the positional
141
    #: arguments to the distribution constructor specified in
142
    #: :attr:`.dist`. If not specified, random arguments will be
143
    #: provided.
144
    params = None
145
146
    def setUp(self):
147
        random.seed(0)
148
        numpy.random.seed(0)
149
150
    def dist_params(self):
151
        # If there are no parameters, then we provide a random one.
152
        if self.params is None:
153
            params = [tuple(1 + rand(self.dist.numargs))]
154
        else:
155
            params = self.params
156
        return params
157
158
    def test_mixed_density_goodness_of_fit(self):
159
        for param in self.dist_params():
160
            dim = get_dim(self.dist.rvs(*param, size=2)[0])
161
            sample_count = NUM_BASE_SAMPLES + NUM_SAMPLES_SCALE * dim
162
            samples = list(self.dist.rvs(*param, size=sample_count))
163
            probabilities = [self.pdf(sample, *param) for sample in samples]
164
            gof = mixed_density_goodness_of_fit(samples, probabilities)
165
            assert_greater(gof, TEST_FAILURE_RATE)
166
167
    def test_good_fit(self):
168
        for param in self.dist_params():
169
            dim = get_dim(self.dist.rvs(*param, size=2)[0])
170
            sample_count = NUM_BASE_SAMPLES + NUM_SAMPLES_SCALE * dim
171
            samples = list(self.dist.rvs(*param, size=sample_count))
172
            probabilities = [self.pdf(sample, *param) for sample in samples]
173
            gof = self.goodness_of_fit(samples, probabilities)
174
            assert_greater(gof, TEST_FAILURE_RATE)
175
176
    def goodness_of_fit(self, samples, probabilities):
177
        raise NotImplementedError
178
179
180
class ContinuousTestBase(DistributionTestBase):
181
    """Abstract base class for testing continuous probability distributions.
182
183
    Concrete subclasses must set the :attr:`.dist` attribute to be the
184
    constructor for a continuous probability distribution.
185
186
    """
187
188
    def goodness_of_fit(self, samples, probabilities):
189
        gof = auto_density_goodness_of_fit(samples, probabilities)
190
        return gof
191
192
    def pdf(self, *args, **kw):
193
        return self.dist.pdf(*args, **kw)
194
195
196
class DiscreteTestBase(DistributionTestBase):
197
    """Abstract base class for testing discrete probability distributions.
198
199
    Concrete subclasses must set the :attr:`.dist` attribute to be the
200
    constructor for a discrete probability distribution.
201
202
    """
203
204
    def goodness_of_fit(self, samples, probabilities):
205
        probs_dict = dict(zip(samples, probabilities))
206
        gof = discrete_goodness_of_fit(samples, probs_dict)
207
        return gof
208
209
    def pdf(self, *args, **kw):
210
        return self.dist.pmf(*args, **kw)
211
212
213
#
214
# Discrete probability distributions.
215
#
216
217
class TestBernoulli(DiscreteTestBase, TestCase):
218
219
    dist = scipy.stats.bernoulli
220
221
    params = [(0.2, )]
222
223
224
class TestBinomial(DiscreteTestBase, TestCase):
225
226
    dist = scipy.stats.binom
227
228
    params = [(40, 0.4)]
229
230
231
@skip('')
232
class TestBoltzmann(DiscreteTestBase, TestCase):
233
234
    dist = scipy.stats.boltzmann
235
236
237
class TestDiscreteLaplacian(DiscreteTestBase, TestCase):
238
239
    dist = scipy.stats.dlaplace
240
241
    params = [(0.8, )]
242
243
244
class TestGeometric(DiscreteTestBase, TestCase):
245
246
    dist = scipy.stats.geom
247
248
    params = [(0.1, )]
249
250
251
class TestHypergeometric(DiscreteTestBase, TestCase):
252
253
    dist = scipy.stats.hypergeom
254
255
    params = [(40, 14, 24)]
256
257
258
class TestLogSeries(DiscreteTestBase, TestCase):
259
260
    dist = scipy.stats.logser
261
262
    params = [(0.9, )]
263
264
265
class TestNegativeBinomial(DiscreteTestBase, TestCase):
266
267
    dist = scipy.stats.nbinom
268
269
    params = [(40, 0.4)]
270
271
272
class TestPlanck(DiscreteTestBase, TestCase):
273
274
    dist = scipy.stats.planck
275
276
    params = [(0.51, )]
277
278
279
class TestPoisson(DiscreteTestBase, TestCase):
280
281
    dist = scipy.stats.poisson
282
283
    params = [(20, )]
284
285
286
@skip('too sparse')
287
class TestRandInt(DiscreteTestBase, TestCase):
288
289
    dist = scipy.stats.randint
290
291
292
class TestSkellam(DiscreteTestBase, TestCase):
293
294
    dist = scipy.stats.skellam
295
296
297
@skip('bug?')
298
class TestZipf(DiscreteTestBase, TestCase):
299
300
    dist = scipy.stats.zipf
301
302
    params = [(1.2, )]
303
304
#
305
# Continuous probability distributions.
306
#
307
308
309
@skip('')
310
class TestAlpha(ContinuousTestBase, TestCase):
311
312
    dist = scipy.stats.alpha
313
314
315
class TestAnglit(ContinuousTestBase, TestCase):
316
317
    dist = scipy.stats.anglit
318
319
320
class TestArcsine(ContinuousTestBase, TestCase):
321
322
    dist = scipy.stats.arcsine
323
324
325
class TestBeta(ContinuousTestBase, TestCase):
326
327
    dist = scipy.stats.beta
328
329
    params = [
330
        (0.5, 0.5),
331
        (0.5, 1.5),
332
        (0.5, 2.5),
333
    ]
334
335
336
class TestBetaPrime(ContinuousTestBase, TestCase):
337
338
    dist = scipy.stats.betaprime
339
340
341
class TestBradford(ContinuousTestBase, TestCase):
342
343
    dist = scipy.stats.bradford
344
345
346
class TestBurr(ContinuousTestBase, TestCase):
347
348
    dist = scipy.stats.burr
349
350
351
class TestCauchy(ContinuousTestBase, TestCase):
352
353
    dist = scipy.stats.cauchy
354
355
356
class TestChi(ContinuousTestBase, TestCase):
357
358
    dist = scipy.stats.chi
359
360
361
class TestChiSquared(ContinuousTestBase, TestCase):
362
363
    dist = scipy.stats.chi2
364
365
366
class TestCosine(ContinuousTestBase, TestCase):
367
368
    dist = scipy.stats.cosine
369
370
371
class TestDoubleGamma(ContinuousTestBase, TestCase):
372
373
    dist = scipy.stats.dgamma
374
375
376
class TestDoubleWeibull(ContinuousTestBase, TestCase):
377
378
    dist = scipy.stats.dweibull
379
380
381
class TestErlang(ContinuousTestBase, TestCase):
382
383
    dist = scipy.stats.erlang
384
385
    params = [(7, )]
386
387
388
class TestExponential(ContinuousTestBase, TestCase):
389
390
    dist = scipy.stats.expon
391
392
    params = [(7, )]
393
394
395
class TestExponentiallyModifiedNormal(ContinuousTestBase, TestCase):
396
397
    dist = scipy.stats.exponnorm
398
399
400
class TestExponentiatedWeibull(ContinuousTestBase, TestCase):
401
402
    dist = scipy.stats.exponweib
403
404
405
class TestExponentialPower(ContinuousTestBase, TestCase):
406
407
    dist = scipy.stats.exponpow
408
409
410
class TestF(ContinuousTestBase, TestCase):
411
412
    dist = scipy.stats.f
413
414
415
class TestFatigueLife(ContinuousTestBase, TestCase):
416
417
    dist = scipy.stats.fatiguelife
418
419
420
class TestFisk(ContinuousTestBase, TestCase):
421
422
    dist = scipy.stats.fisk
423
424
425
class TestFoldedCauchy(ContinuousTestBase, TestCase):
426
427
    dist = scipy.stats.foldcauchy
428
429
430
class TestFoldedNormal(ContinuousTestBase, TestCase):
431
432
    dist = scipy.stats.foldnorm
433
434
435
class TestFrechetRight(ContinuousTestBase, TestCase):
436
437
    dist = scipy.stats.frechet_r
438
439
440
class TestFrechetLeft(ContinuousTestBase, TestCase):
441
442
    dist = scipy.stats.frechet_l
443
444
445
class TestGeneralizedLogistic(ContinuousTestBase, TestCase):
446
447
    dist = scipy.stats.genlogistic
448
449
450
class TestGeneralizedNormal(ContinuousTestBase, TestCase):
451
452
    dist = scipy.stats.gennorm
453
454
455
class TestGeneralizedPareto(ContinuousTestBase, TestCase):
456
457
    dist = scipy.stats.genpareto
458
459
460
class TestGeneralizedExponential(ContinuousTestBase, TestCase):
461
462
    dist = scipy.stats.genexpon
463
464
465
class TestGeneralizedExtreme(ContinuousTestBase, TestCase):
466
467
    dist = scipy.stats.genextreme
468
469
470
@skip('very slow')
471
class TestGaussHypergeometric(ContinuousTestBase, TestCase):
472
473
    dist = scipy.stats.gausshyper
474
475
476
class TestGamma(ContinuousTestBase, TestCase):
477
478
    dist = scipy.stats.gamma
479
480
481
class TestGeneralizedGamma(ContinuousTestBase, TestCase):
482
483
    dist = scipy.stats.gengamma
484
485
486
class TestGeneralizedHalfLogistic(ContinuousTestBase, TestCase):
487
488
    dist = scipy.stats.genhalflogistic
489
490
491
class TestGilbrat(ContinuousTestBase, TestCase):
492
493
    dist = scipy.stats.gilbrat
494
495
496
class TestGompertz(ContinuousTestBase, TestCase):
497
498
    dist = scipy.stats.gompertz
499
500
501
class TestGumbelRight(ContinuousTestBase, TestCase):
502
503
    dist = scipy.stats.gumbel_r
504
505
506
class TestGumbelLeft(ContinuousTestBase, TestCase):
507
508
    dist = scipy.stats.gumbel_l
509
510
511
class TestHalfCauchy(ContinuousTestBase, TestCase):
512
513
    dist = scipy.stats.halfcauchy
514
515
516
class TestHalfLogistic(ContinuousTestBase, TestCase):
517
518
    dist = scipy.stats.halflogistic
519
520
521
class TestHalfNormal(ContinuousTestBase, TestCase):
522
523
    dist = scipy.stats.halfnorm
524
525
526
class TestHalfGeneralizedNormal(ContinuousTestBase, TestCase):
527
528
    dist = scipy.stats.halfgennorm
529
530
531
class TestHyperbolicSecant(ContinuousTestBase, TestCase):
532
533
    dist = scipy.stats.hypsecant
534
535
536
class TestInverseGamma(ContinuousTestBase, TestCase):
537
538
    dist = scipy.stats.invgamma
539
540
541
class TestInverseGauss(ContinuousTestBase, TestCase):
542
543
    dist = scipy.stats.invgauss
544
545
546
class TestInverseWeibull(ContinuousTestBase, TestCase):
547
548
    dist = scipy.stats.invweibull
549
550
551
class TestJohnsonSB(ContinuousTestBase, TestCase):
552
553
    dist = scipy.stats.johnsonsb
554
555
556
class TestJohnsonSU(ContinuousTestBase, TestCase):
557
558
    dist = scipy.stats.johnsonsu
559
560
561
@skip('???')
562
class TestKolmogorovSmirnovOneSided(ContinuousTestBase, TestCase):
563
564
    dist = scipy.stats.ksone
565
566
567
class TestKolmogorovSmirnovTwoSided(ContinuousTestBase, TestCase):
568
569
    dist = scipy.stats.kstwobign
570
571
572
class TestLaplace(ContinuousTestBase, TestCase):
573
574
    dist = scipy.stats.laplace
575
576
577
class TestLevy(ContinuousTestBase, TestCase):
578
579
    dist = scipy.stats.levy
580
581
582
class TestLeftSkewedLevy(ContinuousTestBase, TestCase):
583
584
    dist = scipy.stats.levy_l
585
586
587
@skip('???')
588
class TestLevyStable(ContinuousTestBase, TestCase):
589
590
    dist = scipy.stats.levy_stable
591
592
593
class TestLogistic(ContinuousTestBase, TestCase):
594
595
    dist = scipy.stats.logistic
596
597
598
class TestLogGamma(ContinuousTestBase, TestCase):
599
600
    dist = scipy.stats.loggamma
601
602
603
class TestLogLaplace(ContinuousTestBase, TestCase):
604
605
    dist = scipy.stats.loglaplace
606
607
608
class TestLogNormal(ContinuousTestBase, TestCase):
609
610
    dist = scipy.stats.lognorm
611
612
613
class TestLomax(ContinuousTestBase, TestCase):
614
615
    dist = scipy.stats.lomax
616
617
618
class TestMaxwell(ContinuousTestBase, TestCase):
619
620
    dist = scipy.stats.maxwell
621
622
623
class TestMielke(ContinuousTestBase, TestCase):
624
625
    dist = scipy.stats.mielke
626
627
628
class TestNakagami(ContinuousTestBase, TestCase):
629
630
    dist = scipy.stats.nakagami
631
632
633
class TestNonCentralChiSquared(ContinuousTestBase, TestCase):
634
635
    dist = scipy.stats.ncx2
636
637
638
class TestNonCentralF(ContinuousTestBase, TestCase):
639
640
    dist = scipy.stats.ncf
641
642
    params = [(27, 27, 0.415784417992)]
643
644
645
class TestNonCentralT(ContinuousTestBase, TestCase):
646
647
    dist = scipy.stats.nct
648
649
650
class TestNormal(ContinuousTestBase, TestCase):
651
652
    dist = scipy.stats.norm
653
654
655
class TestPareto(ContinuousTestBase, TestCase):
656
657
    dist = scipy.stats.pareto
658
659
660
class TestPearson3(ContinuousTestBase, TestCase):
661
662
    dist = scipy.stats.pearson3
663
664
665
class TestPowerLaw(ContinuousTestBase, TestCase):
666
667
    dist = scipy.stats.powerlaw
668
669
670
class TestPowerNormal(ContinuousTestBase, TestCase):
671
672
    dist = scipy.stats.powernorm
673
674
675
class TestRDistributed(ContinuousTestBase, TestCase):
676
677
    dist = scipy.stats.rdist
678
679
680
class TestReciprocal(ContinuousTestBase, TestCase):
681
682
    dist = scipy.stats.reciprocal
683
684
    params = [tuple(numpy.array([0, 1]) + rand(1)[0])]
685
686
687
class TestRayleigh(ContinuousTestBase, TestCase):
688
689
    dist = scipy.stats.rayleigh
690
691
692
class TestRice(ContinuousTestBase, TestCase):
693
694
    dist = scipy.stats.rice
695
696
697
class TestReciprocalInverseGaussian(ContinuousTestBase, TestCase):
698
699
    dist = scipy.stats.recipinvgauss
700
701
702
class TestSemicircular(ContinuousTestBase, TestCase):
703
704
    dist = scipy.stats.semicircular
705
706
707
class TestT(ContinuousTestBase, TestCase):
708
709
    dist = scipy.stats.t
710
711
712
class TestTrapz(ContinuousTestBase, TestCase):
713
714
    dist = scipy.stats.trapz
715
716
    params = [(1 / 3, 2 / 3)]
717
718
719
class TestTriangular(ContinuousTestBase, TestCase):
720
721
    dist = scipy.stats.triang
722
723
    params = [tuple(rand(1))]
724
725
726
class TestTruncatedExponential(ContinuousTestBase, TestCase):
727
728
    dist = scipy.stats.truncexpon
729
730
731
class TestTruncatedNormal(ContinuousTestBase, TestCase):
732
733
    dist = scipy.stats.truncnorm
734
735
    params = [(0.1, 2.0)]
736
737
738
class TestTukeyLambda(ContinuousTestBase, TestCase):
739
740
    dist = scipy.stats.tukeylambda
741
742
743
class TestUniform(ContinuousTestBase, TestCase):
744
745
    dist = scipy.stats.uniform
746
747
748
class TestVonMises(ContinuousTestBase, TestCase):
749
750
    dist = scipy.stats.vonmises
751
752
    params = [tuple(1.0 + rand(1))]
753
754
755
class TestVonMisesLine(ContinuousTestBase, TestCase):
756
757
    dist = scipy.stats.vonmises_line
758
759
760
class TestWald(ContinuousTestBase, TestCase):
761
762
    dist = scipy.stats.wald
763
764
765
class TestWeibullMin(ContinuousTestBase, TestCase):
766
767
    dist = scipy.stats.weibull_min
768
769
770
class TestWeibullMax(ContinuousTestBase, TestCase):
771
772
    dist = scipy.stats.weibull_max
773
774
775
class TestWrappedCauchy(ContinuousTestBase, TestCase):
776
777
    dist = scipy.stats.wrapcauchy
778
779
    params = [(0.5,)]
780