test_chi2cdf() - Code Metrics - Inspection of "Removed scipy dependency" - posterior/goftests - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#10)

unknown

created 2016-08-31 18:02 UTC

test_chi2cdf() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	3
dl	0
loc	7
rs	9.4285
c	0
b	0
f	0

# Copyright (c) 2014, Salesforce.com, Inc.  All rights reserved.
# Copyright (c) 2015, Gamelan Labs, Inc.
# Copyright (c) 2016, Google, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# - Redistributions of source code must retain the above copyright
#   notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in the
#   documentation and/or other materials provided with the distribution.
# - Neither the name of Salesforce.com nor the names of its contributors
#   may be used to endorse or promote products derived from this
#   software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from __future__ import division
try:
    from itertools import izip as zip
except ImportError:
    pass
import numpy
import scipy.stats
from numpy import pi
from numpy.testing import rand
from nose import SkipTest
from nose.tools import assert_almost_equal
from nose.tools import assert_equal
from nose.tools import assert_greater
from nose.tools import assert_less
from goftests import seed_all
from goftests import get_dim
from goftests import multinomial_goodness_of_fit
from goftests import discrete_goodness_of_fit
from goftests import auto_density_goodness_of_fit
from goftests import mixed_density_goodness_of_fit
from goftests import split_discrete_continuous
from goftests import volume_of_sphere
from goftests import chi2sf

NUM_BASE_SAMPLES = 250

NUM_SAMPLES_SCALE = 1000

TEST_FAILURE_RATE = 5e-4


def test_chi2cdf(xmin=0.0, xmax=100.0, nx=500, smin=1, smax=41, sstep=1.5):
    xlist = numpy.linspace(xmin, xmax, nx)
    slist = numpy.arange(smin, smax, sstep)
    for s in slist:
        for x in xlist:
            delta = scipy.stats.chi2.sf(x, s) - chi2sf(x, s)
            assert_almost_equal(delta, 0.0)


def test_multinomial_goodness_of_fit():
    for dim in range(2, 20):
        yield _test_multinomial_goodness_of_fit, dim


def _test_multinomial_goodness_of_fit(dim):
    seed_all(0)
    sample_count = int(1e5)
    probs = numpy.random.dirichlet([1] * dim)

    counts = numpy.random.multinomial(sample_count, probs)
    p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
    assert_greater(p_good, TEST_FAILURE_RATE)

    unif_counts = numpy.random.multinomial(sample_count, [1. / dim] * dim)
    p_bad = multinomial_goodness_of_fit(probs, unif_counts, sample_count)
    assert_less(p_bad, TEST_FAILURE_RATE)


def test_volume_of_sphere():
    for r in [0.1, 1.0, 10.0]:
        assert_almost_equal(volume_of_sphere(1, r), 2.0 * r)
        assert_almost_equal(volume_of_sphere(2, r), pi * r ** 2)
        assert_almost_equal(volume_of_sphere(3, r), 4 / 3.0 * pi * r ** 3)


split_examples = [
    {'mixed': False, 'discrete': False, 'continuous': []},
    {'mixed': 0, 'discrete': 0, 'continuous': []},
    {'mixed': 'abc', 'discrete': 'abc', 'continuous': []},
    {'mixed': 0.0, 'discrete': None, 'continuous': [0.0]},
    {'mixed': (), 'discrete': (), 'continuous': []},
    {'mixed': [], 'discrete': (), 'continuous': []},
    {'mixed': (0,), 'discrete': (0, ), 'continuous': []},
    {'mixed': [0, ], 'discrete': (0, ), 'continuous': []},
    {'mixed': (0.0, ), 'discrete': (None, ), 'continuous': [0.0]},
    {'mixed': [0.0, ], 'discrete': (None, ), 'continuous': [0.0]},
    {
        'mixed': [True, 1, 'xyz', 3.14, [None, (), ([2.71],)]],
        'discrete': (True, 1, 'xyz', None, (None, (), ((None,),))),
        'continuous': [3.14, 2.71],
    },
    {
        'mixed': numpy.zeros(3),
        'discrete': (None, None, None),
        'continuous': [0.0, 0.0, 0.0],
    },
]


def split_example(i):
    example = split_examples[i]
    discrete, continuous = split_discrete_continuous(example['mixed'])
    assert_equal(discrete, example['discrete'])
    assert_almost_equal(continuous, example['continuous'])


def test_split_continuous_discrete():
    for i in range(len(split_examples)):
        yield split_example, i


seed_all(0)
default_params = {
    'bernoulli': [(0.2,)],
    'beta': [
        (0.5, 0.5),
        (0.5, 1.5),
        (0.5, 2.5),
    ],
    'binom': [(40, 0.4)],
    'dirichlet': [
        ([2.0, 2.5],),
        ([2.0, 2.5, 3.0],),
        ([2.0, 2.5, 3.0, 3.5],),
    ],
    'erlang': [(7,)],
    'dlaplace': [(0.8,)],
    'frechet': [tuple(2 * rand(1)) + (0,) + tuple(2 * rand(2))],
    'geom': [(0.1,)],
    'hypergeom': [(40, 14, 24)],
    'logser': [(0.9,)],
    'multivariate_normal': [
        (numpy.ones(1), numpy.eye(1)),
        (numpy.ones(2), numpy.eye(2)),
        (numpy.ones(3), numpy.eye(3)),
    ],
    'nbinom': [(40, 0.4)],
    'ncf': [(27, 27, 0.415784417992)],
    'planck': [(0.51,)],
    'poisson': [(20,)],
    'reciprocal': [tuple(numpy.array([0, 1]) + rand(1)[0])],
    'trapz': [(0.333, 0.666)],
    'triang': [tuple(rand(1))],
    'truncnorm': [(0.1, 2.0)],
    'vonmises': [tuple(1.0 + rand(1))],
    'wrapcauchy': [(0.5,)],
    'zipf': [(1.2,)],
}

known_failures = set([
    'alpha',
    'boltzmann',
    'gausshyper',  # very slow
    'ksone',  # ???
    'levy_stable',  # ???
    'ortho_group',  # matrix
    'randint',  # too sparse
    'random_correlation',  # matrix
    'rv_continuous',  # abstract
    'rv_discrete',  # abstract
    'special_ortho_group',  # matrix
    'zipf',  # bug?
    'invwishart',  # matrix
    'wishart',  # matrix
    'matrix_normal',  # matrix
])


def transform_dirichlet(ps):
    dim = len(ps)
    assert dim > 1
    # return ps[:-1] - ps[-1] * (dim ** 0.5 - 1.0) / (dim - 1.0)
    return ps[:-1]


transforms = {
    'dirichlet': transform_dirichlet,
}


def _test_scipy_stats(name):
    if name in known_failures:
        raise SkipTest('known failure')
    dist = getattr(scipy.stats, name)
    try:
        params = default_params[name]
    except KeyError:
        params = [tuple(1.0 + rand(dist.numargs))]
    for param in params:
        print('param = {}'.format(param))
        dim = get_dim(dist.rvs(*param, size=2)[0])
        sample_count = NUM_BASE_SAMPLES + NUM_SAMPLES_SCALE * dim
        samples = list(dist.rvs(*param, size=sample_count))
        if name in transforms:
            transformed = list(map(transforms[name], samples))
        else:
            transformed = samples

        if hasattr(dist, 'pmf'):
            probs = [dist.pmf(sample, *param) for sample in samples]
            probs_dict = dict(zip(samples, probs))
            gof = discrete_goodness_of_fit(transformed, probs_dict, plot=True)
        else:
            probs = [dist.pdf(sample, *param) for sample in samples]
            gof = auto_density_goodness_of_fit(transformed, probs, plot=True)
        assert_greater(gof, TEST_FAILURE_RATE)

        gof = mixed_density_goodness_of_fit(transformed, probs, plot=True)
        assert_greater(gof, TEST_FAILURE_RATE)


def test_scipy_stats():
    seed_all(0)
    for name in dir(scipy.stats):
        if hasattr(getattr(scipy.stats, name), 'rvs'):
            yield _test_scipy_stats, name


1			# Copyright (c) 2014, Salesforce.com, Inc. All rights reserved.
2			# Copyright (c) 2015, Gamelan Labs, Inc.
3			# Copyright (c) 2016, Google, Inc.
4			#
5			# Redistribution and use in source and binary forms, with or without
6			# modification, are permitted provided that the following conditions
7			# are met:
8			#
9			# - Redistributions of source code must retain the above copyright
10			# notice, this list of conditions and the following disclaimer.
11			# - Redistributions in binary form must reproduce the above copyright
12			# notice, this list of conditions and the following disclaimer in the
13			# documentation and/or other materials provided with the distribution.
14			# - Neither the name of Salesforce.com nor the names of its contributors
15			# may be used to endorse or promote products derived from this
16			# software without specific prior written permission.
17			#
18			# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19			# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20			# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21			# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22			# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23			# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24			# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25			# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26			# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
27			# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
28			# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30			from __future__ import division
31			try:
32			from itertools import izip as zip
33			except ImportError:
34			pass
35			import numpy
36			import scipy.stats
37			from numpy import pi
38			from numpy.testing import rand
39			from nose import SkipTest
40			from nose.tools import assert_almost_equal
41			from nose.tools import assert_equal
42			from nose.tools import assert_greater
43			from nose.tools import assert_less
44			from goftests import seed_all
45			from goftests import get_dim
46			from goftests import multinomial_goodness_of_fit
47			from goftests import discrete_goodness_of_fit
48			from goftests import auto_density_goodness_of_fit
49			from goftests import mixed_density_goodness_of_fit
50			from goftests import split_discrete_continuous
51			from goftests import volume_of_sphere
52			from goftests import chi2sf
53
54			NUM_BASE_SAMPLES = 250
55
56			NUM_SAMPLES_SCALE = 1000
57
58			TEST_FAILURE_RATE = 5e-4
59
60
61			def test_chi2cdf(xmin=0.0, xmax=100.0, nx=500, smin=1, smax=41, sstep=1.5):
62			xlist = numpy.linspace(xmin, xmax, nx)
63			slist = numpy.arange(smin, smax, sstep)
64			for s in slist:
65			for x in xlist:
66			delta = scipy.stats.chi2.sf(x, s) - chi2sf(x, s)
67			assert_almost_equal(delta, 0.0)
68
69
70			def test_multinomial_goodness_of_fit():
71			for dim in range(2, 20):
72			yield _test_multinomial_goodness_of_fit, dim
73
74
75			def _test_multinomial_goodness_of_fit(dim):
76			seed_all(0)
77			sample_count = int(1e5)
78			probs = numpy.random.dirichlet([1] * dim)
79
80			counts = numpy.random.multinomial(sample_count, probs)
81			p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
82			assert_greater(p_good, TEST_FAILURE_RATE)
83
84			unif_counts = numpy.random.multinomial(sample_count, [1. / dim] * dim)
85			p_bad = multinomial_goodness_of_fit(probs, unif_counts, sample_count)
86			assert_less(p_bad, TEST_FAILURE_RATE)
87
88
89			def test_volume_of_sphere():
90			for r in [0.1, 1.0, 10.0]:
91			assert_almost_equal(volume_of_sphere(1, r), 2.0 * r)
92			assert_almost_equal(volume_of_sphere(2, r), pi * r ** 2)
93			assert_almost_equal(volume_of_sphere(3, r), 4 / 3.0 * pi * r ** 3)
94
95
96			split_examples = [
97			{'mixed': False, 'discrete': False, 'continuous': []},
98			{'mixed': 0, 'discrete': 0, 'continuous': []},
99			{'mixed': 'abc', 'discrete': 'abc', 'continuous': []},
100			{'mixed': 0.0, 'discrete': None, 'continuous': [0.0]},
101			{'mixed': (), 'discrete': (), 'continuous': []},
102			{'mixed': [], 'discrete': (), 'continuous': []},
103			{'mixed': (0,), 'discrete': (0, ), 'continuous': []},
104			{'mixed': [0, ], 'discrete': (0, ), 'continuous': []},
105			{'mixed': (0.0, ), 'discrete': (None, ), 'continuous': [0.0]},
106			{'mixed': [0.0, ], 'discrete': (None, ), 'continuous': [0.0]},
107			{
108			'mixed': [True, 1, 'xyz', 3.14, [None, (), ([2.71],)]],
109			'discrete': (True, 1, 'xyz', None, (None, (), ((None,),))),
110			'continuous': [3.14, 2.71],
111			},
112			{
113			'mixed': numpy.zeros(3),
114			'discrete': (None, None, None),
115			'continuous': [0.0, 0.0, 0.0],
116			},
117			]
118
119
120			def split_example(i):
121			example = split_examples[i]
122			discrete, continuous = split_discrete_continuous(example['mixed'])
123			assert_equal(discrete, example['discrete'])
124			assert_almost_equal(continuous, example['continuous'])
125
126
127			def test_split_continuous_discrete():
128			for i in range(len(split_examples)):
129			yield split_example, i
130
131
132			seed_all(0)
133			default_params = {
134			'bernoulli': [(0.2,)],
135			'beta': [
136			(0.5, 0.5),
137			(0.5, 1.5),
138			(0.5, 2.5),
139			],
140			'binom': [(40, 0.4)],
141			'dirichlet': [
142			([2.0, 2.5],),
143			([2.0, 2.5, 3.0],),
144			([2.0, 2.5, 3.0, 3.5],),
145			],
146			'erlang': [(7,)],
147			'dlaplace': [(0.8,)],
148			'frechet': [tuple(2 * rand(1)) + (0,) + tuple(2 * rand(2))],
149			'geom': [(0.1,)],
150			'hypergeom': [(40, 14, 24)],
151			'logser': [(0.9,)],
152			'multivariate_normal': [
153			(numpy.ones(1), numpy.eye(1)),
154			(numpy.ones(2), numpy.eye(2)),
155			(numpy.ones(3), numpy.eye(3)),
156			],
157			'nbinom': [(40, 0.4)],
158			'ncf': [(27, 27, 0.415784417992)],
159			'planck': [(0.51,)],
160			'poisson': [(20,)],
161			'reciprocal': [tuple(numpy.array([0, 1]) + rand(1)[0])],
162			'trapz': [(0.333, 0.666)],
163			'triang': [tuple(rand(1))],
164			'truncnorm': [(0.1, 2.0)],
165			'vonmises': [tuple(1.0 + rand(1))],
166			'wrapcauchy': [(0.5,)],
167			'zipf': [(1.2,)],
168			}
169
170			known_failures = set([
171			'alpha',
172			'boltzmann',
173			'gausshyper', # very slow
174			'ksone', # ???
175			'levy_stable', # ???
176			'ortho_group', # matrix
177			'randint', # too sparse
178			'random_correlation', # matrix
179			'rv_continuous', # abstract
180			'rv_discrete', # abstract
181			'special_ortho_group', # matrix
182			'zipf', # bug?
183			'invwishart', # matrix
184			'wishart', # matrix
185			'matrix_normal', # matrix
186			])
187
188
189			def transform_dirichlet(ps):
190			dim = len(ps)
191			assert dim > 1
192			# return ps[:-1] - ps[-1] * (dim ** 0.5 - 1.0) / (dim - 1.0)
193			return ps[:-1]
194
195
196			transforms = {
197			'dirichlet': transform_dirichlet,
198			}
199
200
201			def _test_scipy_stats(name):
202			if name in known_failures:
203			raise SkipTest('known failure')
204			dist = getattr(scipy.stats, name)
205			try:
206			params = default_params[name]
207			except KeyError:
208			params = [tuple(1.0 + rand(dist.numargs))]
209			for param in params:
210			print('param = {}'.format(param))
211			dim = get_dim(dist.rvs(*param, size=2)[0])
212			sample_count = NUM_BASE_SAMPLES + NUM_SAMPLES_SCALE * dim
213			samples = list(dist.rvs(*param, size=sample_count))
214			if name in transforms:
215			transformed = list(map(transforms[name], samples))
216			else:
217			transformed = samples
218
219			if hasattr(dist, 'pmf'):
220			probs = [dist.pmf(sample, *param) for sample in samples]
221			probs_dict = dict(zip(samples, probs))
222			gof = discrete_goodness_of_fit(transformed, probs_dict, plot=True)
223			else:
224			probs = [dist.pdf(sample, *param) for sample in samples]
225			gof = auto_density_goodness_of_fit(transformed, probs, plot=True)
226			assert_greater(gof, TEST_FAILURE_RATE)
227
228			gof = mixed_density_goodness_of_fit(transformed, probs, plot=True)
229			assert_greater(gof, TEST_FAILURE_RATE)
230
231
232			def test_scipy_stats():
233			seed_all(0)
234			for name in dir(scipy.stats):
235			if hasattr(getattr(scipy.stats, name), 'rvs'):
236			yield _test_scipy_stats, name
237

posterior / goftests

GitHub Access Token became invalid

Pull Request — master (#10)

test_chi2cdf() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like