test_chi2cdf() - Code Metrics - Inspection of "Removed scipy dependency" - posterior/goftests - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#10)

unknown

created 2016-08-31 15:49 UTC

test_chi2cdf() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	4
c	1
b	0
f	0
dl	0
loc	9
rs	9.2

# Copyright (c) 2014, Salesforce.com, Inc.  All rights reserved.
# Copyright (c) 2015, Gamelan Labs, Inc.
# Copyright (c) 2016, Google, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# - Redistributions of source code must retain the above copyright
#   notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in the
#   documentation and/or other materials provided with the distribution.
# - Neither the name of Salesforce.com nor the names of its contributors
#   may be used to endorse or promote products derived from this
#   software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from __future__ import division
try:
    from itertools import izip as zip
except ImportError:
    pass
import numpy
import scipy.stats
from numpy import pi
from numpy.testing import rand
from nose import SkipTest
from nose.tools import assert_almost_equal
from nose.tools import assert_equal
from nose.tools import assert_greater
from nose.tools import assert_less
from goftests import seed_all
from goftests import get_dim
from goftests import multinomial_goodness_of_fit
from goftests import discrete_goodness_of_fit
from goftests import auto_density_goodness_of_fit
from goftests import mixed_density_goodness_of_fit
from goftests import split_discrete_continuous
from goftests import volume_of_sphere
from goftests import chi2sf

NUM_BASE_SAMPLES = 250

NUM_SAMPLES_SCALE = 1000

TEST_FAILURE_RATE = 5e-4


def test_chi2cdf(xmin=0.0, xmax=100.0, nx=500, smin=1, smax=41, sstep=1.5):
    xlist = numpy.linspace(xmin, xmax, nx)
    slist = numpy.arange(smin, smax, sstep)
    for s in slist:
        for x in xlist:
            delta = scipy.stats.chi2.sf(x, s) - chi2sf(x, s)
            if delta > 1e-12:
                print s, x, delta, scipy.stats.chi2.sf(x, s), chi2sf(x, s)
            assert_almost_equal(delta, 0.0)


def test_multinomial_goodness_of_fit():
    for dim in range(2, 20):
        yield _test_multinomial_goodness_of_fit, dim


def _test_multinomial_goodness_of_fit(dim):
    seed_all(0)
    sample_count = int(1e5)
    probs = numpy.random.dirichlet([1] * dim)

    counts = numpy.random.multinomial(sample_count, probs)
    p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
    assert_greater(p_good, TEST_FAILURE_RATE)

    unif_counts = numpy.random.multinomial(sample_count, [1. / dim] * dim)
    p_bad = multinomial_goodness_of_fit(probs, unif_counts, sample_count)
    assert_less(p_bad, TEST_FAILURE_RATE)


def test_volume_of_sphere():
    for r in [0.1, 1.0, 10.0]:
        assert_almost_equal(volume_of_sphere(1, r), 2.0 * r)
        assert_almost_equal(volume_of_sphere(2, r), pi * r ** 2)
        assert_almost_equal(volume_of_sphere(3, r), 4 / 3.0 * pi * r ** 3)


split_examples = [
    {'mixed': False, 'discrete': False, 'continuous': []},
    {'mixed': 0, 'discrete': 0, 'continuous': []},
    {'mixed': 'abc', 'discrete': 'abc', 'continuous': []},
    {'mixed': 0.0, 'discrete': None, 'continuous': [0.0]},
    {'mixed': (), 'discrete': (), 'continuous': []},
    {'mixed': [], 'discrete': (), 'continuous': []},
    {'mixed': (0,), 'discrete': (0, ), 'continuous': []},
    {'mixed': [0, ], 'discrete': (0, ), 'continuous': []},
    {'mixed': (0.0, ), 'discrete': (None, ), 'continuous': [0.0]},
    {'mixed': [0.0, ], 'discrete': (None, ), 'continuous': [0.0]},
    {
        'mixed': [True, 1, 'xyz', 3.14, [None, (), ([2.71],)]],
        'discrete': (True, 1, 'xyz', None, (None, (), ((None,),))),
        'continuous': [3.14, 2.71],
    },
    {
        'mixed': numpy.zeros(3),
        'discrete': (None, None, None),
        'continuous': [0.0, 0.0, 0.0],
    },
]


def split_example(i):
    example = split_examples[i]
    discrete, continuous = split_discrete_continuous(example['mixed'])
    assert_equal(discrete, example['discrete'])
    assert_almost_equal(continuous, example['continuous'])


def test_split_continuous_discrete():
    for i in range(len(split_examples)):
        yield split_example, i


seed_all(0)
default_params = {
    'bernoulli': [(0.2,)],
    'beta': [
        (0.5, 0.5),
        (0.5, 1.5),
        (0.5, 2.5),
    ],
    'binom': [(40, 0.4)],
    'dirichlet': [
        ([2.0, 2.5],),
        ([2.0, 2.5, 3.0],),
        ([2.0, 2.5, 3.0, 3.5],),
    ],
    'erlang': [(7,)],
    'dlaplace': [(0.8,)],
    'frechet': [tuple(2 * rand(1)) + (0,) + tuple(2 * rand(2))],
    'geom': [(0.1,)],
    'hypergeom': [(40, 14, 24)],
    'logser': [(0.9,)],
    'multivariate_normal': [
        (numpy.ones(1), numpy.eye(1)),
        (numpy.ones(2), numpy.eye(2)),
        (numpy.ones(3), numpy.eye(3)),
    ],
    'nbinom': [(40, 0.4)],
    'ncf': [(27, 27, 0.415784417992)],
    'planck': [(0.51,)],
    'poisson': [(20,)],
    'reciprocal': [tuple(numpy.array([0, 1]) + rand(1)[0])],
    'trapz': [(0.333, 0.666)],
    'triang': [tuple(rand(1))],
    'truncnorm': [(0.1, 2.0)],
    'vonmises': [tuple(1.0 + rand(1))],
    'wrapcauchy': [(0.5,)],
    'zipf': [(1.2,)],
}

known_failures = set([
    'alpha',
    'boltzmann',
    'gausshyper',  # very slow
    'ksone',  # ???
    'levy_stable',  # ???
    'ortho_group',  # matrix
    'randint',  # too sparse
    'random_correlation',  # matrix
    'rv_continuous',  # abstract
    'rv_discrete',  # abstract
    'special_ortho_group',  # matrix
    'zipf',  # bug?
    'invwishart',  # matrix
    'wishart',  # matrix
    'matrix_normal',  # matrix
])


def transform_dirichlet(ps):
    dim = len(ps)
    assert dim > 1
    # return ps[:-1] - ps[-1] * (dim ** 0.5 - 1.0) / (dim - 1.0)
    return ps[:-1]


transforms = {
    'dirichlet': transform_dirichlet,
}


def _test_scipy_stats(name):
    if name in known_failures:
        raise SkipTest('known failure')
    dist = getattr(scipy.stats, name)
    try:
        params = default_params[name]
    except KeyError:
        params = [tuple(1.0 + rand(dist.numargs))]
    for param in params:
        print('param = {}'.format(param))
        dim = get_dim(dist.rvs(*param, size=2)[0])
        sample_count = NUM_BASE_SAMPLES + NUM_SAMPLES_SCALE * dim
        samples = list(dist.rvs(*param, size=sample_count))
        if name in transforms:
            transformed = list(map(transforms[name], samples))
        else:
            transformed = samples

        if hasattr(dist, 'pmf'):
            probs = [dist.pmf(sample, *param) for sample in samples]
            probs_dict = dict(zip(samples, probs))
            gof = discrete_goodness_of_fit(transformed, probs_dict, plot=True)
        else:
            probs = [dist.pdf(sample, *param) for sample in samples]
            gof = auto_density_goodness_of_fit(transformed, probs, plot=True)
        assert_greater(gof, TEST_FAILURE_RATE)

        gof = mixed_density_goodness_of_fit(transformed, probs, plot=True)
        assert_greater(gof, TEST_FAILURE_RATE)


def test_scipy_stats():
    seed_all(0)
    for name in dir(scipy.stats):
        if hasattr(getattr(scipy.stats, name), 'rvs'):
            yield _test_scipy_stats, name


1			# Copyright (c) 2014, Salesforce.com, Inc. All rights reserved.
2			# Copyright (c) 2015, Gamelan Labs, Inc.
3			# Copyright (c) 2016, Google, Inc.
4			#
5			# Redistribution and use in source and binary forms, with or without
6			# modification, are permitted provided that the following conditions
7			# are met:
8			#
9			# - Redistributions of source code must retain the above copyright
10			# notice, this list of conditions and the following disclaimer.
11			# - Redistributions in binary form must reproduce the above copyright
12			# notice, this list of conditions and the following disclaimer in the
13			# documentation and/or other materials provided with the distribution.
14			# - Neither the name of Salesforce.com nor the names of its contributors
15			# may be used to endorse or promote products derived from this
16			# software without specific prior written permission.
17			#
18			# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19			# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20			# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21			# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22			# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23			# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24			# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25			# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26			# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
27			# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
28			# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30			from __future__ import division
31			try:
32			from itertools import izip as zip
33			except ImportError:
34			pass
35			import numpy
36			import scipy.stats
37			from numpy import pi
38			from numpy.testing import rand
39			from nose import SkipTest
40			from nose.tools import assert_almost_equal
41			from nose.tools import assert_equal
42			from nose.tools import assert_greater
43			from nose.tools import assert_less
44			from goftests import seed_all
45			from goftests import get_dim
46			from goftests import multinomial_goodness_of_fit
47			from goftests import discrete_goodness_of_fit
48			from goftests import auto_density_goodness_of_fit
49			from goftests import mixed_density_goodness_of_fit
50			from goftests import split_discrete_continuous
51			from goftests import volume_of_sphere
52			from goftests import chi2sf
53
54			NUM_BASE_SAMPLES = 250
55
56			NUM_SAMPLES_SCALE = 1000
57
58			TEST_FAILURE_RATE = 5e-4
59
60
61			def test_chi2cdf(xmin=0.0, xmax=100.0, nx=500, smin=1, smax=41, sstep=1.5):
62			xlist = numpy.linspace(xmin, xmax, nx)
63			slist = numpy.arange(smin, smax, sstep)
64			for s in slist:
65			for x in xlist:
66			delta = scipy.stats.chi2.sf(x, s) - chi2sf(x, s)
67			if delta > 1e-12:
68			print s, x, delta, scipy.stats.chi2.sf(x, s), chi2sf(x, s)
69			assert_almost_equal(delta, 0.0)
70
71
72			def test_multinomial_goodness_of_fit():
73			for dim in range(2, 20):
74			yield _test_multinomial_goodness_of_fit, dim
75
76
77			def _test_multinomial_goodness_of_fit(dim):
78			seed_all(0)
79			sample_count = int(1e5)
80			probs = numpy.random.dirichlet([1] * dim)
81
82			counts = numpy.random.multinomial(sample_count, probs)
83			p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
84			assert_greater(p_good, TEST_FAILURE_RATE)
85
86			unif_counts = numpy.random.multinomial(sample_count, [1. / dim] * dim)
87			p_bad = multinomial_goodness_of_fit(probs, unif_counts, sample_count)
88			assert_less(p_bad, TEST_FAILURE_RATE)
89
90
91			def test_volume_of_sphere():
92			for r in [0.1, 1.0, 10.0]:
93			assert_almost_equal(volume_of_sphere(1, r), 2.0 * r)
94			assert_almost_equal(volume_of_sphere(2, r), pi * r ** 2)
95			assert_almost_equal(volume_of_sphere(3, r), 4 / 3.0 * pi * r ** 3)
96
97
98			split_examples = [
99			{'mixed': False, 'discrete': False, 'continuous': []},
100			{'mixed': 0, 'discrete': 0, 'continuous': []},
101			{'mixed': 'abc', 'discrete': 'abc', 'continuous': []},
102			{'mixed': 0.0, 'discrete': None, 'continuous': [0.0]},
103			{'mixed': (), 'discrete': (), 'continuous': []},
104			{'mixed': [], 'discrete': (), 'continuous': []},
105			{'mixed': (0,), 'discrete': (0, ), 'continuous': []},
106			{'mixed': [0, ], 'discrete': (0, ), 'continuous': []},
107			{'mixed': (0.0, ), 'discrete': (None, ), 'continuous': [0.0]},
108			{'mixed': [0.0, ], 'discrete': (None, ), 'continuous': [0.0]},
109			{
110			'mixed': [True, 1, 'xyz', 3.14, [None, (), ([2.71],)]],
111			'discrete': (True, 1, 'xyz', None, (None, (), ((None,),))),
112			'continuous': [3.14, 2.71],
113			},
114			{
115			'mixed': numpy.zeros(3),
116			'discrete': (None, None, None),
117			'continuous': [0.0, 0.0, 0.0],
118			},
119			]
120
121
122			def split_example(i):
123			example = split_examples[i]
124			discrete, continuous = split_discrete_continuous(example['mixed'])
125			assert_equal(discrete, example['discrete'])
126			assert_almost_equal(continuous, example['continuous'])
127
128
129			def test_split_continuous_discrete():
130			for i in range(len(split_examples)):
131			yield split_example, i
132
133
134			seed_all(0)
135			default_params = {
136			'bernoulli': [(0.2,)],
137			'beta': [
138			(0.5, 0.5),
139			(0.5, 1.5),
140			(0.5, 2.5),
141			],
142			'binom': [(40, 0.4)],
143			'dirichlet': [
144			([2.0, 2.5],),
145			([2.0, 2.5, 3.0],),
146			([2.0, 2.5, 3.0, 3.5],),
147			],
148			'erlang': [(7,)],
149			'dlaplace': [(0.8,)],
150			'frechet': [tuple(2 * rand(1)) + (0,) + tuple(2 * rand(2))],
151			'geom': [(0.1,)],
152			'hypergeom': [(40, 14, 24)],
153			'logser': [(0.9,)],
154			'multivariate_normal': [
155			(numpy.ones(1), numpy.eye(1)),
156			(numpy.ones(2), numpy.eye(2)),
157			(numpy.ones(3), numpy.eye(3)),
158			],
159			'nbinom': [(40, 0.4)],
160			'ncf': [(27, 27, 0.415784417992)],
161			'planck': [(0.51,)],
162			'poisson': [(20,)],
163			'reciprocal': [tuple(numpy.array([0, 1]) + rand(1)[0])],
164			'trapz': [(0.333, 0.666)],
165			'triang': [tuple(rand(1))],
166			'truncnorm': [(0.1, 2.0)],
167			'vonmises': [tuple(1.0 + rand(1))],
168			'wrapcauchy': [(0.5,)],
169			'zipf': [(1.2,)],
170			}
171
172			known_failures = set([
173			'alpha',
174			'boltzmann',
175			'gausshyper', # very slow
176			'ksone', # ???
177			'levy_stable', # ???
178			'ortho_group', # matrix
179			'randint', # too sparse
180			'random_correlation', # matrix
181			'rv_continuous', # abstract
182			'rv_discrete', # abstract
183			'special_ortho_group', # matrix
184			'zipf', # bug?
185			'invwishart', # matrix
186			'wishart', # matrix
187			'matrix_normal', # matrix
188			])
189
190
191			def transform_dirichlet(ps):
192			dim = len(ps)
193			assert dim > 1
194			# return ps[:-1] - ps[-1] * (dim ** 0.5 - 1.0) / (dim - 1.0)
195			return ps[:-1]
196
197
198			transforms = {
199			'dirichlet': transform_dirichlet,
200			}
201
202
203			def _test_scipy_stats(name):
204			if name in known_failures:
205			raise SkipTest('known failure')
206			dist = getattr(scipy.stats, name)
207			try:
208			params = default_params[name]
209			except KeyError:
210			params = [tuple(1.0 + rand(dist.numargs))]
211			for param in params:
212			print('param = {}'.format(param))
213			dim = get_dim(dist.rvs(*param, size=2)[0])
214			sample_count = NUM_BASE_SAMPLES + NUM_SAMPLES_SCALE * dim
215			samples = list(dist.rvs(*param, size=sample_count))
216			if name in transforms:
217			transformed = list(map(transforms[name], samples))
218			else:
219			transformed = samples
220
221			if hasattr(dist, 'pmf'):
222			probs = [dist.pmf(sample, *param) for sample in samples]
223			probs_dict = dict(zip(samples, probs))
224			gof = discrete_goodness_of_fit(transformed, probs_dict, plot=True)
225			else:
226			probs = [dist.pdf(sample, *param) for sample in samples]
227			gof = auto_density_goodness_of_fit(transformed, probs, plot=True)
228			assert_greater(gof, TEST_FAILURE_RATE)
229
230			gof = mixed_density_goodness_of_fit(transformed, probs, plot=True)
231			assert_greater(gof, TEST_FAILURE_RATE)
232
233
234			def test_scipy_stats():
235			seed_all(0)
236			for name in dir(scipy.stats):
237			if hasattr(getattr(scipy.stats, name), 'rvs'):
238			yield _test_scipy_stats, name
239

posterior / goftests

GitHub Access Token became invalid

Pull Request — master (#10)

test_chi2cdf() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like