_test_scipy_stats() - Code Metrics - Inspection of "Merge pull request #3 from jfinkels/python3" - posterior/goftests - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( a41705...c23734 )

by Fritz

created 2016-06-26 00:22 UTC

_test_scipy_stats() D

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	3
Bugs	0	Features	0

Metric	Value
cc	8
c	3
b	0
f	0
dl	0
loc	29
rs	4

# Copyright (c) 2014, Salesforce.com, Inc.  All rights reserved.
# Copyright (c) 2015, Gamelan Labs, Inc.
# Copyright (c) 2016, Google, Inc.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# - Redistributions of source code must retain the above copyright
#   notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
#   notice, this list of conditions and the following disclaimer in the
#   documentation and/or other materials provided with the distribution.
# - Neither the name of Salesforce.com nor the names of its contributors
#   may be used to endorse or promote products derived from this
#   software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from __future__ import division
try:
    from itertools import izip as zip
except ImportError:
    pass
import numpy
import scipy.stats
from numpy import pi
from numpy.testing import rand
from nose import SkipTest
from nose.tools import assert_almost_equal
from nose.tools import assert_equal
from nose.tools import assert_greater
from nose.tools import assert_less
from goftests import seed_all
from goftests import get_dim
from goftests import multinomial_goodness_of_fit
from goftests import discrete_goodness_of_fit
from goftests import auto_density_goodness_of_fit
from goftests import mixed_density_goodness_of_fit
from goftests import split_discrete_continuous
from goftests import volume_of_sphere

NUM_BASE_SAMPLES = 200

NUM_SAMPLES_SCALE = 1000

TEST_FAILURE_RATE = 5e-4


def test_multinomial_goodness_of_fit():
    for dim in range(2, 20):
        yield _test_multinomial_goodness_of_fit, dim


def _test_multinomial_goodness_of_fit(dim):
    seed_all(0)
    sample_count = int(1e5)
    probs = numpy.random.dirichlet([1] * dim)

    counts = numpy.random.multinomial(sample_count, probs)
    p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
    assert_greater(p_good, TEST_FAILURE_RATE)

    unif_counts = numpy.random.multinomial(sample_count, [1. / dim] * dim)
    p_bad = multinomial_goodness_of_fit(probs, unif_counts, sample_count)
    assert_less(p_bad, TEST_FAILURE_RATE)


def test_volume_of_sphere():
    for r in [0.1, 1.0, 10.0]:
        assert_almost_equal(volume_of_sphere(1, r), 2.0 * r)
        assert_almost_equal(volume_of_sphere(2, r), pi * r ** 2)
        assert_almost_equal(volume_of_sphere(3, r), 4 / 3.0 * pi * r ** 3)


split_examples = [
    {'mixed': False, 'discrete': False, 'continuous': []},
    {'mixed': 0, 'discrete': 0, 'continuous': []},
    {'mixed': 'abc', 'discrete': 'abc', 'continuous': []},
    {'mixed': 0.0, 'discrete': None, 'continuous': [0.0]},
    {'mixed': (), 'discrete': (), 'continuous': []},
    {'mixed': [], 'discrete': (), 'continuous': []},
    {'mixed': (0,), 'discrete': (0, ), 'continuous': []},
    {'mixed': [0, ], 'discrete': (0, ), 'continuous': []},
    {'mixed': (0.0, ), 'discrete': (None, ), 'continuous': [0.0]},
    {'mixed': [0.0, ], 'discrete': (None, ), 'continuous': [0.0]},
    {
        'mixed': [True, 1, 'xyz', 3.14, [None, (), ([2.71],)]],
        'discrete': (True, 1, 'xyz', None, (None, (), ((None,),))),
        'continuous': [3.14, 2.71],
    },
    {
        'mixed': numpy.zeros(3),
        'discrete': (None, None, None),
        'continuous': [0.0, 0.0, 0.0],
    },
]


def split_example(i):
    example = split_examples[i]
    discrete, continuous = split_discrete_continuous(example['mixed'])
    assert_equal(discrete, example['discrete'])
    assert_almost_equal(continuous, example['continuous'])


def test_split_continuous_discrete():
    for i in range(len(split_examples)):
        yield split_example, i


seed_all(0)
default_params = {
    'bernoulli': [(0.2,)],
    'beta': [
        (0.5, 0.5),
        (0.5, 1.5),
        (0.5, 2.5),
    ],
    'binom': [(40, 0.4)],
    'dirichlet': [
        ([2.0, 2.5],),
        ([2.0, 2.5, 3.0],),
        ([2.0, 2.5, 3.0, 3.5],),
    ],
    'erlang': [(7,)],
    'dlaplace': [(0.8,)],
    'frechet': [tuple(2 * rand(1)) + (0,) + tuple(2 * rand(2))],
    'geom': [(0.1,)],
    'hypergeom': [(40, 14, 24)],
    'logser': [(0.9,)],
    'multivariate_normal': [
        (numpy.ones(1), numpy.eye(1)),
        (numpy.ones(2), numpy.eye(2)),
        (numpy.ones(3), numpy.eye(3)),
    ],
    'nbinom': [(40, 0.4)],
    'ncf': [(27, 27, 0.415784417992)],
    'planck': [(0.51,)],
    'poisson': [(20,)],
    'reciprocal': [tuple(numpy.array([0, 1]) + rand(1)[0])],
    'triang': [tuple(rand(1))],
    'truncnorm': [(0.1, 2.0)],
    'vonmises': [tuple(1.0 + rand(1))],
    'wrapcauchy': [(0.5,)],
    'zipf': [(1.2,)],
}

known_failures = set([
    'alpha',
    'boltzmann',
    'gausshyper',  # very slow
    'ksone',  # ???
    'levy_stable',  # ???
    'randint',  # too sparse
    'rv_continuous',  # abstract
    'rv_discrete',  # abstract
    'zipf',  # bug?
    'invwishart',  # matrix
    'wishart',  # matrix
    'matrix_normal',  # matrix
])


def transform_dirichlet(ps):
    dim = len(ps)
    assert dim > 1
    # return ps[:-1] - ps[-1] * (dim ** 0.5 - 1.0) / (dim - 1.0)
    return ps[:-1]


transforms = {
    'dirichlet': transform_dirichlet,
}


def _test_scipy_stats(name):
    if name in known_failures:
        raise SkipTest('known failure')
    dist = getattr(scipy.stats, name)
    try:
        params = default_params[name]
    except KeyError:
        params = [tuple(1.0 + rand(dist.numargs))]
    for param in params:
        print('param = {}'.format(param))
        dim = get_dim(dist.rvs(*param, size=2)[0])
        sample_count = NUM_BASE_SAMPLES + NUM_SAMPLES_SCALE * dim
        samples = list(dist.rvs(*param, size=sample_count))
        if name in transforms:
            transformed = list(map(transforms[name], samples))
        else:
            transformed = samples

        if hasattr(dist, 'pmf'):
            probs = [dist.pmf(sample, *param) for sample in samples]
            probs_dict = dict(zip(samples, probs))
            gof = discrete_goodness_of_fit(transformed, probs_dict, plot=True)
        else:
            probs = [dist.pdf(sample, *param) for sample in samples]
            gof = auto_density_goodness_of_fit(transformed, probs, plot=True)
        assert_greater(gof, TEST_FAILURE_RATE)

        gof = mixed_density_goodness_of_fit(transformed, probs, plot=True)
        assert_greater(gof, TEST_FAILURE_RATE)


def test_scipy_stats():
    seed_all(0)
    for name in dir(scipy.stats):
        if hasattr(getattr(scipy.stats, name), 'rvs'):
            yield _test_scipy_stats, name


1			# Copyright (c) 2014, Salesforce.com, Inc. All rights reserved.
2			# Copyright (c) 2015, Gamelan Labs, Inc.
3			# Copyright (c) 2016, Google, Inc.
4			#
5			# Redistribution and use in source and binary forms, with or without
6			# modification, are permitted provided that the following conditions
7			# are met:
8			#
9			# - Redistributions of source code must retain the above copyright
10			# notice, this list of conditions and the following disclaimer.
11			# - Redistributions in binary form must reproduce the above copyright
12			# notice, this list of conditions and the following disclaimer in the
13			# documentation and/or other materials provided with the distribution.
14			# - Neither the name of Salesforce.com nor the names of its contributors
15			# may be used to endorse or promote products derived from this
16			# software without specific prior written permission.
17			#
18			# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19			# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20			# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21			# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22			# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23			# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24			# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25			# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26			# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
27			# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
28			# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29			from __future__ import division
30			try:
31			from itertools import izip as zip
32			except ImportError:
33			pass
34			import numpy
35			import scipy.stats
36			from numpy import pi
37			from numpy.testing import rand
38			from nose import SkipTest
39			from nose.tools import assert_almost_equal
40			from nose.tools import assert_equal
41			from nose.tools import assert_greater
42			from nose.tools import assert_less
43			from goftests import seed_all
44			from goftests import get_dim
45			from goftests import multinomial_goodness_of_fit
46			from goftests import discrete_goodness_of_fit
47			from goftests import auto_density_goodness_of_fit
48			from goftests import mixed_density_goodness_of_fit
49			from goftests import split_discrete_continuous
50			from goftests import volume_of_sphere
51
52			NUM_BASE_SAMPLES = 200
53
54			NUM_SAMPLES_SCALE = 1000
55
56			TEST_FAILURE_RATE = 5e-4
57
58
59			def test_multinomial_goodness_of_fit():
60			for dim in range(2, 20):
61			yield _test_multinomial_goodness_of_fit, dim
62
63
64			def _test_multinomial_goodness_of_fit(dim):
65			seed_all(0)
66			sample_count = int(1e5)
67			probs = numpy.random.dirichlet([1] * dim)
68
69			counts = numpy.random.multinomial(sample_count, probs)
70			p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
71			assert_greater(p_good, TEST_FAILURE_RATE)
72
73			unif_counts = numpy.random.multinomial(sample_count, [1. / dim] * dim)
74			p_bad = multinomial_goodness_of_fit(probs, unif_counts, sample_count)
75			assert_less(p_bad, TEST_FAILURE_RATE)
76
77
78			def test_volume_of_sphere():
79			for r in [0.1, 1.0, 10.0]:
80			assert_almost_equal(volume_of_sphere(1, r), 2.0 * r)
81			assert_almost_equal(volume_of_sphere(2, r), pi * r ** 2)
82			assert_almost_equal(volume_of_sphere(3, r), 4 / 3.0 * pi * r ** 3)
83
84
85			split_examples = [
86			{'mixed': False, 'discrete': False, 'continuous': []},
87			{'mixed': 0, 'discrete': 0, 'continuous': []},
88			{'mixed': 'abc', 'discrete': 'abc', 'continuous': []},
89			{'mixed': 0.0, 'discrete': None, 'continuous': [0.0]},
90			{'mixed': (), 'discrete': (), 'continuous': []},
91			{'mixed': [], 'discrete': (), 'continuous': []},
92			{'mixed': (0,), 'discrete': (0, ), 'continuous': []},
93			{'mixed': [0, ], 'discrete': (0, ), 'continuous': []},
94			{'mixed': (0.0, ), 'discrete': (None, ), 'continuous': [0.0]},
95			{'mixed': [0.0, ], 'discrete': (None, ), 'continuous': [0.0]},
96			{
97			'mixed': [True, 1, 'xyz', 3.14, [None, (), ([2.71],)]],
98			'discrete': (True, 1, 'xyz', None, (None, (), ((None,),))),
99			'continuous': [3.14, 2.71],
100			},
101			{
102			'mixed': numpy.zeros(3),
103			'discrete': (None, None, None),
104			'continuous': [0.0, 0.0, 0.0],
105			},
106			]
107
108
109			def split_example(i):
110			example = split_examples[i]
111			discrete, continuous = split_discrete_continuous(example['mixed'])
112			assert_equal(discrete, example['discrete'])
113			assert_almost_equal(continuous, example['continuous'])
114
115
116			def test_split_continuous_discrete():
117			for i in range(len(split_examples)):
118			yield split_example, i
119
120
121			seed_all(0)
122			default_params = {
123			'bernoulli': [(0.2,)],
124			'beta': [
125			(0.5, 0.5),
126			(0.5, 1.5),
127			(0.5, 2.5),
128			],
129			'binom': [(40, 0.4)],
130			'dirichlet': [
131			([2.0, 2.5],),
132			([2.0, 2.5, 3.0],),
133			([2.0, 2.5, 3.0, 3.5],),
134			],
135			'erlang': [(7,)],
136			'dlaplace': [(0.8,)],
137			'frechet': [tuple(2 * rand(1)) + (0,) + tuple(2 * rand(2))],
138			'geom': [(0.1,)],
139			'hypergeom': [(40, 14, 24)],
140			'logser': [(0.9,)],
141			'multivariate_normal': [
142			(numpy.ones(1), numpy.eye(1)),
143			(numpy.ones(2), numpy.eye(2)),
144			(numpy.ones(3), numpy.eye(3)),
145			],
146			'nbinom': [(40, 0.4)],
147			'ncf': [(27, 27, 0.415784417992)],
148			'planck': [(0.51,)],
149			'poisson': [(20,)],
150			'reciprocal': [tuple(numpy.array([0, 1]) + rand(1)[0])],
151			'triang': [tuple(rand(1))],
152			'truncnorm': [(0.1, 2.0)],
153			'vonmises': [tuple(1.0 + rand(1))],
154			'wrapcauchy': [(0.5,)],
155			'zipf': [(1.2,)],
156			}
157
158			known_failures = set([
159			'alpha',
160			'boltzmann',
161			'gausshyper', # very slow
162			'ksone', # ???
163			'levy_stable', # ???
164			'randint', # too sparse
165			'rv_continuous', # abstract
166			'rv_discrete', # abstract
167			'zipf', # bug?
168			'invwishart', # matrix
169			'wishart', # matrix
170			'matrix_normal', # matrix
171			])
172
173
174			def transform_dirichlet(ps):
175			dim = len(ps)
176			assert dim > 1
177			# return ps[:-1] - ps[-1] * (dim ** 0.5 - 1.0) / (dim - 1.0)
178			return ps[:-1]
179
180
181			transforms = {
182			'dirichlet': transform_dirichlet,
183			}
184
185
186			def _test_scipy_stats(name):
187			if name in known_failures:
188			raise SkipTest('known failure')
189			dist = getattr(scipy.stats, name)
190			try:
191			params = default_params[name]
192			except KeyError:
193			params = [tuple(1.0 + rand(dist.numargs))]
194			for param in params:
195			print('param = {}'.format(param))
196			dim = get_dim(dist.rvs(*param, size=2)[0])
197			sample_count = NUM_BASE_SAMPLES + NUM_SAMPLES_SCALE * dim
198			samples = list(dist.rvs(*param, size=sample_count))
199			if name in transforms:
200			transformed = list(map(transforms[name], samples))
201			else:
202			transformed = samples
203
204			if hasattr(dist, 'pmf'):
205			probs = [dist.pmf(sample, *param) for sample in samples]
206			probs_dict = dict(zip(samples, probs))
207			gof = discrete_goodness_of_fit(transformed, probs_dict, plot=True)
208			else:
209			probs = [dist.pdf(sample, *param) for sample in samples]
210			gof = auto_density_goodness_of_fit(transformed, probs, plot=True)
211			assert_greater(gof, TEST_FAILURE_RATE)
212
213			gof = mixed_density_goodness_of_fit(transformed, probs, plot=True)
214			assert_greater(gof, TEST_FAILURE_RATE)
215
216
217			def test_scipy_stats():
218			seed_all(0)
219			for name in dir(scipy.stats):
220			if hasattr(getattr(scipy.stats, name), 'rvs'):
221			yield _test_scipy_stats, name
222

posterior / goftests

GitHub Access Token became invalid

Push — master ( a41705...c23734 )

_test_scipy_stats() D

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like