GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( 668ca3...a3efe1 )
by Fritz
01:05
created

plot_cdfs()   B

Complexity

Conditions 4

Size

Total Lines 46

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
c 1
b 0
f 0
dl 0
loc 46
rs 8.6315
1
# Copyright (c) 2014, Salesforce.com, Inc.  All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or without
4
# modification, are permitted provided that the following conditions
5
# are met:
6
#
7
# - Redistributions of source code must retain the above copyright
8
#   notice, this list of conditions and the following disclaimer.
9
# - Redistributions in binary form must reproduce the above copyright
10
#   notice, this list of conditions and the following disclaimer in the
11
#   documentation and/or other materials provided with the distribution.
12
# - Neither the name of Salesforce.com nor the names of its contributors
13
#   may be used to endorse or promote products derived from this
14
#   software without specific prior written permission.
15
#
16
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
20
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
23
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
25
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
26
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28
from itertools import izip
29
import numpy
30
from matplotlib import pyplot
31
from sklearn.neighbors import NearestNeighbors
32
from goftests import volume_of_sphere
33
import parsable
34
35
36
def get_dim(value):
37
    if isinstance(value, float):
38
        return 1
39
    else:
40
        return len(value)
41
42
43
def get_samples(model, EXAMPLE, sample_count):
44
    shared = model.Shared.from_dict(EXAMPLE['shared'])
45
    values = EXAMPLE['values']
46
    group = model.Group.from_values(shared, values)
47
48
    # This version seems to be broken
49
    # sampler = model.Sampler()
50
    # sampler.init(shared, group)
51
    # ...
52
    # for _ in xrange(sample_count):
53
    #     value = sampler.eval(shared)
54
55
    samples = []
56
    probs = []
57
    for _ in xrange(sample_count):
58
        value = group.sample_value(shared)
59
        samples.append(value)
60
        score = group.score_value(shared, value)
61
        probs.append(score)
62
63
    return numpy.array(samples), numpy.array(probs)
64
65
66
def get_edge_stats(samples, probs):
67
    if not hasattr(samples[0], '__iter__'):
68
        samples = numpy.array([samples]).T
69
    neighbors = NearestNeighbors(n_neighbors=2).fit(samples)
70
    distances, indices = neighbors.kneighbors(samples)
71
    return {'lengths': distances[:, 1], 'probs': probs}
72
73
74
@parsable.command
75
def plot_edges(sample_count=1000, seed=0):
76
    '''
77
    Plot edges of niw examples.
78
    '''
79
    seed_all(seed)
80
    fig, axes = pyplot.subplots(
81
        len(niw.EXAMPLES),
82
        2,
83
        sharey='row',
84
        figsize=(8, 12))
85
86
    model = niw
87
    for EXAMPLE, (ax1, ax2) in izip(model.EXAMPLES, axes):
88
        dim = get_dim(EXAMPLE['shared']['mu'])
89
        samples, probs = get_samples(model, EXAMPLE, sample_count)
90
        edges = get_edge_stats(samples, probs)
91
92
        edge_lengths = numpy.log(edges['lengths'])
93
        edge_probs = edges['probs']
94
        edge_stats = [
95
            numpy.exp((s - d) / dim)
96
            for d, s in izip(edge_lengths, edge_probs)
97
        ]
98
99
        ax1.set_title('NIW, dim = {}'.format(dim))
100
        ax1.scatter(edge_lengths, edge_probs, lw=0, alpha=0.5)
101
        ax1.set_ylabel('log(edge prob)')
102
103
        ax2.scatter(edge_stats, edge_probs, lw=0, alpha=0.5)
104
        ax2.yaxis.set_label_position('right')
105
106
    ax1.set_xlabel('log(edge length)')
107
    ax2.set_ylabel('statistic')
108
    fig.tight_layout()
109
    fig.subplots_adjust(wspace=0)
110
    pyplot.show()
111
112
113
def cdf_to_pdf(Y, X, bandwidth=0.1):
114
    assert len(Y) == len(X)
115
    shift = max(1, int(round(len(Y) * bandwidth)))
116
    Y = (1.0 / shift) * (Y[shift:] - Y[:-shift])
117
    X = 0.5 * (X[shift:] + X[:-shift])
118
    return Y, X
119
120
121
def plot_cdfs(examples):
122
    '''
123
    Plot test statistic cdfs based on the Nearest Neighbor distribution.
124
    '''
125
    seed_all(seed)
126
127
    fig, (ax1, ax2) = pyplot.subplots(2, 1, sharex=True, figsize=(8, 10))
128
    ax1.plot([0, 1], [0, 1], 'k--')
129
    ax2.plot([0, 1], [1, 1], 'k--')
130
131
    for example in model.examples:
132
        sample_count = len(example['samples'])
133
        dim = get_dim(example['samples'][0])
134
        samples, probs = get_samples(model, EXAMPLE, sample_count)
135
        edges = get_edge_stats(example['samples'], example['probs'])
136
        radii = edges['lengths']
137
        intensities = sample_count * numpy.array(edges['probs'])
138
139
        cdf = numpy.array([
140
            1 - numpy.exp(-intensity * volume_of_sphere(dim, radius))
141
            for intensity, radius in izip(intensities, radii)
142
        ])
143
        cdf.sort()
144
        X = numpy.arange(0.5 / sample_count, 1, 1.0 / sample_count)
145
146
        pdf, Xp = cdf_to_pdf(cdf, X)
147
        pdf *= sample_count
148
149
        error = 2 * (sum(cdf) / sample_count) - 1
150
        if abs(error) < 0.05:
151
            status = 'PASS'
152
            linestyle = '-'
153
        else:
154
            status = 'FAIL'
155
            linestyle = '--'
156
        label = '{} {}({}) error = {:.3g}'.format(status, name, dim, error)
157
        ax1.plot(X, cdf, linestyle=linestyle, label=label)
158
        ax2.plot(Xp, pdf, linestyle=linestyle, label=label)
159
160
    ax1.set_title('GOF of Nearest Neighbor Statistic')
161
    ax1.legend(loc='best', prop={'size': 10}, fancybox=True, framealpha=0.5)
162
    ax1.set_ylabel('CDF')
163
    ax2.set_ylabel('PDF')
164
    pyplot.tight_layout()
165
    fig.subplots_adjust(hspace=0)
166
    pyplot.show()
167
168
169
def neighbor_scatter(samples, probs, title='nearest neighbor'):
170
    '''
171
    Plot nearest neighbor statistic cdf for all datatpoints in a 2d dataset.
172
    '''
173
    sample_count = len(samples)
174
    assert sample_count
175
    dim = len(samples[0])
176
    assert dim == 2, dim
177
178
    pyplot.figure()
179
    cmap = pyplot.get_cmap('bwr')
180
181
    edges = get_edge_stats(samples, probs)
182
    radii = edges['lengths']
183
    intensities = sample_count * numpy.array(edges['probs'])
184
185
    cdf = numpy.array([
186
        1 - numpy.exp(-intensity * volume_of_sphere(dim, radius))
187
        for intensity, radius in izip(intensities, radii)
188
    ])
189
    error = 2 * (sum(cdf) / sample_count) - 1
190
191
    X = [value[0] for value in samples]
192
    Y = [value[1] for value in samples]
193
    colors = cdf
194
195
    pyplot.title('{} error = {:0.3g}'.format(title, error))
196
    pyplot.scatter(X, Y, 50, alpha=0.5, c=colors, cmap=cmap)
197
    pyplot.axis('equal')
198
199
    pyplot.tight_layout()
200
    pyplot.show()
201