1
|
|
|
# Copyright (c) 2014, Salesforce.com, Inc. All rights reserved. |
2
|
|
|
# |
3
|
|
|
# Redistribution and use in source and binary forms, with or without |
4
|
|
|
# modification, are permitted provided that the following conditions |
5
|
|
|
# are met: |
6
|
|
|
# |
7
|
|
|
# - Redistributions of source code must retain the above copyright |
8
|
|
|
# notice, this list of conditions and the following disclaimer. |
9
|
|
|
# - Redistributions in binary form must reproduce the above copyright |
10
|
|
|
# notice, this list of conditions and the following disclaimer in the |
11
|
|
|
# documentation and/or other materials provided with the distribution. |
12
|
|
|
# - Neither the name of Salesforce.com nor the names of its contributors |
13
|
|
|
# may be used to endorse or promote products derived from this |
14
|
|
|
# software without specific prior written permission. |
15
|
|
|
# |
16
|
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
17
|
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
18
|
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
19
|
|
|
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
20
|
|
|
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
21
|
|
|
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
22
|
|
|
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS |
23
|
|
|
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
24
|
|
|
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR |
25
|
|
|
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE |
26
|
|
|
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27
|
|
|
|
28
|
|
|
""" |
29
|
|
|
Cook, John D. "Notes on the negative binomial distribution." |
30
|
|
|
Unknown, October 28 (2009): 2009. |
31
|
|
|
|
32
|
|
|
Following http://www.johndcook.com/negative_binomial.pdf |
33
|
|
|
The negative binomial (NB) gives the probability of seeing x |
34
|
|
|
failures before the rth success given a success probability |
35
|
|
|
p: |
36
|
|
|
p(x | p, r) \propto p ^ r * (1 - p) ^ x |
37
|
|
|
For a given r and p, the NB has mean: |
38
|
|
|
mu = r (1 - p) / p |
39
|
|
|
and variance: |
40
|
|
|
sigmasq = mu + (1 / r) * mu ** 2 |
41
|
|
|
""" |
42
|
|
|
from distributions.dbg.special import gammaln |
43
|
|
|
from distributions.dbg.random import sample_beta, sample_negative_binomial |
44
|
|
|
from distributions.mixins import SharedMixin, GroupIoMixin, SharedIoMixin |
45
|
|
|
|
46
|
|
|
|
47
|
|
|
NAME = 'BetaNegativeBinomial' |
48
|
|
|
EXAMPLES = [ |
49
|
|
|
{ |
50
|
|
|
'shared': {'alpha': 1., 'beta': 1., 'r': 1}, |
51
|
|
|
'values': [0, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 2, 3], |
52
|
|
|
}, |
53
|
|
|
] |
54
|
|
|
|
55
|
|
|
|
56
|
|
|
Value = int |
57
|
|
|
|
58
|
|
|
|
59
|
|
|
class Shared(SharedMixin, SharedIoMixin): |
60
|
|
|
def __init__(self): |
61
|
|
|
self.alpha = None |
62
|
|
|
self.beta = None |
63
|
|
|
self.r = None |
64
|
|
|
|
65
|
|
|
def plus_group(self, group): |
66
|
|
|
post = self.__class__() |
67
|
|
|
post.alpha = self.alpha + self.r * group.count |
68
|
|
|
post.beta = self.beta + group.sum |
69
|
|
|
post.r = self.r |
70
|
|
|
return post |
71
|
|
|
|
72
|
|
|
def load(self, raw): |
73
|
|
|
self.alpha = float(raw['alpha']) |
74
|
|
|
self.beta = float(raw['beta']) |
75
|
|
|
self.r = int(raw['r']) |
76
|
|
|
|
77
|
|
|
def dump(self): |
78
|
|
|
return { |
79
|
|
|
'alpha': self.alpha, |
80
|
|
|
'beta': self.beta, |
81
|
|
|
'r': self.r, |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
def protobuf_load(self, message): |
85
|
|
|
self.alpha = float(message.alpha) |
86
|
|
|
self.beta = float(message.beta) |
87
|
|
|
self.r = int(message.r) |
88
|
|
|
|
89
|
|
|
def protobuf_dump(self, message): |
90
|
|
|
message.Clear() |
91
|
|
|
message.alpha = self.alpha |
92
|
|
|
message.beta = self.beta |
93
|
|
|
message.r = self.r |
94
|
|
|
|
95
|
|
|
|
96
|
|
|
class Group(GroupIoMixin): |
97
|
|
|
def __init__(self): |
98
|
|
|
self.count = None |
99
|
|
|
self.sum = None |
100
|
|
|
|
101
|
|
|
def init(self, shared): |
102
|
|
|
self.count = 0 |
103
|
|
|
self.sum = 0 |
104
|
|
|
|
105
|
|
|
def add_value(self, shared, value): |
106
|
|
|
self.count += 1 |
107
|
|
|
self.sum += int(value) |
108
|
|
|
|
109
|
|
|
def add_repeated_value(self, shared, value, count): |
110
|
|
|
self.count += count |
111
|
|
|
self.sum += count * int(value) |
112
|
|
|
|
113
|
|
|
def remove_value(self, shared, value): |
114
|
|
|
self.count -= 1 |
115
|
|
|
self.sum -= int(value) |
116
|
|
|
|
117
|
|
|
def merge(self, shared, source): |
118
|
|
|
self.count += source.count |
119
|
|
|
self.sum += source.sum |
120
|
|
|
|
121
|
|
|
def score_value(self, shared, value): |
122
|
|
|
post = shared.plus_group(self) |
123
|
|
|
alpha = post.alpha + shared.r |
124
|
|
|
beta = post.beta + value |
125
|
|
|
score = gammaln(post.alpha + post.beta) |
126
|
|
|
score -= gammaln(alpha + beta) |
127
|
|
|
score += gammaln(alpha) - gammaln(post.alpha) |
128
|
|
|
score += gammaln(beta) - gammaln(post.beta) |
129
|
|
|
return score |
130
|
|
|
|
131
|
|
|
def score_data(self, shared): |
132
|
|
|
post = shared.plus_group(self) |
133
|
|
|
score = gammaln(shared.alpha + shared.beta) |
134
|
|
|
score -= gammaln(post.alpha + post.beta) |
135
|
|
|
score += gammaln(post.alpha) - gammaln(shared.alpha) |
136
|
|
|
score += gammaln(post.beta) - gammaln(shared.beta) |
137
|
|
|
return score |
138
|
|
|
|
139
|
|
|
def sample_value(self, shared): |
140
|
|
|
sampler = Sampler() |
141
|
|
|
sampler.init(shared, self) |
142
|
|
|
return sampler.eval(shared) |
143
|
|
|
|
144
|
|
|
def dump(self): |
145
|
|
|
return { |
146
|
|
|
'count': self.count, |
147
|
|
|
'sum': self.sum, |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
def load(self, raw): |
151
|
|
|
self.count = int(raw['count']) |
152
|
|
|
self.sum = int(raw['sum']) |
153
|
|
|
|
154
|
|
|
def protobuf_load(self, message): |
155
|
|
|
self.count = int(message.count) |
156
|
|
|
self.sum = int(message.sum) |
157
|
|
|
|
158
|
|
|
def protobuf_dump(self, message): |
159
|
|
|
message.count = self.count |
160
|
|
|
message.sum = self.sum |
161
|
|
|
|
162
|
|
|
|
163
|
|
|
class Sampler(object): |
164
|
|
|
def init(self, shared, group=None): |
165
|
|
|
post = shared if group is None else shared.plus_group(group) |
166
|
|
|
self.p = sample_beta(post.alpha, post.beta) |
167
|
|
|
|
168
|
|
|
def eval(self, shared): |
169
|
|
|
return sample_negative_binomial(self.p, shared.r) |
170
|
|
|
|
171
|
|
|
|
172
|
|
|
def sample_group(shared, size): |
173
|
|
|
group = Group() |
174
|
|
|
group.init(shared) |
175
|
|
|
sampler = Sampler() |
176
|
|
|
sampler.init(shared, group) |
177
|
|
|
return [sampler.eval(shared) for _ in xrange(size)] |
178
|
|
|
|