1 | """Evaluate Theano variables on auxiliary data and during training.""" |
||
2 | from functools import partial |
||
3 | import logging |
||
4 | from abc import ABCMeta, abstractmethod |
||
5 | |||
6 | from six import add_metaclass |
||
7 | from theano import tensor |
||
8 | from theano.ifelse import ifelse |
||
9 | |||
10 | from blocks.utils import shared_like |
||
11 | |||
12 | logger = logging.getLogger(__name__) |
||
13 | |||
14 | |||
15 | @add_metaclass(ABCMeta) |
||
16 | class AggregationScheme(object): |
||
17 | """How to incrementally evaluate a Theano variable over minibatches. |
||
18 | |||
19 | An AggregationScheme allocates :class:`Aggregator` that can |
||
20 | incrementally compute the value of a Theano variable on a full dataset |
||
21 | by aggregating partial results computed on multiple batches. |
||
22 | |||
23 | The AggregationScheme should be attached via the tag |
||
24 | ``aggregation_scheme`` to a Theano variable which computes the desired |
||
25 | value on a single batch. |
||
26 | |||
27 | Parameters |
||
28 | ---------- |
||
29 | variable: :class:`~tensor.TensorVariable` |
||
30 | The variable that holds the desired value on a single batch. |
||
31 | |||
32 | """ |
||
33 | def __init__(self, variable): |
||
34 | self.variable = variable |
||
35 | |||
36 | @abstractmethod |
||
37 | def get_aggregator(self): |
||
38 | """Return a new Aggregator for this variable.""" |
||
39 | pass |
||
40 | |||
41 | |||
42 | class Aggregator(object): |
||
43 | """An Aggregator incrementally evaluates a Theano variable on a dataset. |
||
44 | |||
45 | .. warning:: |
||
46 | The Aggregators should never be created directly. Instead use the |
||
47 | :meth:`AggregationScheme.get_aggregator` method. |
||
48 | |||
49 | Example usages are: |
||
50 | |||
51 | * compute the mean of some value over examples, sequence lengths etc. |
||
52 | * track a parameter of a model |
||
53 | * monitor a penalty |
||
54 | |||
55 | The Aggregator maintains a set of Theano sharer values called |
||
56 | accumulators and specifies how they should be initialized, and |
||
57 | updated with incremental calculations. Finally, it |
||
58 | provides a Theano variable that reads the accumulators |
||
59 | and computes the final value. |
||
60 | |||
61 | Parameters |
||
62 | ---------- |
||
63 | aggregation_scheme : :class:`AggregationScheme` |
||
64 | The aggregation scheme that constructed this Aggregator |
||
65 | initialization_updates : list of Theano updates |
||
66 | Updates that specify how to initialize shared variables of |
||
67 | this Aggregator. *Can only refer to shared variables and |
||
68 | constants.* |
||
69 | accumulation_updates : list of Theano updates |
||
70 | Updates that specify how a new batch of data gets processed |
||
71 | by this Aggregator. *Can refer to model inputs.* |
||
72 | readout_variable : :class:`~tensor.TensorVariable` |
||
73 | Theano variable that holds the final value based on aggregated |
||
74 | partial results. *readout_variable must only consist of shared |
||
75 | variables and constants.* |
||
76 | |||
77 | Attributes |
||
78 | ---------- |
||
79 | All constructor parameters are accessible as attributes. |
||
80 | |||
81 | """ |
||
82 | def __init__(self, aggregation_scheme, initialization_updates=None, |
||
83 | accumulation_updates=None, readout_variable=None): |
||
84 | self.aggregation_scheme = aggregation_scheme |
||
85 | self.readout_variable = readout_variable |
||
86 | |||
87 | if initialization_updates is None: |
||
88 | initialization_updates = [] |
||
89 | if accumulation_updates is None: |
||
90 | accumulation_updates = [] |
||
91 | self.initialization_updates = initialization_updates |
||
92 | self.accumulation_updates = accumulation_updates |
||
93 | |||
94 | |||
95 | class Mean(AggregationScheme): |
||
96 | """Aggregation scheme which computes the mean. |
||
97 | |||
98 | Parameters |
||
99 | ---------- |
||
100 | numerator : :class:`~tensor.TensorVariable` |
||
101 | Theano variable for the numerator e.g. the likelihood |
||
102 | denominator : :class:`~tensor.TensorVariable` |
||
103 | Theano variable for the denominator e.g. the batch size |
||
104 | |||
105 | """ |
||
106 | def __init__(self, numerator, denominator): |
||
0 ignored issues
–
show
|
|||
107 | self.numerator = numerator |
||
108 | self.denominator = denominator |
||
109 | |||
110 | def get_aggregator(self): |
||
111 | initialized = shared_like(0.) |
||
112 | numerator_acc = shared_like(self.numerator) |
||
113 | denominator_acc = shared_like(self.denominator) |
||
114 | |||
115 | # Dummy default expression to use as the previously-aggregated |
||
116 | # value, that has the same shape as the new result |
||
117 | numerator_zeros = tensor.as_tensor(self.numerator).zeros_like() |
||
118 | denominator_zeros = tensor.as_tensor(self.denominator).zeros_like() |
||
119 | |||
120 | conditional_update_num = self.numerator + ifelse(initialized, |
||
121 | numerator_acc, |
||
122 | numerator_zeros) |
||
123 | conditional_update_den = self.denominator + ifelse(initialized, |
||
124 | denominator_acc, |
||
125 | denominator_zeros) |
||
126 | |||
127 | initialization_updates = [(numerator_acc, |
||
128 | tensor.zeros_like(numerator_acc)), |
||
129 | (denominator_acc, |
||
130 | tensor.zeros_like(denominator_acc)), |
||
131 | (initialized, |
||
132 | tensor.zeros_like(initialized))] |
||
133 | accumulation_updates = [(numerator_acc, |
||
134 | conditional_update_num), |
||
135 | (denominator_acc, |
||
136 | conditional_update_den), |
||
137 | (initialized, tensor.ones_like(initialized))] |
||
138 | aggregator = Aggregator(aggregation_scheme=self, |
||
139 | initialization_updates=initialization_updates, |
||
140 | accumulation_updates=accumulation_updates, |
||
141 | readout_variable=(numerator_acc / |
||
142 | denominator_acc)) |
||
143 | return aggregator |
||
144 | |||
145 | |||
146 | class Perplexity(Mean): |
||
147 | |||
148 | def get_aggregator(self): |
||
149 | aggregator = super(Perplexity, self).get_aggregator() |
||
150 | aggregator.readout_variable = tensor.exp(-aggregator.readout_variable) |
||
151 | return aggregator |
||
152 | |||
153 | |||
154 | def mean(numerator, denominator=1.): |
||
155 | """Mean of quantity (numerator) over a number (denominator) values.""" |
||
156 | variable = numerator / denominator |
||
157 | variable.tag.aggregation_scheme = Mean(numerator, denominator) |
||
158 | variable.name = numerator.name |
||
159 | return variable |
||
160 | |||
161 | |||
162 | def perplexity(log_likelihood, n_examples): |
||
163 | """Perplexity for total log_likelihood of n_examples.""" |
||
164 | variable = tensor.exp(-log_likelihood / n_examples) |
||
165 | variable.tag.aggregation_scheme = Perplexity(log_likelihood, n_examples) |
||
166 | variable.name = 'perplexity' |
||
167 | return variable |
||
168 | |||
169 | |||
170 | class _DataIndependent(AggregationScheme): |
||
171 | """Dummy aggregation scheme for values that don't depend on data.""" |
||
172 | def get_aggregator(self): |
||
173 | return Aggregator(aggregation_scheme=self, |
||
174 | initialization_updates=[], |
||
175 | accumulation_updates=[], |
||
176 | readout_variable=self.variable) |
||
177 | |||
178 | |||
179 | class TakeLast(AggregationScheme): |
||
180 | """Aggregation scheme which remembers only the last value.""" |
||
181 | def get_aggregator(self): |
||
182 | self.storage = shared_like(self.variable) |
||
183 | return Aggregator(aggregation_scheme=self, |
||
184 | initialization_updates=[ |
||
185 | (self.storage, tensor.zeros_like(self.storage))], |
||
186 | accumulation_updates=[(self.storage, self.variable)], |
||
187 | readout_variable=self.storage) |
||
188 | |||
189 | |||
190 | def _simple_aggregation(scheme, variable): |
||
191 | variable = variable.copy(variable.name) |
||
192 | variable.tag.aggregation_scheme = scheme(variable) |
||
193 | return variable |
||
194 | |||
195 | |||
196 | take_last = partial(_simple_aggregation, TakeLast) |
||
0 ignored issues
–
show
The name
take_last does not conform to the constant naming conventions ((([A-Z_][A-Z0-9_]*)|(__.*__))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
197 | |||
198 | |||
199 | class Minimum(AggregationScheme): |
||
200 | """Aggregation scheme which remembers only the minimum value.""" |
||
201 | def _build_aggregator(self, accumulate_update): |
||
202 | initialized = shared_like(0.) |
||
203 | accumulate = ifelse(initialized, accumulate_update, self.variable) |
||
204 | return Aggregator(aggregation_scheme=self, |
||
205 | initialization_updates=[ |
||
206 | (self.storage, tensor.zeros_like(self.storage)), |
||
207 | (initialized, tensor.zeros_like(initialized)) |
||
208 | ], |
||
209 | accumulation_updates=[ |
||
210 | (self.storage, accumulate), |
||
211 | (initialized, tensor.ones_like(initialized)) |
||
212 | ], |
||
213 | readout_variable=self.storage) |
||
214 | |||
215 | def get_aggregator(self): |
||
216 | self.storage = shared_like(self.variable) |
||
217 | return self._build_aggregator(tensor.minimum(self.storage, |
||
218 | self.variable)) |
||
219 | |||
220 | minimum = partial(_simple_aggregation, Minimum) |
||
0 ignored issues
–
show
The name
minimum does not conform to the constant naming conventions ((([A-Z_][A-Z0-9_]*)|(__.*__))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
221 | |||
222 | |||
223 | class Maximum(Minimum): |
||
224 | """Aggregation scheme which remembers only the maximum value.""" |
||
225 | def get_aggregator(self): |
||
226 | self.storage = shared_like(self.variable) |
||
227 | return self._build_aggregator(tensor.maximum(self.storage, |
||
228 | self.variable)) |
||
229 | |||
230 | maximum = partial(_simple_aggregation, Maximum) |
||
0 ignored issues
–
show
The name
maximum does not conform to the constant naming conventions ((([A-Z_][A-Z0-9_]*)|(__.*__))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
231 | |||
232 | |||
233 | class Concatenate(Minimum): |
||
234 | """Aggregation scheme which remembers values from all batches. |
||
235 | |||
236 | Parameters |
||
237 | ---------- |
||
238 | variable: :class:`~tensor.TensorVariable` |
||
239 | The variable that holds the desired value on a single batch. |
||
240 | |||
241 | """ |
||
242 | def __init__(self, variable): |
||
243 | # Add an extra axis to concatenate along. Must be non-broadcastable |
||
244 | # for concatenate to always work. |
||
245 | variable = (tensor.unbroadcast(tensor.shape_padleft(variable, 1), 0) |
||
246 | .copy(variable.name)) |
||
247 | super(Concatenate, self).__init__(variable) |
||
248 | |||
249 | def get_aggregator(self): |
||
250 | self.storage = shared_like(self.variable) |
||
251 | return self._build_aggregator(tensor.concatenate([self.storage, |
||
252 | self.variable])) |
||
253 | |||
254 | concatenate = partial(_simple_aggregation, Concatenate) |
||
0 ignored issues
–
show
The name
concatenate does not conform to the constant naming conventions ((([A-Z_][A-Z0-9_]*)|(__.*__))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
255 | |||
256 | |||
257 | @add_metaclass(ABCMeta) |
||
0 ignored issues
–
show
|
|||
258 | class MonitoredQuantity(object): |
||
259 | """The base class for monitored-quantities. |
||
260 | |||
261 | To monitor a non-Theano quantity in Blocks you have to implement this |
||
262 | interface for it. The initialize method initializes accumulators and |
||
263 | the parameters needed to compute this quantity, aggregate method |
||
264 | aggregates results for every batch, and finally readout is called |
||
265 | to get the aggregated results. |
||
266 | |||
267 | Attributes |
||
268 | ---------- |
||
269 | requires : list |
||
270 | List of Theano variables needed to calculate this quantity. |
||
271 | name : str |
||
272 | The name of monitored quantity which appears in the log. |
||
273 | |||
274 | See Also |
||
275 | -------- |
||
276 | :class:`~blocks.monitoring.evaluators.DatasetEvaluator` |
||
277 | :class:`~blocks.extensions.DataStreamMonitoring` |
||
278 | |||
279 | """ |
||
280 | def __init__(self, requires=None, name=None): |
||
281 | if requires is None: |
||
282 | requires = [] |
||
283 | self.requires = requires |
||
284 | self.name = name |
||
285 | |||
286 | @abstractmethod |
||
287 | def initialize(self): |
||
288 | """Initialize accumulators for this monitored quantity.""" |
||
289 | pass |
||
290 | |||
291 | @abstractmethod |
||
292 | def aggregate(self, *args): |
||
293 | r"""Aggregate results for every batch. |
||
294 | |||
295 | \*args : list of :class:`~numpy.ndarray` |
||
296 | The values of the variables required to aggregate the |
||
297 | value of the quantity. |
||
298 | |||
299 | """ |
||
300 | pass |
||
301 | |||
302 | @abstractmethod |
||
303 | def get_aggregated_value(self): |
||
304 | """Obtain the result of aggregation.""" |
||
305 | pass |
||
306 |
It is generally advisable to initialize the super-class by calling its
__init__
method: