1
|
|
|
from theano import tensor |
|
|
|
|
2
|
|
|
from theano.tensor.nnet import conv2d |
3
|
|
|
from theano.tensor.nnet.abstract_conv import (AbstractConv2d_gradInputs, |
4
|
|
|
get_conv_output_shape) |
5
|
|
|
from theano.tensor.signal.pool import pool_2d, Pool |
6
|
|
|
|
7
|
|
|
from blocks.bricks import Initializable, Feedforward, Sequence |
8
|
|
|
from blocks.bricks.base import application, Brick, lazy |
9
|
|
|
from blocks.roles import add_role, FILTER, BIAS |
10
|
|
|
from blocks.utils import shared_floatx_nans |
11
|
|
|
|
12
|
|
|
|
13
|
|
|
class Convolutional(Initializable): |
14
|
|
|
"""Performs a 2D convolution. |
15
|
|
|
|
16
|
|
|
Parameters |
17
|
|
|
---------- |
18
|
|
|
filter_size : tuple |
19
|
|
|
The height and width of the filter (also called *kernels*). |
20
|
|
|
num_filters : int |
21
|
|
|
Number of filters per channel. |
22
|
|
|
num_channels : int |
23
|
|
|
Number of input channels in the image. For the first layer this is |
24
|
|
|
normally 1 for grayscale images and 3 for color (RGB) images. For |
25
|
|
|
subsequent layers this is equal to the number of filters output by |
26
|
|
|
the previous convolutional layer. The filters are pooled over the |
27
|
|
|
channels. |
28
|
|
|
batch_size : int, optional |
29
|
|
|
Number of examples per batch. If given, this will be passed to |
30
|
|
|
Theano convolution operator, possibly resulting in faster |
31
|
|
|
execution. |
32
|
|
|
image_size : tuple, optional |
33
|
|
|
The height and width of the input (image or feature map). If given, |
34
|
|
|
this will be passed to the Theano convolution operator, resulting |
35
|
|
|
in possibly faster execution times. |
36
|
|
|
step : tuple, optional |
37
|
|
|
The step (or stride) with which to slide the filters over the |
38
|
|
|
image. Defaults to (1, 1). |
39
|
|
|
border_mode : {'valid', 'full'}, optional |
40
|
|
|
The border mode to use, see :func:`scipy.signal.convolve2d` for |
41
|
|
|
details. Defaults to 'valid'. |
42
|
|
|
tied_biases : bool |
43
|
|
|
If ``True``, it indicates that the biases of every filter in this |
44
|
|
|
layer should be shared amongst all applications of that filter. |
45
|
|
|
Setting this to ``False`` will untie the biases, yielding a |
46
|
|
|
separate bias for every location at which the filter is applied. |
47
|
|
|
Defaults to ``False``. |
48
|
|
|
|
49
|
|
|
""" |
50
|
|
|
# Make it possible to override the implementation of conv2d that gets |
51
|
|
|
# used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order |
52
|
|
|
# to leverage features not yet available in Theano's standard conv2d. |
53
|
|
|
# The function you override with here should accept at least the |
54
|
|
|
# input and the kernels as positionals, and the keyword arguments |
55
|
|
|
# image_shape, subsample, border_mode, and filter_shape. If some of |
56
|
|
|
# these are unsupported they should still be accepted and ignored, |
57
|
|
|
# e.g. with a wrapper function that swallows **kwargs. |
58
|
|
|
conv2d_impl = staticmethod(conv2d) |
59
|
|
|
|
60
|
|
|
# Used to override the output shape computation for a given value of |
61
|
|
|
# conv2d_impl. Should accept 4 positional arguments: the shape of an |
62
|
|
|
# image minibatch (with 4 elements: batch size, number of channels, |
63
|
|
|
# height, and width), the shape of the filter bank (number of filters, |
64
|
|
|
# number of output channels, filter height, filter width), the border |
65
|
|
|
# mode, and the step (vertical and horizontal strides). It is expected |
66
|
|
|
# to return a 4-tuple of (batch size, number of channels, output |
67
|
|
|
# height, output width). The first element of this tuple is not used |
68
|
|
|
# for anything by this brick. |
69
|
|
|
get_output_shape = staticmethod(get_conv_output_shape) |
70
|
|
|
|
71
|
|
|
@lazy(allocation=['filter_size', 'num_filters', 'num_channels']) |
72
|
|
|
def __init__(self, filter_size, num_filters, num_channels, batch_size=None, |
73
|
|
|
image_size=(None, None), step=(1, 1), border_mode='valid', |
74
|
|
|
tied_biases=False, **kwargs): |
75
|
|
|
super(Convolutional, self).__init__(**kwargs) |
76
|
|
|
|
77
|
|
|
self.filter_size = filter_size |
78
|
|
|
self.num_filters = num_filters |
79
|
|
|
self.batch_size = batch_size |
80
|
|
|
self.num_channels = num_channels |
81
|
|
|
self.image_size = image_size |
82
|
|
|
self.step = step |
83
|
|
|
self.border_mode = border_mode |
84
|
|
|
self.tied_biases = tied_biases |
85
|
|
|
|
86
|
|
|
def _allocate(self): |
|
|
|
|
87
|
|
|
W = shared_floatx_nans((self.num_filters, self.num_channels) + |
88
|
|
|
self.filter_size, name='W') |
89
|
|
|
add_role(W, FILTER) |
90
|
|
|
self.parameters.append(W) |
91
|
|
|
self.add_auxiliary_variable(W.norm(2), name='W_norm') |
92
|
|
|
if self.use_bias: |
93
|
|
|
if self.tied_biases: |
94
|
|
|
b = shared_floatx_nans((self.num_filters,), name='b') |
95
|
|
|
else: |
96
|
|
|
# this error is raised here instead of during initializiation |
97
|
|
|
# because ConvolutionalSequence may specify the image size |
98
|
|
|
if self.image_size == (None, None) and not self.tied_biases: |
99
|
|
|
raise ValueError('Cannot infer bias size without ' |
100
|
|
|
'image_size specified. If you use ' |
101
|
|
|
'variable image_size, you should use ' |
102
|
|
|
'tied_biases=True.') |
103
|
|
|
|
104
|
|
|
b = shared_floatx_nans(self.get_dim('output'), name='b') |
105
|
|
|
add_role(b, BIAS) |
106
|
|
|
|
107
|
|
|
self.parameters.append(b) |
108
|
|
|
self.add_auxiliary_variable(b.norm(2), name='b_norm') |
109
|
|
|
|
110
|
|
|
def _initialize(self): |
111
|
|
|
if self.use_bias: |
112
|
|
|
W, b = self.parameters |
|
|
|
|
113
|
|
|
self.biases_init.initialize(b, self.rng) |
114
|
|
|
else: |
115
|
|
|
W, = self.parameters |
|
|
|
|
116
|
|
|
self.weights_init.initialize(W, self.rng) |
117
|
|
|
|
118
|
|
|
@application(inputs=['input_'], outputs=['output']) |
119
|
|
|
def apply(self, input_): |
120
|
|
|
"""Perform the convolution. |
121
|
|
|
|
122
|
|
|
Parameters |
123
|
|
|
---------- |
124
|
|
|
input_ : :class:`~tensor.TensorVariable` |
125
|
|
|
A 4D tensor with the axes representing batch size, number of |
126
|
|
|
channels, image height, and image width. |
127
|
|
|
|
128
|
|
|
Returns |
129
|
|
|
------- |
130
|
|
|
output : :class:`~tensor.TensorVariable` |
131
|
|
|
A 4D tensor of filtered images (feature maps) with dimensions |
132
|
|
|
representing batch size, number of filters, feature map height, |
133
|
|
|
and feature map width. |
134
|
|
|
|
135
|
|
|
The height and width of the feature map depend on the border |
136
|
|
|
mode. For 'valid' it is ``image_size - filter_size + 1`` while |
137
|
|
|
for 'full' it is ``image_size + filter_size - 1``. |
138
|
|
|
|
139
|
|
|
""" |
140
|
|
|
if self.use_bias: |
141
|
|
|
W, b = self.parameters |
|
|
|
|
142
|
|
|
else: |
143
|
|
|
W, = self.parameters |
|
|
|
|
144
|
|
|
|
145
|
|
|
if self.image_size == (None, None): |
146
|
|
|
image_shape = None |
147
|
|
|
else: |
148
|
|
|
image_shape = (self.batch_size, self.num_channels) |
149
|
|
|
image_shape += self.image_size |
150
|
|
|
|
151
|
|
|
output = self.conv2d_impl( |
152
|
|
|
input_, W, |
153
|
|
|
image_shape=image_shape, |
154
|
|
|
subsample=self.step, |
155
|
|
|
border_mode=self.border_mode, |
156
|
|
|
filter_shape=((self.num_filters, self.num_channels) + |
157
|
|
|
self.filter_size)) |
158
|
|
|
if self.use_bias: |
159
|
|
|
if self.tied_biases: |
160
|
|
|
output += b.dimshuffle('x', 0, 'x', 'x') |
161
|
|
|
else: |
162
|
|
|
output += b.dimshuffle('x', 0, 1, 2) |
163
|
|
|
return output |
164
|
|
|
|
165
|
|
|
def get_dim(self, name): |
166
|
|
|
if name == 'input_': |
167
|
|
|
return (self.num_channels,) + self.image_size |
168
|
|
|
if name == 'output': |
169
|
|
|
image_shape = (None, self.num_channels) + self.image_size |
170
|
|
|
kernel_shape = ((self.num_filters, self.num_channels) + |
171
|
|
|
self.filter_size) |
172
|
|
|
out_shape = self.get_output_shape(image_shape, kernel_shape, |
173
|
|
|
self.border_mode, self.step) |
174
|
|
|
assert len(out_shape) == 4 |
175
|
|
|
return out_shape[1:] |
176
|
|
|
return super(Convolutional, self).get_dim(name) |
177
|
|
|
|
178
|
|
|
@property |
179
|
|
|
def num_output_channels(self): |
180
|
|
|
return self.num_filters |
181
|
|
|
|
182
|
|
|
|
183
|
|
|
class ConvolutionalTranspose(Initializable): |
184
|
|
|
"""Performs the transpose of a 2D convolution. |
185
|
|
|
|
186
|
|
|
Parameters |
187
|
|
|
---------- |
188
|
|
|
filter_size : tuple |
189
|
|
|
The height and width of the filter (also called *kernels*). |
190
|
|
|
num_filters : int |
191
|
|
|
Number of filters per channel. |
192
|
|
|
num_channels : int |
193
|
|
|
Number of input channels in the image. For the first layer this is |
194
|
|
|
normally 1 for grayscale images and 3 for color (RGB) images. For |
195
|
|
|
subsequent layers this is equal to the number of filters output by |
196
|
|
|
the previous convolutional layer. The filters are pooled over the |
197
|
|
|
channels. |
198
|
|
|
original_image_size : tuple |
199
|
|
|
The height and width of the output (image or feature map). |
200
|
|
|
batch_size : int, optional |
201
|
|
|
Number of examples per batch. If given, this will be passed to |
202
|
|
|
Theano convolution operator, possibly resulting in faster |
203
|
|
|
execution. |
204
|
|
|
image_size : tuple, optional |
205
|
|
|
The height and width of the input (image or feature map). If given, |
206
|
|
|
this will be passed to the Theano convolution operator, resulting |
207
|
|
|
in possibly faster execution times. |
208
|
|
|
step : tuple, optional |
209
|
|
|
The step (or stride) with which to slide the filters over the |
210
|
|
|
image. Defaults to (1, 1). |
211
|
|
|
border_mode : {'valid', 'full'}, optional |
212
|
|
|
The border mode to use, see :func:`scipy.signal.convolve2d` for |
213
|
|
|
details. Defaults to 'valid'. |
214
|
|
|
tied_biases : bool |
215
|
|
|
If ``True``, it indicates that the biases of every filter in this |
216
|
|
|
layer should be shared amongst all applications of that filter. |
217
|
|
|
Setting this to ``False`` will untie the biases, yielding a |
218
|
|
|
separate bias for every location at which the filter is applied. |
219
|
|
|
Defaults to ``False``. |
220
|
|
|
|
221
|
|
|
""" |
222
|
|
|
@lazy(allocation=['filter_size', 'num_filters', 'num_channels', |
223
|
|
|
'original_image_size']) |
224
|
|
|
def __init__(self, filter_size, num_filters, num_channels, |
225
|
|
|
original_image_size, batch_size=None, image_size=(None, None), |
226
|
|
|
step=(1, 1), border_mode='valid', tied_biases=False, |
227
|
|
|
**kwargs): |
228
|
|
|
super(ConvolutionalTranspose, self).__init__(**kwargs) |
229
|
|
|
|
230
|
|
|
self.filter_size = filter_size |
231
|
|
|
self.num_filters = num_filters |
232
|
|
|
self.batch_size = batch_size |
233
|
|
|
self.num_channels = num_channels |
234
|
|
|
self.image_size = image_size |
235
|
|
|
self.original_image_size = original_image_size |
236
|
|
|
self.step = step |
237
|
|
|
self.border_mode = border_mode |
238
|
|
|
self.tied_biases = tied_biases |
239
|
|
|
|
240
|
|
|
def _allocate(self): |
|
|
|
|
241
|
|
|
# The AbstractConv2d_gradInputs op takes a kernel that was used for the |
242
|
|
|
# **convolution**. We therefore have to invert num_channels and |
243
|
|
|
# num_filters for W. |
244
|
|
|
W = shared_floatx_nans((self.num_channels, self.num_filters) + |
245
|
|
|
self.filter_size, name='W') |
246
|
|
|
add_role(W, FILTER) |
247
|
|
|
self.parameters.append(W) |
248
|
|
|
self.add_auxiliary_variable(W.norm(2), name='W_norm') |
249
|
|
|
if self.use_bias: |
250
|
|
|
if self.tied_biases: |
251
|
|
|
b = shared_floatx_nans((self.num_filters,), name='b') |
252
|
|
|
else: |
253
|
|
|
# this error is raised here instead of during initializiation |
254
|
|
|
# because ConvolutionalSequence may specify the image size |
255
|
|
|
if self.image_size == (None, None) and not self.tied_biases: |
256
|
|
|
raise ValueError('Cannot infer bias size without ' |
257
|
|
|
'image_size specified. If you use ' |
258
|
|
|
'variable image_size, you should use ' |
259
|
|
|
'tied_biases=True.') |
260
|
|
|
|
261
|
|
|
b = shared_floatx_nans(self.get_dim('output'), name='b') |
262
|
|
|
add_role(b, BIAS) |
263
|
|
|
|
264
|
|
|
self.parameters.append(b) |
265
|
|
|
self.add_auxiliary_variable(b.norm(2), name='b_norm') |
266
|
|
|
|
267
|
|
|
def _initialize(self): |
268
|
|
|
if self.use_bias: |
269
|
|
|
W, b = self.parameters |
|
|
|
|
270
|
|
|
self.biases_init.initialize(b, self.rng) |
271
|
|
|
else: |
272
|
|
|
W, = self.parameters |
|
|
|
|
273
|
|
|
self.weights_init.initialize(W, self.rng) |
274
|
|
|
|
275
|
|
|
@application(inputs=['input_'], outputs=['output']) |
276
|
|
|
def apply(self, input_): |
277
|
|
|
"""Perform the transposed convolution. |
278
|
|
|
|
279
|
|
|
Parameters |
280
|
|
|
---------- |
281
|
|
|
input_ : :class:`~tensor.TensorVariable` |
282
|
|
|
A 4D tensor with the axes representing batch size, number of |
283
|
|
|
channels, image height, and image width. |
284
|
|
|
|
285
|
|
|
Returns |
286
|
|
|
------- |
287
|
|
|
output : :class:`~tensor.TensorVariable` |
288
|
|
|
A 4D tensor of filtered images (feature maps) with dimensions |
289
|
|
|
representing batch size, number of filters, feature map height, |
290
|
|
|
and feature map width. |
291
|
|
|
|
292
|
|
|
""" |
293
|
|
|
if self.use_bias: |
294
|
|
|
W, b = self.parameters |
|
|
|
|
295
|
|
|
else: |
296
|
|
|
W, = self.parameters |
|
|
|
|
297
|
|
|
imshp = (None,) + self.get_dim('output') |
298
|
|
|
kshp = (self.num_channels, self.num_filters) + self.filter_size |
299
|
|
|
output = AbstractConv2d_gradInputs( |
300
|
|
|
imshp=imshp, kshp=kshp, border_mode=self.border_mode, |
301
|
|
|
subsample=self.step)(W, input_, imshp[-2:]) |
302
|
|
|
if self.use_bias: |
303
|
|
|
if self.tied_biases: |
304
|
|
|
output += b.dimshuffle('x', 0, 'x', 'x') |
305
|
|
|
else: |
306
|
|
|
output += b.dimshuffle('x', 0, 1, 2) |
307
|
|
|
return output |
308
|
|
|
|
309
|
|
|
def get_dim(self, name): |
310
|
|
|
if name == 'input_': |
311
|
|
|
return (self.num_channels,) + self.image_size |
312
|
|
|
if name == 'output': |
313
|
|
|
return (self.num_filters,) + self.original_image_size |
314
|
|
|
return super(ConvolutionalTranspose, self).get_dim(name) |
315
|
|
|
|
316
|
|
|
@property |
317
|
|
|
def num_output_channels(self): |
318
|
|
|
return self.num_filters |
319
|
|
|
|
320
|
|
|
|
321
|
|
|
class Pooling(Initializable, Feedforward): |
322
|
|
|
"""Base Brick for pooling operations. |
323
|
|
|
|
324
|
|
|
This should generally not be instantiated directly; see |
325
|
|
|
:class:`MaxPooling`. |
326
|
|
|
|
327
|
|
|
""" |
328
|
|
|
@lazy(allocation=['mode', 'pooling_size']) |
329
|
|
|
def __init__(self, mode, pooling_size, step, input_dim, ignore_border, |
330
|
|
|
padding, **kwargs): |
331
|
|
|
super(Pooling, self).__init__(**kwargs) |
332
|
|
|
self.pooling_size = pooling_size |
333
|
|
|
self.mode = mode |
334
|
|
|
self.step = step |
335
|
|
|
self.input_dim = input_dim if input_dim is not None else (None,) * 3 |
336
|
|
|
self.ignore_border = ignore_border |
337
|
|
|
self.padding = padding |
338
|
|
|
|
339
|
|
|
@property |
340
|
|
|
def image_size(self): |
341
|
|
|
return self.input_dim[-2:] |
342
|
|
|
|
343
|
|
|
@image_size.setter |
344
|
|
|
def image_size(self, value): |
345
|
|
|
self.input_dim = self.input_dim[:-2] + value |
346
|
|
|
|
347
|
|
|
@property |
348
|
|
|
def num_channels(self): |
349
|
|
|
return self.input_dim[0] |
350
|
|
|
|
351
|
|
|
@num_channels.setter |
352
|
|
|
def num_channels(self, value): |
353
|
|
|
self.input_dim = (value,) + self.input_dim[1:] |
354
|
|
|
|
355
|
|
|
@application(inputs=['input_'], outputs=['output']) |
356
|
|
|
def apply(self, input_): |
357
|
|
|
"""Apply the pooling (subsampling) transformation. |
358
|
|
|
|
359
|
|
|
Parameters |
360
|
|
|
---------- |
361
|
|
|
input_ : :class:`~tensor.TensorVariable` |
362
|
|
|
An tensor with dimension greater or equal to 2. The last two |
363
|
|
|
dimensions will be downsampled. For example, with images this |
364
|
|
|
means that the last two dimensions should represent the height |
365
|
|
|
and width of your image. |
366
|
|
|
|
367
|
|
|
Returns |
368
|
|
|
------- |
369
|
|
|
output : :class:`~tensor.TensorVariable` |
370
|
|
|
A tensor with the same number of dimensions as `input_`, but |
371
|
|
|
with the last two dimensions downsampled. |
372
|
|
|
|
373
|
|
|
""" |
374
|
|
|
output = pool_2d(input_, self.pooling_size, st=self.step, |
375
|
|
|
mode=self.mode, padding=self.padding, |
376
|
|
|
ignore_border=self.ignore_border) |
377
|
|
|
return output |
378
|
|
|
|
379
|
|
|
def get_dim(self, name): |
380
|
|
|
if name == 'input_': |
381
|
|
|
return self.input_dim |
382
|
|
|
if name == 'output': |
383
|
|
|
return tuple(Pool.out_shape( |
384
|
|
|
self.input_dim, self.pooling_size, st=self.step, |
385
|
|
|
ignore_border=self.ignore_border, padding=self.padding)) |
386
|
|
|
|
387
|
|
|
@property |
388
|
|
|
def num_output_channels(self): |
389
|
|
|
return self.input_dim[0] |
390
|
|
|
|
391
|
|
|
|
392
|
|
|
class MaxPooling(Pooling): |
393
|
|
|
"""Max pooling layer. |
394
|
|
|
|
395
|
|
|
Parameters |
396
|
|
|
---------- |
397
|
|
|
pooling_size : tuple |
398
|
|
|
The height and width of the pooling region i.e. this is the factor |
399
|
|
|
by which your input's last two dimensions will be downscaled. |
400
|
|
|
step : tuple, optional |
401
|
|
|
The vertical and horizontal shift (stride) between pooling regions. |
402
|
|
|
By default this is equal to `pooling_size`. Setting this to a lower |
403
|
|
|
number results in overlapping pooling regions. |
404
|
|
|
input_dim : tuple, optional |
405
|
|
|
A tuple of integers representing the shape of the input. The last |
406
|
|
|
two dimensions will be used to calculate the output dimension. |
407
|
|
|
padding : tuple, optional |
408
|
|
|
A tuple of integers representing the vertical and horizontal |
409
|
|
|
zero-padding to be applied to each of the top and bottom |
410
|
|
|
(vertical) and left and right (horizontal) edges. For example, |
411
|
|
|
an argument of (4, 3) will apply 4 pixels of padding to the |
412
|
|
|
top edge, 4 pixels of padding to the bottom edge, and 3 pixels |
413
|
|
|
each for the left and right edge. By default, no padding is |
414
|
|
|
performed. |
415
|
|
|
ignore_border : bool, optional |
416
|
|
|
Whether or not to do partial downsampling based on borders where |
417
|
|
|
the extent of the pooling region reaches beyond the edge of the |
418
|
|
|
image. If `True`, a (5, 5) image with (2, 2) pooling regions |
419
|
|
|
and (2, 2) step will be downsampled to shape (2, 2), otherwise |
420
|
|
|
it will be downsampled to (3, 3). `True` by default. |
421
|
|
|
|
422
|
|
|
Notes |
423
|
|
|
----- |
424
|
|
|
.. warning:: |
425
|
|
|
As of this writing, setting `ignore_border` to `False` with a step |
426
|
|
|
not equal to the pooling size will force Theano to perform pooling |
427
|
|
|
computations on CPU rather than GPU, even if you have specified |
428
|
|
|
a GPU as your computation device. Additionally, Theano will only |
429
|
|
|
use [cuDNN]_ (if available) for pooling computations with |
430
|
|
|
`ignure_border` set to `True`. You can ensure that the entire |
431
|
|
|
input is captured by at least one pool by using the `padding` |
432
|
|
|
argument to add zero padding prior to pooling being performed. |
433
|
|
|
|
434
|
|
|
.. [cuDNN]: `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_. |
435
|
|
|
|
436
|
|
|
""" |
437
|
|
|
@lazy(allocation=['pooling_size']) |
438
|
|
|
def __init__(self, pooling_size, step=None, input_dim=None, |
439
|
|
|
ignore_border=True, padding=(0, 0), |
440
|
|
|
**kwargs): |
441
|
|
|
super(MaxPooling, self).__init__('max', pooling_size, |
442
|
|
|
step=step, input_dim=input_dim, |
443
|
|
|
ignore_border=ignore_border, |
444
|
|
|
padding=padding, **kwargs) |
445
|
|
|
|
446
|
|
|
def __setstate__(self, state): |
447
|
|
|
self.__dict__.update(state) |
448
|
|
|
# Fix objects created before pull request #899. |
449
|
|
|
self.mode = getattr(self, 'mode', 'max') |
450
|
|
|
self.padding = getattr(self, 'padding', (0, 0)) |
451
|
|
|
self.ignore_border = getattr(self, 'ignore_border', False) |
452
|
|
|
|
453
|
|
|
|
454
|
|
|
class AveragePooling(Pooling): |
455
|
|
|
"""Average pooling layer. |
456
|
|
|
|
457
|
|
|
Parameters |
458
|
|
|
---------- |
459
|
|
|
include_padding : bool, optional |
460
|
|
|
When calculating an average, include zeros that are the |
461
|
|
|
result of zero padding added by the `padding` argument. |
462
|
|
|
A value of `True` is only accepted if `ignore_border` |
463
|
|
|
is also `True`. `False` by default. |
464
|
|
|
|
465
|
|
|
Notes |
466
|
|
|
----- |
467
|
|
|
For documentation on the remainder of the arguments to this |
468
|
|
|
class, see :class:`MaxPooling`. |
469
|
|
|
|
470
|
|
|
""" |
471
|
|
|
@lazy(allocation=['pooling_size']) |
472
|
|
|
def __init__(self, pooling_size, step=None, input_dim=None, |
473
|
|
|
ignore_border=True, padding=(0, 0), |
474
|
|
|
include_padding=False, **kwargs): |
475
|
|
|
mode = 'average_inc_pad' if include_padding else 'average_exc_pad' |
476
|
|
|
super(AveragePooling, self).__init__(mode, pooling_size, |
477
|
|
|
step=step, input_dim=input_dim, |
478
|
|
|
ignore_border=ignore_border, |
479
|
|
|
padding=padding, **kwargs) |
480
|
|
|
|
481
|
|
|
|
482
|
|
|
class _AllocationMixin(object): |
483
|
|
|
def _push_allocation_config(self): |
484
|
|
|
for attr in ['filter_size', 'num_filters', 'border_mode', |
485
|
|
|
'batch_size', 'num_channels', 'image_size', |
486
|
|
|
'tied_biases', 'use_bias']: |
487
|
|
|
setattr(self.convolution, attr, getattr(self, attr)) |
488
|
|
|
|
489
|
|
|
@property |
490
|
|
|
def num_output_channels(self): |
491
|
|
|
# Assumes an elementwise activation function. Would need to |
492
|
|
|
# change to support e.g. maxout, but that would also require |
493
|
|
|
# a way of querying the activation function for this kind of |
494
|
|
|
# information. |
495
|
|
|
return self.num_filters |
496
|
|
|
|
497
|
|
|
|
498
|
|
|
class ConvolutionalActivation(_AllocationMixin, Sequence, Initializable): |
499
|
|
|
"""A convolution followed by an activation function. |
500
|
|
|
|
501
|
|
|
Parameters |
502
|
|
|
---------- |
503
|
|
|
activation : :class:`.BoundApplication` |
504
|
|
|
The application method to apply after convolution (i.e. |
505
|
|
|
the nonlinear activation function) |
506
|
|
|
|
507
|
|
|
See Also |
508
|
|
|
-------- |
509
|
|
|
:class:`Convolutional` : For the documentation of other parameters. |
510
|
|
|
|
511
|
|
|
""" |
512
|
|
|
@lazy(allocation=['filter_size', 'num_filters', 'num_channels']) |
513
|
|
|
def __init__(self, activation, filter_size, num_filters, num_channels, |
514
|
|
|
batch_size=None, image_size=None, step=(1, 1), |
515
|
|
|
border_mode='valid', tied_biases=False, **kwargs): |
516
|
|
|
self.convolution = Convolutional() |
517
|
|
|
|
518
|
|
|
self.filter_size = filter_size |
519
|
|
|
self.num_filters = num_filters |
520
|
|
|
self.num_channels = num_channels |
521
|
|
|
self.batch_size = batch_size |
522
|
|
|
self.image_size = image_size |
523
|
|
|
self.step = step |
524
|
|
|
self.border_mode = border_mode |
525
|
|
|
self.tied_biases = tied_biases |
526
|
|
|
|
527
|
|
|
super(ConvolutionalActivation, self).__init__( |
528
|
|
|
application_methods=[self.convolution.apply, activation], |
529
|
|
|
**kwargs) |
530
|
|
|
|
531
|
|
|
def get_dim(self, name): |
532
|
|
|
# TODO The name of the activation output doesn't need to be `output` |
533
|
|
|
return self.convolution.get_dim(name) |
534
|
|
|
|
535
|
|
|
def _push_allocation_config(self): |
536
|
|
|
super(ConvolutionalActivation, self)._push_allocation_config() |
537
|
|
|
self.convolution.step = self.step |
538
|
|
|
|
539
|
|
|
|
540
|
|
|
class ConvolutionalTransposeActivation(_AllocationMixin, Sequence, |
541
|
|
|
Initializable): |
542
|
|
|
"""A transposed convolution followed by an activation function. |
543
|
|
|
|
544
|
|
|
Parameters |
545
|
|
|
---------- |
546
|
|
|
activation : :class:`.BoundApplication` |
547
|
|
|
The application method to apply after convolution (i.e. |
548
|
|
|
the nonlinear activation function) |
549
|
|
|
|
550
|
|
|
See Also |
551
|
|
|
-------- |
552
|
|
|
:class:`ConvolutionalTranspose` : For the documentation of other |
553
|
|
|
parameters. |
554
|
|
|
|
555
|
|
|
""" |
556
|
|
|
@lazy(allocation=['filter_size', 'num_filters', 'num_channels', |
557
|
|
|
'original_image_size']) |
558
|
|
|
def __init__(self, activation, filter_size, num_filters, num_channels, |
559
|
|
|
original_image_size, batch_size=None, image_size=None, |
560
|
|
|
step=(1, 1), border_mode='valid', tied_biases=False, |
561
|
|
|
**kwargs): |
562
|
|
|
self.convolution = ConvolutionalTranspose() |
563
|
|
|
|
564
|
|
|
self.filter_size = filter_size |
565
|
|
|
self.num_filters = num_filters |
566
|
|
|
self.num_channels = num_channels |
567
|
|
|
self.batch_size = batch_size |
568
|
|
|
self.image_size = image_size |
569
|
|
|
self.original_image_size = original_image_size |
570
|
|
|
self.step = step |
571
|
|
|
self.border_mode = border_mode |
572
|
|
|
self.tied_biases = tied_biases |
573
|
|
|
|
574
|
|
|
super(ConvolutionalTransposeActivation, self).__init__( |
575
|
|
|
application_methods=[self.convolution.apply, activation], |
576
|
|
|
**kwargs) |
577
|
|
|
|
578
|
|
|
def get_dim(self, name): |
579
|
|
|
# TODO The name of the activation output doesn't need to be `output` |
580
|
|
|
return self.convolution.get_dim(name) |
581
|
|
|
|
582
|
|
|
def _push_allocation_config(self): |
583
|
|
|
super(ConvolutionalTransposeActivation, self)._push_allocation_config() |
584
|
|
|
self.convolution.step = self.step |
585
|
|
|
self.convolution.original_image_size = self.original_image_size |
586
|
|
|
|
587
|
|
|
|
588
|
|
|
class ConvolutionalSequence(Sequence, Initializable, Feedforward): |
589
|
|
|
"""A sequence of convolutional (or pooling) operations. |
590
|
|
|
|
591
|
|
|
Parameters |
592
|
|
|
---------- |
593
|
|
|
layers : list |
594
|
|
|
List of convolutional bricks (i.e. :class:`Convolutional`, |
595
|
|
|
:class:`ConvolutionalActivation`, or :class:`Pooling` bricks). |
596
|
|
|
num_channels : int |
597
|
|
|
Number of input channels in the image. For the first layer this is |
598
|
|
|
normally 1 for grayscale images and 3 for color (RGB) images. For |
599
|
|
|
subsequent layers this is equal to the number of filters output by |
600
|
|
|
the previous convolutional layer. |
601
|
|
|
batch_size : int, optional |
602
|
|
|
Number of images in batch. If given, will be passed to |
603
|
|
|
theano's convolution operator resulting in possibly faster |
604
|
|
|
execution. |
605
|
|
|
image_size : tuple, optional |
606
|
|
|
Width and height of the input (image/featuremap). If given, |
607
|
|
|
will be passed to theano's convolution operator resulting in |
608
|
|
|
possibly faster execution. |
609
|
|
|
border_mode : 'valid', 'full' or None, optional |
610
|
|
|
The border mode to use, see :func:`scipy.signal.convolve2d` for |
611
|
|
|
details. Unlike with :class:`Convolutional`, this defaults to |
612
|
|
|
None, in which case no default value is pushed down to child |
613
|
|
|
bricks at allocation time. Child bricks will in this case |
614
|
|
|
need to rely on either a default border mode (usually valid) |
615
|
|
|
or one provided at construction and/or after construction |
616
|
|
|
(but before allocation). |
617
|
|
|
|
618
|
|
|
Notes |
619
|
|
|
----- |
620
|
|
|
The passed convolutional operators should be 'lazy' constructed, that |
621
|
|
|
is, without specifying the batch_size, num_channels and image_size. The |
622
|
|
|
main feature of :class:`ConvolutionalSequence` is that it will set the |
623
|
|
|
input dimensions of a layer to the output dimensions of the previous |
624
|
|
|
layer by the :meth:`~.Brick.push_allocation_config` method. |
625
|
|
|
|
626
|
|
|
The reason the `border_mode` parameter behaves the way it does is that |
627
|
|
|
pushing a single default `border_mode` makes it very difficult to |
628
|
|
|
have child bricks with different border modes. Normally, such things |
629
|
|
|
would be overridden after `push_allocation_config()`, but this is |
630
|
|
|
a particular hassle as the border mode affects the allocation |
631
|
|
|
parameters of every subsequent child brick in the sequence. Thus, only |
632
|
|
|
an explicitly specified border mode will be pushed down the hierarchy. |
633
|
|
|
|
634
|
|
|
""" |
635
|
|
|
@lazy(allocation=['num_channels']) |
636
|
|
|
def __init__(self, layers, num_channels, batch_size=None, image_size=None, |
637
|
|
|
border_mode=None, tied_biases=False, **kwargs): |
638
|
|
|
self.layers = layers |
639
|
|
|
self.image_size = image_size |
640
|
|
|
self.num_channels = num_channels |
641
|
|
|
self.batch_size = batch_size |
642
|
|
|
self.border_mode = border_mode |
643
|
|
|
self.tied_biases = tied_biases |
644
|
|
|
|
645
|
|
|
application_methods = [brick.apply for brick in layers] |
646
|
|
|
super(ConvolutionalSequence, self).__init__( |
647
|
|
|
application_methods=application_methods, **kwargs) |
648
|
|
|
|
649
|
|
|
def get_dim(self, name): |
650
|
|
|
if name == 'input_': |
651
|
|
|
return ((self.num_channels,) + self.image_size) |
|
|
|
|
652
|
|
|
if name == 'output': |
653
|
|
|
return self.layers[-1].get_dim(name) |
654
|
|
|
return super(ConvolutionalSequence, self).get_dim(name) |
655
|
|
|
|
656
|
|
|
def _push_allocation_config(self): |
657
|
|
|
num_channels = self.num_channels |
658
|
|
|
image_size = self.image_size |
659
|
|
|
for layer in self.layers: |
660
|
|
|
if self.border_mode is not None: |
661
|
|
|
layer.border_mode = self.border_mode |
662
|
|
|
layer.tied_biases = self.tied_biases |
663
|
|
|
layer.image_size = image_size |
664
|
|
|
layer.num_channels = num_channels |
665
|
|
|
layer.batch_size = self.batch_size |
666
|
|
|
layer.use_bias = self.use_bias |
667
|
|
|
|
668
|
|
|
# Push input dimensions to children |
669
|
|
|
layer._push_allocation_config() |
|
|
|
|
670
|
|
|
|
671
|
|
|
# Retrieve output dimensions |
672
|
|
|
# and set it for next layer |
673
|
|
|
if layer.image_size is not None: |
674
|
|
|
output_shape = layer.get_dim('output') |
675
|
|
|
image_size = output_shape[1:] |
676
|
|
|
num_channels = layer.num_output_channels |
677
|
|
|
|
678
|
|
|
|
679
|
|
|
class Flattener(Brick): |
680
|
|
|
"""Flattens the input. |
681
|
|
|
|
682
|
|
|
It may be used to pass multidimensional objects like images or feature |
683
|
|
|
maps of convolutional bricks into bricks which allow only two |
684
|
|
|
dimensional input (batch, features) like MLP. |
685
|
|
|
|
686
|
|
|
""" |
687
|
|
|
@application(inputs=['input_'], outputs=['output']) |
688
|
|
|
def apply(self, input_): |
689
|
|
|
return input_.flatten(ndim=2) |
690
|
|
|
|