1
|
|
|
from theano import tensor |
2
|
|
|
from theano.tensor.nnet import conv2d |
3
|
|
|
from theano.tensor.nnet.abstract_conv import get_conv_output_shape |
4
|
|
|
from theano.tensor.signal.pool import pool_2d, Pool |
5
|
|
|
|
6
|
|
|
from blocks.bricks import Initializable, Feedforward, Sequence |
7
|
|
|
from blocks.bricks.base import application, Brick, lazy |
8
|
|
|
from blocks.roles import add_role, FILTER, BIAS |
9
|
|
|
from blocks.utils import shared_floatx_nans |
10
|
|
|
|
11
|
|
|
|
12
|
|
|
class Convolutional(Initializable): |
13
|
|
|
"""Performs a 2D convolution. |
14
|
|
|
|
15
|
|
|
Parameters |
16
|
|
|
---------- |
17
|
|
|
filter_size : tuple |
18
|
|
|
The height and width of the filter (also called *kernels*). |
19
|
|
|
num_filters : int |
20
|
|
|
Number of filters per channel. |
21
|
|
|
num_channels : int |
22
|
|
|
Number of input channels in the image. For the first layer this is |
23
|
|
|
normally 1 for grayscale images and 3 for color (RGB) images. For |
24
|
|
|
subsequent layers this is equal to the number of filters output by |
25
|
|
|
the previous convolutional layer. The filters are pooled over the |
26
|
|
|
channels. |
27
|
|
|
batch_size : int, optional |
28
|
|
|
Number of examples per batch. If given, this will be passed to |
29
|
|
|
Theano convolution operator, possibly resulting in faster |
30
|
|
|
execution. |
31
|
|
|
image_size : tuple, optional |
32
|
|
|
The height and width of the input (image or feature map). If given, |
33
|
|
|
this will be passed to the Theano convolution operator, resulting |
34
|
|
|
in possibly faster execution times. |
35
|
|
|
step : tuple, optional |
36
|
|
|
The step (or stride) with which to slide the filters over the |
37
|
|
|
image. Defaults to (1, 1). |
38
|
|
|
border_mode : {'valid', 'full'}, optional |
39
|
|
|
The border mode to use, see :func:`scipy.signal.convolve2d` for |
40
|
|
|
details. Defaults to 'valid'. |
41
|
|
|
tied_biases : bool |
42
|
|
|
If ``True``, it indicates that the biases of every filter in this |
43
|
|
|
layer should be shared amongst all applications of that filter. |
44
|
|
|
Setting this to ``False`` will untie the biases, yielding a |
45
|
|
|
separate bias for every location at which the filter is applied. |
46
|
|
|
Defaults to ``False``. |
47
|
|
|
|
48
|
|
|
""" |
49
|
|
|
# Make it possible to override the implementation of conv2d that gets |
50
|
|
|
# used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order |
51
|
|
|
# to leverage features not yet available in Theano's standard conv2d. |
52
|
|
|
# The function you override with here should accept at least the |
53
|
|
|
# input and the kernels as positionals, and the keyword arguments |
54
|
|
|
# image_shape, subsample, border_mode, and filter_shape. If some of |
55
|
|
|
# these are unsupported they should still be accepted and ignored, |
56
|
|
|
# e.g. with a wrapper function that swallows **kwargs. |
57
|
|
|
conv2d_impl = staticmethod(conv2d) |
58
|
|
|
|
59
|
|
|
# Used to override the output shape computation for a given value of |
60
|
|
|
# conv2d_impl. Should accept 4 positional arguments: the shape of an |
61
|
|
|
# image minibatch (with 4 elements: batch size, number of channels, |
62
|
|
|
# height, and width), the shape of the filter bank (number of filters, |
63
|
|
|
# number of output channels, filter height, filter width), the border |
64
|
|
|
# mode, and the step (vertical and horizontal strides). It is expected |
65
|
|
|
# to return a 4-tuple of (batch size, number of channels, output |
66
|
|
|
# height, output width). The first element of this tuple is not used |
67
|
|
|
# for anything by this brick. |
68
|
|
|
get_output_shape = staticmethod(get_conv_output_shape) |
69
|
|
|
|
70
|
|
|
@lazy(allocation=['filter_size', 'num_filters', 'num_channels']) |
71
|
|
|
def __init__(self, filter_size, num_filters, num_channels, batch_size=None, |
72
|
|
|
image_size=(None, None), step=(1, 1), border_mode='valid', |
73
|
|
|
tied_biases=False, **kwargs): |
74
|
|
|
super(Convolutional, self).__init__(**kwargs) |
75
|
|
|
|
76
|
|
|
self.filter_size = filter_size |
77
|
|
|
self.num_filters = num_filters |
78
|
|
|
self.batch_size = batch_size |
79
|
|
|
self.num_channels = num_channels |
80
|
|
|
self.image_size = image_size |
81
|
|
|
self.step = step |
82
|
|
|
self.border_mode = border_mode |
83
|
|
|
self.tied_biases = tied_biases |
84
|
|
|
|
85
|
|
|
def _allocate(self): |
|
|
|
|
86
|
|
|
W = shared_floatx_nans((self.num_filters, self.num_channels) + |
87
|
|
|
self.filter_size, name='W') |
88
|
|
|
add_role(W, FILTER) |
89
|
|
|
self.parameters.append(W) |
90
|
|
|
self.add_auxiliary_variable(W.norm(2), name='W_norm') |
91
|
|
|
if self.use_bias: |
92
|
|
|
if self.tied_biases: |
93
|
|
|
b = shared_floatx_nans((self.num_filters,), name='b') |
94
|
|
|
else: |
95
|
|
|
# this error is raised here instead of during initializiation |
96
|
|
|
# because ConvolutionalSequence may specify the image size |
97
|
|
|
if self.image_size == (None, None) and not self.tied_biases: |
98
|
|
|
raise ValueError('Cannot infer bias size without ' |
99
|
|
|
'image_size specified. If you use ' |
100
|
|
|
'variable image_size, you should use ' |
101
|
|
|
'tied_biases=True.') |
102
|
|
|
|
103
|
|
|
b = shared_floatx_nans(self.get_dim('output'), name='b') |
104
|
|
|
add_role(b, BIAS) |
105
|
|
|
|
106
|
|
|
self.parameters.append(b) |
107
|
|
|
self.add_auxiliary_variable(b.norm(2), name='b_norm') |
108
|
|
|
|
109
|
|
|
def _initialize(self): |
110
|
|
|
if self.use_bias: |
111
|
|
|
W, b = self.parameters |
|
|
|
|
112
|
|
|
self.biases_init.initialize(b, self.rng) |
113
|
|
|
else: |
114
|
|
|
W, = self.parameters |
|
|
|
|
115
|
|
|
self.weights_init.initialize(W, self.rng) |
116
|
|
|
|
117
|
|
|
@application(inputs=['input_'], outputs=['output']) |
118
|
|
|
def apply(self, input_): |
119
|
|
|
"""Perform the convolution. |
120
|
|
|
|
121
|
|
|
Parameters |
122
|
|
|
---------- |
123
|
|
|
input_ : :class:`~tensor.TensorVariable` |
124
|
|
|
A 4D tensor with the axes representing batch size, number of |
125
|
|
|
channels, image height, and image width. |
126
|
|
|
|
127
|
|
|
Returns |
128
|
|
|
------- |
129
|
|
|
output : :class:`~tensor.TensorVariable` |
130
|
|
|
A 4D tensor of filtered images (feature maps) with dimensions |
131
|
|
|
representing batch size, number of filters, feature map height, |
132
|
|
|
and feature map width. |
133
|
|
|
|
134
|
|
|
The height and width of the feature map depend on the border |
135
|
|
|
mode. For 'valid' it is ``image_size - filter_size + 1`` while |
136
|
|
|
for 'full' it is ``image_size + filter_size - 1``. |
137
|
|
|
|
138
|
|
|
""" |
139
|
|
|
if self.use_bias: |
140
|
|
|
W, b = self.parameters |
|
|
|
|
141
|
|
|
else: |
142
|
|
|
W, = self.parameters |
|
|
|
|
143
|
|
|
|
144
|
|
|
if self.image_size == (None, None): |
145
|
|
|
image_shape = None |
146
|
|
|
else: |
147
|
|
|
image_shape = (self.batch_size, self.num_channels) |
148
|
|
|
image_shape += self.image_size |
149
|
|
|
|
150
|
|
|
output = self.conv2d_impl( |
151
|
|
|
input_, W, |
152
|
|
|
image_shape=image_shape, |
153
|
|
|
subsample=self.step, |
154
|
|
|
border_mode=self.border_mode, |
155
|
|
|
filter_shape=((self.num_filters, self.num_channels) + |
156
|
|
|
self.filter_size)) |
157
|
|
|
if self.use_bias: |
158
|
|
|
if self.tied_biases: |
159
|
|
|
output += b.dimshuffle('x', 0, 'x', 'x') |
160
|
|
|
else: |
161
|
|
|
output += b.dimshuffle('x', 0, 1, 2) |
162
|
|
|
return output |
163
|
|
|
|
164
|
|
|
def get_dim(self, name): |
165
|
|
|
if name == 'input_': |
166
|
|
|
return (self.num_channels,) + self.image_size |
167
|
|
|
if name == 'output': |
168
|
|
|
image_shape = (None, self.num_channels) + self.image_size |
169
|
|
|
kernel_shape = ((self.num_filters, self.num_channels) + |
170
|
|
|
self.filter_size) |
171
|
|
|
out_shape = self.get_output_shape(image_shape, kernel_shape, |
172
|
|
|
self.border_mode, self.step) |
173
|
|
|
assert len(out_shape) == 4 |
174
|
|
|
return out_shape[1:] |
175
|
|
|
return super(Convolutional, self).get_dim(name) |
176
|
|
|
|
177
|
|
|
@property |
178
|
|
|
def num_output_channels(self): |
179
|
|
|
return self.num_filters |
180
|
|
|
|
181
|
|
|
|
182
|
|
|
class ConvolutionalTranspose(Initializable): |
183
|
|
|
"""Performs the transpose of a 2D convolution. |
184
|
|
|
|
185
|
|
|
Parameters |
186
|
|
|
---------- |
187
|
|
|
filter_size : tuple |
188
|
|
|
The height and width of the filter (also called *kernels*). |
189
|
|
|
num_filters : int |
190
|
|
|
Number of filters per channel. |
191
|
|
|
num_channels : int |
192
|
|
|
Number of input channels in the image. For the first layer this is |
193
|
|
|
normally 1 for grayscale images and 3 for color (RGB) images. For |
194
|
|
|
subsequent layers this is equal to the number of filters output by |
195
|
|
|
the previous convolutional layer. The filters are pooled over the |
196
|
|
|
channels. |
197
|
|
|
original_image_size : tuple |
198
|
|
|
The height and width of the output (image or feature map). |
199
|
|
|
batch_size : int, optional |
200
|
|
|
Number of examples per batch. If given, this will be passed to |
201
|
|
|
Theano convolution operator, possibly resulting in faster |
202
|
|
|
execution. |
203
|
|
|
image_size : tuple, optional |
204
|
|
|
The height and width of the input (image or feature map). If given, |
205
|
|
|
this will be passed to the Theano convolution operator, resulting |
206
|
|
|
in possibly faster execution times. |
207
|
|
|
step : tuple, optional |
208
|
|
|
The step (or stride) with which to slide the filters over the |
209
|
|
|
image. Defaults to (1, 1). |
210
|
|
|
border_mode : {'valid', 'full'}, optional |
211
|
|
|
The border mode to use, see :func:`scipy.signal.convolve2d` for |
212
|
|
|
details. Defaults to 'valid'. |
213
|
|
|
tied_biases : bool |
214
|
|
|
If ``True``, it indicates that the biases of every filter in this |
215
|
|
|
layer should be shared amongst all applications of that filter. |
216
|
|
|
Setting this to ``False`` will untie the biases, yielding a |
217
|
|
|
separate bias for every location at which the filter is applied. |
218
|
|
|
Defaults to ``False``. |
219
|
|
|
|
220
|
|
|
""" |
221
|
|
|
@lazy(allocation=['filter_size', 'num_filters', 'num_channels', |
222
|
|
|
'original_image_size']) |
223
|
|
|
def __init__(self, filter_size, num_filters, num_channels, |
224
|
|
|
original_image_size, batch_size=None, image_size=(None, None), |
225
|
|
|
step=(1, 1), border_mode='valid', tied_biases=False, |
226
|
|
|
**kwargs): |
227
|
|
|
super(ConvolutionalTranspose, self).__init__(**kwargs) |
228
|
|
|
|
229
|
|
|
self.filter_size = filter_size |
230
|
|
|
self.num_filters = num_filters |
231
|
|
|
self.batch_size = batch_size |
232
|
|
|
self.num_channels = num_channels |
233
|
|
|
self.image_size = image_size |
234
|
|
|
self.original_image_size = original_image_size |
235
|
|
|
self.step = step |
236
|
|
|
self.border_mode = border_mode |
237
|
|
|
self.tied_biases = tied_biases |
238
|
|
|
|
239
|
|
|
def _allocate(self): |
|
|
|
|
240
|
|
|
# The AbstractConv2d_gradInputs op takes a kernel that was used for the |
241
|
|
|
# **convolution**. We therefore have to invert num_channels and |
242
|
|
|
# num_filters for W. |
243
|
|
|
W = shared_floatx_nans((self.num_channels, self.num_filters) + |
244
|
|
|
self.filter_size, name='W') |
245
|
|
|
add_role(W, FILTER) |
246
|
|
|
self.parameters.append(W) |
247
|
|
|
self.add_auxiliary_variable(W.norm(2), name='W_norm') |
248
|
|
|
if self.use_bias: |
249
|
|
|
if self.tied_biases: |
250
|
|
|
b = shared_floatx_nans((self.num_filters,), name='b') |
251
|
|
|
else: |
252
|
|
|
# this error is raised here instead of during initializiation |
253
|
|
|
# because ConvolutionalSequence may specify the image size |
254
|
|
|
if self.image_size == (None, None) and not self.tied_biases: |
255
|
|
|
raise ValueError('Cannot infer bias size without ' |
256
|
|
|
'image_size specified. If you use ' |
257
|
|
|
'variable image_size, you should use ' |
258
|
|
|
'tied_biases=True.') |
259
|
|
|
|
260
|
|
|
b = shared_floatx_nans(self.get_dim('output'), name='b') |
261
|
|
|
add_role(b, BIAS) |
262
|
|
|
|
263
|
|
|
self.parameters.append(b) |
264
|
|
|
self.add_auxiliary_variable(b.norm(2), name='b_norm') |
265
|
|
|
|
266
|
|
|
def _initialize(self): |
267
|
|
|
if self.use_bias: |
268
|
|
|
W, b = self.parameters |
|
|
|
|
269
|
|
|
self.biases_init.initialize(b, self.rng) |
270
|
|
|
else: |
271
|
|
|
W, = self.parameters |
|
|
|
|
272
|
|
|
self.weights_init.initialize(W, self.rng) |
273
|
|
|
|
274
|
|
|
@application(inputs=['input_'], outputs=['output']) |
275
|
|
|
def apply(self, input_): |
276
|
|
|
"""Perform the transposed convolution. |
277
|
|
|
|
278
|
|
|
Parameters |
279
|
|
|
---------- |
280
|
|
|
input_ : :class:`~tensor.TensorVariable` |
281
|
|
|
A 4D tensor with the axes representing batch size, number of |
282
|
|
|
channels, image height, and image width. |
283
|
|
|
|
284
|
|
|
Returns |
285
|
|
|
------- |
286
|
|
|
output : :class:`~tensor.TensorVariable` |
287
|
|
|
A 4D tensor of filtered images (feature maps) with dimensions |
288
|
|
|
representing batch size, number of filters, feature map height, |
289
|
|
|
and feature map width. |
290
|
|
|
|
291
|
|
|
""" |
292
|
|
|
if self.use_bias: |
293
|
|
|
W, b = self.parameters |
|
|
|
|
294
|
|
|
else: |
295
|
|
|
W, = self.parameters |
|
|
|
|
296
|
|
|
out_shape = tensor.stack( |
297
|
|
|
*((input_.shape[0],) + self.get_dim('output'))) |
298
|
|
|
output = AbstractConv2d_gradInputs( |
|
|
|
|
299
|
|
|
imshp=out_shape, kshp=W.shape, border_mode=self.border_mode, |
300
|
|
|
subsample=self.step)(W, input_, out_shape[-2:]) |
301
|
|
|
if self.use_bias: |
302
|
|
|
if self.tied_biases: |
303
|
|
|
output += b.dimshuffle('x', 0, 'x', 'x') |
304
|
|
|
else: |
305
|
|
|
output += b.dimshuffle('x', 0, 1, 2) |
306
|
|
|
return output |
307
|
|
|
|
308
|
|
|
def get_dim(self, name): |
309
|
|
|
if name == 'input_': |
310
|
|
|
return (self.num_channels,) + self.image_size |
311
|
|
|
if name == 'output': |
312
|
|
|
return (self.num_filters,) + self.original_image_size |
313
|
|
|
return super(ConvolutionalTranspose, self).get_dim(name) |
314
|
|
|
|
315
|
|
|
@property |
316
|
|
|
def num_output_channels(self): |
317
|
|
|
return self.num_filters |
318
|
|
|
|
319
|
|
|
|
320
|
|
|
class Pooling(Initializable, Feedforward): |
321
|
|
|
"""Base Brick for pooling operations. |
322
|
|
|
|
323
|
|
|
This should generally not be instantiated directly; see |
324
|
|
|
:class:`MaxPooling`. |
325
|
|
|
|
326
|
|
|
""" |
327
|
|
|
@lazy(allocation=['mode', 'pooling_size']) |
328
|
|
|
def __init__(self, mode, pooling_size, step, input_dim, ignore_border, |
329
|
|
|
padding, **kwargs): |
330
|
|
|
super(Pooling, self).__init__(**kwargs) |
331
|
|
|
self.pooling_size = pooling_size |
332
|
|
|
self.mode = mode |
333
|
|
|
self.step = step |
334
|
|
|
self.input_dim = input_dim if input_dim is not None else (None,) * 3 |
335
|
|
|
self.ignore_border = ignore_border |
336
|
|
|
self.padding = padding |
337
|
|
|
|
338
|
|
|
@property |
339
|
|
|
def image_size(self): |
340
|
|
|
return self.input_dim[-2:] |
341
|
|
|
|
342
|
|
|
@image_size.setter |
343
|
|
|
def image_size(self, value): |
344
|
|
|
self.input_dim = self.input_dim[:-2] + value |
345
|
|
|
|
346
|
|
|
@property |
347
|
|
|
def num_channels(self): |
348
|
|
|
return self.input_dim[0] |
349
|
|
|
|
350
|
|
|
@num_channels.setter |
351
|
|
|
def num_channels(self, value): |
352
|
|
|
self.input_dim = (value,) + self.input_dim[1:] |
353
|
|
|
|
354
|
|
|
@application(inputs=['input_'], outputs=['output']) |
355
|
|
|
def apply(self, input_): |
356
|
|
|
"""Apply the pooling (subsampling) transformation. |
357
|
|
|
|
358
|
|
|
Parameters |
359
|
|
|
---------- |
360
|
|
|
input_ : :class:`~tensor.TensorVariable` |
361
|
|
|
An tensor with dimension greater or equal to 2. The last two |
362
|
|
|
dimensions will be downsampled. For example, with images this |
363
|
|
|
means that the last two dimensions should represent the height |
364
|
|
|
and width of your image. |
365
|
|
|
|
366
|
|
|
Returns |
367
|
|
|
------- |
368
|
|
|
output : :class:`~tensor.TensorVariable` |
369
|
|
|
A tensor with the same number of dimensions as `input_`, but |
370
|
|
|
with the last two dimensions downsampled. |
371
|
|
|
|
372
|
|
|
""" |
373
|
|
|
output = pool_2d(input_, self.pooling_size, st=self.step, |
374
|
|
|
mode=self.mode, padding=self.padding, |
375
|
|
|
ignore_border=self.ignore_border) |
376
|
|
|
return output |
377
|
|
|
|
378
|
|
|
def get_dim(self, name): |
379
|
|
|
if name == 'input_': |
380
|
|
|
return self.input_dim |
381
|
|
|
if name == 'output': |
382
|
|
|
return tuple(Pool.out_shape( |
383
|
|
|
self.input_dim, self.pooling_size, st=self.step, |
384
|
|
|
ignore_border=self.ignore_border, padding=self.padding)) |
385
|
|
|
|
386
|
|
|
@property |
387
|
|
|
def num_output_channels(self): |
388
|
|
|
return self.input_dim[0] |
389
|
|
|
|
390
|
|
|
|
391
|
|
|
class MaxPooling(Pooling): |
392
|
|
|
"""Max pooling layer. |
393
|
|
|
|
394
|
|
|
Parameters |
395
|
|
|
---------- |
396
|
|
|
pooling_size : tuple |
397
|
|
|
The height and width of the pooling region i.e. this is the factor |
398
|
|
|
by which your input's last two dimensions will be downscaled. |
399
|
|
|
step : tuple, optional |
400
|
|
|
The vertical and horizontal shift (stride) between pooling regions. |
401
|
|
|
By default this is equal to `pooling_size`. Setting this to a lower |
402
|
|
|
number results in overlapping pooling regions. |
403
|
|
|
input_dim : tuple, optional |
404
|
|
|
A tuple of integers representing the shape of the input. The last |
405
|
|
|
two dimensions will be used to calculate the output dimension. |
406
|
|
|
padding : tuple, optional |
407
|
|
|
A tuple of integers representing the vertical and horizontal |
408
|
|
|
zero-padding to be applied to each of the top and bottom |
409
|
|
|
(vertical) and left and right (horizontal) edges. For example, |
410
|
|
|
an argument of (4, 3) will apply 4 pixels of padding to the |
411
|
|
|
top edge, 4 pixels of padding to the bottom edge, and 3 pixels |
412
|
|
|
each for the left and right edge. By default, no padding is |
413
|
|
|
performed. |
414
|
|
|
ignore_border : bool, optional |
415
|
|
|
Whether or not to do partial downsampling based on borders where |
416
|
|
|
the extent of the pooling region reaches beyond the edge of the |
417
|
|
|
image. If `True`, a (5, 5) image with (2, 2) pooling regions |
418
|
|
|
and (2, 2) step will be downsampled to shape (2, 2), otherwise |
419
|
|
|
it will be downsampled to (3, 3). `True` by default. |
420
|
|
|
|
421
|
|
|
Notes |
422
|
|
|
----- |
423
|
|
|
.. warning:: |
424
|
|
|
As of this writing, setting `ignore_border` to `False` with a step |
425
|
|
|
not equal to the pooling size will force Theano to perform pooling |
426
|
|
|
computations on CPU rather than GPU, even if you have specified |
427
|
|
|
a GPU as your computation device. Additionally, Theano will only |
428
|
|
|
use [cuDNN]_ (if available) for pooling computations with |
429
|
|
|
`ignure_border` set to `True`. You can ensure that the entire |
430
|
|
|
input is captured by at least one pool by using the `padding` |
431
|
|
|
argument to add zero padding prior to pooling being performed. |
432
|
|
|
|
433
|
|
|
.. [cuDNN]: `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_. |
434
|
|
|
|
435
|
|
|
""" |
436
|
|
|
@lazy(allocation=['pooling_size']) |
437
|
|
|
def __init__(self, pooling_size, step=None, input_dim=None, |
438
|
|
|
ignore_border=True, padding=(0, 0), |
439
|
|
|
**kwargs): |
440
|
|
|
super(MaxPooling, self).__init__('max', pooling_size, |
441
|
|
|
step=step, input_dim=input_dim, |
442
|
|
|
ignore_border=ignore_border, |
443
|
|
|
padding=padding, **kwargs) |
444
|
|
|
|
445
|
|
|
def __setstate__(self, state): |
446
|
|
|
self.__dict__.update(state) |
447
|
|
|
# Fix objects created before pull request #899. |
448
|
|
|
self.mode = getattr(self, 'mode', 'max') |
449
|
|
|
self.padding = getattr(self, 'padding', (0, 0)) |
450
|
|
|
self.ignore_border = getattr(self, 'ignore_border', False) |
451
|
|
|
|
452
|
|
|
|
453
|
|
|
class AveragePooling(Pooling): |
454
|
|
|
"""Average pooling layer. |
455
|
|
|
|
456
|
|
|
Parameters |
457
|
|
|
---------- |
458
|
|
|
include_padding : bool, optional |
459
|
|
|
When calculating an average, include zeros that are the |
460
|
|
|
result of zero padding added by the `padding` argument. |
461
|
|
|
A value of `True` is only accepted if `ignore_border` |
462
|
|
|
is also `True`. `False` by default. |
463
|
|
|
|
464
|
|
|
Notes |
465
|
|
|
----- |
466
|
|
|
For documentation on the remainder of the arguments to this |
467
|
|
|
class, see :class:`MaxPooling`. |
468
|
|
|
|
469
|
|
|
""" |
470
|
|
|
@lazy(allocation=['pooling_size']) |
471
|
|
|
def __init__(self, pooling_size, step=None, input_dim=None, |
472
|
|
|
ignore_border=True, padding=(0, 0), |
473
|
|
|
include_padding=False, **kwargs): |
474
|
|
|
mode = 'average_inc_pad' if include_padding else 'average_exc_pad' |
475
|
|
|
super(AveragePooling, self).__init__(mode, pooling_size, |
476
|
|
|
step=step, input_dim=input_dim, |
477
|
|
|
ignore_border=ignore_border, |
478
|
|
|
padding=padding, **kwargs) |
479
|
|
|
|
480
|
|
|
|
481
|
|
|
class _AllocationMixin(object): |
482
|
|
|
def _push_allocation_config(self): |
483
|
|
|
for attr in ['filter_size', 'num_filters', 'border_mode', |
484
|
|
|
'batch_size', 'num_channels', 'image_size', |
485
|
|
|
'tied_biases', 'use_bias']: |
486
|
|
|
setattr(self.convolution, attr, getattr(self, attr)) |
487
|
|
|
|
488
|
|
|
@property |
489
|
|
|
def num_output_channels(self): |
490
|
|
|
# Assumes an elementwise activation function. Would need to |
491
|
|
|
# change to support e.g. maxout, but that would also require |
492
|
|
|
# a way of querying the activation function for this kind of |
493
|
|
|
# information. |
494
|
|
|
return self.num_filters |
495
|
|
|
|
496
|
|
|
|
497
|
|
|
class ConvolutionalActivation(_AllocationMixin, Sequence, Initializable): |
498
|
|
|
"""A convolution followed by an activation function. |
499
|
|
|
|
500
|
|
|
Parameters |
501
|
|
|
---------- |
502
|
|
|
activation : :class:`.BoundApplication` |
503
|
|
|
The application method to apply after convolution (i.e. |
504
|
|
|
the nonlinear activation function) |
505
|
|
|
|
506
|
|
|
See Also |
507
|
|
|
-------- |
508
|
|
|
:class:`Convolutional` : For the documentation of other parameters. |
509
|
|
|
|
510
|
|
|
""" |
511
|
|
|
@lazy(allocation=['filter_size', 'num_filters', 'num_channels']) |
512
|
|
|
def __init__(self, activation, filter_size, num_filters, num_channels, |
513
|
|
|
batch_size=None, image_size=None, step=(1, 1), |
514
|
|
|
border_mode='valid', tied_biases=False, **kwargs): |
515
|
|
|
self.convolution = Convolutional() |
516
|
|
|
|
517
|
|
|
self.filter_size = filter_size |
518
|
|
|
self.num_filters = num_filters |
519
|
|
|
self.num_channels = num_channels |
520
|
|
|
self.batch_size = batch_size |
521
|
|
|
self.image_size = image_size |
522
|
|
|
self.step = step |
523
|
|
|
self.border_mode = border_mode |
524
|
|
|
self.tied_biases = tied_biases |
525
|
|
|
|
526
|
|
|
super(ConvolutionalActivation, self).__init__( |
527
|
|
|
application_methods=[self.convolution.apply, activation], |
528
|
|
|
**kwargs) |
529
|
|
|
|
530
|
|
|
def get_dim(self, name): |
531
|
|
|
# TODO The name of the activation output doesn't need to be `output` |
532
|
|
|
return self.convolution.get_dim(name) |
533
|
|
|
|
534
|
|
|
def _push_allocation_config(self): |
535
|
|
|
super(ConvolutionalActivation, self)._push_allocation_config() |
536
|
|
|
self.convolution.step = self.step |
537
|
|
|
|
538
|
|
|
|
539
|
|
|
class ConvolutionalTransposeActivation(_AllocationMixin, Sequence, |
540
|
|
|
Initializable): |
541
|
|
|
"""A transposed convolution followed by an activation function. |
542
|
|
|
|
543
|
|
|
Parameters |
544
|
|
|
---------- |
545
|
|
|
activation : :class:`.BoundApplication` |
546
|
|
|
The application method to apply after convolution (i.e. |
547
|
|
|
the nonlinear activation function) |
548
|
|
|
|
549
|
|
|
See Also |
550
|
|
|
-------- |
551
|
|
|
:class:`ConvolutionalTranspose` : For the documentation of other |
552
|
|
|
parameters. |
553
|
|
|
|
554
|
|
|
""" |
555
|
|
|
@lazy(allocation=['filter_size', 'num_filters', 'num_channels', |
556
|
|
|
'original_image_size']) |
557
|
|
|
def __init__(self, activation, filter_size, num_filters, num_channels, |
558
|
|
|
original_image_size, batch_size=None, image_size=None, |
559
|
|
|
step=(1, 1), border_mode='valid', tied_biases=False, |
560
|
|
|
**kwargs): |
561
|
|
|
self.convolution = ConvolutionalTranspose() |
562
|
|
|
|
563
|
|
|
self.filter_size = filter_size |
564
|
|
|
self.num_filters = num_filters |
565
|
|
|
self.num_channels = num_channels |
566
|
|
|
self.batch_size = batch_size |
567
|
|
|
self.image_size = image_size |
568
|
|
|
self.original_image_size = original_image_size |
569
|
|
|
self.step = step |
570
|
|
|
self.border_mode = border_mode |
571
|
|
|
self.tied_biases = tied_biases |
572
|
|
|
|
573
|
|
|
super(ConvolutionalTransposeActivation, self).__init__( |
574
|
|
|
application_methods=[self.convolution.apply, activation], |
575
|
|
|
**kwargs) |
576
|
|
|
|
577
|
|
|
def get_dim(self, name): |
578
|
|
|
# TODO The name of the activation output doesn't need to be `output` |
579
|
|
|
return self.convolution.get_dim(name) |
580
|
|
|
|
581
|
|
|
def _push_allocation_config(self): |
582
|
|
|
super(ConvolutionalActivation, self)._push_allocation_config() |
|
|
|
|
583
|
|
|
self.convolution.step = self.step |
584
|
|
|
self.convolution.original_image_size = self.original_image_size |
585
|
|
|
|
586
|
|
|
|
587
|
|
|
class ConvolutionalSequence(Sequence, Initializable, Feedforward): |
588
|
|
|
"""A sequence of convolutional (or pooling) operations. |
589
|
|
|
|
590
|
|
|
Parameters |
591
|
|
|
---------- |
592
|
|
|
layers : list |
593
|
|
|
List of convolutional bricks (i.e. :class:`Convolutional`, |
594
|
|
|
:class:`ConvolutionalActivation`, or :class:`Pooling` bricks). |
595
|
|
|
num_channels : int |
596
|
|
|
Number of input channels in the image. For the first layer this is |
597
|
|
|
normally 1 for grayscale images and 3 for color (RGB) images. For |
598
|
|
|
subsequent layers this is equal to the number of filters output by |
599
|
|
|
the previous convolutional layer. |
600
|
|
|
batch_size : int, optional |
601
|
|
|
Number of images in batch. If given, will be passed to |
602
|
|
|
theano's convolution operator resulting in possibly faster |
603
|
|
|
execution. |
604
|
|
|
image_size : tuple, optional |
605
|
|
|
Width and height of the input (image/featuremap). If given, |
606
|
|
|
will be passed to theano's convolution operator resulting in |
607
|
|
|
possibly faster execution. |
608
|
|
|
border_mode : 'valid', 'full' or None, optional |
609
|
|
|
The border mode to use, see :func:`scipy.signal.convolve2d` for |
610
|
|
|
details. Unlike with :class:`Convolutional`, this defaults to |
611
|
|
|
None, in which case no default value is pushed down to child |
612
|
|
|
bricks at allocation time. Child bricks will in this case |
613
|
|
|
need to rely on either a default border mode (usually valid) |
614
|
|
|
or one provided at construction and/or after construction |
615
|
|
|
(but before allocation). |
616
|
|
|
|
617
|
|
|
Notes |
618
|
|
|
----- |
619
|
|
|
The passed convolutional operators should be 'lazy' constructed, that |
620
|
|
|
is, without specifying the batch_size, num_channels and image_size. The |
621
|
|
|
main feature of :class:`ConvolutionalSequence` is that it will set the |
622
|
|
|
input dimensions of a layer to the output dimensions of the previous |
623
|
|
|
layer by the :meth:`~.Brick.push_allocation_config` method. |
624
|
|
|
|
625
|
|
|
The reason the `border_mode` parameter behaves the way it does is that |
626
|
|
|
pushing a single default `border_mode` makes it very difficult to |
627
|
|
|
have child bricks with different border modes. Normally, such things |
628
|
|
|
would be overridden after `push_allocation_config()`, but this is |
629
|
|
|
a particular hassle as the border mode affects the allocation |
630
|
|
|
parameters of every subsequent child brick in the sequence. Thus, only |
631
|
|
|
an explicitly specified border mode will be pushed down the hierarchy. |
632
|
|
|
|
633
|
|
|
""" |
634
|
|
|
@lazy(allocation=['num_channels']) |
635
|
|
|
def __init__(self, layers, num_channels, batch_size=None, image_size=None, |
636
|
|
|
border_mode=None, tied_biases=False, **kwargs): |
637
|
|
|
self.layers = layers |
638
|
|
|
self.image_size = image_size |
639
|
|
|
self.num_channels = num_channels |
640
|
|
|
self.batch_size = batch_size |
641
|
|
|
self.border_mode = border_mode |
642
|
|
|
self.tied_biases = tied_biases |
643
|
|
|
|
644
|
|
|
application_methods = [brick.apply for brick in layers] |
645
|
|
|
super(ConvolutionalSequence, self).__init__( |
646
|
|
|
application_methods=application_methods, **kwargs) |
647
|
|
|
|
648
|
|
|
def get_dim(self, name): |
649
|
|
|
if name == 'input_': |
650
|
|
|
return ((self.num_channels,) + self.image_size) |
|
|
|
|
651
|
|
|
if name == 'output': |
652
|
|
|
return self.layers[-1].get_dim(name) |
653
|
|
|
return super(ConvolutionalSequence, self).get_dim(name) |
654
|
|
|
|
655
|
|
|
def _push_allocation_config(self): |
656
|
|
|
num_channels = self.num_channels |
657
|
|
|
image_size = self.image_size |
658
|
|
|
for layer in self.layers: |
659
|
|
|
if self.border_mode is not None: |
660
|
|
|
layer.border_mode = self.border_mode |
661
|
|
|
layer.tied_biases = self.tied_biases |
662
|
|
|
layer.image_size = image_size |
663
|
|
|
layer.num_channels = num_channels |
664
|
|
|
layer.batch_size = self.batch_size |
665
|
|
|
layer.use_bias = self.use_bias |
666
|
|
|
|
667
|
|
|
# Push input dimensions to children |
668
|
|
|
layer._push_allocation_config() |
|
|
|
|
669
|
|
|
|
670
|
|
|
# Retrieve output dimensions |
671
|
|
|
# and set it for next layer |
672
|
|
|
if layer.image_size is not None: |
673
|
|
|
output_shape = layer.get_dim('output') |
674
|
|
|
image_size = output_shape[1:] |
675
|
|
|
num_channels = layer.num_output_channels |
676
|
|
|
|
677
|
|
|
|
678
|
|
|
class Flattener(Brick): |
679
|
|
|
"""Flattens the input. |
680
|
|
|
|
681
|
|
|
It may be used to pass multidimensional objects like images or feature |
682
|
|
|
maps of convolutional bricks into bricks which allow only two |
683
|
|
|
dimensional input (batch, features) like MLP. |
684
|
|
|
|
685
|
|
|
""" |
686
|
|
|
@application(inputs=['input_'], outputs=['output']) |
687
|
|
|
def apply(self, input_): |
688
|
|
|
return input_.flatten(ndim=2) |
689
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.