1
|
|
|
from theano.tensor.nnet import conv2d |
2
|
|
|
from theano.tensor.nnet.abstract_conv import (AbstractConv2d_gradInputs, |
3
|
|
|
get_conv_output_shape) |
4
|
|
|
from theano.tensor.signal.pool import pool_2d, Pool |
5
|
|
|
|
6
|
|
|
from blocks.bricks import (Initializable, Feedforward, Sequence, Activation, |
7
|
|
|
LinearLike) |
8
|
|
|
from blocks.bricks.base import application, Brick, lazy |
9
|
|
|
from blocks.roles import add_role, FILTER, BIAS |
10
|
|
|
from blocks.utils import shared_floatx_nans |
11
|
|
|
|
12
|
|
|
|
13
|
|
|
class Convolutional(LinearLike): |
14
|
|
|
"""Performs a 2D convolution. |
15
|
|
|
|
16
|
|
|
Parameters |
17
|
|
|
---------- |
18
|
|
|
filter_size : tuple |
19
|
|
|
The height and width of the filter (also called *kernels*). |
20
|
|
|
num_filters : int |
21
|
|
|
Number of filters per channel. |
22
|
|
|
num_channels : int |
23
|
|
|
Number of input channels in the image. For the first layer this is |
24
|
|
|
normally 1 for grayscale images and 3 for color (RGB) images. For |
25
|
|
|
subsequent layers this is equal to the number of filters output by |
26
|
|
|
the previous convolutional layer. The filters are pooled over the |
27
|
|
|
channels. |
28
|
|
|
batch_size : int, optional |
29
|
|
|
Number of examples per batch. If given, this will be passed to |
30
|
|
|
Theano convolution operator, possibly resulting in faster |
31
|
|
|
execution. |
32
|
|
|
image_size : tuple, optional |
33
|
|
|
The height and width of the input (image or feature map). If given, |
34
|
|
|
this will be passed to the Theano convolution operator, resulting |
35
|
|
|
in possibly faster execution times. |
36
|
|
|
step : tuple, optional |
37
|
|
|
The step (or stride) with which to slide the filters over the |
38
|
|
|
image. Defaults to (1, 1). |
39
|
|
|
border_mode : {'valid', 'full'}, optional |
40
|
|
|
The border mode to use, see :func:`scipy.signal.convolve2d` for |
41
|
|
|
details. Defaults to 'valid'. |
42
|
|
|
tied_biases : bool |
43
|
|
|
Setting this to ``False`` will untie the biases, yielding a |
44
|
|
|
separate bias for every location at which the filter is applied. |
45
|
|
|
If ``True``, it indicates that the biases of every filter in this |
46
|
|
|
layer should be shared amongst all applications of that filter. |
47
|
|
|
Defaults to ``True``. |
48
|
|
|
|
49
|
|
|
""" |
50
|
|
|
# Make it possible to override the implementation of conv2d that gets |
51
|
|
|
# used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order |
52
|
|
|
# to leverage features not yet available in Theano's standard conv2d. |
53
|
|
|
# The function you override with here should accept at least the |
54
|
|
|
# input and the kernels as positionals, and the keyword arguments |
55
|
|
|
# input_shape, subsample, border_mode, and filter_shape. If some of |
56
|
|
|
# these are unsupported they should still be accepted and ignored, |
57
|
|
|
# e.g. with a wrapper function that swallows **kwargs. |
58
|
|
|
conv2d_impl = staticmethod(conv2d) |
59
|
|
|
|
60
|
|
|
# Used to override the output shape computation for a given value of |
61
|
|
|
# conv2d_impl. Should accept 4 positional arguments: the shape of an |
62
|
|
|
# image minibatch (with 4 elements: batch size, number of channels, |
63
|
|
|
# height, and width), the shape of the filter bank (number of filters, |
64
|
|
|
# number of output channels, filter height, filter width), the border |
65
|
|
|
# mode, and the step (vertical and horizontal strides). It is expected |
66
|
|
|
# to return a 4-tuple of (batch size, number of channels, output |
67
|
|
|
# height, output width). The first element of this tuple is not used |
68
|
|
|
# for anything by this brick. |
69
|
|
|
get_output_shape = staticmethod(get_conv_output_shape) |
70
|
|
|
|
71
|
|
View Code Duplication |
@lazy(allocation=['filter_size', 'num_filters', 'num_channels']) |
|
|
|
|
72
|
|
|
def __init__(self, filter_size, num_filters, num_channels, batch_size=None, |
73
|
|
|
image_size=(None, None), step=(1, 1), border_mode='valid', |
74
|
|
|
tied_biases=True, **kwargs): |
75
|
|
|
super(Convolutional, self).__init__(**kwargs) |
76
|
|
|
|
77
|
|
|
self.filter_size = filter_size |
78
|
|
|
self.num_filters = num_filters |
79
|
|
|
self.batch_size = batch_size |
80
|
|
|
self.num_channels = num_channels |
81
|
|
|
self.image_size = image_size |
82
|
|
|
self.step = step |
83
|
|
|
self.border_mode = border_mode |
84
|
|
|
self.tied_biases = tied_biases |
85
|
|
|
|
86
|
|
|
def _allocate(self): |
87
|
|
|
W = shared_floatx_nans((self.num_filters, self.num_channels) + |
88
|
|
|
self.filter_size, name='W') |
89
|
|
|
add_role(W, FILTER) |
90
|
|
|
self.parameters.append(W) |
91
|
|
|
self.add_auxiliary_variable(W.norm(2), name='W_norm') |
92
|
|
|
if getattr(self, 'use_bias', True): |
93
|
|
|
if self.tied_biases: |
94
|
|
|
b = shared_floatx_nans((self.num_filters,), name='b') |
95
|
|
|
else: |
96
|
|
|
# this error is raised here instead of during initializiation |
97
|
|
|
# because ConvolutionalSequence may specify the image size |
98
|
|
|
if self.image_size == (None, None) and not self.tied_biases: |
99
|
|
|
raise ValueError('Cannot infer bias size without ' |
100
|
|
|
'image_size specified. If you use ' |
101
|
|
|
'variable image_size, you should use ' |
102
|
|
|
'tied_biases=True.') |
103
|
|
|
|
104
|
|
|
b = shared_floatx_nans(self.get_dim('output'), name='b') |
105
|
|
|
add_role(b, BIAS) |
106
|
|
|
|
107
|
|
|
self.parameters.append(b) |
108
|
|
|
self.add_auxiliary_variable(b.norm(2), name='b_norm') |
109
|
|
|
|
110
|
|
|
@application(inputs=['input_'], outputs=['output']) |
111
|
|
|
def apply(self, input_): |
112
|
|
|
"""Perform the convolution. |
113
|
|
|
|
114
|
|
|
Parameters |
115
|
|
|
---------- |
116
|
|
|
input_ : :class:`~tensor.TensorVariable` |
117
|
|
|
A 4D tensor with the axes representing batch size, number of |
118
|
|
|
channels, image height, and image width. |
119
|
|
|
|
120
|
|
|
Returns |
121
|
|
|
------- |
122
|
|
|
output : :class:`~tensor.TensorVariable` |
123
|
|
|
A 4D tensor of filtered images (feature maps) with dimensions |
124
|
|
|
representing batch size, number of filters, feature map height, |
125
|
|
|
and feature map width. |
126
|
|
|
|
127
|
|
|
The height and width of the feature map depend on the border |
128
|
|
|
mode. For 'valid' it is ``image_size - filter_size + 1`` while |
129
|
|
|
for 'full' it is ``image_size + filter_size - 1``. |
130
|
|
|
|
131
|
|
|
""" |
132
|
|
|
if self.image_size == (None, None): |
133
|
|
|
input_shape = None |
134
|
|
|
else: |
135
|
|
|
input_shape = (self.batch_size, self.num_channels) |
136
|
|
|
input_shape += self.image_size |
137
|
|
|
|
138
|
|
|
output = self.conv2d_impl( |
139
|
|
|
input_, self.W, |
140
|
|
|
input_shape=input_shape, |
141
|
|
|
subsample=self.step, |
142
|
|
|
border_mode=self.border_mode, |
143
|
|
|
filter_shape=((self.num_filters, self.num_channels) + |
144
|
|
|
self.filter_size)) |
145
|
|
|
if getattr(self, 'use_bias', True): |
146
|
|
|
if self.tied_biases: |
147
|
|
|
output += self.b.dimshuffle('x', 0, 'x', 'x') |
148
|
|
|
else: |
149
|
|
|
output += self.b.dimshuffle('x', 0, 1, 2) |
150
|
|
|
return output |
151
|
|
|
|
152
|
|
|
def get_dim(self, name): |
153
|
|
|
if name == 'input_': |
154
|
|
|
return (self.num_channels,) + self.image_size |
155
|
|
|
if name == 'output': |
156
|
|
|
input_shape = (None, self.num_channels) + self.image_size |
157
|
|
|
kernel_shape = ((self.num_filters, self.num_channels) + |
158
|
|
|
self.filter_size) |
159
|
|
|
out_shape = self.get_output_shape(input_shape, kernel_shape, |
160
|
|
|
self.border_mode, self.step) |
161
|
|
|
assert len(out_shape) == 4 |
162
|
|
|
return out_shape[1:] |
163
|
|
|
return super(Convolutional, self).get_dim(name) |
164
|
|
|
|
165
|
|
|
@property |
166
|
|
|
def num_output_channels(self): |
167
|
|
|
return self.num_filters |
168
|
|
|
|
169
|
|
|
|
170
|
|
|
class ConvolutionalTranspose(Convolutional): |
171
|
|
|
"""Performs the transpose of a 2D convolution. |
172
|
|
|
|
173
|
|
|
Parameters |
174
|
|
|
---------- |
175
|
|
|
num_filters : int |
176
|
|
|
Number of filters at the *output* of the transposed convolution, |
177
|
|
|
i.e. the number of channels in the corresponding convolution. |
178
|
|
|
num_channels : int |
179
|
|
|
Number of channels at the *input* of the transposed convolution, |
180
|
|
|
i.e. the number of output filters in the corresponding |
181
|
|
|
convolution. |
182
|
|
|
step : tuple, optional |
183
|
|
|
The step (or stride) of the corresponding *convolution*. |
184
|
|
|
Defaults to (1, 1). |
185
|
|
|
image_size : tuple, optional |
186
|
|
|
Image size of the input to the *transposed* convolution, i.e. |
187
|
|
|
the output of the corresponding convolution. Required for tied |
188
|
|
|
biases. Defaults to ``None``. |
189
|
|
|
unused_edge : tuple, optional |
190
|
|
|
Tuple of pixels added to the inferred height and width of the |
191
|
|
|
output image, whose values would be ignored in the corresponding |
192
|
|
|
forward convolution. Must be such that 0 <= ``unused_edge[i]`` <= |
193
|
|
|
``step[i]``. Note that this parameter is **ignored** if |
194
|
|
|
``original_image_size`` is specified in the constructor or manually |
195
|
|
|
set as an attribute. |
196
|
|
|
original_image_size : tuple, optional |
197
|
|
|
The height and width of the image that forms the output of |
198
|
|
|
the transpose operation, which is the input of the original |
199
|
|
|
(non-transposed) convolution. By default, this is inferred |
200
|
|
|
from `image_size` to be the size that has each pixel of the |
201
|
|
|
original image touched by at least one filter application |
202
|
|
|
in the original convolution. Degenerate cases with dropped |
203
|
|
|
border pixels (in the original convolution) are possible, and can |
204
|
|
|
be manually specified via this argument. See notes below. |
205
|
|
|
|
206
|
|
|
See Also |
207
|
|
|
-------- |
208
|
|
|
:class:`Convolutional` : For the documentation of other parameters. |
209
|
|
|
|
210
|
|
|
Notes |
211
|
|
|
----- |
212
|
|
|
By default, `original_image_size` is inferred from `image_size` |
213
|
|
|
as being the *minimum* size of image that could have produced this |
214
|
|
|
output. Let ``hanging[i] = original_image_size[i] - image_size[i] |
215
|
|
|
* step[i]``. Any value of ``hanging[i]`` greater than |
216
|
|
|
``filter_size[i] - step[i]`` will result in border pixels that are |
217
|
|
|
ignored by the original convolution. With this brick, any |
218
|
|
|
``original_image_size`` such that ``filter_size[i] - step[i] < |
219
|
|
|
hanging[i] < filter_size[i]`` for all ``i`` can be validly specified. |
220
|
|
|
However, no value will be output by the transposed convolution |
221
|
|
|
itself for these extra hanging border pixels, and they will be |
222
|
|
|
determined entirely by the bias. |
223
|
|
|
|
224
|
|
|
""" |
225
|
|
|
@lazy(allocation=['filter_size', 'num_filters', 'num_channels']) |
226
|
|
|
def __init__(self, filter_size, num_filters, num_channels, |
227
|
|
|
original_image_size=None, unused_edge=(0, 0), |
228
|
|
|
**kwargs): |
229
|
|
|
super(ConvolutionalTranspose, self).__init__( |
230
|
|
|
filter_size, num_filters, num_channels, **kwargs) |
231
|
|
|
self.original_image_size = original_image_size |
232
|
|
|
self.unused_edge = unused_edge |
233
|
|
|
|
234
|
|
|
@property |
235
|
|
|
def original_image_size(self): |
236
|
|
|
if self._original_image_size is None: |
237
|
|
|
if all(s is None for s in self.image_size): |
238
|
|
|
raise ValueError("can't infer original_image_size, " |
239
|
|
|
"no image_size set") |
240
|
|
|
if isinstance(self.border_mode, tuple): |
241
|
|
|
border = self.border_mode |
242
|
|
|
elif self.border_mode == 'full': |
243
|
|
|
border = tuple(k - 1 for k in self.filter_size) |
244
|
|
|
elif self.border_mode == 'half': |
245
|
|
|
border = tuple(k // 2 for k in self.filter_size) |
246
|
|
|
else: |
247
|
|
|
border = [0] * len(self.image_size) |
248
|
|
|
tups = zip(self.image_size, self.step, self.filter_size, border, |
249
|
|
|
self.unused_edge) |
250
|
|
|
return tuple(s * (i - 1) + k - 2 * p + u for i, s, k, p, u in tups) |
251
|
|
|
else: |
252
|
|
|
return self._original_image_size |
253
|
|
|
|
254
|
|
|
@original_image_size.setter |
255
|
|
|
def original_image_size(self, value): |
256
|
|
|
self._original_image_size = value |
257
|
|
|
|
258
|
|
|
def conv2d_impl(self, input_, W, input_shape, subsample, border_mode, |
|
|
|
|
259
|
|
|
filter_shape): |
260
|
|
|
# The AbstractConv2d_gradInputs op takes a kernel that was used for the |
261
|
|
|
# **convolution**. We therefore have to invert num_channels and |
262
|
|
|
# num_filters for W. |
263
|
|
|
W = W.transpose(1, 0, 2, 3) |
264
|
|
|
imshp = (None,) + self.get_dim('output') |
265
|
|
|
kshp = (filter_shape[1], filter_shape[0]) + filter_shape[2:] |
266
|
|
|
return AbstractConv2d_gradInputs( |
267
|
|
|
imshp=imshp, kshp=kshp, border_mode=border_mode, |
268
|
|
|
subsample=subsample)(W, input_, self.get_dim('output')[1:]) |
269
|
|
|
|
270
|
|
|
def get_dim(self, name): |
271
|
|
|
if name == 'output': |
272
|
|
|
return (self.num_filters,) + self.original_image_size |
273
|
|
|
return super(ConvolutionalTranspose, self).get_dim(name) |
274
|
|
|
|
275
|
|
|
|
276
|
|
|
class Pooling(Initializable, Feedforward): |
277
|
|
|
"""Base Brick for pooling operations. |
278
|
|
|
|
279
|
|
|
This should generally not be instantiated directly; see |
280
|
|
|
:class:`MaxPooling`. |
281
|
|
|
|
282
|
|
|
""" |
283
|
|
|
@lazy(allocation=['mode', 'pooling_size']) |
284
|
|
|
def __init__(self, mode, pooling_size, step, input_dim, ignore_border, |
285
|
|
|
padding, **kwargs): |
286
|
|
|
super(Pooling, self).__init__(**kwargs) |
287
|
|
|
self.pooling_size = pooling_size |
288
|
|
|
self.mode = mode |
289
|
|
|
self.step = step |
290
|
|
|
self.input_dim = input_dim if input_dim is not None else (None,) * 3 |
291
|
|
|
self.ignore_border = ignore_border |
292
|
|
|
self.padding = padding |
293
|
|
|
|
294
|
|
|
@property |
295
|
|
|
def image_size(self): |
296
|
|
|
return self.input_dim[-2:] |
297
|
|
|
|
298
|
|
|
@image_size.setter |
299
|
|
|
def image_size(self, value): |
300
|
|
|
self.input_dim = self.input_dim[:-2] + value |
301
|
|
|
|
302
|
|
|
@property |
303
|
|
|
def num_channels(self): |
304
|
|
|
return self.input_dim[0] |
305
|
|
|
|
306
|
|
|
@num_channels.setter |
307
|
|
|
def num_channels(self, value): |
308
|
|
|
self.input_dim = (value,) + self.input_dim[1:] |
309
|
|
|
|
310
|
|
|
@application(inputs=['input_'], outputs=['output']) |
311
|
|
|
def apply(self, input_): |
312
|
|
|
"""Apply the pooling (subsampling) transformation. |
313
|
|
|
|
314
|
|
|
Parameters |
315
|
|
|
---------- |
316
|
|
|
input_ : :class:`~tensor.TensorVariable` |
317
|
|
|
An tensor with dimension greater or equal to 2. The last two |
318
|
|
|
dimensions will be downsampled. For example, with images this |
319
|
|
|
means that the last two dimensions should represent the height |
320
|
|
|
and width of your image. |
321
|
|
|
|
322
|
|
|
Returns |
323
|
|
|
------- |
324
|
|
|
output : :class:`~tensor.TensorVariable` |
325
|
|
|
A tensor with the same number of dimensions as `input_`, but |
326
|
|
|
with the last two dimensions downsampled. |
327
|
|
|
|
328
|
|
|
""" |
329
|
|
|
output = pool_2d(input_, self.pooling_size, st=self.step, |
330
|
|
|
mode=self.mode, padding=self.padding, |
331
|
|
|
ignore_border=self.ignore_border) |
332
|
|
|
return output |
333
|
|
|
|
334
|
|
|
def get_dim(self, name): |
335
|
|
|
if name == 'input_': |
336
|
|
|
return self.input_dim |
337
|
|
|
if name == 'output': |
338
|
|
|
return tuple(Pool.out_shape( |
339
|
|
|
self.input_dim, self.pooling_size, st=self.step, |
340
|
|
|
ignore_border=self.ignore_border, padding=self.padding)) |
341
|
|
|
|
342
|
|
|
@property |
343
|
|
|
def num_output_channels(self): |
344
|
|
|
return self.input_dim[0] |
345
|
|
|
|
346
|
|
|
|
347
|
|
|
class MaxPooling(Pooling): |
348
|
|
|
"""Max pooling layer. |
349
|
|
|
|
350
|
|
|
Parameters |
351
|
|
|
---------- |
352
|
|
|
pooling_size : tuple |
353
|
|
|
The height and width of the pooling region i.e. this is the factor |
354
|
|
|
by which your input's last two dimensions will be downscaled. |
355
|
|
|
step : tuple, optional |
356
|
|
|
The vertical and horizontal shift (stride) between pooling regions. |
357
|
|
|
By default this is equal to `pooling_size`. Setting this to a lower |
358
|
|
|
number results in overlapping pooling regions. |
359
|
|
|
input_dim : tuple, optional |
360
|
|
|
A tuple of integers representing the shape of the input. The last |
361
|
|
|
two dimensions will be used to calculate the output dimension. |
362
|
|
|
padding : tuple, optional |
363
|
|
|
A tuple of integers representing the vertical and horizontal |
364
|
|
|
zero-padding to be applied to each of the top and bottom |
365
|
|
|
(vertical) and left and right (horizontal) edges. For example, |
366
|
|
|
an argument of (4, 3) will apply 4 pixels of padding to the |
367
|
|
|
top edge, 4 pixels of padding to the bottom edge, and 3 pixels |
368
|
|
|
each for the left and right edge. By default, no padding is |
369
|
|
|
performed. |
370
|
|
|
ignore_border : bool, optional |
371
|
|
|
Whether or not to do partial downsampling based on borders where |
372
|
|
|
the extent of the pooling region reaches beyond the edge of the |
373
|
|
|
image. If `True`, a (5, 5) image with (2, 2) pooling regions |
374
|
|
|
and (2, 2) step will be downsampled to shape (2, 2), otherwise |
375
|
|
|
it will be downsampled to (3, 3). `True` by default. |
376
|
|
|
|
377
|
|
|
Notes |
378
|
|
|
----- |
379
|
|
|
.. warning:: |
380
|
|
|
As of this writing, setting `ignore_border` to `False` with a step |
381
|
|
|
not equal to the pooling size will force Theano to perform pooling |
382
|
|
|
computations on CPU rather than GPU, even if you have specified |
383
|
|
|
a GPU as your computation device. Additionally, Theano will only |
384
|
|
|
use [cuDNN]_ (if available) for pooling computations with |
385
|
|
|
`ignure_border` set to `True`. You can ensure that the entire |
386
|
|
|
input is captured by at least one pool by using the `padding` |
387
|
|
|
argument to add zero padding prior to pooling being performed. |
388
|
|
|
|
389
|
|
|
.. [cuDNN] `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_. |
390
|
|
|
|
391
|
|
|
""" |
392
|
|
|
@lazy(allocation=['pooling_size']) |
393
|
|
|
def __init__(self, pooling_size, step=None, input_dim=None, |
394
|
|
|
ignore_border=True, padding=(0, 0), |
395
|
|
|
**kwargs): |
396
|
|
|
super(MaxPooling, self).__init__('max', pooling_size, |
397
|
|
|
step=step, input_dim=input_dim, |
398
|
|
|
ignore_border=ignore_border, |
399
|
|
|
padding=padding, **kwargs) |
400
|
|
|
|
401
|
|
|
def __setstate__(self, state): |
402
|
|
|
self.__dict__.update(state) |
403
|
|
|
# Fix objects created before pull request #899. |
404
|
|
|
self.mode = getattr(self, 'mode', 'max') |
405
|
|
|
self.padding = getattr(self, 'padding', (0, 0)) |
406
|
|
|
self.ignore_border = getattr(self, 'ignore_border', False) |
407
|
|
|
|
408
|
|
|
|
409
|
|
|
class AveragePooling(Pooling): |
410
|
|
|
"""Average pooling layer. |
411
|
|
|
|
412
|
|
|
Parameters |
413
|
|
|
---------- |
414
|
|
|
include_padding : bool, optional |
415
|
|
|
When calculating an average, include zeros that are the |
416
|
|
|
result of zero padding added by the `padding` argument. |
417
|
|
|
A value of `True` is only accepted if `ignore_border` |
418
|
|
|
is also `True`. `False` by default. |
419
|
|
|
|
420
|
|
|
Notes |
421
|
|
|
----- |
422
|
|
|
For documentation on the remainder of the arguments to this |
423
|
|
|
class, see :class:`MaxPooling`. |
424
|
|
|
|
425
|
|
|
""" |
426
|
|
|
@lazy(allocation=['pooling_size']) |
427
|
|
|
def __init__(self, pooling_size, step=None, input_dim=None, |
428
|
|
|
ignore_border=True, padding=(0, 0), |
429
|
|
|
include_padding=False, **kwargs): |
430
|
|
|
mode = 'average_inc_pad' if include_padding else 'average_exc_pad' |
431
|
|
|
super(AveragePooling, self).__init__(mode, pooling_size, |
432
|
|
|
step=step, input_dim=input_dim, |
433
|
|
|
ignore_border=ignore_border, |
434
|
|
|
padding=padding, **kwargs) |
435
|
|
|
|
436
|
|
|
|
437
|
|
|
class ConvolutionalSequence(Sequence, Initializable, Feedforward): |
438
|
|
|
"""A sequence of convolutional (or pooling) operations. |
439
|
|
|
|
440
|
|
|
Parameters |
441
|
|
|
---------- |
442
|
|
|
layers : list |
443
|
|
|
List of convolutional bricks (i.e. :class:`Convolutional`, |
444
|
|
|
:class:`ConvolutionalActivation`, or :class:`Pooling` bricks), |
445
|
|
|
or application methods from such bricks. :class:`Activation` |
446
|
|
|
bricks that operate elementwise can also be included. |
447
|
|
|
num_channels : int |
448
|
|
|
Number of input channels in the image. For the first layer this is |
449
|
|
|
normally 1 for grayscale images and 3 for color (RGB) images. For |
450
|
|
|
subsequent layers this is equal to the number of filters output by |
451
|
|
|
the previous convolutional layer. |
452
|
|
|
batch_size : int, optional |
453
|
|
|
Number of images in batch. If given, will be passed to |
454
|
|
|
theano's convolution operator resulting in possibly faster |
455
|
|
|
execution. |
456
|
|
|
image_size : tuple, optional |
457
|
|
|
Width and height of the input (image/featuremap). If given, |
458
|
|
|
will be passed to theano's convolution operator resulting in |
459
|
|
|
possibly faster execution. |
460
|
|
|
border_mode : 'valid', 'full' or None, optional |
461
|
|
|
The border mode to use, see :func:`scipy.signal.convolve2d` for |
462
|
|
|
details. Unlike with :class:`Convolutional`, this defaults to |
463
|
|
|
None, in which case no default value is pushed down to child |
464
|
|
|
bricks at allocation time. Child bricks will in this case |
465
|
|
|
need to rely on either a default border mode (usually valid) |
466
|
|
|
or one provided at construction and/or after construction |
467
|
|
|
(but before allocation). |
468
|
|
|
tied_biases : bool, optional |
469
|
|
|
Same meaning as in :class:`Convolutional`. Defaults to ``None``, |
470
|
|
|
in which case no value is pushed to child :class:`Convolutional` |
471
|
|
|
bricks. |
472
|
|
|
|
473
|
|
|
Notes |
474
|
|
|
----- |
475
|
|
|
The passed convolutional operators should be 'lazy' constructed, that |
476
|
|
|
is, without specifying the batch_size, num_channels and image_size. The |
477
|
|
|
main feature of :class:`ConvolutionalSequence` is that it will set the |
478
|
|
|
input dimensions of a layer to the output dimensions of the previous |
479
|
|
|
layer by the :meth:`~bricks.Brick.push_allocation_config` method. |
480
|
|
|
|
481
|
|
|
The push behaviour of `tied_biases` mirrors that of `use_bias` or any |
482
|
|
|
initialization configuration: only an explicitly specified value is |
483
|
|
|
pushed down the hierarchy. `border_mode` also has this behaviour. |
484
|
|
|
The reason the `border_mode` parameter behaves the way it does is that |
485
|
|
|
pushing a single default `border_mode` makes it very difficult to |
486
|
|
|
have child bricks with different border modes. Normally, such things |
487
|
|
|
would be overridden after `push_allocation_config()`, but this is |
488
|
|
|
a particular hassle as the border mode affects the allocation |
489
|
|
|
parameters of every subsequent child brick in the sequence. Thus, only |
490
|
|
|
an explicitly specified border mode will be pushed down the hierarchy. |
491
|
|
|
|
492
|
|
|
""" |
493
|
|
View Code Duplication |
@lazy(allocation=['num_channels']) |
|
|
|
|
494
|
|
|
def __init__(self, layers, num_channels, batch_size=None, |
495
|
|
|
image_size=(None, None), border_mode=None, tied_biases=None, |
496
|
|
|
**kwargs): |
497
|
|
|
self.layers = [a if isinstance(a, Brick) else a.brick for a in layers] |
498
|
|
|
self.image_size = image_size |
499
|
|
|
self.num_channels = num_channels |
500
|
|
|
self.batch_size = batch_size |
501
|
|
|
self.border_mode = border_mode |
502
|
|
|
self.tied_biases = tied_biases |
503
|
|
|
|
504
|
|
|
super(ConvolutionalSequence, self).__init__( |
505
|
|
|
application_methods=layers, **kwargs) |
506
|
|
|
|
507
|
|
|
def get_dim(self, name): |
508
|
|
|
if name == 'input_': |
509
|
|
|
return ((self.num_channels,) + self.image_size) |
|
|
|
|
510
|
|
|
if name == 'output': |
511
|
|
|
last = len(self.layers) - 1 |
512
|
|
|
while last >= 0: |
513
|
|
|
try: |
514
|
|
|
return self.layers[last].get_dim(name) |
515
|
|
|
except ValueError: |
516
|
|
|
last -= 1 |
517
|
|
|
# The output shape of an empty ConvolutionalSequence or one |
518
|
|
|
# consisting only of Activations is the input shape. |
519
|
|
|
return self.get_dim('input_') |
520
|
|
|
return super(ConvolutionalSequence, self).get_dim(name) |
521
|
|
|
|
522
|
|
|
def _push_allocation_config(self): |
523
|
|
|
num_channels = self.num_channels |
524
|
|
|
image_size = self.image_size |
525
|
|
|
for layer in self.layers: |
526
|
|
|
if isinstance(layer, Activation): |
527
|
|
|
# Activations operate elementwise; nothing to set. |
528
|
|
|
layer.push_allocation_config() |
529
|
|
|
continue |
530
|
|
|
if self.border_mode is not None: |
531
|
|
|
layer.border_mode = self.border_mode |
532
|
|
|
if self.tied_biases is not None: |
533
|
|
|
layer.tied_biases = self.tied_biases |
534
|
|
|
layer.image_size = image_size |
535
|
|
|
layer.num_channels = num_channels |
536
|
|
|
layer.batch_size = self.batch_size |
537
|
|
|
if getattr(self, 'use_bias', None) is not None: |
538
|
|
|
layer.use_bias = self.use_bias |
539
|
|
|
|
540
|
|
|
# Push input dimensions to children |
541
|
|
|
layer.push_allocation_config() |
542
|
|
|
|
543
|
|
|
# Retrieve output dimensions |
544
|
|
|
# and set it for next layer |
545
|
|
|
if None not in layer.image_size: |
546
|
|
|
output_shape = layer.get_dim('output') |
547
|
|
|
image_size = output_shape[1:] |
548
|
|
|
num_channels = layer.num_output_channels |
549
|
|
|
|
550
|
|
|
|
551
|
|
|
class Flattener(Brick): |
552
|
|
|
"""Flattens the input. |
553
|
|
|
|
554
|
|
|
It may be used to pass multidimensional objects like images or feature |
555
|
|
|
maps of convolutional bricks into bricks which allow only two |
556
|
|
|
dimensional input (batch, features) like MLP. |
557
|
|
|
|
558
|
|
|
""" |
559
|
|
|
@application(inputs=['input_'], outputs=['output']) |
560
|
|
|
def apply(self, input_): |
561
|
|
|
return input_.flatten(ndim=2) |
562
|
|
|
|