|
1
|
|
|
from theano.tensor.nnet import conv2d |
|
2
|
|
|
from theano.tensor.nnet.abstract_conv import (AbstractConv2d_gradInputs, |
|
3
|
|
|
get_conv_output_shape) |
|
4
|
|
|
from theano.tensor.signal.pool import pool_2d, Pool |
|
5
|
|
|
|
|
6
|
|
|
from blocks.bricks import (Initializable, Feedforward, Sequence, Activation, |
|
7
|
|
|
LinearLike) |
|
8
|
|
|
from blocks.bricks.base import application, Brick, lazy |
|
9
|
|
|
from blocks.roles import add_role, FILTER, BIAS |
|
10
|
|
|
from blocks.utils import shared_floatx_nans |
|
11
|
|
|
|
|
12
|
|
|
|
|
13
|
|
|
class Convolutional(LinearLike): |
|
14
|
|
|
"""Performs a 2D convolution. |
|
15
|
|
|
|
|
16
|
|
|
Parameters |
|
17
|
|
|
---------- |
|
18
|
|
|
filter_size : tuple |
|
19
|
|
|
The height and width of the filter (also called *kernels*). |
|
20
|
|
|
num_filters : int |
|
21
|
|
|
Number of filters per channel. |
|
22
|
|
|
num_channels : int |
|
23
|
|
|
Number of input channels in the image. For the first layer this is |
|
24
|
|
|
normally 1 for grayscale images and 3 for color (RGB) images. For |
|
25
|
|
|
subsequent layers this is equal to the number of filters output by |
|
26
|
|
|
the previous convolutional layer. The filters are pooled over the |
|
27
|
|
|
channels. |
|
28
|
|
|
batch_size : int, optional |
|
29
|
|
|
Number of examples per batch. If given, this will be passed to |
|
30
|
|
|
Theano convolution operator, possibly resulting in faster |
|
31
|
|
|
execution. |
|
32
|
|
|
image_size : tuple, optional |
|
33
|
|
|
The height and width of the input (image or feature map). If given, |
|
34
|
|
|
this will be passed to the Theano convolution operator, resulting |
|
35
|
|
|
in possibly faster execution times. |
|
36
|
|
|
step : tuple, optional |
|
37
|
|
|
The step (or stride) with which to slide the filters over the |
|
38
|
|
|
image. Defaults to (1, 1). |
|
39
|
|
|
border_mode : {'valid', 'full'}, optional |
|
40
|
|
|
The border mode to use, see :func:`scipy.signal.convolve2d` for |
|
41
|
|
|
details. Defaults to 'valid'. |
|
42
|
|
|
tied_biases : bool |
|
43
|
|
|
Setting this to ``False`` will untie the biases, yielding a |
|
44
|
|
|
separate bias for every location at which the filter is applied. |
|
45
|
|
|
If ``True``, it indicates that the biases of every filter in this |
|
46
|
|
|
layer should be shared amongst all applications of that filter. |
|
47
|
|
|
Defaults to ``True``. |
|
48
|
|
|
|
|
49
|
|
|
""" |
|
50
|
|
|
# Make it possible to override the implementation of conv2d that gets |
|
51
|
|
|
# used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order |
|
52
|
|
|
# to leverage features not yet available in Theano's standard conv2d. |
|
53
|
|
|
# The function you override with here should accept at least the |
|
54
|
|
|
# input and the kernels as positionals, and the keyword arguments |
|
55
|
|
|
# input_shape, subsample, border_mode, and filter_shape. If some of |
|
56
|
|
|
# these are unsupported they should still be accepted and ignored, |
|
57
|
|
|
# e.g. with a wrapper function that swallows **kwargs. |
|
58
|
|
|
conv2d_impl = staticmethod(conv2d) |
|
59
|
|
|
|
|
60
|
|
|
# Used to override the output shape computation for a given value of |
|
61
|
|
|
# conv2d_impl. Should accept 4 positional arguments: the shape of an |
|
62
|
|
|
# image minibatch (with 4 elements: batch size, number of channels, |
|
63
|
|
|
# height, and width), the shape of the filter bank (number of filters, |
|
64
|
|
|
# number of output channels, filter height, filter width), the border |
|
65
|
|
|
# mode, and the step (vertical and horizontal strides). It is expected |
|
66
|
|
|
# to return a 4-tuple of (batch size, number of channels, output |
|
67
|
|
|
# height, output width). The first element of this tuple is not used |
|
68
|
|
|
# for anything by this brick. |
|
69
|
|
|
get_output_shape = staticmethod(get_conv_output_shape) |
|
70
|
|
|
|
|
71
|
|
View Code Duplication |
@lazy(allocation=['filter_size', 'num_filters', 'num_channels']) |
|
|
|
|
|
|
72
|
|
|
def __init__(self, filter_size, num_filters, num_channels, batch_size=None, |
|
73
|
|
|
image_size=(None, None), step=(1, 1), border_mode='valid', |
|
74
|
|
|
tied_biases=True, **kwargs): |
|
75
|
|
|
super(Convolutional, self).__init__(**kwargs) |
|
76
|
|
|
|
|
77
|
|
|
self.filter_size = filter_size |
|
78
|
|
|
self.num_filters = num_filters |
|
79
|
|
|
self.batch_size = batch_size |
|
80
|
|
|
self.num_channels = num_channels |
|
81
|
|
|
self.image_size = image_size |
|
82
|
|
|
self.step = step |
|
83
|
|
|
self.border_mode = border_mode |
|
84
|
|
|
self.tied_biases = tied_biases |
|
85
|
|
|
|
|
86
|
|
|
def _allocate(self): |
|
87
|
|
|
W = shared_floatx_nans((self.num_filters, self.num_channels) + |
|
88
|
|
|
self.filter_size, name='W') |
|
89
|
|
|
add_role(W, FILTER) |
|
90
|
|
|
self.parameters.append(W) |
|
91
|
|
|
self.add_auxiliary_variable(W.norm(2), name='W_norm') |
|
92
|
|
|
if getattr(self, 'use_bias', True): |
|
93
|
|
|
if self.tied_biases: |
|
94
|
|
|
b = shared_floatx_nans((self.num_filters,), name='b') |
|
95
|
|
|
else: |
|
96
|
|
|
# this error is raised here instead of during initializiation |
|
97
|
|
|
# because ConvolutionalSequence may specify the image size |
|
98
|
|
|
if self.image_size == (None, None) and not self.tied_biases: |
|
99
|
|
|
raise ValueError('Cannot infer bias size without ' |
|
100
|
|
|
'image_size specified. If you use ' |
|
101
|
|
|
'variable image_size, you should use ' |
|
102
|
|
|
'tied_biases=True.') |
|
103
|
|
|
|
|
104
|
|
|
b = shared_floatx_nans(self.get_dim('output'), name='b') |
|
105
|
|
|
add_role(b, BIAS) |
|
106
|
|
|
|
|
107
|
|
|
self.parameters.append(b) |
|
108
|
|
|
self.add_auxiliary_variable(b.norm(2), name='b_norm') |
|
109
|
|
|
|
|
110
|
|
|
@application(inputs=['input_'], outputs=['output']) |
|
111
|
|
|
def apply(self, input_): |
|
112
|
|
|
"""Perform the convolution. |
|
113
|
|
|
|
|
114
|
|
|
Parameters |
|
115
|
|
|
---------- |
|
116
|
|
|
input_ : :class:`~tensor.TensorVariable` |
|
117
|
|
|
A 4D tensor with the axes representing batch size, number of |
|
118
|
|
|
channels, image height, and image width. |
|
119
|
|
|
|
|
120
|
|
|
Returns |
|
121
|
|
|
------- |
|
122
|
|
|
output : :class:`~tensor.TensorVariable` |
|
123
|
|
|
A 4D tensor of filtered images (feature maps) with dimensions |
|
124
|
|
|
representing batch size, number of filters, feature map height, |
|
125
|
|
|
and feature map width. |
|
126
|
|
|
|
|
127
|
|
|
The height and width of the feature map depend on the border |
|
128
|
|
|
mode. For 'valid' it is ``image_size - filter_size + 1`` while |
|
129
|
|
|
for 'full' it is ``image_size + filter_size - 1``. |
|
130
|
|
|
|
|
131
|
|
|
""" |
|
132
|
|
|
if self.image_size == (None, None): |
|
133
|
|
|
input_shape = None |
|
134
|
|
|
else: |
|
135
|
|
|
input_shape = (self.batch_size, self.num_channels) |
|
136
|
|
|
input_shape += self.image_size |
|
137
|
|
|
|
|
138
|
|
|
output = self.conv2d_impl( |
|
139
|
|
|
input_, self.W, |
|
140
|
|
|
input_shape=input_shape, |
|
141
|
|
|
subsample=self.step, |
|
142
|
|
|
border_mode=self.border_mode, |
|
143
|
|
|
filter_shape=((self.num_filters, self.num_channels) + |
|
144
|
|
|
self.filter_size)) |
|
145
|
|
|
if getattr(self, 'use_bias', True): |
|
146
|
|
|
if self.tied_biases: |
|
147
|
|
|
output += self.b.dimshuffle('x', 0, 'x', 'x') |
|
148
|
|
|
else: |
|
149
|
|
|
output += self.b.dimshuffle('x', 0, 1, 2) |
|
150
|
|
|
return output |
|
151
|
|
|
|
|
152
|
|
|
def get_dim(self, name): |
|
153
|
|
|
if name == 'input_': |
|
154
|
|
|
return (self.num_channels,) + self.image_size |
|
155
|
|
|
if name == 'output': |
|
156
|
|
|
input_shape = (None, self.num_channels) + self.image_size |
|
157
|
|
|
kernel_shape = ((self.num_filters, self.num_channels) + |
|
158
|
|
|
self.filter_size) |
|
159
|
|
|
out_shape = self.get_output_shape(input_shape, kernel_shape, |
|
160
|
|
|
self.border_mode, self.step) |
|
161
|
|
|
assert len(out_shape) == 4 |
|
162
|
|
|
return out_shape[1:] |
|
163
|
|
|
return super(Convolutional, self).get_dim(name) |
|
164
|
|
|
|
|
165
|
|
|
@property |
|
166
|
|
|
def num_output_channels(self): |
|
167
|
|
|
return self.num_filters |
|
168
|
|
|
|
|
169
|
|
|
|
|
170
|
|
|
class ConvolutionalTranspose(Convolutional): |
|
171
|
|
|
"""Performs the transpose of a 2D convolution. |
|
172
|
|
|
|
|
173
|
|
|
Parameters |
|
174
|
|
|
---------- |
|
175
|
|
|
num_filters : int |
|
176
|
|
|
Number of filters at the *output* of the transposed convolution, |
|
177
|
|
|
i.e. the number of channels in the corresponding convolution. |
|
178
|
|
|
num_channels : int |
|
179
|
|
|
Number of channels at the *input* of the transposed convolution, |
|
180
|
|
|
i.e. the number of output filters in the corresponding |
|
181
|
|
|
convolution. |
|
182
|
|
|
step : tuple, optional |
|
183
|
|
|
The step (or stride) of the corresponding *convolution*. |
|
184
|
|
|
Defaults to (1, 1). |
|
185
|
|
|
image_size : tuple, optional |
|
186
|
|
|
Image size of the input to the *transposed* convolution, i.e. |
|
187
|
|
|
the output of the corresponding convolution. Required for tied |
|
188
|
|
|
biases. Defaults to ``None``. |
|
189
|
|
|
unused_edge : tuple, optional |
|
190
|
|
|
Tuple of pixels added to the inferred height and width of the |
|
191
|
|
|
output image, whose values would be ignored in the corresponding |
|
192
|
|
|
forward convolution. Must be such that 0 <= ``unused_edge[i]`` <= |
|
193
|
|
|
``step[i]``. Note that this parameter is **ignored** if |
|
194
|
|
|
``original_image_size`` is specified in the constructor or manually |
|
195
|
|
|
set as an attribute. |
|
196
|
|
|
original_image_size : tuple, optional |
|
197
|
|
|
The height and width of the image that forms the output of |
|
198
|
|
|
the transpose operation, which is the input of the original |
|
199
|
|
|
(non-transposed) convolution. By default, this is inferred |
|
200
|
|
|
from `image_size` to be the size that has each pixel of the |
|
201
|
|
|
original image touched by at least one filter application |
|
202
|
|
|
in the original convolution. Degenerate cases with dropped |
|
203
|
|
|
border pixels (in the original convolution) are possible, and can |
|
204
|
|
|
be manually specified via this argument. See notes below. |
|
205
|
|
|
|
|
206
|
|
|
See Also |
|
207
|
|
|
-------- |
|
208
|
|
|
:class:`Convolutional` : For the documentation of other parameters. |
|
209
|
|
|
|
|
210
|
|
|
Notes |
|
211
|
|
|
----- |
|
212
|
|
|
By default, `original_image_size` is inferred from `image_size` |
|
213
|
|
|
as being the *minimum* size of image that could have produced this |
|
214
|
|
|
output. Let ``hanging[i] = original_image_size[i] - image_size[i] |
|
215
|
|
|
* step[i]``. Any value of ``hanging[i]`` greater than |
|
216
|
|
|
``filter_size[i] - step[i]`` will result in border pixels that are |
|
217
|
|
|
ignored by the original convolution. With this brick, any |
|
218
|
|
|
``original_image_size`` such that ``filter_size[i] - step[i] < |
|
219
|
|
|
hanging[i] < filter_size[i]`` for all ``i`` can be validly specified. |
|
220
|
|
|
However, no value will be output by the transposed convolution |
|
221
|
|
|
itself for these extra hanging border pixels, and they will be |
|
222
|
|
|
determined entirely by the bias. |
|
223
|
|
|
|
|
224
|
|
|
""" |
|
225
|
|
|
@lazy(allocation=['filter_size', 'num_filters', 'num_channels']) |
|
226
|
|
|
def __init__(self, filter_size, num_filters, num_channels, |
|
227
|
|
|
original_image_size=None, unused_edge=(0, 0), |
|
228
|
|
|
**kwargs): |
|
229
|
|
|
super(ConvolutionalTranspose, self).__init__( |
|
230
|
|
|
filter_size, num_filters, num_channels, **kwargs) |
|
231
|
|
|
self.original_image_size = original_image_size |
|
232
|
|
|
self.unused_edge = unused_edge |
|
233
|
|
|
|
|
234
|
|
|
@property |
|
235
|
|
|
def original_image_size(self): |
|
236
|
|
|
if self._original_image_size is None: |
|
237
|
|
|
if all(s is None for s in self.image_size): |
|
238
|
|
|
raise ValueError("can't infer original_image_size, " |
|
239
|
|
|
"no image_size set") |
|
240
|
|
|
if isinstance(self.border_mode, tuple): |
|
241
|
|
|
border = self.border_mode |
|
242
|
|
|
elif self.border_mode == 'full': |
|
243
|
|
|
border = tuple(k - 1 for k in self.filter_size) |
|
244
|
|
|
elif self.border_mode == 'half': |
|
245
|
|
|
border = tuple(k // 2 for k in self.filter_size) |
|
246
|
|
|
else: |
|
247
|
|
|
border = [0] * len(self.image_size) |
|
248
|
|
|
tups = zip(self.image_size, self.step, self.filter_size, border, |
|
249
|
|
|
self.unused_edge) |
|
250
|
|
|
return tuple(s * (i - 1) + k - 2 * p + u for i, s, k, p, u in tups) |
|
251
|
|
|
else: |
|
252
|
|
|
return self._original_image_size |
|
253
|
|
|
|
|
254
|
|
|
@original_image_size.setter |
|
255
|
|
|
def original_image_size(self, value): |
|
256
|
|
|
self._original_image_size = value |
|
257
|
|
|
|
|
258
|
|
|
def conv2d_impl(self, input_, W, input_shape, subsample, border_mode, |
|
|
|
|
|
|
259
|
|
|
filter_shape): |
|
260
|
|
|
# The AbstractConv2d_gradInputs op takes a kernel that was used for the |
|
261
|
|
|
# **convolution**. We therefore have to invert num_channels and |
|
262
|
|
|
# num_filters for W. |
|
263
|
|
|
W = W.transpose(1, 0, 2, 3) |
|
264
|
|
|
imshp = (None,) + self.get_dim('output') |
|
265
|
|
|
kshp = (filter_shape[1], filter_shape[0]) + filter_shape[2:] |
|
266
|
|
|
return AbstractConv2d_gradInputs( |
|
267
|
|
|
imshp=imshp, kshp=kshp, border_mode=border_mode, |
|
268
|
|
|
subsample=subsample)(W, input_, self.get_dim('output')[1:]) |
|
269
|
|
|
|
|
270
|
|
|
def get_dim(self, name): |
|
271
|
|
|
if name == 'output': |
|
272
|
|
|
return (self.num_filters,) + self.original_image_size |
|
273
|
|
|
return super(ConvolutionalTranspose, self).get_dim(name) |
|
274
|
|
|
|
|
275
|
|
|
|
|
276
|
|
|
class Pooling(Initializable, Feedforward): |
|
277
|
|
|
"""Base Brick for pooling operations. |
|
278
|
|
|
|
|
279
|
|
|
This should generally not be instantiated directly; see |
|
280
|
|
|
:class:`MaxPooling`. |
|
281
|
|
|
|
|
282
|
|
|
""" |
|
283
|
|
|
@lazy(allocation=['mode', 'pooling_size']) |
|
284
|
|
|
def __init__(self, mode, pooling_size, step, input_dim, ignore_border, |
|
285
|
|
|
padding, **kwargs): |
|
286
|
|
|
super(Pooling, self).__init__(**kwargs) |
|
287
|
|
|
self.pooling_size = pooling_size |
|
288
|
|
|
self.mode = mode |
|
289
|
|
|
self.step = step |
|
290
|
|
|
self.input_dim = input_dim if input_dim is not None else (None,) * 3 |
|
291
|
|
|
self.ignore_border = ignore_border |
|
292
|
|
|
self.padding = padding |
|
293
|
|
|
|
|
294
|
|
|
@property |
|
295
|
|
|
def image_size(self): |
|
296
|
|
|
return self.input_dim[-2:] |
|
297
|
|
|
|
|
298
|
|
|
@image_size.setter |
|
299
|
|
|
def image_size(self, value): |
|
300
|
|
|
self.input_dim = self.input_dim[:-2] + value |
|
301
|
|
|
|
|
302
|
|
|
@property |
|
303
|
|
|
def num_channels(self): |
|
304
|
|
|
return self.input_dim[0] |
|
305
|
|
|
|
|
306
|
|
|
@num_channels.setter |
|
307
|
|
|
def num_channels(self, value): |
|
308
|
|
|
self.input_dim = (value,) + self.input_dim[1:] |
|
309
|
|
|
|
|
310
|
|
|
@application(inputs=['input_'], outputs=['output']) |
|
311
|
|
|
def apply(self, input_): |
|
312
|
|
|
"""Apply the pooling (subsampling) transformation. |
|
313
|
|
|
|
|
314
|
|
|
Parameters |
|
315
|
|
|
---------- |
|
316
|
|
|
input_ : :class:`~tensor.TensorVariable` |
|
317
|
|
|
An tensor with dimension greater or equal to 2. The last two |
|
318
|
|
|
dimensions will be downsampled. For example, with images this |
|
319
|
|
|
means that the last two dimensions should represent the height |
|
320
|
|
|
and width of your image. |
|
321
|
|
|
|
|
322
|
|
|
Returns |
|
323
|
|
|
------- |
|
324
|
|
|
output : :class:`~tensor.TensorVariable` |
|
325
|
|
|
A tensor with the same number of dimensions as `input_`, but |
|
326
|
|
|
with the last two dimensions downsampled. |
|
327
|
|
|
|
|
328
|
|
|
""" |
|
329
|
|
|
output = pool_2d(input_, self.pooling_size, stride=self.step, |
|
330
|
|
|
mode=self.mode, pad=self.padding, |
|
331
|
|
|
ignore_border=self.ignore_border) |
|
332
|
|
|
return output |
|
333
|
|
|
|
|
334
|
|
|
def get_dim(self, name): |
|
335
|
|
|
if name == 'input_': |
|
336
|
|
|
return self.input_dim |
|
337
|
|
|
if name == 'output': |
|
338
|
|
|
return tuple(Pool.out_shape( |
|
339
|
|
|
self.input_dim, self.pooling_size, stride=self.step, |
|
340
|
|
|
ignore_border=self.ignore_border, pad=self.padding)) |
|
341
|
|
|
|
|
342
|
|
|
@property |
|
343
|
|
|
def num_output_channels(self): |
|
344
|
|
|
return self.input_dim[0] |
|
345
|
|
|
|
|
346
|
|
|
|
|
347
|
|
|
class MaxPooling(Pooling): |
|
348
|
|
|
"""Max pooling layer. |
|
349
|
|
|
|
|
350
|
|
|
Parameters |
|
351
|
|
|
---------- |
|
352
|
|
|
pooling_size : tuple |
|
353
|
|
|
The height and width of the pooling region i.e. this is the factor |
|
354
|
|
|
by which your input's last two dimensions will be downscaled. |
|
355
|
|
|
step : tuple, optional |
|
356
|
|
|
The vertical and horizontal shift (stride) between pooling regions. |
|
357
|
|
|
By default this is equal to `pooling_size`. Setting this to a lower |
|
358
|
|
|
number results in overlapping pooling regions. |
|
359
|
|
|
input_dim : tuple, optional |
|
360
|
|
|
A tuple of integers representing the shape of the input. The last |
|
361
|
|
|
two dimensions will be used to calculate the output dimension. |
|
362
|
|
|
padding : tuple, optional |
|
363
|
|
|
A tuple of integers representing the vertical and horizontal |
|
364
|
|
|
zero-padding to be applied to each of the top and bottom |
|
365
|
|
|
(vertical) and left and right (horizontal) edges. For example, |
|
366
|
|
|
an argument of (4, 3) will apply 4 pixels of padding to the |
|
367
|
|
|
top edge, 4 pixels of padding to the bottom edge, and 3 pixels |
|
368
|
|
|
each for the left and right edge. By default, no padding is |
|
369
|
|
|
performed. |
|
370
|
|
|
ignore_border : bool, optional |
|
371
|
|
|
Whether or not to do partial downsampling based on borders where |
|
372
|
|
|
the extent of the pooling region reaches beyond the edge of the |
|
373
|
|
|
image. If `True`, a (5, 5) image with (2, 2) pooling regions |
|
374
|
|
|
and (2, 2) step will be downsampled to shape (2, 2), otherwise |
|
375
|
|
|
it will be downsampled to (3, 3). `True` by default. |
|
376
|
|
|
|
|
377
|
|
|
Notes |
|
378
|
|
|
----- |
|
379
|
|
|
.. warning:: |
|
380
|
|
|
As of this writing, setting `ignore_border` to `False` with a step |
|
381
|
|
|
not equal to the pooling size will force Theano to perform pooling |
|
382
|
|
|
computations on CPU rather than GPU, even if you have specified |
|
383
|
|
|
a GPU as your computation device. Additionally, Theano will only |
|
384
|
|
|
use [cuDNN]_ (if available) for pooling computations with |
|
385
|
|
|
`ignure_border` set to `True`. You can ensure that the entire |
|
386
|
|
|
input is captured by at least one pool by using the `padding` |
|
387
|
|
|
argument to add zero padding prior to pooling being performed. |
|
388
|
|
|
|
|
389
|
|
|
.. [cuDNN] `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_. |
|
390
|
|
|
|
|
391
|
|
|
""" |
|
392
|
|
|
@lazy(allocation=['pooling_size']) |
|
393
|
|
|
def __init__(self, pooling_size, step=None, input_dim=None, |
|
394
|
|
|
ignore_border=True, padding=(0, 0), |
|
395
|
|
|
**kwargs): |
|
396
|
|
|
super(MaxPooling, self).__init__('max', pooling_size, |
|
397
|
|
|
step=step, input_dim=input_dim, |
|
398
|
|
|
ignore_border=ignore_border, |
|
399
|
|
|
padding=padding, **kwargs) |
|
400
|
|
|
|
|
401
|
|
|
def __setstate__(self, state): |
|
402
|
|
|
self.__dict__.update(state) |
|
403
|
|
|
# Fix objects created before pull request #899. |
|
404
|
|
|
self.mode = getattr(self, 'mode', 'max') |
|
405
|
|
|
self.padding = getattr(self, 'padding', (0, 0)) |
|
406
|
|
|
self.ignore_border = getattr(self, 'ignore_border', False) |
|
407
|
|
|
|
|
408
|
|
|
|
|
409
|
|
|
class AveragePooling(Pooling): |
|
410
|
|
|
"""Average pooling layer. |
|
411
|
|
|
|
|
412
|
|
|
Parameters |
|
413
|
|
|
---------- |
|
414
|
|
|
include_padding : bool, optional |
|
415
|
|
|
When calculating an average, include zeros that are the |
|
416
|
|
|
result of zero padding added by the `padding` argument. |
|
417
|
|
|
A value of `True` is only accepted if `ignore_border` |
|
418
|
|
|
is also `True`. `False` by default. |
|
419
|
|
|
|
|
420
|
|
|
Notes |
|
421
|
|
|
----- |
|
422
|
|
|
For documentation on the remainder of the arguments to this |
|
423
|
|
|
class, see :class:`MaxPooling`. |
|
424
|
|
|
|
|
425
|
|
|
""" |
|
426
|
|
|
@lazy(allocation=['pooling_size']) |
|
427
|
|
|
def __init__(self, pooling_size, step=None, input_dim=None, |
|
428
|
|
|
ignore_border=True, padding=(0, 0), |
|
429
|
|
|
include_padding=False, **kwargs): |
|
430
|
|
|
mode = 'average_inc_pad' if include_padding else 'average_exc_pad' |
|
431
|
|
|
super(AveragePooling, self).__init__(mode, pooling_size, |
|
432
|
|
|
step=step, input_dim=input_dim, |
|
433
|
|
|
ignore_border=ignore_border, |
|
434
|
|
|
padding=padding, **kwargs) |
|
435
|
|
|
|
|
436
|
|
|
|
|
437
|
|
|
class ConvolutionalSequence(Sequence, Initializable, Feedforward): |
|
438
|
|
|
"""A sequence of convolutional (or pooling) operations. |
|
439
|
|
|
|
|
440
|
|
|
Parameters |
|
441
|
|
|
---------- |
|
442
|
|
|
layers : list |
|
443
|
|
|
List of convolutional bricks (i.e. :class:`Convolutional`, |
|
444
|
|
|
:class:`ConvolutionalActivation`, or :class:`Pooling` bricks), |
|
445
|
|
|
or application methods from such bricks. :class:`Activation` |
|
446
|
|
|
bricks that operate elementwise can also be included. |
|
447
|
|
|
num_channels : int |
|
448
|
|
|
Number of input channels in the image. For the first layer this is |
|
449
|
|
|
normally 1 for grayscale images and 3 for color (RGB) images. For |
|
450
|
|
|
subsequent layers this is equal to the number of filters output by |
|
451
|
|
|
the previous convolutional layer. |
|
452
|
|
|
batch_size : int, optional |
|
453
|
|
|
Number of images in batch. If given, will be passed to |
|
454
|
|
|
theano's convolution operator resulting in possibly faster |
|
455
|
|
|
execution. |
|
456
|
|
|
image_size : tuple, optional |
|
457
|
|
|
Width and height of the input (image/featuremap). If given, |
|
458
|
|
|
will be passed to theano's convolution operator resulting in |
|
459
|
|
|
possibly faster execution. |
|
460
|
|
|
border_mode : 'valid', 'full' or None, optional |
|
461
|
|
|
The border mode to use, see :func:`scipy.signal.convolve2d` for |
|
462
|
|
|
details. Unlike with :class:`Convolutional`, this defaults to |
|
463
|
|
|
None, in which case no default value is pushed down to child |
|
464
|
|
|
bricks at allocation time. Child bricks will in this case |
|
465
|
|
|
need to rely on either a default border mode (usually valid) |
|
466
|
|
|
or one provided at construction and/or after construction |
|
467
|
|
|
(but before allocation). |
|
468
|
|
|
tied_biases : bool, optional |
|
469
|
|
|
Same meaning as in :class:`Convolutional`. Defaults to ``None``, |
|
470
|
|
|
in which case no value is pushed to child :class:`Convolutional` |
|
471
|
|
|
bricks. |
|
472
|
|
|
|
|
473
|
|
|
Notes |
|
474
|
|
|
----- |
|
475
|
|
|
The passed convolutional operators should be 'lazy' constructed, that |
|
476
|
|
|
is, without specifying the batch_size, num_channels and image_size. The |
|
477
|
|
|
main feature of :class:`ConvolutionalSequence` is that it will set the |
|
478
|
|
|
input dimensions of a layer to the output dimensions of the previous |
|
479
|
|
|
layer by the :meth:`~bricks.Brick.push_allocation_config` method. |
|
480
|
|
|
|
|
481
|
|
|
The push behaviour of `tied_biases` mirrors that of `use_bias` or any |
|
482
|
|
|
initialization configuration: only an explicitly specified value is |
|
483
|
|
|
pushed down the hierarchy. `border_mode` also has this behaviour. |
|
484
|
|
|
The reason the `border_mode` parameter behaves the way it does is that |
|
485
|
|
|
pushing a single default `border_mode` makes it very difficult to |
|
486
|
|
|
have child bricks with different border modes. Normally, such things |
|
487
|
|
|
would be overridden after `push_allocation_config()`, but this is |
|
488
|
|
|
a particular hassle as the border mode affects the allocation |
|
489
|
|
|
parameters of every subsequent child brick in the sequence. Thus, only |
|
490
|
|
|
an explicitly specified border mode will be pushed down the hierarchy. |
|
491
|
|
|
|
|
492
|
|
|
""" |
|
493
|
|
View Code Duplication |
@lazy(allocation=['num_channels']) |
|
|
|
|
|
|
494
|
|
|
def __init__(self, layers, num_channels, batch_size=None, |
|
495
|
|
|
image_size=(None, None), border_mode=None, tied_biases=None, |
|
496
|
|
|
**kwargs): |
|
497
|
|
|
self.layers = [a if isinstance(a, Brick) else a.brick for a in layers] |
|
498
|
|
|
self.image_size = image_size |
|
499
|
|
|
self.num_channels = num_channels |
|
500
|
|
|
self.batch_size = batch_size |
|
501
|
|
|
self.border_mode = border_mode |
|
502
|
|
|
self.tied_biases = tied_biases |
|
503
|
|
|
|
|
504
|
|
|
super(ConvolutionalSequence, self).__init__( |
|
505
|
|
|
application_methods=layers, **kwargs) |
|
506
|
|
|
|
|
507
|
|
|
def get_dim(self, name): |
|
508
|
|
|
if name == 'input_': |
|
509
|
|
|
return ((self.num_channels,) + self.image_size) |
|
|
|
|
|
|
510
|
|
|
if name == 'output': |
|
511
|
|
|
last = len(self.layers) - 1 |
|
512
|
|
|
while last >= 0: |
|
513
|
|
|
try: |
|
514
|
|
|
return self.layers[last].get_dim(name) |
|
515
|
|
|
except ValueError: |
|
516
|
|
|
last -= 1 |
|
517
|
|
|
# The output shape of an empty ConvolutionalSequence or one |
|
518
|
|
|
# consisting only of Activations is the input shape. |
|
519
|
|
|
return self.get_dim('input_') |
|
520
|
|
|
return super(ConvolutionalSequence, self).get_dim(name) |
|
521
|
|
|
|
|
522
|
|
|
def _push_allocation_config(self): |
|
523
|
|
|
num_channels = self.num_channels |
|
524
|
|
|
image_size = self.image_size |
|
525
|
|
|
for layer in self.layers: |
|
526
|
|
|
if isinstance(layer, Activation): |
|
527
|
|
|
# Activations operate elementwise; nothing to set. |
|
528
|
|
|
layer.push_allocation_config() |
|
529
|
|
|
continue |
|
530
|
|
|
if self.border_mode is not None: |
|
531
|
|
|
layer.border_mode = self.border_mode |
|
532
|
|
|
if self.tied_biases is not None: |
|
533
|
|
|
layer.tied_biases = self.tied_biases |
|
534
|
|
|
layer.image_size = image_size |
|
535
|
|
|
layer.num_channels = num_channels |
|
536
|
|
|
layer.batch_size = self.batch_size |
|
537
|
|
|
if getattr(self, 'use_bias', None) is not None: |
|
538
|
|
|
layer.use_bias = self.use_bias |
|
539
|
|
|
|
|
540
|
|
|
# Push input dimensions to children |
|
541
|
|
|
layer.push_allocation_config() |
|
542
|
|
|
|
|
543
|
|
|
# Retrieve output dimensions |
|
544
|
|
|
# and set it for next layer |
|
545
|
|
|
if None not in layer.image_size: |
|
546
|
|
|
output_shape = layer.get_dim('output') |
|
547
|
|
|
image_size = output_shape[1:] |
|
548
|
|
|
num_channels = layer.num_output_channels |
|
549
|
|
|
|
|
550
|
|
|
|
|
551
|
|
|
class Flattener(Brick): |
|
552
|
|
|
"""Flattens the input. |
|
553
|
|
|
|
|
554
|
|
|
It may be used to pass multidimensional objects like images or feature |
|
555
|
|
|
maps of convolutional bricks into bricks which allow only two |
|
556
|
|
|
dimensional input (batch, features) like MLP. |
|
557
|
|
|
|
|
558
|
|
|
""" |
|
559
|
|
|
@application(inputs=['input_'], outputs=['output']) |
|
560
|
|
|
def apply(self, input_): |
|
561
|
|
|
return input_.flatten(ndim=2) |
|
562
|
|
|
|