Completed
Pull Request — master (#977)
by Frédéric
02:51 queued 01:10
created

blocks.bricks.Convolutional   A

Complexity

Total Complexity 15

Size/Duplication

Total Lines 155
Duplicated Lines 0 %
Metric Value
dl 0
loc 155
rs 10
wmc 15

5 Methods

Rating   Name   Duplication   Size   Complexity  
A num_output_channels() 0 3 1
B _allocate() 0 23 5
A __init__() 0 14 1
A get_dim() 0 12 4
B apply() 0 41 4
1
from theano.tensor.nnet import conv2d
2
from theano.tensor.nnet.abstract_conv import (AbstractConv2d_gradInputs,
3
                                              get_conv_output_shape)
4
from theano.tensor.signal.pool import pool_2d, Pool
5
6
from blocks.bricks import (Initializable, Feedforward, Sequence, Activation,
7
                           LinearLike)
8
from blocks.bricks.base import application, Brick, lazy
9
from blocks.roles import add_role, FILTER, BIAS
10
from blocks.utils import shared_floatx_nans
11
12
13
class Convolutional(LinearLike):
14
    """Performs a 2D convolution.
15
16
    Parameters
17
    ----------
18
    filter_size : tuple
19
        The height and width of the filter (also called *kernels*).
20
    num_filters : int
21
        Number of filters per channel.
22
    num_channels : int
23
        Number of input channels in the image. For the first layer this is
24
        normally 1 for grayscale images and 3 for color (RGB) images. For
25
        subsequent layers this is equal to the number of filters output by
26
        the previous convolutional layer. The filters are pooled over the
27
        channels.
28
    batch_size : int, optional
29
        Number of examples per batch. If given, this will be passed to
30
        Theano convolution operator, possibly resulting in faster
31
        execution.
32
    image_size : tuple, optional
33
        The height and width of the input (image or feature map). If given,
34
        this will be passed to the Theano convolution operator, resulting
35
        in possibly faster execution times.
36
    step : tuple, optional
37
        The step (or stride) with which to slide the filters over the
38
        image. Defaults to (1, 1).
39
    border_mode : {'valid', 'full'}, optional
40
        The border mode to use, see :func:`scipy.signal.convolve2d` for
41
        details. Defaults to 'valid'.
42
    tied_biases : bool
43
        If ``True``, it indicates that the biases of every filter in this
44
        layer should be shared amongst all applications of that filter.
45
        Setting this to ``False`` will untie the biases, yielding a
46
        separate bias for every location at which the filter is applied.
47
        Defaults to ``False``.
48
49
    """
50
    # Make it possible to override the implementation of conv2d that gets
51
    # used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order
52
    # to leverage features not yet available in Theano's standard conv2d.
53
    # The function you override with here should accept at least the
54
    # input and the kernels as positionals, and the keyword arguments
55
    # input_shape, subsample, border_mode, and filter_shape. If some of
56
    # these are unsupported they should still be accepted and ignored,
57
    # e.g. with a wrapper function that swallows **kwargs.
58
    conv2d_impl = staticmethod(conv2d)
59
60
    # Used to override the output shape computation for a given value of
61
    # conv2d_impl. Should accept 4 positional arguments: the shape of an
62
    # image minibatch (with 4 elements: batch size, number of channels,
63
    # height, and width), the shape of the filter bank (number of filters,
64
    # number of output channels, filter height, filter width), the border
65
    # mode, and the step (vertical and horizontal strides). It is expected
66
    # to return a 4-tuple of (batch size, number of channels, output
67
    # height, output width). The first element of this tuple is not used
68
    # for anything by this brick.
69
    get_output_shape = staticmethod(get_conv_output_shape)
70
71
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
72
    def __init__(self, filter_size, num_filters, num_channels, batch_size=None,
73
                 image_size=(None, None), step=(1, 1), border_mode='valid',
74
                 tied_biases=False, **kwargs):
75
        super(Convolutional, self).__init__(**kwargs)
76
77
        self.filter_size = filter_size
78
        self.num_filters = num_filters
79
        self.batch_size = batch_size
80
        self.num_channels = num_channels
81
        self.image_size = image_size
82
        self.step = step
83
        self.border_mode = border_mode
84
        self.tied_biases = tied_biases
85
86
    def _allocate(self):
87
        W = shared_floatx_nans((self.num_filters, self.num_channels) +
88
                               self.filter_size, name='W')
89
        add_role(W, FILTER)
90
        self.parameters.append(W)
91
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
92
        if self.use_bias:
93
            if self.tied_biases:
94
                b = shared_floatx_nans((self.num_filters,), name='b')
95
            else:
96
                # this error is raised here instead of during initializiation
97
                # because ConvolutionalSequence may specify the image size
98
                if self.image_size == (None, None) and not self.tied_biases:
99
                    raise ValueError('Cannot infer bias size without '
100
                                     'image_size specified. If you use '
101
                                     'variable image_size, you should use '
102
                                     'tied_biases=True.')
103
104
                b = shared_floatx_nans(self.get_dim('output'), name='b')
105
            add_role(b, BIAS)
106
107
            self.parameters.append(b)
108
            self.add_auxiliary_variable(b.norm(2), name='b_norm')
109
110
    @application(inputs=['input_'], outputs=['output'])
111
    def apply(self, input_):
112
        """Perform the convolution.
113
114
        Parameters
115
        ----------
116
        input_ : :class:`~tensor.TensorVariable`
117
            A 4D tensor with the axes representing batch size, number of
118
            channels, image height, and image width.
119
120
        Returns
121
        -------
122
        output : :class:`~tensor.TensorVariable`
123
            A 4D tensor of filtered images (feature maps) with dimensions
124
            representing batch size, number of filters, feature map height,
125
            and feature map width.
126
127
            The height and width of the feature map depend on the border
128
            mode. For 'valid' it is ``image_size - filter_size + 1`` while
129
            for 'full' it is ``image_size + filter_size - 1``.
130
131
        """
132
        if self.image_size == (None, None):
133
            input_shape = None
134
        else:
135
            input_shape = (self.batch_size, self.num_channels)
136
            input_shape += self.image_size
137
138
        output = self.conv2d_impl(
139
            input_, self.W,
140
            input_shape=input_shape,
141
            subsample=self.step,
142
            border_mode=self.border_mode,
143
            filter_shape=((self.num_filters, self.num_channels) +
144
                          self.filter_size))
145
        if self.use_bias:
146
            if self.tied_biases:
147
                output += self.b.dimshuffle('x', 0, 'x', 'x')
148
            else:
149
                output += self.b.dimshuffle('x', 0, 1, 2)
150
        return output
151
152
    def get_dim(self, name):
153
        if name == 'input_':
154
            return (self.num_channels,) + self.image_size
155
        if name == 'output':
156
            input_shape = (None, self.num_channels) + self.image_size
157
            kernel_shape = ((self.num_filters, self.num_channels) +
158
                            self.filter_size)
159
            out_shape = self.get_output_shape(input_shape, kernel_shape,
160
                                              self.border_mode, self.step)
161
            assert len(out_shape) == 4
162
            return out_shape[1:]
163
        return super(Convolutional, self).get_dim(name)
164
165
    @property
166
    def num_output_channels(self):
167
        return self.num_filters
168
169
170
class ConvolutionalTranspose(Convolutional):
171
    """Performs the transpose of a 2D convolution.
172
173
    Parameters
174
    ----------
175
    original_image_size : tuple
176
        The height and width of the image that forms the output of
177
        the transpose operation, which is the input of the original
178
        (non-transposed) convolution.
179
    num_filters : int
180
        Number of filters at the *output* of the transposed convolution,
181
        i.e. the number of channels in the corresponding convolution.
182
    num_channels : int
183
        Number of channels at the *input* of the transposed convolution,
184
        i.e. the number of output filters in the corresponding
185
        convolution.
186
    step : tuple, optional
187
        The step (or stride) of the corresponding *convolution*.
188
        Defaults to (1, 1).
189
    image_size : tuple, optional
190
        Image size of the input to the *transposed* convolution, i.e.
191
        the output of the corresponding convolution. Required for tied
192
        biases. Defaults to ``None``.
193
194
    See Also
195
    --------
196
    :class:`Convolutional` : For the documentation of other parameters.
197
198
    """
199
    @lazy(allocation=['original_image_size', 'filter_size', 'num_filters',
200
                      'num_channels'])
201
    def __init__(self, original_image_size, filter_size, num_filters,
202
                 num_channels, **kwargs):
203
        super(ConvolutionalTranspose, self).__init__(
204
            filter_size, num_filters, num_channels, **kwargs)
205
        self.original_image_size = original_image_size
206
207
    def conv2d_impl(self, input_, W, input_shape, subsample, border_mode,
0 ignored issues
show
Unused Code introduced by
The argument input_shape seems to be unused.
Loading history...
208
                    filter_shape):
209
        # The AbstractConv2d_gradInputs op takes a kernel that was used for the
210
        # **convolution**. We therefore have to invert num_channels and
211
        # num_filters for W.
212
        W = W.transpose(1, 0, 2, 3)
213
        imshp = (None,) + self.get_dim('output')
214
        kshp = (filter_shape[1], filter_shape[0]) + filter_shape[2:]
215
        return AbstractConv2d_gradInputs(
216
            imshp=imshp, kshp=kshp, border_mode=border_mode,
217
            subsample=subsample)(W, input_, self.get_dim('output')[1:])
218
219
    def get_dim(self, name):
220
        if name == 'output':
221
            return (self.num_filters,) + self.original_image_size
222
        return super(ConvolutionalTranspose, self).get_dim(name)
223
224
225
class Pooling(Initializable, Feedforward):
226
    """Base Brick for pooling operations.
227
228
    This should generally not be instantiated directly; see
229
    :class:`MaxPooling`.
230
231
    """
232
    @lazy(allocation=['mode', 'pooling_size'])
233
    def __init__(self, mode, pooling_size, step, input_dim, ignore_border,
234
                 padding, **kwargs):
235
        super(Pooling, self).__init__(**kwargs)
236
        self.pooling_size = pooling_size
237
        self.mode = mode
238
        self.step = step
239
        self.input_dim = input_dim if input_dim is not None else (None,) * 3
240
        self.ignore_border = ignore_border
241
        self.padding = padding
242
243
    @property
244
    def image_size(self):
245
        return self.input_dim[-2:]
246
247
    @image_size.setter
248
    def image_size(self, value):
249
        self.input_dim = self.input_dim[:-2] + value
250
251
    @property
252
    def num_channels(self):
253
        return self.input_dim[0]
254
255
    @num_channels.setter
256
    def num_channels(self, value):
257
        self.input_dim = (value,) + self.input_dim[1:]
258
259
    @application(inputs=['input_'], outputs=['output'])
260
    def apply(self, input_):
261
        """Apply the pooling (subsampling) transformation.
262
263
        Parameters
264
        ----------
265
        input_ : :class:`~tensor.TensorVariable`
266
            An tensor with dimension greater or equal to 2. The last two
267
            dimensions will be downsampled. For example, with images this
268
            means that the last two dimensions should represent the height
269
            and width of your image.
270
271
        Returns
272
        -------
273
        output : :class:`~tensor.TensorVariable`
274
            A tensor with the same number of dimensions as `input_`, but
275
            with the last two dimensions downsampled.
276
277
        """
278
        output = pool_2d(input_, self.pooling_size, st=self.step,
279
                         mode=self.mode, padding=self.padding,
280
                         ignore_border=self.ignore_border)
281
        return output
282
283
    def get_dim(self, name):
284
        if name == 'input_':
285
            return self.input_dim
286
        if name == 'output':
287
            return tuple(Pool.out_shape(
288
                self.input_dim, self.pooling_size, st=self.step,
289
                ignore_border=self.ignore_border, padding=self.padding))
290
291
    @property
292
    def num_output_channels(self):
293
        return self.input_dim[0]
294
295
296
class MaxPooling(Pooling):
297
    """Max pooling layer.
298
299
    Parameters
300
    ----------
301
    pooling_size : tuple
302
        The height and width of the pooling region i.e. this is the factor
303
        by which your input's last two dimensions will be downscaled.
304
    step : tuple, optional
305
        The vertical and horizontal shift (stride) between pooling regions.
306
        By default this is equal to `pooling_size`. Setting this to a lower
307
        number results in overlapping pooling regions.
308
    input_dim : tuple, optional
309
        A tuple of integers representing the shape of the input. The last
310
        two dimensions will be used to calculate the output dimension.
311
    padding : tuple, optional
312
        A tuple of integers representing the vertical and horizontal
313
        zero-padding to be applied to each of the top and bottom
314
        (vertical) and left and right (horizontal) edges. For example,
315
        an argument of (4, 3) will apply 4 pixels of padding to the
316
        top edge, 4 pixels of padding to the bottom edge, and 3 pixels
317
        each for the left and right edge. By default, no padding is
318
        performed.
319
    ignore_border : bool, optional
320
        Whether or not to do partial downsampling based on borders where
321
        the extent of the pooling region reaches beyond the edge of the
322
        image. If `True`, a (5, 5) image with (2, 2) pooling regions
323
        and (2, 2) step will be downsampled to shape (2, 2), otherwise
324
        it will be downsampled to (3, 3). `True` by default.
325
326
    Notes
327
    -----
328
    .. warning::
329
        As of this writing, setting `ignore_border` to `False` with a step
330
        not equal to the pooling size will force Theano to perform pooling
331
        computations on CPU rather than GPU, even if you have specified
332
        a GPU as your computation device. Additionally, Theano will only
333
        use [cuDNN]_ (if available) for pooling computations with
334
        `ignure_border` set to `True`. You can ensure that the entire
335
        input is captured by at least one pool by using the `padding`
336
        argument to add zero padding prior to pooling being performed.
337
338
    .. [cuDNN]: `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_.
339
340
    """
341
    @lazy(allocation=['pooling_size'])
342
    def __init__(self, pooling_size, step=None, input_dim=None,
343
                 ignore_border=True, padding=(0, 0),
344
                 **kwargs):
345
        super(MaxPooling, self).__init__('max', pooling_size,
346
                                         step=step, input_dim=input_dim,
347
                                         ignore_border=ignore_border,
348
                                         padding=padding, **kwargs)
349
350
    def __setstate__(self, state):
351
        self.__dict__.update(state)
352
        # Fix objects created before pull request #899.
353
        self.mode = getattr(self, 'mode', 'max')
354
        self.padding = getattr(self, 'padding', (0, 0))
355
        self.ignore_border = getattr(self, 'ignore_border', False)
356
357
358
class AveragePooling(Pooling):
359
    """Average pooling layer.
360
361
    Parameters
362
    ----------
363
    include_padding : bool, optional
364
        When calculating an average, include zeros that are the
365
        result of zero padding added by the `padding` argument.
366
        A value of `True` is only accepted if `ignore_border`
367
        is also `True`. `False` by default.
368
369
    Notes
370
    -----
371
    For documentation on the remainder of the arguments to this
372
    class, see :class:`MaxPooling`.
373
374
    """
375
    @lazy(allocation=['pooling_size'])
376
    def __init__(self, pooling_size, step=None, input_dim=None,
377
                 ignore_border=True, padding=(0, 0),
378
                 include_padding=False, **kwargs):
379
        mode = 'average_inc_pad' if include_padding else 'average_exc_pad'
380
        super(AveragePooling, self).__init__(mode, pooling_size,
381
                                             step=step, input_dim=input_dim,
382
                                             ignore_border=ignore_border,
383
                                             padding=padding, **kwargs)
384
385
386
class ConvolutionalSequence(Sequence, Initializable, Feedforward):
387
    """A sequence of convolutional (or pooling) operations.
388
389
    Parameters
390
    ----------
391
    layers : list
392
        List of convolutional bricks (i.e. :class:`Convolutional`,
393
        :class:`ConvolutionalActivation`, or :class:`Pooling` bricks).
394
        :class:`Activation` bricks that operate elementwise can also
395
        be included.
396
    num_channels : int
397
        Number of input channels in the image. For the first layer this is
398
        normally 1 for grayscale images and 3 for color (RGB) images. For
399
        subsequent layers this is equal to the number of filters output by
400
        the previous convolutional layer.
401
    batch_size : int, optional
402
        Number of images in batch. If given, will be passed to
403
        theano's convolution operator resulting in possibly faster
404
        execution.
405
    image_size : tuple, optional
406
        Width and height of the input (image/featuremap). If given,
407
        will be passed to theano's convolution operator resulting in
408
        possibly faster execution.
409
    border_mode : 'valid', 'full' or None, optional
410
        The border mode to use, see :func:`scipy.signal.convolve2d` for
411
        details. Unlike with :class:`Convolutional`, this defaults to
412
        None, in which case no default value is pushed down to child
413
        bricks at allocation time. Child bricks will in this case
414
        need to rely on either a default border mode (usually valid)
415
        or one provided at construction and/or after construction
416
        (but before allocation).
417
418
    Notes
419
    -----
420
    The passed convolutional operators should be 'lazy' constructed, that
421
    is, without specifying the batch_size, num_channels and image_size. The
422
    main feature of :class:`ConvolutionalSequence` is that it will set the
423
    input dimensions of a layer to the output dimensions of the previous
424
    layer by the :meth:`~.Brick.push_allocation_config` method.
425
426
    The reason the `border_mode` parameter behaves the way it does is that
427
    pushing a single default `border_mode` makes it very difficult to
428
    have child bricks with different border modes. Normally, such things
429
    would be overridden after `push_allocation_config()`, but this is
430
    a particular hassle as the border mode affects the allocation
431
    parameters of every subsequent child brick in the sequence. Thus, only
432
    an explicitly specified border mode will be pushed down the hierarchy.
433
434
    """
435
    @lazy(allocation=['num_channels'])
436
    def __init__(self, layers, num_channels, batch_size=None, image_size=None,
437
                 border_mode=None, tied_biases=False, **kwargs):
438
        self.layers = layers
439
        self.image_size = image_size
440
        self.num_channels = num_channels
441
        self.batch_size = batch_size
442
        self.border_mode = border_mode
443
        self.tied_biases = tied_biases
444
445
        application_methods = [brick.apply for brick in layers]
446
        super(ConvolutionalSequence, self).__init__(
447
            application_methods=application_methods, **kwargs)
448
449
    def get_dim(self, name):
450
        if name == 'input_':
451
            return ((self.num_channels,) + self.image_size)
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after return.
Loading history...
452
        if name == 'output':
453
            last = len(self.layers) - 1
454
            while last >= 0:
455
                try:
456
                    return self.layers[last].get_dim(name)
457
                except ValueError:
458
                    last -= 1
459
            # The output shape of an empty ConvolutionalSequence or one
460
            # consisting only of Activations is the input shape.
461
            return self.get_dim('input_')
462
        return super(ConvolutionalSequence, self).get_dim(name)
463
464
    def _push_allocation_config(self):
465
        num_channels = self.num_channels
466
        image_size = self.image_size
467
        for layer in self.layers:
468
            if isinstance(layer, Activation):
469
                # Activations operate elementwise; nothing to set.
470
                layer.push_allocation_config()
471
                continue
472
            if self.border_mode is not None:
473
                layer.border_mode = self.border_mode
474
            layer.tied_biases = self.tied_biases
475
            layer.image_size = image_size
476
            layer.num_channels = num_channels
477
            layer.batch_size = self.batch_size
478
            layer.use_bias = self.use_bias
479
480
            # Push input dimensions to children
481
            layer.push_allocation_config()
482
483
            # Retrieve output dimensions
484
            # and set it for next layer
485
            if layer.image_size is not None:
486
                output_shape = layer.get_dim('output')
487
                image_size = output_shape[1:]
488
            num_channels = layer.num_output_channels
489
490
491
class Flattener(Brick):
492
    """Flattens the input.
493
494
    It may be used to pass multidimensional objects like images or feature
495
    maps of convolutional bricks into bricks which allow only two
496
    dimensional input (batch, features) like MLP.
497
498
    """
499
    @application(inputs=['input_'], outputs=['output'])
500
    def apply(self, input_):
501
        return input_.flatten(ndim=2)
502