Completed
Pull Request — master (#1041)
by David
01:37
created

blocks.bricks.ConvolutionalTranspose.conv2d_impl()   A

Complexity

Conditions 1

Size

Total Lines 11

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 11
rs 9.4285
1
from theano.tensor.nnet import conv2d
2
from theano.tensor.nnet.abstract_conv import (AbstractConv2d_gradInputs,
3
                                              get_conv_output_shape)
4
from theano.tensor.signal.pool import pool_2d, Pool
5
6
from blocks.bricks import (Initializable, Feedforward, Sequence, Activation,
7
                           LinearLike)
8
from blocks.bricks.base import application, Brick, lazy
9
from blocks.roles import add_role, FILTER, BIAS
10
from blocks.utils import shared_floatx_nans
11
12
13
class Convolutional(LinearLike):
14
    """Performs a 2D convolution.
15
16
    Parameters
17
    ----------
18
    filter_size : tuple
19
        The height and width of the filter (also called *kernels*).
20
    num_filters : int
21
        Number of filters per channel.
22
    num_channels : int
23
        Number of input channels in the image. For the first layer this is
24
        normally 1 for grayscale images and 3 for color (RGB) images. For
25
        subsequent layers this is equal to the number of filters output by
26
        the previous convolutional layer. The filters are pooled over the
27
        channels.
28
    batch_size : int, optional
29
        Number of examples per batch. If given, this will be passed to
30
        Theano convolution operator, possibly resulting in faster
31
        execution.
32
    image_size : tuple, optional
33
        The height and width of the input (image or feature map). If given,
34
        this will be passed to the Theano convolution operator, resulting
35
        in possibly faster execution times.
36
    step : tuple, optional
37
        The step (or stride) with which to slide the filters over the
38
        image. Defaults to (1, 1).
39
    border_mode : {'valid', 'full'}, optional
40
        The border mode to use, see :func:`scipy.signal.convolve2d` for
41
        details. Defaults to 'valid'.
42
    tied_biases : bool
43
        If ``True``, it indicates that the biases of every filter in this
44
        layer should be shared amongst all applications of that filter.
45
        Setting this to ``False`` will untie the biases, yielding a
46
        separate bias for every location at which the filter is applied.
47
        Defaults to ``False``.
48
49
    """
50
    # Make it possible to override the implementation of conv2d that gets
51
    # used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order
52
    # to leverage features not yet available in Theano's standard conv2d.
53
    # The function you override with here should accept at least the
54
    # input and the kernels as positionals, and the keyword arguments
55
    # input_shape, subsample, border_mode, and filter_shape. If some of
56
    # these are unsupported they should still be accepted and ignored,
57
    # e.g. with a wrapper function that swallows **kwargs.
58
    conv2d_impl = staticmethod(conv2d)
59
60
    # Used to override the output shape computation for a given value of
61
    # conv2d_impl. Should accept 4 positional arguments: the shape of an
62
    # image minibatch (with 4 elements: batch size, number of channels,
63
    # height, and width), the shape of the filter bank (number of filters,
64
    # number of output channels, filter height, filter width), the border
65
    # mode, and the step (vertical and horizontal strides). It is expected
66
    # to return a 4-tuple of (batch size, number of channels, output
67
    # height, output width). The first element of this tuple is not used
68
    # for anything by this brick.
69
    get_output_shape = staticmethod(get_conv_output_shape)
70
71
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
72
    def __init__(self, filter_size, num_filters, num_channels, batch_size=None,
73
                 image_size=(None, None), step=(1, 1), border_mode='valid',
74
                 tied_biases=False, **kwargs):
75
        super(Convolutional, self).__init__(**kwargs)
76
77
        self.filter_size = filter_size
78
        self.num_filters = num_filters
79
        self.batch_size = batch_size
80
        self.num_channels = num_channels
81
        self.image_size = image_size
82
        self.step = step
83
        self.border_mode = border_mode
84
        self.tied_biases = tied_biases
85
86
    def _allocate(self):
87
        W = shared_floatx_nans((self.num_filters, self.num_channels) +
88
                               self.filter_size, name='W')
89
        add_role(W, FILTER)
90
        self.parameters.append(W)
91
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
92
        if getattr(self, 'use_bias', True):
93
            if self.tied_biases:
94
                b = shared_floatx_nans((self.num_filters,), name='b')
95
            else:
96
                # this error is raised here instead of during initializiation
97
                # because ConvolutionalSequence may specify the image size
98
                if self.image_size == (None, None) and not self.tied_biases:
99
                    raise ValueError('Cannot infer bias size without '
100
                                     'image_size specified. If you use '
101
                                     'variable image_size, you should use '
102
                                     'tied_biases=True.')
103
104
                b = shared_floatx_nans(self.get_dim('output'), name='b')
105
            add_role(b, BIAS)
106
107
            self.parameters.append(b)
108
            self.add_auxiliary_variable(b.norm(2), name='b_norm')
109
110
    @application(inputs=['input_'], outputs=['output'])
111
    def apply(self, input_):
112
        """Perform the convolution.
113
114
        Parameters
115
        ----------
116
        input_ : :class:`~tensor.TensorVariable`
117
            A 4D tensor with the axes representing batch size, number of
118
            channels, image height, and image width.
119
120
        Returns
121
        -------
122
        output : :class:`~tensor.TensorVariable`
123
            A 4D tensor of filtered images (feature maps) with dimensions
124
            representing batch size, number of filters, feature map height,
125
            and feature map width.
126
127
            The height and width of the feature map depend on the border
128
            mode. For 'valid' it is ``image_size - filter_size + 1`` while
129
            for 'full' it is ``image_size + filter_size - 1``.
130
131
        """
132
        if self.image_size == (None, None):
133
            input_shape = None
134
        else:
135
            input_shape = (self.batch_size, self.num_channels)
136
            input_shape += self.image_size
137
138
        output = self.conv2d_impl(
139
            input_, self.W,
140
            input_shape=input_shape,
141
            subsample=self.step,
142
            border_mode=self.border_mode,
143
            filter_shape=((self.num_filters, self.num_channels) +
144
                          self.filter_size))
145
        if getattr(self, 'use_bias', True):
146
            if self.tied_biases:
147
                output += self.b.dimshuffle('x', 0, 'x', 'x')
148
            else:
149
                output += self.b.dimshuffle('x', 0, 1, 2)
150
        return output
151
152
    def get_dim(self, name):
153
        if name == 'input_':
154
            return (self.num_channels,) + self.image_size
155
        if name == 'output':
156
            input_shape = (None, self.num_channels) + self.image_size
157
            kernel_shape = ((self.num_filters, self.num_channels) +
158
                            self.filter_size)
159
            out_shape = self.get_output_shape(input_shape, kernel_shape,
160
                                              self.border_mode, self.step)
161
            assert len(out_shape) == 4
162
            return out_shape[1:]
163
        return super(Convolutional, self).get_dim(name)
164
165
    @property
166
    def num_output_channels(self):
167
        return self.num_filters
168
169
170
class ConvolutionalTranspose(Convolutional):
171
    """Performs the transpose of a 2D convolution.
172
173
    Parameters
174
    ----------
175
    num_filters : int
176
        Number of filters at the *output* of the transposed convolution,
177
        i.e. the number of channels in the corresponding convolution.
178
    num_channels : int
179
        Number of channels at the *input* of the transposed convolution,
180
        i.e. the number of output filters in the corresponding
181
        convolution.
182
    step : tuple, optional
183
        The step (or stride) of the corresponding *convolution*.
184
        Defaults to (1, 1).
185
    image_size : tuple, optional
186
        Image size of the input to the *transposed* convolution, i.e.
187
        the output of the corresponding convolution. Required for tied
188
        biases. Defaults to ``None``.
189
    original_image_size : tuple, optional
190
        The height and width of the image that forms the output of
191
        the transpose operation, which is the input of the original
192
        (non-transposed) convolution. By default, this is inferred
193
        from `image_size` to be the size that has each pixel of the
194
        original image touched by at least one filter application
195
        in the original convolution. Degenerate cases with dropped
196
        border pixels (in the original convolution) are possible, and can
197
        be manually specified via this argument. See notes below.
198
199
    Notes
200
    -----
201
    By default, `original_image_size` is inferred from `image_size`
202
    as being the *minimum* size of image that could have produced this
203
    output. Let ``hanging[i] = original_image_size[i] - image_size[i]
204
    * step[i]``. Any value of ``hanging[i]`` greater than
205
    ``filter_size[i] - step[i]`` will result in border pixels that are
206
    ignored by the original convolution. With this brick, any
207
    ``original_image_size`` such that ``filter_size[i] - step[i] <
208
    hanging[i] < filter_size[i]`` for all ``i`` can be validly specified.
209
    However, no value will be output by the transposed convolution
210
    itself for these extra hanging border pixels, and they will be
211
    determined entirely by the bias.
212
213
    See Also
214
    --------
215
    :class:`Convolutional` : For the documentation of other parameters.
216
217
    """
218
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
219
    def __init__(self, filter_size, num_filters, num_channels,
220
                 original_image_size=None, **kwargs):
221
        super(ConvolutionalTranspose, self).__init__(
222
            filter_size, num_filters, num_channels, **kwargs)
223
        self._original_image_size = original_image_size
224
225
    @property
226
    def original_image_size(self):
227
        if self._original_image_size is None:
228
            if all(s is None for s in self.image_size):
229
                raise ValueError("can't infer original_image_size, "
230
                                 "no image_size set")
231
            if isinstance(self.border_mode, tuple):
232
                border = self.border_mode
233
            elif self.border_mode == 'full':
234
                border = tuple(k - 1 for k in self.filter_size)
235
            elif self.border_mode == 'half':
236
                border = tuple(k // 2 for k in self.filter_size)
237
            else:
238
                border = [0] * len(self.image_size)
239
            tups = zip(self.image_size, self.step, self.filter_size, border)
240
            return tuple(s * (i - 1) + k - 2 * p for i, s, k, p in tups)
241
        else:
242
            return self._original_image_size
243
    @original_image_size.setter
244
    def original_image_size(self, value):
245
        self._original_image_size = value
246
247
    def conv2d_impl(self, input_, W, input_shape, subsample, border_mode,
0 ignored issues
show
Unused Code introduced by
The argument input_shape seems to be unused.
Loading history...
248
                    filter_shape):
249
        # The AbstractConv2d_gradInputs op takes a kernel that was used for the
250
        # **convolution**. We therefore have to invert num_channels and
251
        # num_filters for W.
252
        W = W.transpose(1, 0, 2, 3)
253
        imshp = (None,) + self.get_dim('output')
254
        kshp = (filter_shape[1], filter_shape[0]) + filter_shape[2:]
255
        return AbstractConv2d_gradInputs(
256
            imshp=imshp, kshp=kshp, border_mode=border_mode,
257
            subsample=subsample)(W, input_, self.get_dim('output')[1:])
258
259
    def get_dim(self, name):
260
        if name == 'output':
261
            return (self.num_filters,) + self.original_image_size
262
        return super(ConvolutionalTranspose, self).get_dim(name)
263
264
265
class Pooling(Initializable, Feedforward):
266
    """Base Brick for pooling operations.
267
268
    This should generally not be instantiated directly; see
269
    :class:`MaxPooling`.
270
271
    """
272
    @lazy(allocation=['mode', 'pooling_size'])
273
    def __init__(self, mode, pooling_size, step, input_dim, ignore_border,
274
                 padding, **kwargs):
275
        super(Pooling, self).__init__(**kwargs)
276
        self.pooling_size = pooling_size
277
        self.mode = mode
278
        self.step = step
279
        self.input_dim = input_dim if input_dim is not None else (None,) * 3
280
        self.ignore_border = ignore_border
281
        self.padding = padding
282
283
    @property
284
    def image_size(self):
285
        return self.input_dim[-2:]
286
287
    @image_size.setter
288
    def image_size(self, value):
289
        self.input_dim = self.input_dim[:-2] + value
290
291
    @property
292
    def num_channels(self):
293
        return self.input_dim[0]
294
295
    @num_channels.setter
296
    def num_channels(self, value):
297
        self.input_dim = (value,) + self.input_dim[1:]
298
299
    @application(inputs=['input_'], outputs=['output'])
300
    def apply(self, input_):
301
        """Apply the pooling (subsampling) transformation.
302
303
        Parameters
304
        ----------
305
        input_ : :class:`~tensor.TensorVariable`
306
            An tensor with dimension greater or equal to 2. The last two
307
            dimensions will be downsampled. For example, with images this
308
            means that the last two dimensions should represent the height
309
            and width of your image.
310
311
        Returns
312
        -------
313
        output : :class:`~tensor.TensorVariable`
314
            A tensor with the same number of dimensions as `input_`, but
315
            with the last two dimensions downsampled.
316
317
        """
318
        output = pool_2d(input_, self.pooling_size, st=self.step,
319
                         mode=self.mode, padding=self.padding,
320
                         ignore_border=self.ignore_border)
321
        return output
322
323
    def get_dim(self, name):
324
        if name == 'input_':
325
            return self.input_dim
326
        if name == 'output':
327
            return tuple(Pool.out_shape(
328
                self.input_dim, self.pooling_size, st=self.step,
329
                ignore_border=self.ignore_border, padding=self.padding))
330
331
    @property
332
    def num_output_channels(self):
333
        return self.input_dim[0]
334
335
336
class MaxPooling(Pooling):
337
    """Max pooling layer.
338
339
    Parameters
340
    ----------
341
    pooling_size : tuple
342
        The height and width of the pooling region i.e. this is the factor
343
        by which your input's last two dimensions will be downscaled.
344
    step : tuple, optional
345
        The vertical and horizontal shift (stride) between pooling regions.
346
        By default this is equal to `pooling_size`. Setting this to a lower
347
        number results in overlapping pooling regions.
348
    input_dim : tuple, optional
349
        A tuple of integers representing the shape of the input. The last
350
        two dimensions will be used to calculate the output dimension.
351
    padding : tuple, optional
352
        A tuple of integers representing the vertical and horizontal
353
        zero-padding to be applied to each of the top and bottom
354
        (vertical) and left and right (horizontal) edges. For example,
355
        an argument of (4, 3) will apply 4 pixels of padding to the
356
        top edge, 4 pixels of padding to the bottom edge, and 3 pixels
357
        each for the left and right edge. By default, no padding is
358
        performed.
359
    ignore_border : bool, optional
360
        Whether or not to do partial downsampling based on borders where
361
        the extent of the pooling region reaches beyond the edge of the
362
        image. If `True`, a (5, 5) image with (2, 2) pooling regions
363
        and (2, 2) step will be downsampled to shape (2, 2), otherwise
364
        it will be downsampled to (3, 3). `True` by default.
365
366
    Notes
367
    -----
368
    .. warning::
369
        As of this writing, setting `ignore_border` to `False` with a step
370
        not equal to the pooling size will force Theano to perform pooling
371
        computations on CPU rather than GPU, even if you have specified
372
        a GPU as your computation device. Additionally, Theano will only
373
        use [cuDNN]_ (if available) for pooling computations with
374
        `ignure_border` set to `True`. You can ensure that the entire
375
        input is captured by at least one pool by using the `padding`
376
        argument to add zero padding prior to pooling being performed.
377
378
    .. [cuDNN]: `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_.
379
380
    """
381
    @lazy(allocation=['pooling_size'])
382
    def __init__(self, pooling_size, step=None, input_dim=None,
383
                 ignore_border=True, padding=(0, 0),
384
                 **kwargs):
385
        super(MaxPooling, self).__init__('max', pooling_size,
386
                                         step=step, input_dim=input_dim,
387
                                         ignore_border=ignore_border,
388
                                         padding=padding, **kwargs)
389
390
    def __setstate__(self, state):
391
        self.__dict__.update(state)
392
        # Fix objects created before pull request #899.
393
        self.mode = getattr(self, 'mode', 'max')
394
        self.padding = getattr(self, 'padding', (0, 0))
395
        self.ignore_border = getattr(self, 'ignore_border', False)
396
397
398
class AveragePooling(Pooling):
399
    """Average pooling layer.
400
401
    Parameters
402
    ----------
403
    include_padding : bool, optional
404
        When calculating an average, include zeros that are the
405
        result of zero padding added by the `padding` argument.
406
        A value of `True` is only accepted if `ignore_border`
407
        is also `True`. `False` by default.
408
409
    Notes
410
    -----
411
    For documentation on the remainder of the arguments to this
412
    class, see :class:`MaxPooling`.
413
414
    """
415
    @lazy(allocation=['pooling_size'])
416
    def __init__(self, pooling_size, step=None, input_dim=None,
417
                 ignore_border=True, padding=(0, 0),
418
                 include_padding=False, **kwargs):
419
        mode = 'average_inc_pad' if include_padding else 'average_exc_pad'
420
        super(AveragePooling, self).__init__(mode, pooling_size,
421
                                             step=step, input_dim=input_dim,
422
                                             ignore_border=ignore_border,
423
                                             padding=padding, **kwargs)
424
425
426
class ConvolutionalSequence(Sequence, Initializable, Feedforward):
427
    """A sequence of convolutional (or pooling) operations.
428
429
    Parameters
430
    ----------
431
    layers : list
432
        List of convolutional bricks (i.e. :class:`Convolutional`,
433
        :class:`ConvolutionalActivation`, or :class:`Pooling` bricks).
434
        :class:`Activation` bricks that operate elementwise can also
435
        be included.
436
    num_channels : int
437
        Number of input channels in the image. For the first layer this is
438
        normally 1 for grayscale images and 3 for color (RGB) images. For
439
        subsequent layers this is equal to the number of filters output by
440
        the previous convolutional layer.
441
    batch_size : int, optional
442
        Number of images in batch. If given, will be passed to
443
        theano's convolution operator resulting in possibly faster
444
        execution.
445
    image_size : tuple, optional
446
        Width and height of the input (image/featuremap). If given,
447
        will be passed to theano's convolution operator resulting in
448
        possibly faster execution.
449
    border_mode : 'valid', 'full' or None, optional
450
        The border mode to use, see :func:`scipy.signal.convolve2d` for
451
        details. Unlike with :class:`Convolutional`, this defaults to
452
        None, in which case no default value is pushed down to child
453
        bricks at allocation time. Child bricks will in this case
454
        need to rely on either a default border mode (usually valid)
455
        or one provided at construction and/or after construction
456
        (but before allocation).
457
    tied_biases : bool, optional
458
        Same meaning as in :class:`Convolutional`. Defaults to ``None``,
459
        in which case no value is pushed to child :class:`Convolutional`
460
        bricks.
461
462
    Notes
463
    -----
464
    The passed convolutional operators should be 'lazy' constructed, that
465
    is, without specifying the batch_size, num_channels and image_size. The
466
    main feature of :class:`ConvolutionalSequence` is that it will set the
467
    input dimensions of a layer to the output dimensions of the previous
468
    layer by the :meth:`~.Brick.push_allocation_config` method.
469
470
    The push behaviour of `tied_biases` mirrors that of `use_bias` or any
471
    initialization configuration: only an explicitly specified value is
472
    pushed down the hierarchy. `border_mode` also has this behaviour.
473
    The reason the `border_mode` parameter behaves the way it does is that
474
    pushing a single default `border_mode` makes it very difficult to
475
    have child bricks with different border modes. Normally, such things
476
    would be overridden after `push_allocation_config()`, but this is
477
    a particular hassle as the border mode affects the allocation
478
    parameters of every subsequent child brick in the sequence. Thus, only
479
    an explicitly specified border mode will be pushed down the hierarchy.
480
481
    """
482
    @lazy(allocation=['num_channels'])
483
    def __init__(self, layers, num_channels, batch_size=None, image_size=None,
484
                 border_mode=None, tied_biases=None, **kwargs):
485
        self.layers = layers
486
        self.image_size = image_size
487
        self.num_channels = num_channels
488
        self.batch_size = batch_size
489
        self.border_mode = border_mode
490
        self.tied_biases = tied_biases
491
492
        application_methods = [brick.apply for brick in layers]
493
        super(ConvolutionalSequence, self).__init__(
494
            application_methods=application_methods, **kwargs)
495
496
    def get_dim(self, name):
497
        if name == 'input_':
498
            return ((self.num_channels,) + self.image_size)
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after return.
Loading history...
499
        if name == 'output':
500
            last = len(self.layers) - 1
501
            while last >= 0:
502
                try:
503
                    return self.layers[last].get_dim(name)
504
                except ValueError:
505
                    last -= 1
506
            # The output shape of an empty ConvolutionalSequence or one
507
            # consisting only of Activations is the input shape.
508
            return self.get_dim('input_')
509
        return super(ConvolutionalSequence, self).get_dim(name)
510
511
    def _push_allocation_config(self):
512
        num_channels = self.num_channels
513
        image_size = self.image_size
514
        for layer in self.layers:
515
            if isinstance(layer, Activation):
516
                # Activations operate elementwise; nothing to set.
517
                layer.push_allocation_config()
518
                continue
519
            if self.border_mode is not None:
520
                layer.border_mode = self.border_mode
521
            if self.tied_biases is not None:
522
                layer.tied_biases = self.tied_biases
523
            layer.image_size = image_size
524
            layer.num_channels = num_channels
525
            layer.batch_size = self.batch_size
526
            if getattr(self, 'use_bias', None) is not None:
527
                layer.use_bias = self.use_bias
528
529
            # Push input dimensions to children
530
            layer.push_allocation_config()
531
532
            # Retrieve output dimensions
533
            # and set it for next layer
534
            if layer.image_size is not None:
535
                output_shape = layer.get_dim('output')
536
                image_size = output_shape[1:]
537
            num_channels = layer.num_output_channels
538
539
540
class Flattener(Brick):
541
    """Flattens the input.
542
543
    It may be used to pass multidimensional objects like images or feature
544
    maps of convolutional bricks into bricks which allow only two
545
    dimensional input (batch, features) like MLP.
546
547
    """
548
    @application(inputs=['input_'], outputs=['output'])
549
    def apply(self, input_):
550
        return input_.flatten(ndim=2)
551