Completed
Pull Request — master (#1041)
by David
01:26
created

blocks.bricks.ConvolutionalTranspose   A

Complexity

Total Complexity 15

Size/Duplication

Total Lines 94
Duplicated Lines 0 %
Metric Value
dl 0
loc 94
rs 10
wmc 15

4 Methods

Rating   Name   Duplication   Size   Complexity  
A __init__() 0 6 1
A conv2d_impl() 0 11 1
A original_image_size() 0 3 1
A get_dim() 0 4 2
1
from theano.tensor.nnet import conv2d
2
from theano.tensor.nnet.abstract_conv import (AbstractConv2d_gradInputs,
3
                                              get_conv_output_shape)
4
from theano.tensor.signal.pool import pool_2d, Pool
5
6
from blocks.bricks import (Initializable, Feedforward, Sequence, Activation,
7
                           LinearLike)
8
from blocks.bricks.base import application, Brick, lazy
9
from blocks.roles import add_role, FILTER, BIAS
10
from blocks.utils import shared_floatx_nans
11
12
13
class Convolutional(LinearLike):
14
    """Performs a 2D convolution.
15
16
    Parameters
17
    ----------
18
    filter_size : tuple
19
        The height and width of the filter (also called *kernels*).
20
    num_filters : int
21
        Number of filters per channel.
22
    num_channels : int
23
        Number of input channels in the image. For the first layer this is
24
        normally 1 for grayscale images and 3 for color (RGB) images. For
25
        subsequent layers this is equal to the number of filters output by
26
        the previous convolutional layer. The filters are pooled over the
27
        channels.
28
    batch_size : int, optional
29
        Number of examples per batch. If given, this will be passed to
30
        Theano convolution operator, possibly resulting in faster
31
        execution.
32
    image_size : tuple, optional
33
        The height and width of the input (image or feature map). If given,
34
        this will be passed to the Theano convolution operator, resulting
35
        in possibly faster execution times.
36
    step : tuple, optional
37
        The step (or stride) with which to slide the filters over the
38
        image. Defaults to (1, 1).
39
    border_mode : {'valid', 'full'}, optional
40
        The border mode to use, see :func:`scipy.signal.convolve2d` for
41
        details. Defaults to 'valid'.
42
    tied_biases : bool
43
        If ``True``, it indicates that the biases of every filter in this
44
        layer should be shared amongst all applications of that filter.
45
        Setting this to ``False`` will untie the biases, yielding a
46
        separate bias for every location at which the filter is applied.
47
        Defaults to ``False``.
48
49
    """
50
    # Make it possible to override the implementation of conv2d that gets
51
    # used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order
52
    # to leverage features not yet available in Theano's standard conv2d.
53
    # The function you override with here should accept at least the
54
    # input and the kernels as positionals, and the keyword arguments
55
    # input_shape, subsample, border_mode, and filter_shape. If some of
56
    # these are unsupported they should still be accepted and ignored,
57
    # e.g. with a wrapper function that swallows **kwargs.
58
    conv2d_impl = staticmethod(conv2d)
59
60
    # Used to override the output shape computation for a given value of
61
    # conv2d_impl. Should accept 4 positional arguments: the shape of an
62
    # image minibatch (with 4 elements: batch size, number of channels,
63
    # height, and width), the shape of the filter bank (number of filters,
64
    # number of output channels, filter height, filter width), the border
65
    # mode, and the step (vertical and horizontal strides). It is expected
66
    # to return a 4-tuple of (batch size, number of channels, output
67
    # height, output width). The first element of this tuple is not used
68
    # for anything by this brick.
69
    get_output_shape = staticmethod(get_conv_output_shape)
70
71
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
72
    def __init__(self, filter_size, num_filters, num_channels, batch_size=None,
73
                 image_size=(None, None), step=(1, 1), border_mode='valid',
74
                 tied_biases=False, **kwargs):
75
        super(Convolutional, self).__init__(**kwargs)
76
77
        self.filter_size = filter_size
78
        self.num_filters = num_filters
79
        self.batch_size = batch_size
80
        self.num_channels = num_channels
81
        self.image_size = image_size
82
        self.step = step
83
        self.border_mode = border_mode
84
        self.tied_biases = tied_biases
85
86
    def _allocate(self):
87
        W = shared_floatx_nans((self.num_filters, self.num_channels) +
88
                               self.filter_size, name='W')
89
        add_role(W, FILTER)
90
        self.parameters.append(W)
91
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
92
        if getattr(self, 'use_bias', True):
93
            if self.tied_biases:
94
                b = shared_floatx_nans((self.num_filters,), name='b')
95
            else:
96
                # this error is raised here instead of during initializiation
97
                # because ConvolutionalSequence may specify the image size
98
                if self.image_size == (None, None) and not self.tied_biases:
99
                    raise ValueError('Cannot infer bias size without '
100
                                     'image_size specified. If you use '
101
                                     'variable image_size, you should use '
102
                                     'tied_biases=True.')
103
104
                b = shared_floatx_nans(self.get_dim('output'), name='b')
105
            add_role(b, BIAS)
106
107
            self.parameters.append(b)
108
            self.add_auxiliary_variable(b.norm(2), name='b_norm')
109
110
    @application(inputs=['input_'], outputs=['output'])
111
    def apply(self, input_):
112
        """Perform the convolution.
113
114
        Parameters
115
        ----------
116
        input_ : :class:`~tensor.TensorVariable`
117
            A 4D tensor with the axes representing batch size, number of
118
            channels, image height, and image width.
119
120
        Returns
121
        -------
122
        output : :class:`~tensor.TensorVariable`
123
            A 4D tensor of filtered images (feature maps) with dimensions
124
            representing batch size, number of filters, feature map height,
125
            and feature map width.
126
127
            The height and width of the feature map depend on the border
128
            mode. For 'valid' it is ``image_size - filter_size + 1`` while
129
            for 'full' it is ``image_size + filter_size - 1``.
130
131
        """
132
        if self.image_size == (None, None):
133
            input_shape = None
134
        else:
135
            input_shape = (self.batch_size, self.num_channels)
136
            input_shape += self.image_size
137
138
        output = self.conv2d_impl(
139
            input_, self.W,
140
            input_shape=input_shape,
141
            subsample=self.step,
142
            border_mode=self.border_mode,
143
            filter_shape=((self.num_filters, self.num_channels) +
144
                          self.filter_size))
145
        if getattr(self, 'use_bias', True):
146
            if self.tied_biases:
147
                output += self.b.dimshuffle('x', 0, 'x', 'x')
148
            else:
149
                output += self.b.dimshuffle('x', 0, 1, 2)
150
        return output
151
152
    def get_dim(self, name):
153
        if name == 'input_':
154
            return (self.num_channels,) + self.image_size
155
        if name == 'output':
156
            input_shape = (None, self.num_channels) + self.image_size
157
            kernel_shape = ((self.num_filters, self.num_channels) +
158
                            self.filter_size)
159
            out_shape = self.get_output_shape(input_shape, kernel_shape,
160
                                              self.border_mode, self.step)
161
            assert len(out_shape) == 4
162
            return out_shape[1:]
163
        return super(Convolutional, self).get_dim(name)
164
165
    @property
166
    def num_output_channels(self):
167
        return self.num_filters
168
169
170
class ConvolutionalTranspose(Convolutional):
171
    """Performs the transpose of a 2D convolution.
172
173
    Parameters
174
    ----------
175
    num_filters : int
176
        Number of filters at the *output* of the transposed convolution,
177
        i.e. the number of channels in the corresponding convolution.
178
    num_channels : int
179
        Number of channels at the *input* of the transposed convolution,
180
        i.e. the number of output filters in the corresponding
181
        convolution.
182
    step : tuple, optional
183
        The step (or stride) of the corresponding *convolution*.
184
        Defaults to (1, 1).
185
    image_size : tuple, optional
186
        Image size of the input to the *transposed* convolution, i.e.
187
        the output of the corresponding convolution. Required for tied
188
        biases. Defaults to ``None``.
189
    original_image_size : tuple, optional
190
        The height and width of the image that forms the output of
191
        the transpose operation, which is the input of the original
192
        (non-transposed) convolution. By default, this is inferred
193
        from `image_size` to be the size that has each pixel of the
194
        original image touched by at least one filter application
195
        in the original convolution. Degenerate cases with dropped
196
        border pixels (in the original convolution) are possible, and can
197
        be manually specified via this argument. See notes below.
198
199
    See Also
200
    --------
201
    :class:`Convolutional` : For the documentation of other parameters.
202
203
    Notes
204
    -----
205
    By default, `original_image_size` is inferred from `image_size`
206
    as being the *minimum* size of image that could have produced this
207
    output. Let ``hanging[i] = original_image_size[i] - image_size[i]
208
    * step[i]``. Any value of ``hanging[i]`` greater than
209
    ``filter_size[i] - step[i]`` will result in border pixels that are
210
    ignored by the original convolution. With this brick, any
211
    ``original_image_size`` such that ``filter_size[i] - step[i] <
212
    hanging[i] < filter_size[i]`` for all ``i`` can be validly specified.
213
    However, no value will be output by the transposed convolution
214
    itself for these extra hanging border pixels, and they will be
215
    determined entirely by the bias.
216
217
    """
218
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
219
    def __init__(self, filter_size, num_filters, num_channels,
220
                 original_image_size=None, **kwargs):
221
        super(ConvolutionalTranspose, self).__init__(
222
            filter_size, num_filters, num_channels, **kwargs)
223
        self._original_image_size = original_image_size
224
225
    @property
226
    def original_image_size(self):
227
        if self._original_image_size is None:
228
            if all(s is None for s in self.image_size):
229
                raise ValueError("can't infer original_image_size, "
230
                                 "no image_size set")
231
            if isinstance(self.border_mode, tuple):
232
                border = self.border_mode
233
            elif self.border_mode == 'full':
234
                border = tuple(k - 1 for k in self.filter_size)
235
            elif self.border_mode == 'half':
236
                border = tuple(k // 2 for k in self.filter_size)
237
            else:
238
                border = [0] * len(self.image_size)
239
            tups = zip(self.image_size, self.step, self.filter_size, border)
240
            return tuple(s * (i - 1) + k - 2 * p for i, s, k, p in tups)
241
        else:
242
            return self._original_image_size
243
244
    @original_image_size.setter
245
    def original_image_size(self, value):
246
        self._original_image_size = value
247
248
    def conv2d_impl(self, input_, W, input_shape, subsample, border_mode,
0 ignored issues
show
Unused Code introduced by
The argument input_shape seems to be unused.
Loading history...
249
                    filter_shape):
250
        # The AbstractConv2d_gradInputs op takes a kernel that was used for the
251
        # **convolution**. We therefore have to invert num_channels and
252
        # num_filters for W.
253
        W = W.transpose(1, 0, 2, 3)
254
        imshp = (None,) + self.get_dim('output')
255
        kshp = (filter_shape[1], filter_shape[0]) + filter_shape[2:]
256
        return AbstractConv2d_gradInputs(
257
            imshp=imshp, kshp=kshp, border_mode=border_mode,
258
            subsample=subsample)(W, input_, self.get_dim('output')[1:])
259
260
    def get_dim(self, name):
261
        if name == 'output':
262
            return (self.num_filters,) + self.original_image_size
263
        return super(ConvolutionalTranspose, self).get_dim(name)
264
265
266
class Pooling(Initializable, Feedforward):
267
    """Base Brick for pooling operations.
268
269
    This should generally not be instantiated directly; see
270
    :class:`MaxPooling`.
271
272
    """
273
    @lazy(allocation=['mode', 'pooling_size'])
274
    def __init__(self, mode, pooling_size, step, input_dim, ignore_border,
275
                 padding, **kwargs):
276
        super(Pooling, self).__init__(**kwargs)
277
        self.pooling_size = pooling_size
278
        self.mode = mode
279
        self.step = step
280
        self.input_dim = input_dim if input_dim is not None else (None,) * 3
281
        self.ignore_border = ignore_border
282
        self.padding = padding
283
284
    @property
285
    def image_size(self):
286
        return self.input_dim[-2:]
287
288
    @image_size.setter
289
    def image_size(self, value):
290
        self.input_dim = self.input_dim[:-2] + value
291
292
    @property
293
    def num_channels(self):
294
        return self.input_dim[0]
295
296
    @num_channels.setter
297
    def num_channels(self, value):
298
        self.input_dim = (value,) + self.input_dim[1:]
299
300
    @application(inputs=['input_'], outputs=['output'])
301
    def apply(self, input_):
302
        """Apply the pooling (subsampling) transformation.
303
304
        Parameters
305
        ----------
306
        input_ : :class:`~tensor.TensorVariable`
307
            An tensor with dimension greater or equal to 2. The last two
308
            dimensions will be downsampled. For example, with images this
309
            means that the last two dimensions should represent the height
310
            and width of your image.
311
312
        Returns
313
        -------
314
        output : :class:`~tensor.TensorVariable`
315
            A tensor with the same number of dimensions as `input_`, but
316
            with the last two dimensions downsampled.
317
318
        """
319
        output = pool_2d(input_, self.pooling_size, st=self.step,
320
                         mode=self.mode, padding=self.padding,
321
                         ignore_border=self.ignore_border)
322
        return output
323
324
    def get_dim(self, name):
325
        if name == 'input_':
326
            return self.input_dim
327
        if name == 'output':
328
            return tuple(Pool.out_shape(
329
                self.input_dim, self.pooling_size, st=self.step,
330
                ignore_border=self.ignore_border, padding=self.padding))
331
332
    @property
333
    def num_output_channels(self):
334
        return self.input_dim[0]
335
336
337
class MaxPooling(Pooling):
338
    """Max pooling layer.
339
340
    Parameters
341
    ----------
342
    pooling_size : tuple
343
        The height and width of the pooling region i.e. this is the factor
344
        by which your input's last two dimensions will be downscaled.
345
    step : tuple, optional
346
        The vertical and horizontal shift (stride) between pooling regions.
347
        By default this is equal to `pooling_size`. Setting this to a lower
348
        number results in overlapping pooling regions.
349
    input_dim : tuple, optional
350
        A tuple of integers representing the shape of the input. The last
351
        two dimensions will be used to calculate the output dimension.
352
    padding : tuple, optional
353
        A tuple of integers representing the vertical and horizontal
354
        zero-padding to be applied to each of the top and bottom
355
        (vertical) and left and right (horizontal) edges. For example,
356
        an argument of (4, 3) will apply 4 pixels of padding to the
357
        top edge, 4 pixels of padding to the bottom edge, and 3 pixels
358
        each for the left and right edge. By default, no padding is
359
        performed.
360
    ignore_border : bool, optional
361
        Whether or not to do partial downsampling based on borders where
362
        the extent of the pooling region reaches beyond the edge of the
363
        image. If `True`, a (5, 5) image with (2, 2) pooling regions
364
        and (2, 2) step will be downsampled to shape (2, 2), otherwise
365
        it will be downsampled to (3, 3). `True` by default.
366
367
    Notes
368
    -----
369
    .. warning::
370
        As of this writing, setting `ignore_border` to `False` with a step
371
        not equal to the pooling size will force Theano to perform pooling
372
        computations on CPU rather than GPU, even if you have specified
373
        a GPU as your computation device. Additionally, Theano will only
374
        use [cuDNN]_ (if available) for pooling computations with
375
        `ignure_border` set to `True`. You can ensure that the entire
376
        input is captured by at least one pool by using the `padding`
377
        argument to add zero padding prior to pooling being performed.
378
379
    .. [cuDNN]: `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_.
380
381
    """
382
    @lazy(allocation=['pooling_size'])
383
    def __init__(self, pooling_size, step=None, input_dim=None,
384
                 ignore_border=True, padding=(0, 0),
385
                 **kwargs):
386
        super(MaxPooling, self).__init__('max', pooling_size,
387
                                         step=step, input_dim=input_dim,
388
                                         ignore_border=ignore_border,
389
                                         padding=padding, **kwargs)
390
391
    def __setstate__(self, state):
392
        self.__dict__.update(state)
393
        # Fix objects created before pull request #899.
394
        self.mode = getattr(self, 'mode', 'max')
395
        self.padding = getattr(self, 'padding', (0, 0))
396
        self.ignore_border = getattr(self, 'ignore_border', False)
397
398
399
class AveragePooling(Pooling):
400
    """Average pooling layer.
401
402
    Parameters
403
    ----------
404
    include_padding : bool, optional
405
        When calculating an average, include zeros that are the
406
        result of zero padding added by the `padding` argument.
407
        A value of `True` is only accepted if `ignore_border`
408
        is also `True`. `False` by default.
409
410
    Notes
411
    -----
412
    For documentation on the remainder of the arguments to this
413
    class, see :class:`MaxPooling`.
414
415
    """
416
    @lazy(allocation=['pooling_size'])
417
    def __init__(self, pooling_size, step=None, input_dim=None,
418
                 ignore_border=True, padding=(0, 0),
419
                 include_padding=False, **kwargs):
420
        mode = 'average_inc_pad' if include_padding else 'average_exc_pad'
421
        super(AveragePooling, self).__init__(mode, pooling_size,
422
                                             step=step, input_dim=input_dim,
423
                                             ignore_border=ignore_border,
424
                                             padding=padding, **kwargs)
425
426
427
class ConvolutionalSequence(Sequence, Initializable, Feedforward):
428
    """A sequence of convolutional (or pooling) operations.
429
430
    Parameters
431
    ----------
432
    layers : list
433
        List of convolutional bricks (i.e. :class:`Convolutional`,
434
        :class:`ConvolutionalActivation`, or :class:`Pooling` bricks).
435
        :class:`Activation` bricks that operate elementwise can also
436
        be included.
437
    num_channels : int
438
        Number of input channels in the image. For the first layer this is
439
        normally 1 for grayscale images and 3 for color (RGB) images. For
440
        subsequent layers this is equal to the number of filters output by
441
        the previous convolutional layer.
442
    batch_size : int, optional
443
        Number of images in batch. If given, will be passed to
444
        theano's convolution operator resulting in possibly faster
445
        execution.
446
    image_size : tuple, optional
447
        Width and height of the input (image/featuremap). If given,
448
        will be passed to theano's convolution operator resulting in
449
        possibly faster execution.
450
    border_mode : 'valid', 'full' or None, optional
451
        The border mode to use, see :func:`scipy.signal.convolve2d` for
452
        details. Unlike with :class:`Convolutional`, this defaults to
453
        None, in which case no default value is pushed down to child
454
        bricks at allocation time. Child bricks will in this case
455
        need to rely on either a default border mode (usually valid)
456
        or one provided at construction and/or after construction
457
        (but before allocation).
458
    tied_biases : bool, optional
459
        Same meaning as in :class:`Convolutional`. Defaults to ``None``,
460
        in which case no value is pushed to child :class:`Convolutional`
461
        bricks.
462
463
    Notes
464
    -----
465
    The passed convolutional operators should be 'lazy' constructed, that
466
    is, without specifying the batch_size, num_channels and image_size. The
467
    main feature of :class:`ConvolutionalSequence` is that it will set the
468
    input dimensions of a layer to the output dimensions of the previous
469
    layer by the :meth:`~.Brick.push_allocation_config` method.
470
471
    The push behaviour of `tied_biases` mirrors that of `use_bias` or any
472
    initialization configuration: only an explicitly specified value is
473
    pushed down the hierarchy. `border_mode` also has this behaviour.
474
    The reason the `border_mode` parameter behaves the way it does is that
475
    pushing a single default `border_mode` makes it very difficult to
476
    have child bricks with different border modes. Normally, such things
477
    would be overridden after `push_allocation_config()`, but this is
478
    a particular hassle as the border mode affects the allocation
479
    parameters of every subsequent child brick in the sequence. Thus, only
480
    an explicitly specified border mode will be pushed down the hierarchy.
481
482
    """
483
    @lazy(allocation=['num_channels'])
484
    def __init__(self, layers, num_channels, batch_size=None, image_size=None,
485
                 border_mode=None, tied_biases=None, **kwargs):
486
        self.layers = layers
487
        self.image_size = image_size
488
        self.num_channels = num_channels
489
        self.batch_size = batch_size
490
        self.border_mode = border_mode
491
        self.tied_biases = tied_biases
492
493
        application_methods = [brick.apply for brick in layers]
494
        super(ConvolutionalSequence, self).__init__(
495
            application_methods=application_methods, **kwargs)
496
497
    def get_dim(self, name):
498
        if name == 'input_':
499
            return ((self.num_channels,) + self.image_size)
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after return.
Loading history...
500
        if name == 'output':
501
            last = len(self.layers) - 1
502
            while last >= 0:
503
                try:
504
                    return self.layers[last].get_dim(name)
505
                except ValueError:
506
                    last -= 1
507
            # The output shape of an empty ConvolutionalSequence or one
508
            # consisting only of Activations is the input shape.
509
            return self.get_dim('input_')
510
        return super(ConvolutionalSequence, self).get_dim(name)
511
512
    def _push_allocation_config(self):
513
        num_channels = self.num_channels
514
        image_size = self.image_size
515
        for layer in self.layers:
516
            if isinstance(layer, Activation):
517
                # Activations operate elementwise; nothing to set.
518
                layer.push_allocation_config()
519
                continue
520
            if self.border_mode is not None:
521
                layer.border_mode = self.border_mode
522
            if self.tied_biases is not None:
523
                layer.tied_biases = self.tied_biases
524
            layer.image_size = image_size
525
            layer.num_channels = num_channels
526
            layer.batch_size = self.batch_size
527
            if getattr(self, 'use_bias', None) is not None:
528
                layer.use_bias = self.use_bias
529
530
            # Push input dimensions to children
531
            layer.push_allocation_config()
532
533
            # Retrieve output dimensions
534
            # and set it for next layer
535
            if layer.image_size is not None:
536
                output_shape = layer.get_dim('output')
537
                image_size = output_shape[1:]
538
            num_channels = layer.num_output_channels
539
540
541
class Flattener(Brick):
542
    """Flattens the input.
543
544
    It may be used to pass multidimensional objects like images or feature
545
    maps of convolutional bricks into bricks which allow only two
546
    dimensional input (batch, features) like MLP.
547
548
    """
549
    @application(inputs=['input_'], outputs=['output'])
550
    def apply(self, input_):
551
        return input_.flatten(ndim=2)
552