Completed
Pull Request — master (#1041)
by David
01:27
created

blocks.bricks.ConvolutionalTranspose._allocate()   B

Complexity

Conditions 5

Size

Total Lines 9

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 5
dl 0
loc 9
rs 8.5454
1
from theano.tensor.nnet import conv2d
2
from theano.tensor.nnet.abstract_conv import (AbstractConv2d_gradInputs,
3
                                              get_conv_output_shape)
4
from theano.tensor.signal.pool import pool_2d, Pool
5
6
from blocks.bricks import (Initializable, Feedforward, Sequence, Activation,
7
                           LinearLike)
8
from blocks.bricks.base import application, Brick, lazy
9
from blocks.roles import add_role, FILTER, BIAS
10
from blocks.utils import shared_floatx_nans
11
12
13
class Convolutional(LinearLike):
14
    """Performs a 2D convolution.
15
16
    Parameters
17
    ----------
18
    filter_size : tuple
19
        The height and width of the filter (also called *kernels*).
20
    num_filters : int
21
        Number of filters per channel.
22
    num_channels : int
23
        Number of input channels in the image. For the first layer this is
24
        normally 1 for grayscale images and 3 for color (RGB) images. For
25
        subsequent layers this is equal to the number of filters output by
26
        the previous convolutional layer. The filters are pooled over the
27
        channels.
28
    batch_size : int, optional
29
        Number of examples per batch. If given, this will be passed to
30
        Theano convolution operator, possibly resulting in faster
31
        execution.
32
    image_size : tuple, optional
33
        The height and width of the input (image or feature map). If given,
34
        this will be passed to the Theano convolution operator, resulting
35
        in possibly faster execution times.
36
    step : tuple, optional
37
        The step (or stride) with which to slide the filters over the
38
        image. Defaults to (1, 1).
39
    border_mode : {'valid', 'full'}, optional
40
        The border mode to use, see :func:`scipy.signal.convolve2d` for
41
        details. Defaults to 'valid'.
42
    tied_biases : bool
43
        If ``True``, it indicates that the biases of every filter in this
44
        layer should be shared amongst all applications of that filter.
45
        Setting this to ``False`` will untie the biases, yielding a
46
        separate bias for every location at which the filter is applied.
47
        Defaults to ``False``.
48
49
    """
50
    # Make it possible to override the implementation of conv2d that gets
51
    # used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order
52
    # to leverage features not yet available in Theano's standard conv2d.
53
    # The function you override with here should accept at least the
54
    # input and the kernels as positionals, and the keyword arguments
55
    # input_shape, subsample, border_mode, and filter_shape. If some of
56
    # these are unsupported they should still be accepted and ignored,
57
    # e.g. with a wrapper function that swallows **kwargs.
58
    conv2d_impl = staticmethod(conv2d)
59
60
    # Used to override the output shape computation for a given value of
61
    # conv2d_impl. Should accept 4 positional arguments: the shape of an
62
    # image minibatch (with 4 elements: batch size, number of channels,
63
    # height, and width), the shape of the filter bank (number of filters,
64
    # number of output channels, filter height, filter width), the border
65
    # mode, and the step (vertical and horizontal strides). It is expected
66
    # to return a 4-tuple of (batch size, number of channels, output
67
    # height, output width). The first element of this tuple is not used
68
    # for anything by this brick.
69
    get_output_shape = staticmethod(get_conv_output_shape)
70
71
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
72
    def __init__(self, filter_size, num_filters, num_channels, batch_size=None,
73
                 image_size=(None, None), step=(1, 1), border_mode='valid',
74
                 tied_biases=False, **kwargs):
75
        super(Convolutional, self).__init__(**kwargs)
76
77
        self.filter_size = filter_size
78
        self.num_filters = num_filters
79
        self.batch_size = batch_size
80
        self.num_channels = num_channels
81
        self.image_size = image_size
82
        self.step = step
83
        self.border_mode = border_mode
84
        self.tied_biases = tied_biases
85
86
    def _allocate(self):
87
        W = shared_floatx_nans((self.num_filters, self.num_channels) +
88
                               self.filter_size, name='W')
89
        add_role(W, FILTER)
90
        self.parameters.append(W)
91
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
92
        if self.use_bias:
93
            if self.tied_biases:
94
                b = shared_floatx_nans((self.num_filters,), name='b')
95
            else:
96
                # this error is raised here instead of during initializiation
97
                # because ConvolutionalSequence may specify the image size
98
                if self.image_size == (None, None) and not self.tied_biases:
99
                    raise ValueError('Cannot infer bias size without '
100
                                     'image_size specified. If you use '
101
                                     'variable image_size, you should use '
102
                                     'tied_biases=True.')
103
104
                b = shared_floatx_nans(self.get_dim('output'), name='b')
105
            add_role(b, BIAS)
106
107
            self.parameters.append(b)
108
            self.add_auxiliary_variable(b.norm(2), name='b_norm')
109
110
    @application(inputs=['input_'], outputs=['output'])
111
    def apply(self, input_):
112
        """Perform the convolution.
113
114
        Parameters
115
        ----------
116
        input_ : :class:`~tensor.TensorVariable`
117
            A 4D tensor with the axes representing batch size, number of
118
            channels, image height, and image width.
119
120
        Returns
121
        -------
122
        output : :class:`~tensor.TensorVariable`
123
            A 4D tensor of filtered images (feature maps) with dimensions
124
            representing batch size, number of filters, feature map height,
125
            and feature map width.
126
127
            The height and width of the feature map depend on the border
128
            mode. For 'valid' it is ``image_size - filter_size + 1`` while
129
            for 'full' it is ``image_size + filter_size - 1``.
130
131
        """
132
        if self.image_size == (None, None):
133
            input_shape = None
134
        else:
135
            input_shape = (self.batch_size, self.num_channels)
136
            input_shape += self.image_size
137
138
        output = self.conv2d_impl(
139
            input_, self.W,
140
            input_shape=input_shape,
141
            subsample=self.step,
142
            border_mode=self.border_mode,
143
            filter_shape=((self.num_filters, self.num_channels) +
144
                          self.filter_size))
145
        if self.use_bias:
146
            if self.tied_biases:
147
                output += self.b.dimshuffle('x', 0, 'x', 'x')
148
            else:
149
                output += self.b.dimshuffle('x', 0, 1, 2)
150
        return output
151
152
    def get_dim(self, name):
153
        if name == 'input_':
154
            return (self.num_channels,) + self.image_size
155
        if name == 'output':
156
            input_shape = (None, self.num_channels) + self.image_size
157
            kernel_shape = ((self.num_filters, self.num_channels) +
158
                            self.filter_size)
159
            out_shape = self.get_output_shape(input_shape, kernel_shape,
160
                                              self.border_mode, self.step)
161
            assert len(out_shape) == 4
162
            return out_shape[1:]
163
        return super(Convolutional, self).get_dim(name)
164
165
    @property
166
    def num_output_channels(self):
167
        return self.num_filters
168
169
170
class ConvolutionalTranspose(Convolutional):
171
    """Performs the transpose of a 2D convolution.
172
173
    Parameters
174
    ----------
175
    num_filters : int
176
        Number of filters at the *output* of the transposed convolution,
177
        i.e. the number of channels in the corresponding convolution.
178
    num_channels : int
179
        Number of channels at the *input* of the transposed convolution,
180
        i.e. the number of output filters in the corresponding
181
        convolution.
182
    step : tuple, optional
183
        The step (or stride) of the corresponding *convolution*.
184
        Defaults to (1, 1).
185
    image_size : tuple, optional
186
        Image size of the input to the *transposed* convolution, i.e.
187
        the output of the corresponding convolution. Required for tied
188
        biases. Defaults to ``None``.
189
    original_image_size : tuple, optional
190
        The height and width of the image that forms the output of
191
        the transpose operation, which is the input of the original
192
        (non-transposed) convolution. By default, this is inferred
193
        from `image_size` to be the size that has each pixel of the
194
        original image touched by at least one filter application
195
        in the original convolution. Degenerate cases with dropped
196
        border pixels (in the original convolution) are possible, and can
197
        be manually specified via this argument. See notes below.
198
199
    Notes
200
    -----
201
    By default, `original_image_size` is inferred from `image_size`
202
    as being the *minimum* size of image that could have produced this
203
    output. Let ``hanging[i] = original_image_size[i] - image_size[i]
204
    * step[i]``. Any value of ``hanging[i]`` greater than
205
    ``filter_size[i] - step[i]`` will result in border pixels that are
206
    ignored by the original convolution. With this brick, any
207
    ``original_image_size`` such that ``filter_size[i] - step[i] <
208
    hanging[i] < filter_size[i]`` for all ``i`` can be validly specified.
209
    However, no value will be output by the transposed convolution
210
    itself for these extra hanging border pixels, and they will be
211
    determined entirely by the bias.
212
213
    See Also
214
    --------
215
    :class:`Convolutional` : For the documentation of other parameters.
216
217
    """
218
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
219
    def __init__(self, filter_size, num_filters, num_channels,
220
                 original_image_size=None, **kwargs):
221
        super(ConvolutionalTranspose, self).__init__(
222
            filter_size, num_filters, num_channels, **kwargs)
223
        self.original_image_size = original_image_size
224
225
    def _allocate(self):
226
        if self.original_image_size is None:
227
            if self.image_size is None:
228
                raise ValueError("can't infer original_image_size, "
229
                                 "no image_size set")
230
            last_edge = [d - s for d, s in zip(self.filter_size, self.step)]
231
            tups = zip(self.image_size, self.step, last_edge)
232
            self.original_image_size = tuple(i * s + e for i, s, e in tups)
233
        super(ConvolutionalTranspose, self)._allocate()
234
235
    def conv2d_impl(self, input_, W, input_shape, subsample, border_mode,
0 ignored issues
show
Unused Code introduced by
The argument input_shape seems to be unused.
Loading history...
236
                    filter_shape):
237
        # The AbstractConv2d_gradInputs op takes a kernel that was used for the
238
        # **convolution**. We therefore have to invert num_channels and
239
        # num_filters for W.
240
        W = W.transpose(1, 0, 2, 3)
241
        imshp = (None,) + self.get_dim('output')
242
        kshp = (filter_shape[1], filter_shape[0]) + filter_shape[2:]
243
        return AbstractConv2d_gradInputs(
244
            imshp=imshp, kshp=kshp, border_mode=border_mode,
245
            subsample=subsample)(W, input_, self.get_dim('output')[1:])
246
247
    def get_dim(self, name):
248
        if name == 'output':
249
            return (self.num_filters,) + self.original_image_size
250
        return super(ConvolutionalTranspose, self).get_dim(name)
251
252
253
class Pooling(Initializable, Feedforward):
254
    """Base Brick for pooling operations.
255
256
    This should generally not be instantiated directly; see
257
    :class:`MaxPooling`.
258
259
    """
260
    @lazy(allocation=['mode', 'pooling_size'])
261
    def __init__(self, mode, pooling_size, step, input_dim, ignore_border,
262
                 padding, **kwargs):
263
        super(Pooling, self).__init__(**kwargs)
264
        self.pooling_size = pooling_size
265
        self.mode = mode
266
        self.step = step
267
        self.input_dim = input_dim if input_dim is not None else (None,) * 3
268
        self.ignore_border = ignore_border
269
        self.padding = padding
270
271
    @property
272
    def image_size(self):
273
        return self.input_dim[-2:]
274
275
    @image_size.setter
276
    def image_size(self, value):
277
        self.input_dim = self.input_dim[:-2] + value
278
279
    @property
280
    def num_channels(self):
281
        return self.input_dim[0]
282
283
    @num_channels.setter
284
    def num_channels(self, value):
285
        self.input_dim = (value,) + self.input_dim[1:]
286
287
    @application(inputs=['input_'], outputs=['output'])
288
    def apply(self, input_):
289
        """Apply the pooling (subsampling) transformation.
290
291
        Parameters
292
        ----------
293
        input_ : :class:`~tensor.TensorVariable`
294
            An tensor with dimension greater or equal to 2. The last two
295
            dimensions will be downsampled. For example, with images this
296
            means that the last two dimensions should represent the height
297
            and width of your image.
298
299
        Returns
300
        -------
301
        output : :class:`~tensor.TensorVariable`
302
            A tensor with the same number of dimensions as `input_`, but
303
            with the last two dimensions downsampled.
304
305
        """
306
        output = pool_2d(input_, self.pooling_size, st=self.step,
307
                         mode=self.mode, padding=self.padding,
308
                         ignore_border=self.ignore_border)
309
        return output
310
311
    def get_dim(self, name):
312
        if name == 'input_':
313
            return self.input_dim
314
        if name == 'output':
315
            return tuple(Pool.out_shape(
316
                self.input_dim, self.pooling_size, st=self.step,
317
                ignore_border=self.ignore_border, padding=self.padding))
318
319
    @property
320
    def num_output_channels(self):
321
        return self.input_dim[0]
322
323
324
class MaxPooling(Pooling):
325
    """Max pooling layer.
326
327
    Parameters
328
    ----------
329
    pooling_size : tuple
330
        The height and width of the pooling region i.e. this is the factor
331
        by which your input's last two dimensions will be downscaled.
332
    step : tuple, optional
333
        The vertical and horizontal shift (stride) between pooling regions.
334
        By default this is equal to `pooling_size`. Setting this to a lower
335
        number results in overlapping pooling regions.
336
    input_dim : tuple, optional
337
        A tuple of integers representing the shape of the input. The last
338
        two dimensions will be used to calculate the output dimension.
339
    padding : tuple, optional
340
        A tuple of integers representing the vertical and horizontal
341
        zero-padding to be applied to each of the top and bottom
342
        (vertical) and left and right (horizontal) edges. For example,
343
        an argument of (4, 3) will apply 4 pixels of padding to the
344
        top edge, 4 pixels of padding to the bottom edge, and 3 pixels
345
        each for the left and right edge. By default, no padding is
346
        performed.
347
    ignore_border : bool, optional
348
        Whether or not to do partial downsampling based on borders where
349
        the extent of the pooling region reaches beyond the edge of the
350
        image. If `True`, a (5, 5) image with (2, 2) pooling regions
351
        and (2, 2) step will be downsampled to shape (2, 2), otherwise
352
        it will be downsampled to (3, 3). `True` by default.
353
354
    Notes
355
    -----
356
    .. warning::
357
        As of this writing, setting `ignore_border` to `False` with a step
358
        not equal to the pooling size will force Theano to perform pooling
359
        computations on CPU rather than GPU, even if you have specified
360
        a GPU as your computation device. Additionally, Theano will only
361
        use [cuDNN]_ (if available) for pooling computations with
362
        `ignure_border` set to `True`. You can ensure that the entire
363
        input is captured by at least one pool by using the `padding`
364
        argument to add zero padding prior to pooling being performed.
365
366
    .. [cuDNN]: `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_.
367
368
    """
369
    @lazy(allocation=['pooling_size'])
370
    def __init__(self, pooling_size, step=None, input_dim=None,
371
                 ignore_border=True, padding=(0, 0),
372
                 **kwargs):
373
        super(MaxPooling, self).__init__('max', pooling_size,
374
                                         step=step, input_dim=input_dim,
375
                                         ignore_border=ignore_border,
376
                                         padding=padding, **kwargs)
377
378
    def __setstate__(self, state):
379
        self.__dict__.update(state)
380
        # Fix objects created before pull request #899.
381
        self.mode = getattr(self, 'mode', 'max')
382
        self.padding = getattr(self, 'padding', (0, 0))
383
        self.ignore_border = getattr(self, 'ignore_border', False)
384
385
386
class AveragePooling(Pooling):
387
    """Average pooling layer.
388
389
    Parameters
390
    ----------
391
    include_padding : bool, optional
392
        When calculating an average, include zeros that are the
393
        result of zero padding added by the `padding` argument.
394
        A value of `True` is only accepted if `ignore_border`
395
        is also `True`. `False` by default.
396
397
    Notes
398
    -----
399
    For documentation on the remainder of the arguments to this
400
    class, see :class:`MaxPooling`.
401
402
    """
403
    @lazy(allocation=['pooling_size'])
404
    def __init__(self, pooling_size, step=None, input_dim=None,
405
                 ignore_border=True, padding=(0, 0),
406
                 include_padding=False, **kwargs):
407
        mode = 'average_inc_pad' if include_padding else 'average_exc_pad'
408
        super(AveragePooling, self).__init__(mode, pooling_size,
409
                                             step=step, input_dim=input_dim,
410
                                             ignore_border=ignore_border,
411
                                             padding=padding, **kwargs)
412
413
414
class ConvolutionalSequence(Sequence, Initializable, Feedforward):
415
    """A sequence of convolutional (or pooling) operations.
416
417
    Parameters
418
    ----------
419
    layers : list
420
        List of convolutional bricks (i.e. :class:`Convolutional`,
421
        :class:`ConvolutionalActivation`, or :class:`Pooling` bricks).
422
        :class:`Activation` bricks that operate elementwise can also
423
        be included.
424
    num_channels : int
425
        Number of input channels in the image. For the first layer this is
426
        normally 1 for grayscale images and 3 for color (RGB) images. For
427
        subsequent layers this is equal to the number of filters output by
428
        the previous convolutional layer.
429
    batch_size : int, optional
430
        Number of images in batch. If given, will be passed to
431
        theano's convolution operator resulting in possibly faster
432
        execution.
433
    image_size : tuple, optional
434
        Width and height of the input (image/featuremap). If given,
435
        will be passed to theano's convolution operator resulting in
436
        possibly faster execution.
437
    border_mode : 'valid', 'full' or None, optional
438
        The border mode to use, see :func:`scipy.signal.convolve2d` for
439
        details. Unlike with :class:`Convolutional`, this defaults to
440
        None, in which case no default value is pushed down to child
441
        bricks at allocation time. Child bricks will in this case
442
        need to rely on either a default border mode (usually valid)
443
        or one provided at construction and/or after construction
444
        (but before allocation).
445
446
    Notes
447
    -----
448
    The passed convolutional operators should be 'lazy' constructed, that
449
    is, without specifying the batch_size, num_channels and image_size. The
450
    main feature of :class:`ConvolutionalSequence` is that it will set the
451
    input dimensions of a layer to the output dimensions of the previous
452
    layer by the :meth:`~.Brick.push_allocation_config` method.
453
454
    The reason the `border_mode` parameter behaves the way it does is that
455
    pushing a single default `border_mode` makes it very difficult to
456
    have child bricks with different border modes. Normally, such things
457
    would be overridden after `push_allocation_config()`, but this is
458
    a particular hassle as the border mode affects the allocation
459
    parameters of every subsequent child brick in the sequence. Thus, only
460
    an explicitly specified border mode will be pushed down the hierarchy.
461
462
    """
463
    @lazy(allocation=['num_channels'])
464
    def __init__(self, layers, num_channels, batch_size=None, image_size=None,
465
                 border_mode=None, tied_biases=False, **kwargs):
466
        self.layers = layers
467
        self.image_size = image_size
468
        self.num_channels = num_channels
469
        self.batch_size = batch_size
470
        self.border_mode = border_mode
471
        self.tied_biases = tied_biases
472
473
        application_methods = [brick.apply for brick in layers]
474
        super(ConvolutionalSequence, self).__init__(
475
            application_methods=application_methods, **kwargs)
476
477
    def get_dim(self, name):
478
        if name == 'input_':
479
            return ((self.num_channels,) + self.image_size)
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after return.
Loading history...
480
        if name == 'output':
481
            last = len(self.layers) - 1
482
            while last >= 0:
483
                try:
484
                    return self.layers[last].get_dim(name)
485
                except ValueError:
486
                    last -= 1
487
            # The output shape of an empty ConvolutionalSequence or one
488
            # consisting only of Activations is the input shape.
489
            return self.get_dim('input_')
490
        return super(ConvolutionalSequence, self).get_dim(name)
491
492
    def _push_allocation_config(self):
493
        num_channels = self.num_channels
494
        image_size = self.image_size
495
        for layer in self.layers:
496
            if isinstance(layer, Activation):
497
                # Activations operate elementwise; nothing to set.
498
                layer.push_allocation_config()
499
                continue
500
            if self.border_mode is not None:
501
                layer.border_mode = self.border_mode
502
            layer.tied_biases = self.tied_biases
503
            layer.image_size = image_size
504
            layer.num_channels = num_channels
505
            layer.batch_size = self.batch_size
506
            layer.use_bias = self.use_bias
507
508
            # Push input dimensions to children
509
            layer.push_allocation_config()
510
511
            # Retrieve output dimensions
512
            # and set it for next layer
513
            if layer.image_size is not None:
514
                output_shape = layer.get_dim('output')
515
                image_size = output_shape[1:]
516
            num_channels = layer.num_output_channels
517
518
519
class Flattener(Brick):
520
    """Flattens the input.
521
522
    It may be used to pass multidimensional objects like images or feature
523
    maps of convolutional bricks into bricks which allow only two
524
    dimensional input (batch, features) like MLP.
525
526
    """
527
    @application(inputs=['input_'], outputs=['output'])
528
    def apply(self, input_):
529
        return input_.flatten(ndim=2)
530