Issues (119)

blocks/bricks/conv.py (4 issues)

1
from theano.tensor.nnet import conv2d
2
from theano.tensor.nnet.abstract_conv import (AbstractConv2d_gradInputs,
3
                                              get_conv_output_shape)
4
from theano.tensor.signal.pool import pool_2d, Pool
5
6
from blocks.bricks import (Initializable, Feedforward, Sequence, Activation,
7
                           LinearLike)
8
from blocks.bricks.base import application, Brick, lazy
9
from blocks.roles import add_role, FILTER, BIAS
10
from blocks.utils import shared_floatx_nans
11
12
13
class Convolutional(LinearLike):
14
    """Performs a 2D convolution.
15
16
    Parameters
17
    ----------
18
    filter_size : tuple
19
        The height and width of the filter (also called *kernels*).
20
    num_filters : int
21
        Number of filters per channel.
22
    num_channels : int
23
        Number of input channels in the image. For the first layer this is
24
        normally 1 for grayscale images and 3 for color (RGB) images. For
25
        subsequent layers this is equal to the number of filters output by
26
        the previous convolutional layer. The filters are pooled over the
27
        channels.
28
    batch_size : int, optional
29
        Number of examples per batch. If given, this will be passed to
30
        Theano convolution operator, possibly resulting in faster
31
        execution.
32
    image_size : tuple, optional
33
        The height and width of the input (image or feature map). If given,
34
        this will be passed to the Theano convolution operator, resulting
35
        in possibly faster execution times.
36
    step : tuple, optional
37
        The step (or stride) with which to slide the filters over the
38
        image. Defaults to (1, 1).
39
    border_mode : {'valid', 'full'}, optional
40
        The border mode to use, see :func:`scipy.signal.convolve2d` for
41
        details. Defaults to 'valid'.
42
    tied_biases : bool
43
        Setting this to ``False`` will untie the biases, yielding a
44
        separate bias for every location at which the filter is applied.
45
        If ``True``, it indicates that the biases of every filter in this
46
        layer should be shared amongst all applications of that filter.
47
        Defaults to ``True``.
48
49
    """
50
    # Make it possible to override the implementation of conv2d that gets
51
    # used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order
52
    # to leverage features not yet available in Theano's standard conv2d.
53
    # The function you override with here should accept at least the
54
    # input and the kernels as positionals, and the keyword arguments
55
    # input_shape, subsample, border_mode, and filter_shape. If some of
56
    # these are unsupported they should still be accepted and ignored,
57
    # e.g. with a wrapper function that swallows **kwargs.
58
    conv2d_impl = staticmethod(conv2d)
59
60
    # Used to override the output shape computation for a given value of
61
    # conv2d_impl. Should accept 4 positional arguments: the shape of an
62
    # image minibatch (with 4 elements: batch size, number of channels,
63
    # height, and width), the shape of the filter bank (number of filters,
64
    # number of output channels, filter height, filter width), the border
65
    # mode, and the step (vertical and horizontal strides). It is expected
66
    # to return a 4-tuple of (batch size, number of channels, output
67
    # height, output width). The first element of this tuple is not used
68
    # for anything by this brick.
69
    get_output_shape = staticmethod(get_conv_output_shape)
70
71 View Code Duplication
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
72
    def __init__(self, filter_size, num_filters, num_channels, batch_size=None,
73
                 image_size=(None, None), step=(1, 1), border_mode='valid',
74
                 tied_biases=True, **kwargs):
75
        super(Convolutional, self).__init__(**kwargs)
76
77
        self.filter_size = filter_size
78
        self.num_filters = num_filters
79
        self.batch_size = batch_size
80
        self.num_channels = num_channels
81
        self.image_size = image_size
82
        self.step = step
83
        self.border_mode = border_mode
84
        self.tied_biases = tied_biases
85
86
    def _allocate(self):
87
        W = shared_floatx_nans((self.num_filters, self.num_channels) +
88
                               self.filter_size, name='W')
89
        add_role(W, FILTER)
90
        self.parameters.append(W)
91
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
92
        if getattr(self, 'use_bias', True):
93
            if self.tied_biases:
94
                b = shared_floatx_nans((self.num_filters,), name='b')
95
            else:
96
                # this error is raised here instead of during initializiation
97
                # because ConvolutionalSequence may specify the image size
98
                if self.image_size == (None, None) and not self.tied_biases:
99
                    raise ValueError('Cannot infer bias size without '
100
                                     'image_size specified. If you use '
101
                                     'variable image_size, you should use '
102
                                     'tied_biases=True.')
103
104
                b = shared_floatx_nans(self.get_dim('output'), name='b')
105
            add_role(b, BIAS)
106
107
            self.parameters.append(b)
108
            self.add_auxiliary_variable(b.norm(2), name='b_norm')
109
110
    @application(inputs=['input_'], outputs=['output'])
111
    def apply(self, input_):
112
        """Perform the convolution.
113
114
        Parameters
115
        ----------
116
        input_ : :class:`~tensor.TensorVariable`
117
            A 4D tensor with the axes representing batch size, number of
118
            channels, image height, and image width.
119
120
        Returns
121
        -------
122
        output : :class:`~tensor.TensorVariable`
123
            A 4D tensor of filtered images (feature maps) with dimensions
124
            representing batch size, number of filters, feature map height,
125
            and feature map width.
126
127
            The height and width of the feature map depend on the border
128
            mode. For 'valid' it is ``image_size - filter_size + 1`` while
129
            for 'full' it is ``image_size + filter_size - 1``.
130
131
        """
132
        if self.image_size == (None, None):
133
            input_shape = None
134
        else:
135
            input_shape = (self.batch_size, self.num_channels)
136
            input_shape += self.image_size
137
138
        output = self.conv2d_impl(
139
            input_, self.W,
140
            input_shape=input_shape,
141
            subsample=self.step,
142
            border_mode=self.border_mode,
143
            filter_shape=((self.num_filters, self.num_channels) +
144
                          self.filter_size))
145
        if getattr(self, 'use_bias', True):
146
            if self.tied_biases:
147
                output += self.b.dimshuffle('x', 0, 'x', 'x')
148
            else:
149
                output += self.b.dimshuffle('x', 0, 1, 2)
150
        return output
151
152
    def get_dim(self, name):
153
        if name == 'input_':
154
            return (self.num_channels,) + self.image_size
155
        if name == 'output':
156
            input_shape = (None, self.num_channels) + self.image_size
157
            kernel_shape = ((self.num_filters, self.num_channels) +
158
                            self.filter_size)
159
            out_shape = self.get_output_shape(input_shape, kernel_shape,
160
                                              self.border_mode, self.step)
161
            assert len(out_shape) == 4
162
            return out_shape[1:]
163
        return super(Convolutional, self).get_dim(name)
164
165
    @property
166
    def num_output_channels(self):
167
        return self.num_filters
168
169
170
class ConvolutionalTranspose(Convolutional):
171
    """Performs the transpose of a 2D convolution.
172
173
    Parameters
174
    ----------
175
    num_filters : int
176
        Number of filters at the *output* of the transposed convolution,
177
        i.e. the number of channels in the corresponding convolution.
178
    num_channels : int
179
        Number of channels at the *input* of the transposed convolution,
180
        i.e. the number of output filters in the corresponding
181
        convolution.
182
    step : tuple, optional
183
        The step (or stride) of the corresponding *convolution*.
184
        Defaults to (1, 1).
185
    image_size : tuple, optional
186
        Image size of the input to the *transposed* convolution, i.e.
187
        the output of the corresponding convolution. Required for tied
188
        biases. Defaults to ``None``.
189
    unused_edge : tuple, optional
190
        Tuple of pixels added to the inferred height and width of the
191
        output image, whose values would be ignored in the corresponding
192
        forward convolution. Must be such that 0 <= ``unused_edge[i]`` <=
193
        ``step[i]``. Note that this parameter is **ignored** if
194
        ``original_image_size`` is specified in the constructor or manually
195
        set as an attribute.
196
    original_image_size : tuple, optional
197
        The height and width of the image that forms the output of
198
        the transpose operation, which is the input of the original
199
        (non-transposed) convolution. By default, this is inferred
200
        from `image_size` to be the size that has each pixel of the
201
        original image touched by at least one filter application
202
        in the original convolution. Degenerate cases with dropped
203
        border pixels (in the original convolution) are possible, and can
204
        be manually specified via this argument. See notes below.
205
206
    See Also
207
    --------
208
    :class:`Convolutional` : For the documentation of other parameters.
209
210
    Notes
211
    -----
212
    By default, `original_image_size` is inferred from `image_size`
213
    as being the *minimum* size of image that could have produced this
214
    output. Let ``hanging[i] = original_image_size[i] - image_size[i]
215
    * step[i]``. Any value of ``hanging[i]`` greater than
216
    ``filter_size[i] - step[i]`` will result in border pixels that are
217
    ignored by the original convolution. With this brick, any
218
    ``original_image_size`` such that ``filter_size[i] - step[i] <
219
    hanging[i] < filter_size[i]`` for all ``i`` can be validly specified.
220
    However, no value will be output by the transposed convolution
221
    itself for these extra hanging border pixels, and they will be
222
    determined entirely by the bias.
223
224
    """
225
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
226
    def __init__(self, filter_size, num_filters, num_channels,
227
                 original_image_size=None, unused_edge=(0, 0),
228
                 **kwargs):
229
        super(ConvolutionalTranspose, self).__init__(
230
            filter_size, num_filters, num_channels, **kwargs)
231
        self.original_image_size = original_image_size
232
        self.unused_edge = unused_edge
233
234
    @property
235
    def original_image_size(self):
236
        if self._original_image_size is None:
237
            if all(s is None for s in self.image_size):
238
                raise ValueError("can't infer original_image_size, "
239
                                 "no image_size set")
240
            if isinstance(self.border_mode, tuple):
241
                border = self.border_mode
242
            elif self.border_mode == 'full':
243
                border = tuple(k - 1 for k in self.filter_size)
244
            elif self.border_mode == 'half':
245
                border = tuple(k // 2 for k in self.filter_size)
246
            else:
247
                border = [0] * len(self.image_size)
248
            tups = zip(self.image_size, self.step, self.filter_size, border,
249
                       self.unused_edge)
250
            return tuple(s * (i - 1) + k - 2 * p + u for i, s, k, p, u in tups)
251
        else:
252
            return self._original_image_size
253
254
    @original_image_size.setter
255
    def original_image_size(self, value):
256
        self._original_image_size = value
257
258
    def conv2d_impl(self, input_, W, input_shape, subsample, border_mode,
0 ignored issues
show
The argument input_shape seems to be unused.
Loading history...
259
                    filter_shape):
260
        # The AbstractConv2d_gradInputs op takes a kernel that was used for the
261
        # **convolution**. We therefore have to invert num_channels and
262
        # num_filters for W.
263
        W = W.transpose(1, 0, 2, 3)
264
        imshp = (None,) + self.get_dim('output')
265
        kshp = (filter_shape[1], filter_shape[0]) + filter_shape[2:]
266
        return AbstractConv2d_gradInputs(
267
            imshp=imshp, kshp=kshp, border_mode=border_mode,
268
            subsample=subsample)(W, input_, self.get_dim('output')[1:])
269
270
    def get_dim(self, name):
271
        if name == 'output':
272
            return (self.num_filters,) + self.original_image_size
273
        return super(ConvolutionalTranspose, self).get_dim(name)
274
275
276
class Pooling(Initializable, Feedforward):
277
    """Base Brick for pooling operations.
278
279
    This should generally not be instantiated directly; see
280
    :class:`MaxPooling`.
281
282
    """
283
    @lazy(allocation=['mode', 'pooling_size'])
284
    def __init__(self, mode, pooling_size, step, input_dim, ignore_border,
285
                 padding, **kwargs):
286
        super(Pooling, self).__init__(**kwargs)
287
        self.pooling_size = pooling_size
288
        self.mode = mode
289
        self.step = step
290
        self.input_dim = input_dim if input_dim is not None else (None,) * 3
291
        self.ignore_border = ignore_border
292
        self.padding = padding
293
294
    @property
295
    def image_size(self):
296
        return self.input_dim[-2:]
297
298
    @image_size.setter
299
    def image_size(self, value):
300
        self.input_dim = self.input_dim[:-2] + value
301
302
    @property
303
    def num_channels(self):
304
        return self.input_dim[0]
305
306
    @num_channels.setter
307
    def num_channels(self, value):
308
        self.input_dim = (value,) + self.input_dim[1:]
309
310
    @application(inputs=['input_'], outputs=['output'])
311
    def apply(self, input_):
312
        """Apply the pooling (subsampling) transformation.
313
314
        Parameters
315
        ----------
316
        input_ : :class:`~tensor.TensorVariable`
317
            An tensor with dimension greater or equal to 2. The last two
318
            dimensions will be downsampled. For example, with images this
319
            means that the last two dimensions should represent the height
320
            and width of your image.
321
322
        Returns
323
        -------
324
        output : :class:`~tensor.TensorVariable`
325
            A tensor with the same number of dimensions as `input_`, but
326
            with the last two dimensions downsampled.
327
328
        """
329
        output = pool_2d(input_, self.pooling_size, stride=self.step,
330
                         mode=self.mode, pad=self.padding,
331
                         ignore_border=self.ignore_border)
332
        return output
333
334
    def get_dim(self, name):
335
        if name == 'input_':
336
            return self.input_dim
337
        if name == 'output':
338
            return tuple(Pool.out_shape(
339
                self.input_dim, self.pooling_size, stride=self.step,
340
                ignore_border=self.ignore_border, pad=self.padding))
341
342
    @property
343
    def num_output_channels(self):
344
        return self.input_dim[0]
345
346
347
class MaxPooling(Pooling):
348
    """Max pooling layer.
349
350
    Parameters
351
    ----------
352
    pooling_size : tuple
353
        The height and width of the pooling region i.e. this is the factor
354
        by which your input's last two dimensions will be downscaled.
355
    step : tuple, optional
356
        The vertical and horizontal shift (stride) between pooling regions.
357
        By default this is equal to `pooling_size`. Setting this to a lower
358
        number results in overlapping pooling regions.
359
    input_dim : tuple, optional
360
        A tuple of integers representing the shape of the input. The last
361
        two dimensions will be used to calculate the output dimension.
362
    padding : tuple, optional
363
        A tuple of integers representing the vertical and horizontal
364
        zero-padding to be applied to each of the top and bottom
365
        (vertical) and left and right (horizontal) edges. For example,
366
        an argument of (4, 3) will apply 4 pixels of padding to the
367
        top edge, 4 pixels of padding to the bottom edge, and 3 pixels
368
        each for the left and right edge. By default, no padding is
369
        performed.
370
    ignore_border : bool, optional
371
        Whether or not to do partial downsampling based on borders where
372
        the extent of the pooling region reaches beyond the edge of the
373
        image. If `True`, a (5, 5) image with (2, 2) pooling regions
374
        and (2, 2) step will be downsampled to shape (2, 2), otherwise
375
        it will be downsampled to (3, 3). `True` by default.
376
377
    Notes
378
    -----
379
    .. warning::
380
        As of this writing, setting `ignore_border` to `False` with a step
381
        not equal to the pooling size will force Theano to perform pooling
382
        computations on CPU rather than GPU, even if you have specified
383
        a GPU as your computation device. Additionally, Theano will only
384
        use [cuDNN]_ (if available) for pooling computations with
385
        `ignure_border` set to `True`. You can ensure that the entire
386
        input is captured by at least one pool by using the `padding`
387
        argument to add zero padding prior to pooling being performed.
388
389
    .. [cuDNN] `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_.
390
391
    """
392
    @lazy(allocation=['pooling_size'])
393
    def __init__(self, pooling_size, step=None, input_dim=None,
394
                 ignore_border=True, padding=(0, 0),
395
                 **kwargs):
396
        super(MaxPooling, self).__init__('max', pooling_size,
397
                                         step=step, input_dim=input_dim,
398
                                         ignore_border=ignore_border,
399
                                         padding=padding, **kwargs)
400
401
    def __setstate__(self, state):
402
        self.__dict__.update(state)
403
        # Fix objects created before pull request #899.
404
        self.mode = getattr(self, 'mode', 'max')
405
        self.padding = getattr(self, 'padding', (0, 0))
406
        self.ignore_border = getattr(self, 'ignore_border', False)
407
408
409
class AveragePooling(Pooling):
410
    """Average pooling layer.
411
412
    Parameters
413
    ----------
414
    include_padding : bool, optional
415
        When calculating an average, include zeros that are the
416
        result of zero padding added by the `padding` argument.
417
        A value of `True` is only accepted if `ignore_border`
418
        is also `True`. `False` by default.
419
420
    Notes
421
    -----
422
    For documentation on the remainder of the arguments to this
423
    class, see :class:`MaxPooling`.
424
425
    """
426
    @lazy(allocation=['pooling_size'])
427
    def __init__(self, pooling_size, step=None, input_dim=None,
428
                 ignore_border=True, padding=(0, 0),
429
                 include_padding=False, **kwargs):
430
        mode = 'average_inc_pad' if include_padding else 'average_exc_pad'
431
        super(AveragePooling, self).__init__(mode, pooling_size,
432
                                             step=step, input_dim=input_dim,
433
                                             ignore_border=ignore_border,
434
                                             padding=padding, **kwargs)
435
436
437
class ConvolutionalSequence(Sequence, Initializable, Feedforward):
438
    """A sequence of convolutional (or pooling) operations.
439
440
    Parameters
441
    ----------
442
    layers : list
443
        List of convolutional bricks (i.e. :class:`Convolutional`,
444
        :class:`ConvolutionalActivation`, or :class:`Pooling` bricks),
445
        or application methods from such bricks.  :class:`Activation`
446
        bricks that operate elementwise can also be included.
447
    num_channels : int
448
        Number of input channels in the image. For the first layer this is
449
        normally 1 for grayscale images and 3 for color (RGB) images. For
450
        subsequent layers this is equal to the number of filters output by
451
        the previous convolutional layer.
452
    batch_size : int, optional
453
        Number of images in batch. If given, will be passed to
454
        theano's convolution operator resulting in possibly faster
455
        execution.
456
    image_size : tuple, optional
457
        Width and height of the input (image/featuremap). If given,
458
        will be passed to theano's convolution operator resulting in
459
        possibly faster execution.
460
    border_mode : 'valid', 'full' or None, optional
461
        The border mode to use, see :func:`scipy.signal.convolve2d` for
462
        details. Unlike with :class:`Convolutional`, this defaults to
463
        None, in which case no default value is pushed down to child
464
        bricks at allocation time. Child bricks will in this case
465
        need to rely on either a default border mode (usually valid)
466
        or one provided at construction and/or after construction
467
        (but before allocation).
468
    tied_biases : bool, optional
469
        Same meaning as in :class:`Convolutional`. Defaults to ``None``,
470
        in which case no value is pushed to child :class:`Convolutional`
471
        bricks.
472
473
    Notes
474
    -----
475
    The passed convolutional operators should be 'lazy' constructed, that
476
    is, without specifying the batch_size, num_channels and image_size. The
477
    main feature of :class:`ConvolutionalSequence` is that it will set the
478
    input dimensions of a layer to the output dimensions of the previous
479
    layer by the :meth:`~bricks.Brick.push_allocation_config` method.
480
481
    The push behaviour of `tied_biases` mirrors that of `use_bias` or any
482
    initialization configuration: only an explicitly specified value is
483
    pushed down the hierarchy. `border_mode` also has this behaviour.
484
    The reason the `border_mode` parameter behaves the way it does is that
485
    pushing a single default `border_mode` makes it very difficult to
486
    have child bricks with different border modes. Normally, such things
487
    would be overridden after `push_allocation_config()`, but this is
488
    a particular hassle as the border mode affects the allocation
489
    parameters of every subsequent child brick in the sequence. Thus, only
490
    an explicitly specified border mode will be pushed down the hierarchy.
491
492
    """
493 View Code Duplication
    @lazy(allocation=['num_channels'])
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
494
    def __init__(self, layers, num_channels, batch_size=None,
495
                 image_size=(None, None), border_mode=None, tied_biases=None,
496
                 **kwargs):
497
        self.layers = [a if isinstance(a, Brick) else a.brick for a in layers]
498
        self.image_size = image_size
499
        self.num_channels = num_channels
500
        self.batch_size = batch_size
501
        self.border_mode = border_mode
502
        self.tied_biases = tied_biases
503
504
        super(ConvolutionalSequence, self).__init__(
505
            application_methods=layers, **kwargs)
506
507
    def get_dim(self, name):
508
        if name == 'input_':
509
            return ((self.num_channels,) + self.image_size)
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after return.
Loading history...
510
        if name == 'output':
511
            last = len(self.layers) - 1
512
            while last >= 0:
513
                try:
514
                    return self.layers[last].get_dim(name)
515
                except ValueError:
516
                    last -= 1
517
            # The output shape of an empty ConvolutionalSequence or one
518
            # consisting only of Activations is the input shape.
519
            return self.get_dim('input_')
520
        return super(ConvolutionalSequence, self).get_dim(name)
521
522
    def _push_allocation_config(self):
523
        num_channels = self.num_channels
524
        image_size = self.image_size
525
        for layer in self.layers:
526
            if isinstance(layer, Activation):
527
                # Activations operate elementwise; nothing to set.
528
                layer.push_allocation_config()
529
                continue
530
            if self.border_mode is not None:
531
                layer.border_mode = self.border_mode
532
            if self.tied_biases is not None:
533
                layer.tied_biases = self.tied_biases
534
            layer.image_size = image_size
535
            layer.num_channels = num_channels
536
            layer.batch_size = self.batch_size
537
            if getattr(self, 'use_bias', None) is not None:
538
                layer.use_bias = self.use_bias
539
540
            # Push input dimensions to children
541
            layer.push_allocation_config()
542
543
            # Retrieve output dimensions
544
            # and set it for next layer
545
            if None not in layer.image_size:
546
                output_shape = layer.get_dim('output')
547
                image_size = output_shape[1:]
548
            num_channels = layer.num_output_channels
549
550
551
class Flattener(Brick):
552
    """Flattens the input.
553
554
    It may be used to pass multidimensional objects like images or feature
555
    maps of convolutional bricks into bricks which allow only two
556
    dimensional input (batch, features) like MLP.
557
558
    """
559
    @application(inputs=['input_'], outputs=['output'])
560
    def apply(self, input_):
561
        return input_.flatten(ndim=2)
562