Completed
Pull Request — master (#1041)
by David
01:30
created

original_image_size()   B

Complexity

Conditions 6

Size

Total Lines 11

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 6
dl 0
loc 11
rs 8
1
from theano.tensor.nnet import conv2d
2
from theano.tensor.nnet.abstract_conv import (AbstractConv2d_gradInputs,
3
                                              get_conv_output_shape)
4
from theano.tensor.signal.pool import pool_2d, Pool
5
6
from blocks.bricks import (Initializable, Feedforward, Sequence, Activation,
7
                           LinearLike)
8
from blocks.bricks.base import application, Brick, lazy
9
from blocks.roles import add_role, FILTER, BIAS
10
from blocks.utils import shared_floatx_nans
11
12
13
class Convolutional(LinearLike):
14
    """Performs a 2D convolution.
15
16
    Parameters
17
    ----------
18
    filter_size : tuple
19
        The height and width of the filter (also called *kernels*).
20
    num_filters : int
21
        Number of filters per channel.
22
    num_channels : int
23
        Number of input channels in the image. For the first layer this is
24
        normally 1 for grayscale images and 3 for color (RGB) images. For
25
        subsequent layers this is equal to the number of filters output by
26
        the previous convolutional layer. The filters are pooled over the
27
        channels.
28
    batch_size : int, optional
29
        Number of examples per batch. If given, this will be passed to
30
        Theano convolution operator, possibly resulting in faster
31
        execution.
32
    image_size : tuple, optional
33
        The height and width of the input (image or feature map). If given,
34
        this will be passed to the Theano convolution operator, resulting
35
        in possibly faster execution times.
36
    step : tuple, optional
37
        The step (or stride) with which to slide the filters over the
38
        image. Defaults to (1, 1).
39
    border_mode : {'valid', 'full'}, optional
40
        The border mode to use, see :func:`scipy.signal.convolve2d` for
41
        details. Defaults to 'valid'.
42
    tied_biases : bool
43
        If ``True``, it indicates that the biases of every filter in this
44
        layer should be shared amongst all applications of that filter.
45
        Setting this to ``False`` will untie the biases, yielding a
46
        separate bias for every location at which the filter is applied.
47
        Defaults to ``False``.
48
49
    """
50
    # Make it possible to override the implementation of conv2d that gets
51
    # used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order
52
    # to leverage features not yet available in Theano's standard conv2d.
53
    # The function you override with here should accept at least the
54
    # input and the kernels as positionals, and the keyword arguments
55
    # input_shape, subsample, border_mode, and filter_shape. If some of
56
    # these are unsupported they should still be accepted and ignored,
57
    # e.g. with a wrapper function that swallows **kwargs.
58
    conv2d_impl = staticmethod(conv2d)
59
60
    # Used to override the output shape computation for a given value of
61
    # conv2d_impl. Should accept 4 positional arguments: the shape of an
62
    # image minibatch (with 4 elements: batch size, number of channels,
63
    # height, and width), the shape of the filter bank (number of filters,
64
    # number of output channels, filter height, filter width), the border
65
    # mode, and the step (vertical and horizontal strides). It is expected
66
    # to return a 4-tuple of (batch size, number of channels, output
67
    # height, output width). The first element of this tuple is not used
68
    # for anything by this brick.
69
    get_output_shape = staticmethod(get_conv_output_shape)
70
71
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
72
    def __init__(self, filter_size, num_filters, num_channels, batch_size=None,
73
                 image_size=(None, None), step=(1, 1), border_mode='valid',
74
                 tied_biases=False, **kwargs):
75
        super(Convolutional, self).__init__(**kwargs)
76
77
        self.filter_size = filter_size
78
        self.num_filters = num_filters
79
        self.batch_size = batch_size
80
        self.num_channels = num_channels
81
        self.image_size = image_size
82
        self.step = step
83
        self.border_mode = border_mode
84
        self.tied_biases = tied_biases
85
86
    def _allocate(self):
87
        W = shared_floatx_nans((self.num_filters, self.num_channels) +
88
                               self.filter_size, name='W')
89
        add_role(W, FILTER)
90
        self.parameters.append(W)
91
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
92
        if getattr(self, 'use_bias', True):
93
            if self.tied_biases:
94
                b = shared_floatx_nans((self.num_filters,), name='b')
95
            else:
96
                # this error is raised here instead of during initializiation
97
                # because ConvolutionalSequence may specify the image size
98
                if self.image_size == (None, None) and not self.tied_biases:
99
                    raise ValueError('Cannot infer bias size without '
100
                                     'image_size specified. If you use '
101
                                     'variable image_size, you should use '
102
                                     'tied_biases=True.')
103
104
                b = shared_floatx_nans(self.get_dim('output'), name='b')
105
            add_role(b, BIAS)
106
107
            self.parameters.append(b)
108
            self.add_auxiliary_variable(b.norm(2), name='b_norm')
109
110
    @application(inputs=['input_'], outputs=['output'])
111
    def apply(self, input_):
112
        """Perform the convolution.
113
114
        Parameters
115
        ----------
116
        input_ : :class:`~tensor.TensorVariable`
117
            A 4D tensor with the axes representing batch size, number of
118
            channels, image height, and image width.
119
120
        Returns
121
        -------
122
        output : :class:`~tensor.TensorVariable`
123
            A 4D tensor of filtered images (feature maps) with dimensions
124
            representing batch size, number of filters, feature map height,
125
            and feature map width.
126
127
            The height and width of the feature map depend on the border
128
            mode. For 'valid' it is ``image_size - filter_size + 1`` while
129
            for 'full' it is ``image_size + filter_size - 1``.
130
131
        """
132
        if self.image_size == (None, None):
133
            input_shape = None
134
        else:
135
            input_shape = (self.batch_size, self.num_channels)
136
            input_shape += self.image_size
137
138
        output = self.conv2d_impl(
139
            input_, self.W,
140
            input_shape=input_shape,
141
            subsample=self.step,
142
            border_mode=self.border_mode,
143
            filter_shape=((self.num_filters, self.num_channels) +
144
                          self.filter_size))
145
        if getattr(self, 'use_bias', True):
146
            if self.tied_biases:
147
                output += self.b.dimshuffle('x', 0, 'x', 'x')
148
            else:
149
                output += self.b.dimshuffle('x', 0, 1, 2)
150
        return output
151
152
    def get_dim(self, name):
153
        if name == 'input_':
154
            return (self.num_channels,) + self.image_size
155
        if name == 'output':
156
            input_shape = (None, self.num_channels) + self.image_size
157
            kernel_shape = ((self.num_filters, self.num_channels) +
158
                            self.filter_size)
159
            out_shape = self.get_output_shape(input_shape, kernel_shape,
160
                                              self.border_mode, self.step)
161
            assert len(out_shape) == 4
162
            return out_shape[1:]
163
        return super(Convolutional, self).get_dim(name)
164
165
    @property
166
    def num_output_channels(self):
167
        return self.num_filters
168
169
170
class ConvolutionalTranspose(Convolutional):
171
    """Performs the transpose of a 2D convolution.
172
173
    Parameters
174
    ----------
175
    num_filters : int
176
        Number of filters at the *output* of the transposed convolution,
177
        i.e. the number of channels in the corresponding convolution.
178
    num_channels : int
179
        Number of channels at the *input* of the transposed convolution,
180
        i.e. the number of output filters in the corresponding
181
        convolution.
182
    step : tuple, optional
183
        The step (or stride) of the corresponding *convolution*.
184
        Defaults to (1, 1).
185
    image_size : tuple, optional
186
        Image size of the input to the *transposed* convolution, i.e.
187
        the output of the corresponding convolution. Required for tied
188
        biases. Defaults to ``None``.
189
    original_image_size : tuple, optional
190
        The height and width of the image that forms the output of
191
        the transpose operation, which is the input of the original
192
        (non-transposed) convolution. By default, this is inferred
193
        from `image_size` to be the size that has each pixel of the
194
        original image touched by at least one filter application
195
        in the original convolution. Degenerate cases with dropped
196
        border pixels (in the original convolution) are possible, and can
197
        be manually specified via this argument. See notes below.
198
199
    Notes
200
    -----
201
    By default, `original_image_size` is inferred from `image_size`
202
    as being the *minimum* size of image that could have produced this
203
    output. Let ``hanging[i] = original_image_size[i] - image_size[i]
204
    * step[i]``. Any value of ``hanging[i]`` greater than
205
    ``filter_size[i] - step[i]`` will result in border pixels that are
206
    ignored by the original convolution. With this brick, any
207
    ``original_image_size`` such that ``filter_size[i] - step[i] <
208
    hanging[i] < filter_size[i]`` for all ``i`` can be validly specified.
209
    However, no value will be output by the transposed convolution
210
    itself for these extra hanging border pixels, and they will be
211
    determined entirely by the bias.
212
213
    See Also
214
    --------
215
    :class:`Convolutional` : For the documentation of other parameters.
216
217
    """
218
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
219
    def __init__(self, filter_size, num_filters, num_channels,
220
                 original_image_size=None, **kwargs):
221
        super(ConvolutionalTranspose, self).__init__(
222
            filter_size, num_filters, num_channels, **kwargs)
223
        self._original_image_size = original_image_size
224
225
    @property
226
    def original_image_size(self):
227
        if self._original_image_size is None:
228
            if all(s is None for s in self.image_size):
229
                raise ValueError("can't infer original_image_size, "
230
                                 "no image_size set")
231
            last_edge = [d - s for d, s in zip(self.filter_size, self.step)]
232
            tups = zip(self.image_size, self.step, last_edge)
233
            return tuple(i * s + e for i, s, e in tups)
234
        else:
235
            return self._original_image_size
236
237
    @original_image_size.setter
238
    def original_image_size(self, value):
239
        self._original_image_size = value
240
241
    def conv2d_impl(self, input_, W, input_shape, subsample, border_mode,
0 ignored issues
show
Unused Code introduced by
The argument input_shape seems to be unused.
Loading history...
242
                    filter_shape):
243
        # The AbstractConv2d_gradInputs op takes a kernel that was used for the
244
        # **convolution**. We therefore have to invert num_channels and
245
        # num_filters for W.
246
        W = W.transpose(1, 0, 2, 3)
247
        imshp = (None,) + self.get_dim('output')
248
        kshp = (filter_shape[1], filter_shape[0]) + filter_shape[2:]
249
        return AbstractConv2d_gradInputs(
250
            imshp=imshp, kshp=kshp, border_mode=border_mode,
251
            subsample=subsample)(W, input_, self.get_dim('output')[1:])
252
253
    def get_dim(self, name):
254
        if name == 'output':
255
            return (self.num_filters,) + self.original_image_size
256
        return super(ConvolutionalTranspose, self).get_dim(name)
257
258
259
class Pooling(Initializable, Feedforward):
260
    """Base Brick for pooling operations.
261
262
    This should generally not be instantiated directly; see
263
    :class:`MaxPooling`.
264
265
    """
266
    @lazy(allocation=['mode', 'pooling_size'])
267
    def __init__(self, mode, pooling_size, step, input_dim, ignore_border,
268
                 padding, **kwargs):
269
        super(Pooling, self).__init__(**kwargs)
270
        self.pooling_size = pooling_size
271
        self.mode = mode
272
        self.step = step
273
        self.input_dim = input_dim if input_dim is not None else (None,) * 3
274
        self.ignore_border = ignore_border
275
        self.padding = padding
276
277
    @property
278
    def image_size(self):
279
        return self.input_dim[-2:]
280
281
    @image_size.setter
282
    def image_size(self, value):
283
        self.input_dim = self.input_dim[:-2] + value
284
285
    @property
286
    def num_channels(self):
287
        return self.input_dim[0]
288
289
    @num_channels.setter
290
    def num_channels(self, value):
291
        self.input_dim = (value,) + self.input_dim[1:]
292
293
    @application(inputs=['input_'], outputs=['output'])
294
    def apply(self, input_):
295
        """Apply the pooling (subsampling) transformation.
296
297
        Parameters
298
        ----------
299
        input_ : :class:`~tensor.TensorVariable`
300
            An tensor with dimension greater or equal to 2. The last two
301
            dimensions will be downsampled. For example, with images this
302
            means that the last two dimensions should represent the height
303
            and width of your image.
304
305
        Returns
306
        -------
307
        output : :class:`~tensor.TensorVariable`
308
            A tensor with the same number of dimensions as `input_`, but
309
            with the last two dimensions downsampled.
310
311
        """
312
        output = pool_2d(input_, self.pooling_size, st=self.step,
313
                         mode=self.mode, padding=self.padding,
314
                         ignore_border=self.ignore_border)
315
        return output
316
317
    def get_dim(self, name):
318
        if name == 'input_':
319
            return self.input_dim
320
        if name == 'output':
321
            return tuple(Pool.out_shape(
322
                self.input_dim, self.pooling_size, st=self.step,
323
                ignore_border=self.ignore_border, padding=self.padding))
324
325
    @property
326
    def num_output_channels(self):
327
        return self.input_dim[0]
328
329
330
class MaxPooling(Pooling):
331
    """Max pooling layer.
332
333
    Parameters
334
    ----------
335
    pooling_size : tuple
336
        The height and width of the pooling region i.e. this is the factor
337
        by which your input's last two dimensions will be downscaled.
338
    step : tuple, optional
339
        The vertical and horizontal shift (stride) between pooling regions.
340
        By default this is equal to `pooling_size`. Setting this to a lower
341
        number results in overlapping pooling regions.
342
    input_dim : tuple, optional
343
        A tuple of integers representing the shape of the input. The last
344
        two dimensions will be used to calculate the output dimension.
345
    padding : tuple, optional
346
        A tuple of integers representing the vertical and horizontal
347
        zero-padding to be applied to each of the top and bottom
348
        (vertical) and left and right (horizontal) edges. For example,
349
        an argument of (4, 3) will apply 4 pixels of padding to the
350
        top edge, 4 pixels of padding to the bottom edge, and 3 pixels
351
        each for the left and right edge. By default, no padding is
352
        performed.
353
    ignore_border : bool, optional
354
        Whether or not to do partial downsampling based on borders where
355
        the extent of the pooling region reaches beyond the edge of the
356
        image. If `True`, a (5, 5) image with (2, 2) pooling regions
357
        and (2, 2) step will be downsampled to shape (2, 2), otherwise
358
        it will be downsampled to (3, 3). `True` by default.
359
360
    Notes
361
    -----
362
    .. warning::
363
        As of this writing, setting `ignore_border` to `False` with a step
364
        not equal to the pooling size will force Theano to perform pooling
365
        computations on CPU rather than GPU, even if you have specified
366
        a GPU as your computation device. Additionally, Theano will only
367
        use [cuDNN]_ (if available) for pooling computations with
368
        `ignure_border` set to `True`. You can ensure that the entire
369
        input is captured by at least one pool by using the `padding`
370
        argument to add zero padding prior to pooling being performed.
371
372
    .. [cuDNN]: `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_.
373
374
    """
375
    @lazy(allocation=['pooling_size'])
376
    def __init__(self, pooling_size, step=None, input_dim=None,
377
                 ignore_border=True, padding=(0, 0),
378
                 **kwargs):
379
        super(MaxPooling, self).__init__('max', pooling_size,
380
                                         step=step, input_dim=input_dim,
381
                                         ignore_border=ignore_border,
382
                                         padding=padding, **kwargs)
383
384
    def __setstate__(self, state):
385
        self.__dict__.update(state)
386
        # Fix objects created before pull request #899.
387
        self.mode = getattr(self, 'mode', 'max')
388
        self.padding = getattr(self, 'padding', (0, 0))
389
        self.ignore_border = getattr(self, 'ignore_border', False)
390
391
392
class AveragePooling(Pooling):
393
    """Average pooling layer.
394
395
    Parameters
396
    ----------
397
    include_padding : bool, optional
398
        When calculating an average, include zeros that are the
399
        result of zero padding added by the `padding` argument.
400
        A value of `True` is only accepted if `ignore_border`
401
        is also `True`. `False` by default.
402
403
    Notes
404
    -----
405
    For documentation on the remainder of the arguments to this
406
    class, see :class:`MaxPooling`.
407
408
    """
409
    @lazy(allocation=['pooling_size'])
410
    def __init__(self, pooling_size, step=None, input_dim=None,
411
                 ignore_border=True, padding=(0, 0),
412
                 include_padding=False, **kwargs):
413
        mode = 'average_inc_pad' if include_padding else 'average_exc_pad'
414
        super(AveragePooling, self).__init__(mode, pooling_size,
415
                                             step=step, input_dim=input_dim,
416
                                             ignore_border=ignore_border,
417
                                             padding=padding, **kwargs)
418
419
420
class ConvolutionalSequence(Sequence, Initializable, Feedforward):
421
    """A sequence of convolutional (or pooling) operations.
422
423
    Parameters
424
    ----------
425
    layers : list
426
        List of convolutional bricks (i.e. :class:`Convolutional`,
427
        :class:`ConvolutionalActivation`, or :class:`Pooling` bricks).
428
        :class:`Activation` bricks that operate elementwise can also
429
        be included.
430
    num_channels : int
431
        Number of input channels in the image. For the first layer this is
432
        normally 1 for grayscale images and 3 for color (RGB) images. For
433
        subsequent layers this is equal to the number of filters output by
434
        the previous convolutional layer.
435
    batch_size : int, optional
436
        Number of images in batch. If given, will be passed to
437
        theano's convolution operator resulting in possibly faster
438
        execution.
439
    image_size : tuple, optional
440
        Width and height of the input (image/featuremap). If given,
441
        will be passed to theano's convolution operator resulting in
442
        possibly faster execution.
443
    border_mode : 'valid', 'full' or None, optional
444
        The border mode to use, see :func:`scipy.signal.convolve2d` for
445
        details. Unlike with :class:`Convolutional`, this defaults to
446
        None, in which case no default value is pushed down to child
447
        bricks at allocation time. Child bricks will in this case
448
        need to rely on either a default border mode (usually valid)
449
        or one provided at construction and/or after construction
450
        (but before allocation).
451
    tied_biases : bool, optional
452
        Same meaning as in :class:`Convolutional`. Defaults to ``None``,
453
        in which case no value is pushed to child :class:`Convolutional`
454
        bricks.
455
456
    Notes
457
    -----
458
    The passed convolutional operators should be 'lazy' constructed, that
459
    is, without specifying the batch_size, num_channels and image_size. The
460
    main feature of :class:`ConvolutionalSequence` is that it will set the
461
    input dimensions of a layer to the output dimensions of the previous
462
    layer by the :meth:`~.Brick.push_allocation_config` method.
463
464
    The push behaviour of `tied_biases` mirrors that of `use_bias` or any
465
    initialization configuration: only an explicitly specified value is
466
    pushed down the hierarchy. `border_mode` also has this behaviour.
467
    The reason the `border_mode` parameter behaves the way it does is that
468
    pushing a single default `border_mode` makes it very difficult to
469
    have child bricks with different border modes. Normally, such things
470
    would be overridden after `push_allocation_config()`, but this is
471
    a particular hassle as the border mode affects the allocation
472
    parameters of every subsequent child brick in the sequence. Thus, only
473
    an explicitly specified border mode will be pushed down the hierarchy.
474
475
    """
476
    @lazy(allocation=['num_channels'])
477
    def __init__(self, layers, num_channels, batch_size=None, image_size=None,
478
                 border_mode=None, tied_biases=None, **kwargs):
479
        self.layers = layers
480
        self.image_size = image_size
481
        self.num_channels = num_channels
482
        self.batch_size = batch_size
483
        self.border_mode = border_mode
484
        self.tied_biases = tied_biases
485
486
        application_methods = [brick.apply for brick in layers]
487
        super(ConvolutionalSequence, self).__init__(
488
            application_methods=application_methods, **kwargs)
489
490
    def get_dim(self, name):
491
        if name == 'input_':
492
            return ((self.num_channels,) + self.image_size)
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after return.
Loading history...
493
        if name == 'output':
494
            last = len(self.layers) - 1
495
            while last >= 0:
496
                try:
497
                    return self.layers[last].get_dim(name)
498
                except ValueError:
499
                    last -= 1
500
            # The output shape of an empty ConvolutionalSequence or one
501
            # consisting only of Activations is the input shape.
502
            return self.get_dim('input_')
503
        return super(ConvolutionalSequence, self).get_dim(name)
504
505
    def _push_allocation_config(self):
506
        num_channels = self.num_channels
507
        image_size = self.image_size
508
        for layer in self.layers:
509
            if isinstance(layer, Activation):
510
                # Activations operate elementwise; nothing to set.
511
                layer.push_allocation_config()
512
                continue
513
            if self.border_mode is not None:
514
                layer.border_mode = self.border_mode
515
            if self.tied_biases is not None:
516
                layer.tied_biases = self.tied_biases
517
            layer.image_size = image_size
518
            layer.num_channels = num_channels
519
            layer.batch_size = self.batch_size
520
            if getattr(self, 'use_bias', None) is not None:
521
                layer.use_bias = self.use_bias
522
523
            # Push input dimensions to children
524
            layer.push_allocation_config()
525
526
            # Retrieve output dimensions
527
            # and set it for next layer
528
            if layer.image_size is not None:
529
                output_shape = layer.get_dim('output')
530
                image_size = output_shape[1:]
531
            num_channels = layer.num_output_channels
532
533
534
class Flattener(Brick):
535
    """Flattens the input.
536
537
    It may be used to pass multidimensional objects like images or feature
538
    maps of convolutional bricks into bricks which allow only two
539
    dimensional input (batch, features) like MLP.
540
541
    """
542
    @application(inputs=['input_'], outputs=['output'])
543
    def apply(self, input_):
544
        return input_.flatten(ndim=2)
545