Completed
Pull Request — master (#1035)
by David
01:30
created

_push_allocation_config()   C

Complexity

Conditions 7

Size

Total Lines 27

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 7
dl 0
loc 27
rs 5.5
1
from theano.tensor.nnet import conv2d
2
from theano.tensor.nnet.abstract_conv import (AbstractConv2d_gradInputs,
3
                                              get_conv_output_shape)
4
from theano.tensor.signal.pool import pool_2d, Pool
5
6
from blocks.bricks import (Initializable, Feedforward, Sequence, Activation,
7
                           LinearLike)
8
from blocks.bricks.base import application, Brick, lazy
9
from blocks.roles import add_role, FILTER, BIAS
10
from blocks.utils import shared_floatx_nans
11
12
13
class Convolutional(LinearLike):
14
    """Performs a 2D convolution.
15
16
    Parameters
17
    ----------
18
    filter_size : tuple
19
        The height and width of the filter (also called *kernels*).
20
    num_filters : int
21
        Number of filters per channel.
22
    num_channels : int
23
        Number of input channels in the image. For the first layer this is
24
        normally 1 for grayscale images and 3 for color (RGB) images. For
25
        subsequent layers this is equal to the number of filters output by
26
        the previous convolutional layer. The filters are pooled over the
27
        channels.
28
    batch_size : int, optional
29
        Number of examples per batch. If given, this will be passed to
30
        Theano convolution operator, possibly resulting in faster
31
        execution.
32
    image_size : tuple, optional
33
        The height and width of the input (image or feature map). If given,
34
        this will be passed to the Theano convolution operator, resulting
35
        in possibly faster execution times.
36
    step : tuple, optional
37
        The step (or stride) with which to slide the filters over the
38
        image. Defaults to (1, 1).
39
    border_mode : {'valid', 'full'}, optional
40
        The border mode to use, see :func:`scipy.signal.convolve2d` for
41
        details. Defaults to 'valid'.
42
    tied_biases : bool
43
        If ``True``, it indicates that the biases of every filter in this
44
        layer should be shared amongst all applications of that filter.
45
        Setting this to ``False`` will untie the biases, yielding a
46
        separate bias for every location at which the filter is applied.
47
        Defaults to ``False``.
48
49
    """
50
    # Make it possible to override the implementation of conv2d that gets
51
    # used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order
52
    # to leverage features not yet available in Theano's standard conv2d.
53
    # The function you override with here should accept at least the
54
    # input and the kernels as positionals, and the keyword arguments
55
    # input_shape, subsample, border_mode, and filter_shape. If some of
56
    # these are unsupported they should still be accepted and ignored,
57
    # e.g. with a wrapper function that swallows **kwargs.
58
    conv2d_impl = staticmethod(conv2d)
59
60
    # Used to override the output shape computation for a given value of
61
    # conv2d_impl. Should accept 4 positional arguments: the shape of an
62
    # image minibatch (with 4 elements: batch size, number of channels,
63
    # height, and width), the shape of the filter bank (number of filters,
64
    # number of output channels, filter height, filter width), the border
65
    # mode, and the step (vertical and horizontal strides). It is expected
66
    # to return a 4-tuple of (batch size, number of channels, output
67
    # height, output width). The first element of this tuple is not used
68
    # for anything by this brick.
69
    get_output_shape = staticmethod(get_conv_output_shape)
70
71
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
72
    def __init__(self, filter_size, num_filters, num_channels, batch_size=None,
73
                 image_size=(None, None), step=(1, 1), border_mode='valid',
74
                 tied_biases=False, **kwargs):
75
        super(Convolutional, self).__init__(**kwargs)
76
77
        self.filter_size = filter_size
78
        self.num_filters = num_filters
79
        self.batch_size = batch_size
80
        self.num_channels = num_channels
81
        self.image_size = image_size
82
        self.step = step
83
        self.border_mode = border_mode
84
        self.tied_biases = tied_biases
85
86
    def _allocate(self):
87
        W = shared_floatx_nans((self.num_filters, self.num_channels) +
88
                               self.filter_size, name='W')
89
        add_role(W, FILTER)
90
        self.parameters.append(W)
91
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
92
        if getattr(self, 'use_bias', True):
93
            if self.tied_biases:
94
                b = shared_floatx_nans((self.num_filters,), name='b')
95
            else:
96
                # this error is raised here instead of during initializiation
97
                # because ConvolutionalSequence may specify the image size
98
                if self.image_size == (None, None) and not self.tied_biases:
99
                    raise ValueError('Cannot infer bias size without '
100
                                     'image_size specified. If you use '
101
                                     'variable image_size, you should use '
102
                                     'tied_biases=True.')
103
104
                b = shared_floatx_nans(self.get_dim('output'), name='b')
105
            add_role(b, BIAS)
106
107
            self.parameters.append(b)
108
            self.add_auxiliary_variable(b.norm(2), name='b_norm')
109
110
    @application(inputs=['input_'], outputs=['output'])
111
    def apply(self, input_):
112
        """Perform the convolution.
113
114
        Parameters
115
        ----------
116
        input_ : :class:`~tensor.TensorVariable`
117
            A 4D tensor with the axes representing batch size, number of
118
            channels, image height, and image width.
119
120
        Returns
121
        -------
122
        output : :class:`~tensor.TensorVariable`
123
            A 4D tensor of filtered images (feature maps) with dimensions
124
            representing batch size, number of filters, feature map height,
125
            and feature map width.
126
127
            The height and width of the feature map depend on the border
128
            mode. For 'valid' it is ``image_size - filter_size + 1`` while
129
            for 'full' it is ``image_size + filter_size - 1``.
130
131
        """
132
        if self.image_size == (None, None):
133
            input_shape = None
134
        else:
135
            input_shape = (self.batch_size, self.num_channels)
136
            input_shape += self.image_size
137
138
        output = self.conv2d_impl(
139
            input_, self.W,
140
            input_shape=input_shape,
141
            subsample=self.step,
142
            border_mode=self.border_mode,
143
            filter_shape=((self.num_filters, self.num_channels) +
144
                          self.filter_size))
145
        if getattr(self, 'use_bias', True):
146
            if self.tied_biases:
147
                output += self.b.dimshuffle('x', 0, 'x', 'x')
148
            else:
149
                output += self.b.dimshuffle('x', 0, 1, 2)
150
        return output
151
152
    def get_dim(self, name):
153
        if name == 'input_':
154
            return (self.num_channels,) + self.image_size
155
        if name == 'output':
156
            input_shape = (None, self.num_channels) + self.image_size
157
            kernel_shape = ((self.num_filters, self.num_channels) +
158
                            self.filter_size)
159
            out_shape = self.get_output_shape(input_shape, kernel_shape,
160
                                              self.border_mode, self.step)
161
            assert len(out_shape) == 4
162
            return out_shape[1:]
163
        return super(Convolutional, self).get_dim(name)
164
165
    @property
166
    def num_output_channels(self):
167
        return self.num_filters
168
169
170
class ConvolutionalTranspose(Convolutional):
171
    """Performs the transpose of a 2D convolution.
172
173
    Parameters
174
    ----------
175
    original_image_size : tuple
176
        The height and width of the image that forms the output of
177
        the transpose operation, which is the input of the original
178
        (non-transposed) convolution.
179
    num_filters : int
180
        Number of filters at the *output* of the transposed convolution,
181
        i.e. the number of channels in the corresponding convolution.
182
    num_channels : int
183
        Number of channels at the *input* of the transposed convolution,
184
        i.e. the number of output filters in the corresponding
185
        convolution.
186
    step : tuple, optional
187
        The step (or stride) of the corresponding *convolution*.
188
        Defaults to (1, 1).
189
    image_size : tuple, optional
190
        Image size of the input to the *transposed* convolution, i.e.
191
        the output of the corresponding convolution. Required for tied
192
        biases. Defaults to ``None``.
193
194
    See Also
195
    --------
196
    :class:`Convolutional` : For the documentation of other parameters.
197
198
    """
199
    @lazy(allocation=['original_image_size', 'filter_size', 'num_filters',
200
                      'num_channels'])
201
    def __init__(self, original_image_size, filter_size, num_filters,
202
                 num_channels, **kwargs):
203
        super(ConvolutionalTranspose, self).__init__(
204
            filter_size, num_filters, num_channels, **kwargs)
205
        self.original_image_size = original_image_size
206
207
    def conv2d_impl(self, input_, W, input_shape, subsample, border_mode,
0 ignored issues
show
Unused Code introduced by
The argument input_shape seems to be unused.
Loading history...
208
                    filter_shape):
209
        # The AbstractConv2d_gradInputs op takes a kernel that was used for the
210
        # **convolution**. We therefore have to invert num_channels and
211
        # num_filters for W.
212
        W = W.transpose(1, 0, 2, 3)
213
        imshp = (None,) + self.get_dim('output')
214
        kshp = (filter_shape[1], filter_shape[0]) + filter_shape[2:]
215
        return AbstractConv2d_gradInputs(
216
            imshp=imshp, kshp=kshp, border_mode=border_mode,
217
            subsample=subsample)(W, input_, self.get_dim('output')[1:])
218
219
    def get_dim(self, name):
220
        if name == 'output':
221
            return (self.num_filters,) + self.original_image_size
222
        return super(ConvolutionalTranspose, self).get_dim(name)
223
224
225
class Pooling(Initializable, Feedforward):
226
    """Base Brick for pooling operations.
227
228
    This should generally not be instantiated directly; see
229
    :class:`MaxPooling`.
230
231
    """
232
    @lazy(allocation=['mode', 'pooling_size'])
233
    def __init__(self, mode, pooling_size, step, input_dim, ignore_border,
234
                 padding, **kwargs):
235
        super(Pooling, self).__init__(**kwargs)
236
        self.pooling_size = pooling_size
237
        self.mode = mode
238
        self.step = step
239
        self.input_dim = input_dim if input_dim is not None else (None,) * 3
240
        self.ignore_border = ignore_border
241
        self.padding = padding
242
243
    @property
244
    def image_size(self):
245
        return self.input_dim[-2:]
246
247
    @image_size.setter
248
    def image_size(self, value):
249
        self.input_dim = self.input_dim[:-2] + value
250
251
    @property
252
    def num_channels(self):
253
        return self.input_dim[0]
254
255
    @num_channels.setter
256
    def num_channels(self, value):
257
        self.input_dim = (value,) + self.input_dim[1:]
258
259
    @application(inputs=['input_'], outputs=['output'])
260
    def apply(self, input_):
261
        """Apply the pooling (subsampling) transformation.
262
263
        Parameters
264
        ----------
265
        input_ : :class:`~tensor.TensorVariable`
266
            An tensor with dimension greater or equal to 2. The last two
267
            dimensions will be downsampled. For example, with images this
268
            means that the last two dimensions should represent the height
269
            and width of your image.
270
271
        Returns
272
        -------
273
        output : :class:`~tensor.TensorVariable`
274
            A tensor with the same number of dimensions as `input_`, but
275
            with the last two dimensions downsampled.
276
277
        """
278
        output = pool_2d(input_, self.pooling_size, st=self.step,
279
                         mode=self.mode, padding=self.padding,
280
                         ignore_border=self.ignore_border)
281
        return output
282
283
    def get_dim(self, name):
284
        if name == 'input_':
285
            return self.input_dim
286
        if name == 'output':
287
            return tuple(Pool.out_shape(
288
                self.input_dim, self.pooling_size, st=self.step,
289
                ignore_border=self.ignore_border, padding=self.padding))
290
291
    @property
292
    def num_output_channels(self):
293
        return self.input_dim[0]
294
295
296
class MaxPooling(Pooling):
297
    """Max pooling layer.
298
299
    Parameters
300
    ----------
301
    pooling_size : tuple
302
        The height and width of the pooling region i.e. this is the factor
303
        by which your input's last two dimensions will be downscaled.
304
    step : tuple, optional
305
        The vertical and horizontal shift (stride) between pooling regions.
306
        By default this is equal to `pooling_size`. Setting this to a lower
307
        number results in overlapping pooling regions.
308
    input_dim : tuple, optional
309
        A tuple of integers representing the shape of the input. The last
310
        two dimensions will be used to calculate the output dimension.
311
    padding : tuple, optional
312
        A tuple of integers representing the vertical and horizontal
313
        zero-padding to be applied to each of the top and bottom
314
        (vertical) and left and right (horizontal) edges. For example,
315
        an argument of (4, 3) will apply 4 pixels of padding to the
316
        top edge, 4 pixels of padding to the bottom edge, and 3 pixels
317
        each for the left and right edge. By default, no padding is
318
        performed.
319
    ignore_border : bool, optional
320
        Whether or not to do partial downsampling based on borders where
321
        the extent of the pooling region reaches beyond the edge of the
322
        image. If `True`, a (5, 5) image with (2, 2) pooling regions
323
        and (2, 2) step will be downsampled to shape (2, 2), otherwise
324
        it will be downsampled to (3, 3). `True` by default.
325
326
    Notes
327
    -----
328
    .. warning::
329
        As of this writing, setting `ignore_border` to `False` with a step
330
        not equal to the pooling size will force Theano to perform pooling
331
        computations on CPU rather than GPU, even if you have specified
332
        a GPU as your computation device. Additionally, Theano will only
333
        use [cuDNN]_ (if available) for pooling computations with
334
        `ignure_border` set to `True`. You can ensure that the entire
335
        input is captured by at least one pool by using the `padding`
336
        argument to add zero padding prior to pooling being performed.
337
338
    .. [cuDNN]: `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_.
339
340
    """
341
    @lazy(allocation=['pooling_size'])
342
    def __init__(self, pooling_size, step=None, input_dim=None,
343
                 ignore_border=True, padding=(0, 0),
344
                 **kwargs):
345
        super(MaxPooling, self).__init__('max', pooling_size,
346
                                         step=step, input_dim=input_dim,
347
                                         ignore_border=ignore_border,
348
                                         padding=padding, **kwargs)
349
350
    def __setstate__(self, state):
351
        self.__dict__.update(state)
352
        # Fix objects created before pull request #899.
353
        self.mode = getattr(self, 'mode', 'max')
354
        self.padding = getattr(self, 'padding', (0, 0))
355
        self.ignore_border = getattr(self, 'ignore_border', False)
356
357
358
class AveragePooling(Pooling):
359
    """Average pooling layer.
360
361
    Parameters
362
    ----------
363
    include_padding : bool, optional
364
        When calculating an average, include zeros that are the
365
        result of zero padding added by the `padding` argument.
366
        A value of `True` is only accepted if `ignore_border`
367
        is also `True`. `False` by default.
368
369
    Notes
370
    -----
371
    For documentation on the remainder of the arguments to this
372
    class, see :class:`MaxPooling`.
373
374
    """
375
    @lazy(allocation=['pooling_size'])
376
    def __init__(self, pooling_size, step=None, input_dim=None,
377
                 ignore_border=True, padding=(0, 0),
378
                 include_padding=False, **kwargs):
379
        mode = 'average_inc_pad' if include_padding else 'average_exc_pad'
380
        super(AveragePooling, self).__init__(mode, pooling_size,
381
                                             step=step, input_dim=input_dim,
382
                                             ignore_border=ignore_border,
383
                                             padding=padding, **kwargs)
384
385
386
class ConvolutionalSequence(Sequence, Initializable, Feedforward):
387
    """A sequence of convolutional (or pooling) operations.
388
389
    Parameters
390
    ----------
391
    layers : list
392
        List of convolutional bricks (i.e. :class:`Convolutional`,
393
        :class:`ConvolutionalActivation`, or :class:`Pooling` bricks).
394
        :class:`Activation` bricks that operate elementwise can also
395
        be included.
396
    num_channels : int
397
        Number of input channels in the image. For the first layer this is
398
        normally 1 for grayscale images and 3 for color (RGB) images. For
399
        subsequent layers this is equal to the number of filters output by
400
        the previous convolutional layer.
401
    batch_size : int, optional
402
        Number of images in batch. If given, will be passed to
403
        theano's convolution operator resulting in possibly faster
404
        execution.
405
    image_size : tuple, optional
406
        Width and height of the input (image/featuremap). If given,
407
        will be passed to theano's convolution operator resulting in
408
        possibly faster execution.
409
    border_mode : 'valid', 'full' or None, optional
410
        The border mode to use, see :func:`scipy.signal.convolve2d` for
411
        details. Unlike with :class:`Convolutional`, this defaults to
412
        None, in which case no default value is pushed down to child
413
        bricks at allocation time. Child bricks will in this case
414
        need to rely on either a default border mode (usually valid)
415
        or one provided at construction and/or after construction
416
        (but before allocation).
417
    tied_biases : bool, optional
418
        Same meaning as in :class:`Convolutional`. Defaults to ``None``,
419
        in which case no value is pushed to child :class:`Convolutional`
420
        bricks.
421
422
    Notes
423
    -----
424
    The passed convolutional operators should be 'lazy' constructed, that
425
    is, without specifying the batch_size, num_channels and image_size. The
426
    main feature of :class:`ConvolutionalSequence` is that it will set the
427
    input dimensions of a layer to the output dimensions of the previous
428
    layer by the :meth:`~.Brick.push_allocation_config` method.
429
430
    The push behaviour of `tied_biases` mirrors that of `use_bias` or any
431
    initialization configuration: only an explicitly specified value is
432
    pushed down the hierarchy. `border_mode` also has this behaviour.
433
    The reason the `border_mode` parameter behaves the way it does is that
434
    pushing a single default `border_mode` makes it very difficult to
435
    have child bricks with different border modes. Normally, such things
436
    would be overridden after `push_allocation_config()`, but this is
437
    a particular hassle as the border mode affects the allocation
438
    parameters of every subsequent child brick in the sequence. Thus, only
439
    an explicitly specified border mode will be pushed down the hierarchy.
440
441
    """
442
    @lazy(allocation=['num_channels'])
443
    def __init__(self, layers, num_channels, batch_size=None, image_size=None,
444
                 border_mode=None, tied_biases=None, **kwargs):
445
        self.layers = layers
446
        self.image_size = image_size
447
        self.num_channels = num_channels
448
        self.batch_size = batch_size
449
        self.border_mode = border_mode
450
        self.tied_biases = tied_biases
451
452
        application_methods = [brick.apply for brick in layers]
453
        super(ConvolutionalSequence, self).__init__(
454
            application_methods=application_methods, **kwargs)
455
456
    def get_dim(self, name):
457
        if name == 'input_':
458
            return ((self.num_channels,) + self.image_size)
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after return.
Loading history...
459
        if name == 'output':
460
            last = len(self.layers) - 1
461
            while last >= 0:
462
                try:
463
                    return self.layers[last].get_dim(name)
464
                except ValueError:
465
                    last -= 1
466
            # The output shape of an empty ConvolutionalSequence or one
467
            # consisting only of Activations is the input shape.
468
            return self.get_dim('input_')
469
        return super(ConvolutionalSequence, self).get_dim(name)
470
471
    def _push_allocation_config(self):
472
        num_channels = self.num_channels
473
        image_size = self.image_size
474
        for layer in self.layers:
475
            if isinstance(layer, Activation):
476
                # Activations operate elementwise; nothing to set.
477
                layer.push_allocation_config()
478
                continue
479
            if self.border_mode is not None:
480
                layer.border_mode = self.border_mode
481
            if self.tied_biases is not None:
482
                layer.tied_biases = self.tied_biases
483
            layer.image_size = image_size
484
            layer.num_channels = num_channels
485
            layer.batch_size = self.batch_size
486
            if getattr(self, 'use_bias', None) is not None:
487
                layer.use_bias = self.use_bias
488
489
            # Push input dimensions to children
490
            layer.push_allocation_config()
491
492
            # Retrieve output dimensions
493
            # and set it for next layer
494
            if layer.image_size is not None:
495
                output_shape = layer.get_dim('output')
496
                image_size = output_shape[1:]
497
            num_channels = layer.num_output_channels
498
499
500
class Flattener(Brick):
501
    """Flattens the input.
502
503
    It may be used to pass multidimensional objects like images or feature
504
    maps of convolutional bricks into bricks which allow only two
505
    dimensional input (batch, features) like MLP.
506
507
    """
508
    @application(inputs=['input_'], outputs=['output'])
509
    def apply(self, input_):
510
        return input_.flatten(ndim=2)
511