Completed
Pull Request — master (#921)
by David
87:20 queued 07:55
created

blocks.bricks.ConvolutionalLayer.get_dim()   A

Complexity

Conditions 3

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 3
dl 0
loc 6
rs 9.4286
1
from theano.tensor.nnet.conv import conv2d, get_conv_output_shape
2
from theano.tensor.signal.downsample import max_pool_2d, DownsampleFactorMax
3
4
from blocks.bricks import Initializable, Feedforward, Sequence
5
from blocks.bricks.base import application, Brick, lazy
6
from blocks.roles import add_role, FILTER, BIAS
7
from blocks.utils import shared_floatx_nans
8
9
10
class Convolutional(Initializable):
11
    """Performs a 2D convolution.
12
13
    Parameters
14
    ----------
15
    filter_size : tuple
16
        The height and width of the filter (also called *kernels*).
17
    num_filters : int
18
        Number of filters per channel.
19
    num_channels : int
20
        Number of input channels in the image. For the first layer this is
21
        normally 1 for grayscale images and 3 for color (RGB) images. For
22
        subsequent layers this is equal to the number of filters output by
23
        the previous convolutional layer. The filters are pooled over the
24
        channels.
25
    batch_size : int, optional
26
        Number of examples per batch. If given, this will be passed to
27
        Theano convolution operator, possibly resulting in faster
28
        execution.
29
    image_size : tuple, optional
30
        The height and width of the input (image or feature map). If given,
31
        this will be passed to the Theano convolution operator, resulting
32
        in possibly faster execution times.
33
    step : tuple, optional
34
        The step (or stride) with which to slide the filters over the
35
        image. Defaults to (1, 1).
36
    border_mode : {'valid', 'full'}, optional
37
        The border mode to use, see :func:`scipy.signal.convolve2d` for
38
        details. Defaults to 'valid'.
39
    tied_biases : bool
40
        If ``True``, it indicates that the biases of every filter in this
41
        layer should be shared amongst all applications of that filter.
42
        Setting this to ``False`` will untie the biases, yielding a
43
        separate bias for every location at which the filter is applied.
44
        Defaults to ``False``.
45
46
    """
47
    # Make it possible to override the implementation of conv2d that gets
48
    # used, i.e. to use theano.sandbox.cuda.dnn.dnn_conv directly in order
49
    # to leverage features not yet available in Theano's standard conv2d.
50
    # The function you override with here should accept at least the
51
    # input and the kernels as positionals, and the keyword arguments
52
    # image_shape, subsample, border_mode, and filter_shape. If some of
53
    # these are unsupported they should still be accepted and ignored,
54
    # e.g. with a wrapper function that swallows **kwargs.
55
    conv2d_impl = staticmethod(conv2d)
56
57
    # Used to override the output shape computation for a given value of
58
    # conv2d_impl. Should accept 4 positional arguments: the shape of an
59
    # image minibatch (with 4 elements: batch size, number of channels,
60
    # height, and width), the shape of the filter bank (number of filters,
61
    # number of output channels, filter height, filter width), the border
62
    # mode, and the step (vertical and horizontal strides).
63
    get_output_shape = staticmethod(get_conv_output_shape)
64
65
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
66
    def __init__(self, filter_size, num_filters, num_channels, batch_size=None,
67
                 image_size=(None, None), step=(1, 1), border_mode='valid',
68
                 tied_biases=False, **kwargs):
69
        super(Convolutional, self).__init__(**kwargs)
70
71
        self.filter_size = filter_size
72
        self.num_filters = num_filters
73
        self.batch_size = batch_size
74
        self.num_channels = num_channels
75
        self.image_size = image_size
76
        self.step = step
77
        self.border_mode = border_mode
78
        self.tied_biases = tied_biases
79
80
    def _allocate(self):
81
        W = shared_floatx_nans((self.num_filters, self.num_channels) +
82
                               self.filter_size, name='W')
83
        add_role(W, FILTER)
84
        self.parameters.append(W)
85
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
86
        if self.use_bias:
87
            if self.tied_biases:
88
                b = shared_floatx_nans((self.num_filters,), name='b')
89
            else:
90
                # this error is raised here instead of during initializiation
91
                # because ConvolutionalSequence may specify the image size
92
                if self.image_size == (None, None) and not self.tied_biases:
93
                    raise ValueError('Cannot infer bias size without '
94
                                     'image_size specified. If you use '
95
                                     'variable image_size, you should use '
96
                                     'tied_biases=True.')
97
98
                b = shared_floatx_nans(self.get_dim('output'), name='b')
99
            add_role(b, BIAS)
100
101
            self.parameters.append(b)
102
            self.add_auxiliary_variable(b.norm(2), name='b_norm')
103
104
    def _initialize(self):
105
        if self.use_bias:
106
            W, b = self.parameters
0 ignored issues
show
Bug introduced by
The tuple unpacking with sequence defined at line 610 of blocks.bricks.base seems to be unbalanced; 2 value(s) for 0 label(s)

This happens when the amount of values does not equal the amount of labels:

a, b = ("a", "b", "c")  # only 2 labels for 3 values
Loading history...
107
            self.biases_init.initialize(b, self.rng)
108
        else:
109
            W, = self.parameters
0 ignored issues
show
Bug introduced by
The tuple unpacking with sequence defined at line 610 of blocks.bricks.base seems to be unbalanced; 1 value(s) for 0 label(s)

This happens when the amount of values does not equal the amount of labels:

a, b = ("a", "b", "c")  # only 2 labels for 3 values
Loading history...
110
        self.weights_init.initialize(W, self.rng)
111
112
    @application(inputs=['input_'], outputs=['output'])
113
    def apply(self, input_):
114
        """Perform the convolution.
115
116
        Parameters
117
        ----------
118
        input_ : :class:`~tensor.TensorVariable`
119
            A 4D tensor with the axes representing batch size, number of
120
            channels, image height, and image width.
121
122
        Returns
123
        -------
124
        output : :class:`~tensor.TensorVariable`
125
            A 4D tensor of filtered images (feature maps) with dimensions
126
            representing batch size, number of filters, feature map height,
127
            and feature map width.
128
129
            The height and width of the feature map depend on the border
130
            mode. For 'valid' it is ``image_size - filter_size + 1`` while
131
            for 'full' it is ``image_size + filter_size - 1``.
132
133
        """
134
        if self.use_bias:
135
            W, b = self.parameters
0 ignored issues
show
Bug introduced by
The tuple unpacking with sequence defined at line 610 of blocks.bricks.base seems to be unbalanced; 2 value(s) for 0 label(s)

This happens when the amount of values does not equal the amount of labels:

a, b = ("a", "b", "c")  # only 2 labels for 3 values
Loading history...
136
        else:
137
            W, = self.parameters
0 ignored issues
show
Bug introduced by
The tuple unpacking with sequence defined at line 610 of blocks.bricks.base seems to be unbalanced; 1 value(s) for 0 label(s)

This happens when the amount of values does not equal the amount of labels:

a, b = ("a", "b", "c")  # only 2 labels for 3 values
Loading history...
138
139
        if self.image_size == (None, None):
140
            image_shape = None
141
        else:
142
            image_shape = (self.batch_size, self.num_channels)
143
            image_shape += self.image_size
144
145
        output = self.conv2d_impl(
146
            input_, W,
147
            image_shape=image_shape,
148
            subsample=self.step,
149
            border_mode=self.border_mode,
150
            filter_shape=((self.num_filters, self.num_channels) +
151
                          self.filter_size))
152
        if self.use_bias:
153
            if self.tied_biases:
154
                output += b.dimshuffle('x', 0, 'x', 'x')
155
            else:
156
                output += b.dimshuffle('x', 0, 1, 2)
157
        return output
158
159
    def get_dim(self, name):
160
        if name == 'input_':
161
            return (self.num_channels,) + self.image_size
162
        if name == 'output':
163
            image_shape = (None, self.num_channels) + self.image_size
164
            kernel_shape = ((self.num_filters, self.num_channels) +
165
                            self.filter_size)
166
            return self.get_output_shape(image_shape, kernel_shape,
167
                                         self.border_mode, self.step)[-3:]
168
        return super(Convolutional, self).get_dim(name)
169
170
    @property
171
    def num_output_channels(self):
172
        return self.num_filters
173
174
175
class Pooling(Initializable, Feedforward):
176
    """Base Brick for pooling operations.
177
178
    This should generally not be instantiated directly; see
179
    :class:`MaxPooling`.
180
181
    """
182
    @lazy(allocation=['mode', 'pooling_size'])
183
    def __init__(self, mode, pooling_size, step, input_dim, ignore_border,
184
                 padding, **kwargs):
185
        super(Pooling, self).__init__(**kwargs)
186
        self.pooling_size = pooling_size
187
        self.mode = mode
188
        self.step = step
189
        self.input_dim = input_dim if input_dim is not None else (None,) * 3
190
        self.ignore_border = ignore_border
191
        self.padding = padding
192
193
    @property
194
    def image_size(self):
195
        return self.input_dim[-2:]
196
197
    @image_size.setter
198
    def image_size(self, value):
199
        self.input_dim = self.input_dim[:-2] + value
200
201
    @property
202
    def num_channels(self):
203
        return self.input_dim[0]
204
205
    @num_channels.setter
206
    def num_channels(self, value):
207
        self.input_dim = (value,) + self.input_dim[1:]
208
209
    @application(inputs=['input_'], outputs=['output'])
210
    def apply(self, input_):
211
        """Apply the pooling (subsampling) transformation.
212
213
        Parameters
214
        ----------
215
        input_ : :class:`~tensor.TensorVariable`
216
            An tensor with dimension greater or equal to 2. The last two
217
            dimensions will be downsampled. For example, with images this
218
            means that the last two dimensions should represent the height
219
            and width of your image.
220
221
        Returns
222
        -------
223
        output : :class:`~tensor.TensorVariable`
224
            A tensor with the same number of dimensions as `input_`, but
225
            with the last two dimensions downsampled.
226
227
        """
228
        output = max_pool_2d(input_, self.pooling_size, st=self.step,
229
                             mode=self.mode, padding=self.padding,
230
                             ignore_border=self.ignore_border)
231
        return output
232
233
    def get_dim(self, name):
234
        if name == 'input_':
235
            return self.input_dim
236
        if name == 'output':
237
            return tuple(DownsampleFactorMax.out_shape(
238
                self.input_dim, self.pooling_size, st=self.step,
239
                ignore_border=self.ignore_border, padding=self.padding))
240
241
    @property
242
    def num_output_channels(self):
243
        return self.input_dim[0]
244
245
246
class MaxPooling(Pooling):
247
    """Max pooling layer.
248
249
    Parameters
250
    ----------
251
    pooling_size : tuple
252
        The height and width of the pooling region i.e. this is the factor
253
        by which your input's last two dimensions will be downscaled.
254
    step : tuple, optional
255
        The vertical and horizontal shift (stride) between pooling regions.
256
        By default this is equal to `pooling_size`. Setting this to a lower
257
        number results in overlapping pooling regions.
258
    input_dim : tuple, optional
259
        A tuple of integers representing the shape of the input. The last
260
        two dimensions will be used to calculate the output dimension.
261
    padding : tuple, optional
262
        A tuple of integers representing the vertical and horizontal
263
        zero-padding to be applied to each of the top and bottom
264
        (vertical) and left and right (horizontal) edges. For example,
265
        an argument of (4, 3) will apply 4 pixels of padding to the
266
        top edge, 4 pixels of padding to the bottom edge, and 3 pixels
267
        each for the left and right edge. By default, no padding is
268
        performed.
269
    ignore_border : bool, optional
270
        Whether or not to do partial downsampling based on borders where
271
        the extent of the pooling region reaches beyond the edge of the
272
        image. If `True`, a (5, 5) image with (2, 2) pooling regions
273
        and (2, 2) step will be downsampled to shape (2, 2), otherwise
274
        it will be downsampled to (3, 3). `True` by default.
275
276
    Notes
277
    -----
278
    .. warning::
279
        As of this writing, setting `ignore_border` to `False` with a step
280
        not equal to the pooling size will force Theano to perform pooling
281
        computations on CPU rather than GPU, even if you have specified
282
        a GPU as your computation device. Additionally, Theano will only
283
        use [cuDNN]_ (if available) for pooling computations with
284
        `ignure_border` set to `True`. You can ensure that the entire
285
        input is captured by at least one pool by using the `padding`
286
        argument to add zero padding prior to pooling being performed.
287
288
    .. [cuDNN]: `NVIDIA cuDNN <https://developer.nvidia.com/cudnn>`_.
289
290
    """
291
    @lazy(allocation=['pooling_size'])
292
    def __init__(self, pooling_size, step=None, input_dim=None,
293
                 ignore_border=True, padding=(0, 0),
294
                 **kwargs):
295
        super(MaxPooling, self).__init__('max', pooling_size,
296
                                         step=step, input_dim=input_dim,
297
                                         ignore_border=ignore_border,
298
                                         padding=padding, **kwargs)
299
300
    def __setstate__(self, state):
301
        self.__dict__.update(state)
302
        # Fix objects created before pull request #899.
303
        self.mode = getattr(self, 'mode', 'max')
304
        self.padding = getattr(self, 'padding', (0, 0))
305
        self.ignore_border = getattr(self, 'ignore_border', False)
306
307
308
class AveragePooling(Pooling):
309
    """Average pooling layer.
310
311
    Parameters
312
    ----------
313
    include_padding : bool, optional
314
        When calculating an average, include zeros that are the
315
        result of zero padding added by the `padding` argument.
316
        A value of `True` is only accepted if `ignore_border`
317
        is also `True`. `False` by default.
318
319
    Notes
320
    -----
321
    For documentation on the remainder of the arguments to this
322
    class, see :class:`MaxPooling`.
323
324
    """
325
    @lazy(allocation=['pooling_size'])
326
    def __init__(self, pooling_size, step=None, input_dim=None,
327
                 ignore_border=True, padding=(0, 0),
328
                 include_padding=False, **kwargs):
329
        mode = 'average_inc_pad' if include_padding else 'average_exc_pad'
330
        super(AveragePooling, self).__init__(mode, pooling_size,
331
                                             step=step, input_dim=input_dim,
332
                                             ignore_border=ignore_border,
333
                                             padding=padding, **kwargs)
334
335
336
class _AllocationMixin(object):
337
    def _push_allocation_config(self):
338
        for attr in ['filter_size', 'num_filters', 'border_mode',
339
                     'batch_size', 'num_channels', 'image_size',
340
                     'tied_biases', 'use_bias']:
341
            setattr(self.convolution, attr, getattr(self, attr))
342
343
    @property
344
    def num_output_channels(self):
345
        # Assumes an elementwise activation function. Would need to
346
        # change to support e.g. maxout, but that would also require
347
        # a way of querying the activation function for this kind of
348
        # information.
349
        return self.num_filters
350
351
352
class ConvolutionalActivation(_AllocationMixin, Sequence, Initializable):
353
    """A convolution followed by an activation function.
354
355
    Parameters
356
    ----------
357
    activation : :class:`.BoundApplication`
358
        The application method to apply after convolution (i.e.
359
        the nonlinear activation function)
360
361
    See Also
362
    --------
363
    :class:`Convolutional` : For the documentation of other parameters.
364
365
    """
366
    @lazy(allocation=['filter_size', 'num_filters', 'num_channels'])
367
    def __init__(self, activation, filter_size, num_filters, num_channels,
368
                 batch_size=None, image_size=None, step=(1, 1),
369
                 border_mode='valid', tied_biases=False, **kwargs):
370
        self.convolution = Convolutional()
371
372
        self.filter_size = filter_size
373
        self.num_filters = num_filters
374
        self.num_channels = num_channels
375
        self.batch_size = batch_size
376
        self.image_size = image_size
377
        self.step = step
378
        self.border_mode = border_mode
379
        self.tied_biases = tied_biases
380
381
        super(ConvolutionalActivation, self).__init__(
382
            application_methods=[self.convolution.apply, activation],
383
            **kwargs)
384
385
    def get_dim(self, name):
386
        # TODO The name of the activation output doesn't need to be `output`
387
        return self.convolution.get_dim(name)
388
389
    def _push_allocation_config(self):
390
        super(ConvolutionalActivation, self)._push_allocation_config()
391
        self.convolution.step = self.step
392
393
394
class ConvolutionalSequence(Sequence, Initializable, Feedforward):
395
    """A sequence of convolutional operations.
396
397
    Parameters
398
    ----------
399
    layers : list
400
        List of convolutional bricks (i.e. :class:`Convolutional` or
401
        :class:`ConvolutionalActivation`).
402
    num_channels : int
403
        Number of input channels in the image. For the first layer this is
404
        normally 1 for grayscale images and 3 for color (RGB) images. For
405
        subsequent layers this is equal to the number of filters output by
406
        the previous convolutional layer.
407
    batch_size : int, optional
408
        Number of images in batch. If given, will be passed to
409
        theano's convolution operator resulting in possibly faster
410
        execution.
411
    image_size : tuple, optional
412
        Width and height of the input (image/featuremap). If given,
413
        will be passed to theano's convolution operator resulting in
414
        possibly faster execution.
415
    border_mode : 'valid', 'full' or None, optional
416
        The border mode to use, see :func:`scipy.signal.convolve2d` for
417
        details. Unlike with :class:`Convolutional`, this defaults to
418
        None, in which case no default value is pushed down to child
419
        bricks at allocation time. Child bricks will in this case
420
        need to rely on either a default border mode (usually valid)
421
        or one provided at construction and/or after construction
422
        (but before allocation).
423
424
    Notes
425
    -----
426
    The passed convolutional operators should be 'lazy' constructed, that
427
    is, without specifying the batch_size, num_channels and image_size. The
428
    main feature of :class:`ConvolutionalSequence` is that it will set the
429
    input dimensions of a layer to the output dimensions of the previous
430
    layer by the :meth:`~.Brick.push_allocation_config` method.
431
432
    The reason the `border_mode` parameter behaves the way it does is that
433
    pushing a single default `border_mode` makes it very difficult to
434
    have child bricks with different border modes. Normally, such things
435
    would be overridden after `push_allocation_config()`, but this is
436
    a particular hassle as the border mode affects the allocation
437
    parameters of every subsequent child brick in the sequence. Thus, only
438
    an explicitly specified border mode will be pushed down the hierarchy.
439
440
    """
441
    @lazy(allocation=['num_channels'])
442
    def __init__(self, layers, num_channels, batch_size=None, image_size=None,
443
                 border_mode=None, tied_biases=False, **kwargs):
444
        self.layers = layers
445
        self.image_size = image_size
446
        self.num_channels = num_channels
447
        self.batch_size = batch_size
448
        self.border_mode = border_mode
449
        self.tied_biases = tied_biases
450
451
        application_methods = [brick.apply for brick in layers]
452
        super(ConvolutionalSequence, self).__init__(
453
            application_methods=application_methods, **kwargs)
454
455
    def get_dim(self, name):
456
        if name == 'input_':
457
            return ((self.num_channels,) + self.image_size)
0 ignored issues
show
Unused Code Coding Style introduced by
There is an unnecessary parenthesis after return.
Loading history...
458
        if name == 'output':
459
            return self.layers[-1].get_dim(name)
460
        return super(ConvolutionalSequence, self).get_dim(name)
461
462
    def _push_allocation_config(self):
463
        num_channels = self.num_channels
464
        image_size = self.image_size
465
        for layer in self.layers:
466
            if self.border_mode is not None:
467
                layer.border_mode = self.border_mode
468
            layer.tied_biases = self.tied_biases
469
            layer.image_size = image_size
470
            layer.num_channels = num_channels
471
            layer.batch_size = self.batch_size
472
            layer.use_bias = self.use_bias
473
474
            # Push input dimensions to children
475
            layer._push_allocation_config()
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like _push_allocation_config was declared protected and should not be accessed from this context.

Prefixing a member variable _ is usually regarded as the equivalent of declaring it with protected visibility that exists in other languages. Consequentially, such a member should only be accessed from the same class or a child class:

class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
Loading history...
476
477
            # Retrieve output dimensions
478
            # and set it for next layer
479
            if layer.image_size is not None:
480
                output_shape = layer.get_dim('output')
481
                image_size = output_shape[1:]
482
            num_channels = layer.num_output_channels
483
484
485
class Flattener(Brick):
486
    """Flattens the input.
487
488
    It may be used to pass multidimensional objects like images or feature
489
    maps of convolutional bricks into bricks which allow only two
490
    dimensional input (batch, features) like MLP.
491
492
    """
493
    @application(inputs=['input_'], outputs=['output'])
494
    def apply(self, input_):
495
        return input_.flatten(ndim=2)
496