Issues (119)

blocks/bricks/simple.py (2 issues)

1
"""Some of the simplest individual bricks."""
2
import logging
3
4
from theano import tensor
5
6
from blocks.bricks.base import application, Brick, lazy
7
from blocks.bricks.interfaces import Activation, Feedforward, Initializable
8
from blocks.bricks.interfaces import LinearLike, Random  # noqa
9
10
from blocks.bricks.wrappers import WithExtraDims
11
from blocks.roles import add_role, WEIGHT, BIAS
12
from blocks.utils import shared_floatx_nans
13
14
logger = logging.getLogger(__name__)
15
16
17
class Linear(LinearLike, Feedforward):
18
    r"""A linear transformation with optional bias.
19
20
    Brick which applies a linear (affine) transformation by multiplying
21
    the input with a weight matrix. By default, a bias term is added
22
    (see :class:`Initializable` for information on disabling this).
23
24
    Parameters
25
    ----------
26
    input_dim : int
27
        The dimension of the input. Required by :meth:`~.Brick.allocate`.
28
    output_dim : int
29
        The dimension of the output. Required by :meth:`~.Brick.allocate`.
30
31
    Notes
32
    -----
33
    See :class:`Initializable` for initialization parameters.
34
35
    A linear transformation with bias is a matrix multiplication followed
36
    by a vector summation.
37
38
    .. math:: f(\mathbf{x}) = \mathbf{W}\mathbf{x} + \mathbf{b}
39
40
    """
41
    @lazy(allocation=['input_dim', 'output_dim'])
42
    def __init__(self, input_dim, output_dim, **kwargs):
43
        super(Linear, self).__init__(**kwargs)
44
        self.input_dim = input_dim
45
        self.output_dim = output_dim
46
47
    def _allocate(self):
48
        W = shared_floatx_nans((self.input_dim, self.output_dim), name='W')
49
        add_role(W, WEIGHT)
50
        self.parameters.append(W)
51
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
52
        if getattr(self, 'use_bias', True):
53
            b = shared_floatx_nans((self.output_dim,), name='b')
54
            add_role(b, BIAS)
55
            self.parameters.append(b)
56
            self.add_auxiliary_variable(b.norm(2), name='b_norm')
57
58
    @application(inputs=['input_'], outputs=['output'])
59
    def apply(self, input_):
60
        """Apply the linear transformation.
61
62
        Parameters
63
        ----------
64
        input_ : :class:`~tensor.TensorVariable`
65
            The input on which to apply the transformation
66
67
        Returns
68
        -------
69
        output : :class:`~tensor.TensorVariable`
70
            The transformed input plus optional bias
71
72
        """
73
        output = tensor.dot(input_, self.W)
74
        if getattr(self, 'use_bias', True):
75
            output += self.b
76
        return output
77
78
    def get_dim(self, name):
79
        if name == 'input_':
80
            return self.input_dim
81
        if name == 'output':
82
            return self.output_dim
83
        super(Linear, self).get_dim(name)
84
85
86
class Bias(Feedforward, Initializable):
87
    """Add a bias (i.e. sum with a vector)."""
88
    @lazy(allocation=['dim'])
89
    def __init__(self, dim, **kwargs):
90
        super(Bias, self).__init__(**kwargs)
91
        self.dim = dim
92
93
    def _allocate(self):
94
        b = shared_floatx_nans((self.output_dim,), name='b')
95
        add_role(b, BIAS)
96
        self.parameters.append(b)
97
98
    def _initialize(self):
99
        b, = self.parameters
0 ignored issues
show
The tuple unpacking with sequence defined at line 614 of blocks.bricks.base seems to be unbalanced; 1 value(s) for 0 label(s)

This happens when the amount of values does not equal the amount of labels:

a, b = ("a", "b", "c")  # only 2 labels for 3 values
Loading history...
100
        self.biases_init.initialize(b, self.rng)
101
102
    @application(inputs=['input_'], outputs=['output'])
103
    def apply(self, input_):
104
        """Apply the linear transformation.
105
106
        Parameters
107
        ----------
108
        input_ : :class:`~tensor.TensorVariable`
109
            The input on which to apply the transformation
110
111
        Returns
112
        -------
113
        output : :class:`~tensor.TensorVariable`
114
            The transformed input plus optional bias
115
116
        """
117
        b, = self.parameters
0 ignored issues
show
The tuple unpacking with sequence defined at line 614 of blocks.bricks.base seems to be unbalanced; 1 value(s) for 0 label(s)

This happens when the amount of values does not equal the amount of labels:

a, b = ("a", "b", "c")  # only 2 labels for 3 values
Loading history...
118
        return input_ + b
119
120
    def get_dim(self, name):
121
        if name in ['input_', 'output']:
122
            return self.dim
123
        super(Bias, self).get_dim(name)
124
125
    def _get_dim(self):
126
        return self.dim
127
128
    def _set_dim(self, value):
129
        self.dim = value
130
131
    input_dim = output_dim = property(_get_dim, _set_dim)
132
133
134
class Maxout(Brick):
135
    """Maxout pooling transformation.
136
137
    A brick that does max pooling over groups of input units. If you use
138
    this code in a research project, please cite [GWFM13]_.
139
140
    .. [GWFM13] Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron
141
       Courville, and Yoshua Bengio, *Maxout networks*, ICML (2013), pp.
142
       1319-1327.
143
144
    Parameters
145
    ----------
146
    num_pieces : int
147
        The size of the groups the maximum is taken over.
148
149
    Notes
150
    -----
151
    Maxout applies a set of linear transformations to a vector and selects
152
    for each output dimension the result with the highest value.
153
154
    """
155
    @lazy(allocation=['num_pieces'])
156
    def __init__(self, num_pieces, **kwargs):
157
        super(Maxout, self).__init__(**kwargs)
158
        self.num_pieces = num_pieces
159
160
    @application(inputs=['input_'], outputs=['output'])
161
    def apply(self, input_):
162
        """Apply the maxout transformation.
163
164
        Parameters
165
        ----------
166
        input_ : :class:`~tensor.TensorVariable`
167
            The input on which to apply the transformation
168
169
        Returns
170
        -------
171
        output : :class:`~tensor.TensorVariable`
172
            The transformed input
173
174
        """
175
        last_dim = input_.shape[-1]
176
        output_dim = last_dim // self.num_pieces
177
        new_shape = ([input_.shape[i] for i in range(input_.ndim - 1)] +
178
                     [output_dim, self.num_pieces])
179
        output = tensor.max(input_.reshape(new_shape, ndim=input_.ndim + 1),
180
                            axis=input_.ndim)
181
        return output
182
183
184
class LinearMaxout(Initializable, Feedforward):
185
    """Maxout pooling following a linear transformation.
186
187
    This code combines the :class:`Linear` brick with a :class:`Maxout`
188
    brick.
189
190
    Parameters
191
    ----------
192
    input_dim : int
193
        The dimension of the input. Required by :meth:`~.Brick.allocate`.
194
    output_dim : int
195
        The dimension of the output. Required by :meth:`~.Brick.allocate`.
196
    num_pieces : int
197
        The number of linear functions. Required by
198
        :meth:`~.Brick.allocate`.
199
200
    Notes
201
    -----
202
    See :class:`Initializable` for initialization parameters.
203
204
    """
205
    @lazy(allocation=['input_dim', 'output_dim', 'num_pieces'])
206
    def __init__(self, input_dim, output_dim, num_pieces, **kwargs):
207
        self.linear = Linear()
208
        self.maxout = Maxout()
209
        children = [self.linear, self.maxout]
210
        kwargs.setdefault('children', []).extend(children)
211
        super(LinearMaxout, self).__init__(**kwargs)
212
213
        self.input_dim = input_dim
214
        self.output_dim = output_dim
215
        self.num_pieces = num_pieces
216
217
    @property
218
    def input_dim(self):
219
        return self.linear.input_dim
220
221
    @input_dim.setter
222
    def input_dim(self, value):
223
        self.linear.input_dim = value
224
225
    def _push_allocation_config(self):
226
        self.linear.output_dim = self.output_dim * self.num_pieces
227
        self.maxout.num_pieces = self.num_pieces
228
229
    @application(inputs=['input_'], outputs=['output'])
230
    def apply(self, input_):
231
        """Apply the linear transformation followed by maxout.
232
233
        Parameters
234
        ----------
235
        input_ : :class:`~tensor.TensorVariable`
236
            The input on which to apply the transformations
237
238
        Returns
239
        -------
240
        output : :class:`~tensor.TensorVariable`
241
            The transformed input
242
243
        """
244
        pre_activation = self.linear.apply(input_)
245
        output = self.maxout.apply(pre_activation)
246
        return output
247
248
249
class Identity(Activation):
250
    @application(inputs=['input_'], outputs=['output'])
251
    def apply(self, input_):
252
        return input_
253
254
255
class Tanh(Activation):
256
    @application(inputs=['input_'], outputs=['output'])
257
    def apply(self, input_):
258
        return tensor.tanh(input_)
259
260
261
class Logistic(Activation):
262
    @application(inputs=['input_'], outputs=['output'])
263
    def apply(self, input_):
264
        return tensor.nnet.sigmoid(input_)
265
266
267
class Softplus(Activation):
268
    r""" Softplus brick.
269
270
    The softplus is defined as :math:`\zeta(x) = \log(1+e^x)`.
271
272
    .. Dugas, C., Bengio, Y., Belisle, F., Nadeau, C., and Garcia,
273
       R. (2001). Incorporating second-order functional knowledge
274
       for better option pricing. In NIPS 13 . MIT Press.
275
276
    """
277
    @application(inputs=['input_'], outputs=['output'])
278
    def apply(self, input_):
279
        return tensor.nnet.softplus(input_)
280
281
282
class Rectifier(Activation):
283
    @application(inputs=['input_'], outputs=['output'])
284
    def apply(self, input_):
285
        return tensor.nnet.relu(input_)
286
287
288
class LeakyRectifier(Activation):
289
    r"""Leaky ReLU
290
291
    Like Rectifier, but inputs are scaled by small constant for negative
292
    inputs.
293
294
    .. math:: f(x) = \text{max}(x, ax)
295
296
    Parameters
297
    ----------
298
    leak : float, optional
299
        The scalar to multiply negative values by. Named 'a' above.
300
301
    .. Maas, Andrew L., Awni Y. Hannun, and Andrew Y. Ng. Rectifier
302
       nonlinearities improve neural network acoustic models. Proc.
303
       ICML. Vol. 30. 2013.
304
305
    """
306
    def __init__(self, leak=0.01, **kwargs):
307
        super(LeakyRectifier, self).__init__(**kwargs)
308
        self._leak = leak
309
310
    @application(inputs=['input_'], outputs=['output'])
311
    def apply(self, input_):
312
        return tensor.nnet.relu(input_, alpha=self._leak)
313
314
315
class Softmax(Brick):
316
    """A softmax brick.
317
318
    Works with 2-dimensional inputs only. If you need more,
319
    see :class:`NDimensionalSoftmax`.
320
321
    """
322
    @application(inputs=['input_'], outputs=['output'])
323
    def apply(self, input_):
324
        """Standard softmax.
325
326
        Parameters
327
        ----------
328
        input_ : :class:`~theano.Variable`
329
            A matrix, each row contains unnormalized log-probabilities of a
330
            distribution.
331
332
        Returns
333
        -------
334
        output_ : :class:`~theano.Variable`
335
            A matrix with probabilities in each row for each distribution
336
            from `input_`.
337
338
        """
339
        return tensor.nnet.softmax(input_)
340
341
    @application(inputs=['input_'], outputs=['output'])
342
    def log_probabilities(self, input_):
343
        """Normalize log-probabilities.
344
345
        Converts unnormalized log-probabilities (exponents of which do not
346
        sum to one) into actual log-probabilities (exponents of which sum
347
        to one).
348
349
        Parameters
350
        ----------
351
        input_ : :class:`~theano.Variable`
352
            A matrix, each row contains unnormalized log-probabilities of a
353
            distribution.
354
355
        Returns
356
        -------
357
        output : :class:`~theano.Variable`
358
            A matrix with normalized log-probabilities in each row for each
359
            distribution from `input_`.
360
361
        """
362
        shifted = input_ - input_.max(axis=1, keepdims=True)
363
        return shifted - tensor.log(
364
            tensor.exp(shifted).sum(axis=1, keepdims=True))
365
366
    @application(inputs=['y', 'x'], outputs=['output'])
367
    def categorical_cross_entropy(self, application_call, y, x):
368
        """Computationally stable cross-entropy for pre-softmax values.
369
370
        Parameters
371
        ----------
372
        y : :class:`~tensor.TensorVariable`
373
            In the case of a matrix argument, each row represents a
374
            probabilility distribution. In the vector case, each element
375
            represents a distribution by specifying the position of 1 in a
376
            1-hot vector.
377
        x : :class:`~tensor.TensorVariable`
378
            A matrix, each row contains unnormalized probabilities of a
379
            distribution.
380
381
        Returns
382
        -------
383
        cost : :class:`~tensor.TensorVariable`
384
            A vector of cross-entropies between respective distributions
385
            from y and x.
386
387
        """
388
        x = self.log_probabilities(x)
389
        application_call.add_auxiliary_variable(
390
            x.copy(name='log_probabilities'))
391
        if y.ndim == x.ndim - 1:
392
            indices = tensor.arange(y.shape[0]) * x.shape[1] + y
393
            cost = -x.flatten()[indices]
394
        elif y.ndim == x.ndim:
395
            cost = -(x * y).sum(axis=1)
396
        else:
397
            raise TypeError('rank mismatch between x and y')
398
        return cost
399
400
401
class NDimensionalSoftmax(Softmax):
402
    decorators = [WithExtraDims()]
403