Completed
Pull Request — master (#977)
by Frédéric
02:51 queued 01:10
created

blocks.bricks.Linear.b()   A

Complexity

Conditions 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 1
dl 0
loc 3
rs 10
1
"""Some of the simplest individual bricks."""
2
import logging
3
4
from theano import tensor
5
6
from blocks.bricks.base import application, Brick, lazy
7
from blocks.bricks.interfaces import Activation, Feedforward, Initializable
8
from blocks.bricks.interfaces import LinearLike, Random  # noqa
0 ignored issues
show
Unused Code introduced by
Unused Random imported from blocks.bricks.interfaces
Loading history...
9
10
from blocks.bricks.wrappers import WithExtraDims
11
from blocks.roles import add_role, WEIGHT, BIAS
12
from blocks.utils import shared_floatx_nans
13
14
logger = logging.getLogger(__name__)
15
16
17
class Linear(LinearLike, Feedforward):
18
    r"""A linear transformation with optional bias.
19
20
    Brick which applies a linear (affine) transformation by multiplying
21
    the input with a weight matrix. By default, a bias term is added
22
    (see :class:`Initializable` for information on disabling this).
23
24
    Parameters
25
    ----------
26
    input_dim : int
27
        The dimension of the input. Required by :meth:`~.Brick.allocate`.
28
    output_dim : int
29
        The dimension of the output. Required by :meth:`~.Brick.allocate`.
30
31
    Notes
32
    -----
33
    See :class:`Initializable` for initialization parameters.
34
35
    A linear transformation with bias is a matrix multiplication followed
36
    by a vector summation.
37
38
    .. math:: f(\mathbf{x}) = \mathbf{W}\mathbf{x} + \mathbf{b}
39
40
    """
41
    @lazy(allocation=['input_dim', 'output_dim'])
42
    def __init__(self, input_dim, output_dim, **kwargs):
43
        super(Linear, self).__init__(**kwargs)
44
        self.input_dim = input_dim
45
        self.output_dim = output_dim
46
47
    def _allocate(self):
48
        W = shared_floatx_nans((self.input_dim, self.output_dim), name='W')
49
        add_role(W, WEIGHT)
50
        self.parameters.append(W)
51
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
52
        if self.use_bias:
53
            b = shared_floatx_nans((self.output_dim,), name='b')
54
            add_role(b, BIAS)
55
            self.parameters.append(b)
56
            self.add_auxiliary_variable(b.norm(2), name='b_norm')
57
58
    @application(inputs=['input_'], outputs=['output'])
59
    def apply(self, input_):
60
        """Apply the linear transformation.
61
62
        Parameters
63
        ----------
64
        input_ : :class:`~tensor.TensorVariable`
65
            The input on which to apply the transformation
66
67
        Returns
68
        -------
69
        output : :class:`~tensor.TensorVariable`
70
            The transformed input plus optional bias
71
72
        """
73
        output = tensor.dot(input_, self.W)
74
        if self.use_bias:
75
            output += self.b
76
        return output
77
78
    def get_dim(self, name):
79
        if name == 'input_':
80
            return self.input_dim
81
        if name == 'output':
82
            return self.output_dim
83
        super(Linear, self).get_dim(name)
84
85
86
class Bias(Feedforward, Initializable):
87
    """Add a bias (i.e. sum with a vector)."""
88
    @lazy(allocation=['dim'])
89
    def __init__(self, dim, **kwargs):
90
        super(Bias, self).__init__(**kwargs)
91
        self.dim = dim
92
93
    def _allocate(self):
94
        b = shared_floatx_nans((self.output_dim,), name='b')
95
        add_role(b, BIAS)
96
        self.parameters.append(b)
97
98
    def _initialize(self):
99
        b, = self.parameters
0 ignored issues
show
Bug introduced by
The tuple unpacking with sequence defined at line 617 of blocks.bricks.base seems to be unbalanced; 1 value(s) for 0 label(s)

This happens when the amount of values does not equal the amount of labels:

a, b = ("a", "b", "c")  # only 2 labels for 3 values
Loading history...
100
        self.biases_init.initialize(b, self.rng)
101
102
    @application(inputs=['input_'], outputs=['output'])
103
    def apply(self, input_):
104
        """Apply the linear transformation.
105
106
        Parameters
107
        ----------
108
        input_ : :class:`~tensor.TensorVariable`
109
            The input on which to apply the transformation
110
111
        Returns
112
        -------
113
        output : :class:`~tensor.TensorVariable`
114
            The transformed input plus optional bias
115
116
        """
117
        b, = self.parameters
0 ignored issues
show
Bug introduced by
The tuple unpacking with sequence defined at line 617 of blocks.bricks.base seems to be unbalanced; 1 value(s) for 0 label(s)

This happens when the amount of values does not equal the amount of labels:

a, b = ("a", "b", "c")  # only 2 labels for 3 values
Loading history...
118
        return input_ + b
119
120
    def get_dim(self, name):
121
        if name in ['input_', 'output']:
122
            return self.dim
123
        super(Bias, self).get_dim(name)
124
125
    def _get_dim(self):
126
        return self.dim
127
128
    def _set_dim(self, value):
129
        self.dim = value
130
131
    input_dim = output_dim = property(_get_dim, _set_dim)
132
133
134
class Maxout(Brick):
135
    """Maxout pooling transformation.
136
137
    A brick that does max pooling over groups of input units. If you use
138
    this code in a research project, please cite [GWFM13]_.
139
140
    .. [GWFM13] Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron
141
       Courville, and Yoshua Bengio, *Maxout networks*, ICML (2013), pp.
142
       1319-1327.
143
144
    Parameters
145
    ----------
146
    num_pieces : int
147
        The size of the groups the maximum is taken over.
148
149
    Notes
150
    -----
151
    Maxout applies a set of linear transformations to a vector and selects
152
    for each output dimension the result with the highest value.
153
154
    """
155
    @lazy(allocation=['num_pieces'])
156
    def __init__(self, num_pieces, **kwargs):
157
        super(Maxout, self).__init__(**kwargs)
158
        self.num_pieces = num_pieces
159
160
    @application(inputs=['input_'], outputs=['output'])
161
    def apply(self, input_):
162
        """Apply the maxout transformation.
163
164
        Parameters
165
        ----------
166
        input_ : :class:`~tensor.TensorVariable`
167
            The input on which to apply the transformation
168
169
        Returns
170
        -------
171
        output : :class:`~tensor.TensorVariable`
172
            The transformed input
173
174
        """
175
        last_dim = input_.shape[-1]
176
        output_dim = last_dim // self.num_pieces
177
        new_shape = ([input_.shape[i] for i in range(input_.ndim - 1)] +
178
                     [output_dim, self.num_pieces])
179
        output = tensor.max(input_.reshape(new_shape, ndim=input_.ndim + 1),
180
                            axis=input_.ndim)
181
        return output
182
183
184
class LinearMaxout(Initializable, Feedforward):
185
    """Maxout pooling following a linear transformation.
186
187
    This code combines the :class:`Linear` brick with a :class:`Maxout`
188
    brick.
189
190
    Parameters
191
    ----------
192
    input_dim : int
193
        The dimension of the input. Required by :meth:`~.Brick.allocate`.
194
    output_dim : int
195
        The dimension of the output. Required by :meth:`~.Brick.allocate`.
196
    num_pieces : int
197
        The number of linear functions. Required by
198
        :meth:`~.Brick.allocate`.
199
200
    Notes
201
    -----
202
    See :class:`Initializable` for initialization parameters.
203
204
    """
205
    @lazy(allocation=['input_dim', 'output_dim', 'num_pieces'])
206
    def __init__(self, input_dim, output_dim, num_pieces, **kwargs):
207
        super(LinearMaxout, self).__init__(**kwargs)
208
        self.linear = Linear()
209
        self.maxout = Maxout()
210
        self.children = [self.linear,
211
                         self.maxout]
212
213
        self.input_dim = input_dim
214
        self.output_dim = output_dim
215
        self.num_pieces = num_pieces
216
217
    @property
218
    def input_dim(self):
219
        return self.linear.input_dim
220
221
    @input_dim.setter
222
    def input_dim(self, value):
223
        self.linear.input_dim = value
224
225
    def _push_allocation_config(self):
226
        self.linear.output_dim = self.output_dim * self.num_pieces
227
        self.maxout.num_pieces = self.num_pieces
228
229
    @application(inputs=['input_'], outputs=['output'])
230
    def apply(self, input_):
231
        """Apply the linear transformation followed by maxout.
232
233
        Parameters
234
        ----------
235
        input_ : :class:`~tensor.TensorVariable`
236
            The input on which to apply the transformations
237
238
        Returns
239
        -------
240
        output : :class:`~tensor.TensorVariable`
241
            The transformed input
242
243
        """
244
        pre_activation = self.linear.apply(input_)
245
        output = self.maxout.apply(pre_activation)
246
        return output
247
248
249
class Identity(Activation):
250
    @application(inputs=['input_'], outputs=['output'])
251
    def apply(self, input_):
252
        return input_
253
254
255
class Tanh(Activation):
256
    @application(inputs=['input_'], outputs=['output'])
257
    def apply(self, input_):
258
        return tensor.tanh(input_)
259
260
261
class Logistic(Activation):
262
    @application(inputs=['input_'], outputs=['output'])
263
    def apply(self, input_):
264
        return tensor.nnet.sigmoid(input_)
265
266
267
class Softplus(Activation):
268
    r""" Softplus brick.
269
270
    The softplus is defined as :math:`\zeta(x) = \log(1+e^x)`.
271
272
    .. Dugas, C., Bengio, Y., Belisle, F., Nadeau, C., and Garcia,
273
       R. (2001). Incorporating second-order functional knowledge
274
       for better option pricing. In NIPS 13 . MIT Press.
275
276
    """
277
    @application(inputs=['input_'], outputs=['output'])
278
    def apply(self, input_):
279
        return tensor.nnet.softplus(input_)
280
281
282
class Rectifier(Activation):
283
    @application(inputs=['input_'], outputs=['output'])
284
    def apply(self, input_):
285
        return tensor.switch(input_ > 0, input_, 0)
286
287
288
class Softmax(Brick):
289
    """A softmax brick.
290
291
    Works with 2-dimensional inputs only. If you need more,
292
    see :class:`NDimensionalSoftmax`.
293
294
    """
295
    @application(inputs=['input_'], outputs=['output'])
296
    def apply(self, input_):
297
        """Standard softmax.
298
299
        Parameters
300
        ----------
301
        input_ : :class:`~theano.Variable`
302
            A matrix, each row contains unnormalized log-probabilities of a
303
            distribution.
304
305
        Returns
306
        -------
307
        output_ : :class:`~theano.Variable`
308
            A matrix with probabilities in each row for each distribution
309
            from `input_`.
310
311
        """
312
        return tensor.nnet.softmax(input_)
313
314
    @application(inputs=['input_'], outputs=['output'])
315
    def log_probabilities(self, input_):
316
        """Normalize log-probabilities.
317
318
        Converts unnormalized log-probabilities (exponents of which do not
319
        sum to one) into actual log-probabilities (exponents of which sum
320
        to one).
321
322
        Parameters
323
        ----------
324
        input_ : :class:`~theano.Variable`
325
            A matrix, each row contains unnormalized log-probabilities of a
326
            distribution.
327
328
        Returns
329
        -------
330
        output : :class:`~theano.Variable`
331
            A matrix with normalized log-probabilities in each row for each
332
            distribution from `input_`.
333
334
        """
335
        shifted = input_ - input_.max(axis=1, keepdims=True)
336
        return shifted - tensor.log(
337
            tensor.exp(shifted).sum(axis=1, keepdims=True))
338
339
    @application(inputs=['y', 'x'], outputs=['output'])
340
    def categorical_cross_entropy(self, application_call, y, x):
341
        """Computationally stable cross-entropy for pre-softmax values.
342
343
        Parameters
344
        ----------
345
        y : :class:`~tensor.TensorVariable`
346
            In the case of a matrix argument, each row represents a
347
            probabilility distribution. In the vector case, each element
348
            represents a distribution by specifying the position of 1 in a
349
            1-hot vector.
350
        x : :class:`~tensor.TensorVariable`
351
            A matrix, each row contains unnormalized probabilities of a
352
            distribution.
353
354
        Returns
355
        -------
356
        cost : :class:`~tensor.TensorVariable`
357
            A vector of cross-entropies between respective distributions
358
            from y and x.
359
360
        """
361
        x = self.log_probabilities(x)
362
        application_call.add_auxiliary_variable(
363
            x.copy(name='log_probabilities'))
364
        if y.ndim == x.ndim - 1:
365
            indices = tensor.arange(y.shape[0]) * x.shape[1] + y
366
            cost = -x.flatten()[indices]
367
        elif y.ndim == x.ndim:
368
            cost = -(x * y).sum(axis=1)
369
        else:
370
            raise TypeError('rank mismatch between x and y')
371
        return cost
372
373
374
class NDimensionalSoftmax(Softmax):
375
    decorators = [WithExtraDims()]
376