Completed
Pull Request — master (#1030)
by
unknown
12:36 queued 07:51
created

Bias   A

Complexity

Total Complexity 7

Size/Duplication

Total Lines 42
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
dl 0
loc 42
rs 10
c 0
b 0
f 0
wmc 7

6 Methods

Rating   Name   Duplication   Size   Complexity  
A _allocate() 0 4 1
A __init__() 0 4 1
A _get_dim() 0 2 1
A get_dim() 0 4 2
A _set_dim() 0 2 1
A apply() 0 17 1
1
"""Some of the simplest individual bricks."""
2
import logging
3
4
from theano import tensor
5
6
from blocks.bricks.base import application, Brick, lazy
7
from blocks.bricks.interfaces import Activation, Feedforward, Initializable
8
from blocks.bricks.interfaces import LinearLike, Random  # noqa
0 ignored issues
show
Unused Code introduced by
Unused Random imported from blocks.bricks.interfaces
Loading history...
9
10
from blocks.bricks.wrappers import WithExtraDims
11
from blocks.roles import add_role, WEIGHT, BIAS
12
from blocks.utils import shared_floatx_nans
13
14
logger = logging.getLogger(__name__)
15
16
17
class Linear(LinearLike, Feedforward):
18
    r"""A linear transformation with optional bias.
19
20
    Brick which applies a linear (affine) transformation by multiplying
21
    the input with a weight matrix. By default, a bias term is added
22
    (see :class:`Initializable` for information on disabling this).
23
24
    Parameters
25
    ----------
26
    input_dim : int
27
        The dimension of the input. Required by :meth:`~.Brick.allocate`.
28
    output_dim : int
29
        The dimension of the output. Required by :meth:`~.Brick.allocate`.
30
31
    Notes
32
    -----
33
    See :class:`Initializable` for initialization parameters.
34
35
    A linear transformation with bias is a matrix multiplication followed
36
    by a vector summation.
37
38
    .. math:: f(\mathbf{x}) = \mathbf{W}\mathbf{x} + \mathbf{b}
39
40
    """
41
    @lazy(allocation=['input_dim', 'output_dim'])
42
    def __init__(self, input_dim, output_dim, **kwargs):
43
        super(Linear, self).__init__(**kwargs)
44
        self.input_dim = input_dim
45
        self.output_dim = output_dim
46
47
    def _allocate(self):
48
        W = shared_floatx_nans((self.input_dim, self.output_dim), name='W')
49
        add_role(W, WEIGHT)
50
        self.parameters.append(W)
51
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
52
        if getattr(self, 'use_bias', True):
53
            b = shared_floatx_nans((self.output_dim,), name='b')
54
            add_role(b, BIAS)
55
            self.parameters.append(b)
56
            self.add_auxiliary_variable(b.norm(2), name='b_norm')
57
58
    @application(inputs=['input_'], outputs=['output'])
59
    def apply(self, input_):
60
        """Apply the linear transformation.
61
62
        Parameters
63
        ----------
64
        input_ : :class:`~tensor.TensorVariable`
65
            The input on which to apply the transformation
66
67
        Returns
68
        -------
69
        output : :class:`~tensor.TensorVariable`
70
            The transformed input plus optional bias
71
72
        """
73
        output = tensor.dot(input_, self.W)
74
        if getattr(self, 'use_bias', True):
75
            output += self.b
76
        return output
77
78
    def get_dim(self, name):
79
        if name == 'input_':
80
            return self.input_dim
81
        if name == 'output':
82
            return self.output_dim
83
        super(Linear, self).get_dim(name)
84
85
86
class Bias(Feedforward, Initializable):
87
    """Add a bias (i.e. sum with a vector)."""
88
    @lazy(allocation=['dim'])
89
    def __init__(self, dim, **kwargs):
90
        super(Bias, self).__init__(**kwargs)
91
        self.dim = dim
92
93
    def _allocate(self):
94
        b = shared_floatx_nans((self.output_dim,), name='b')
95
        add_role(b, BIAS)
96
        self.parameters.append(b)
97
98
    @application(inputs=['input_'], outputs=['output'])
99
    def apply(self, input_):
100
        """Apply the linear transformation.
101
102
        Parameters
103
        ----------
104
        input_ : :class:`~tensor.TensorVariable`
105
            The input on which to apply the transformation
106
107
        Returns
108
        -------
109
        output : :class:`~tensor.TensorVariable`
110
            The transformed input plus optional bias
111
112
        """
113
        b, = self.parameters
0 ignored issues
show
Bug introduced by
The tuple unpacking with sequence defined at line 620 of blocks.bricks.base seems to be unbalanced; 1 value(s) for 0 label(s)

This happens when the amount of values does not equal the amount of labels:

a, b = ("a", "b", "c")  # only 2 labels for 3 values
Loading history...
114
        return input_ + b
115
116
    def get_dim(self, name):
117
        if name in ['input_', 'output']:
118
            return self.dim
119
        super(Bias, self).get_dim(name)
120
121
    def _get_dim(self):
122
        return self.dim
123
124
    def _set_dim(self, value):
125
        self.dim = value
126
127
    input_dim = output_dim = property(_get_dim, _set_dim)
128
129
130
class Maxout(Brick):
131
    """Maxout pooling transformation.
132
133
    A brick that does max pooling over groups of input units. If you use
134
    this code in a research project, please cite [GWFM13]_.
135
136
    .. [GWFM13] Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron
137
       Courville, and Yoshua Bengio, *Maxout networks*, ICML (2013), pp.
138
       1319-1327.
139
140
    Parameters
141
    ----------
142
    num_pieces : int
143
        The size of the groups the maximum is taken over.
144
145
    Notes
146
    -----
147
    Maxout applies a set of linear transformations to a vector and selects
148
    for each output dimension the result with the highest value.
149
150
    """
151
    @lazy(allocation=['num_pieces'])
152
    def __init__(self, num_pieces, **kwargs):
153
        super(Maxout, self).__init__(**kwargs)
154
        self.num_pieces = num_pieces
155
156
    @application(inputs=['input_'], outputs=['output'])
157
    def apply(self, input_):
158
        """Apply the maxout transformation.
159
160
        Parameters
161
        ----------
162
        input_ : :class:`~tensor.TensorVariable`
163
            The input on which to apply the transformation
164
165
        Returns
166
        -------
167
        output : :class:`~tensor.TensorVariable`
168
            The transformed input
169
170
        """
171
        last_dim = input_.shape[-1]
172
        output_dim = last_dim // self.num_pieces
173
        new_shape = ([input_.shape[i] for i in range(input_.ndim - 1)] +
174
                     [output_dim, self.num_pieces])
175
        output = tensor.max(input_.reshape(new_shape, ndim=input_.ndim + 1),
176
                            axis=input_.ndim)
177
        return output
178
179
180
class LinearMaxout(Initializable, Feedforward):
181
    """Maxout pooling following a linear transformation.
182
183
    This code combines the :class:`Linear` brick with a :class:`Maxout`
184
    brick.
185
186
    Parameters
187
    ----------
188
    input_dim : int
189
        The dimension of the input. Required by :meth:`~.Brick.allocate`.
190
    output_dim : int
191
        The dimension of the output. Required by :meth:`~.Brick.allocate`.
192
    num_pieces : int
193
        The number of linear functions. Required by
194
        :meth:`~.Brick.allocate`.
195
196
    Notes
197
    -----
198
    See :class:`Initializable` for initialization parameters.
199
200
    """
201
    @lazy(allocation=['input_dim', 'output_dim', 'num_pieces'])
202
    def __init__(self, input_dim, output_dim, num_pieces, **kwargs):
203
        self.linear = Linear()
204
        self.maxout = Maxout()
205
        children = [self.linear, self.maxout]
206
        kwargs.setdefault('children', []).extend(children)
207
        super(LinearMaxout, self).__init__(**kwargs)
208
209
        self.input_dim = input_dim
210
        self.output_dim = output_dim
211
        self.num_pieces = num_pieces
212
213
    @property
214
    def input_dim(self):
215
        return self.linear.input_dim
216
217
    @input_dim.setter
218
    def input_dim(self, value):
219
        self.linear.input_dim = value
220
221
    def _push_allocation_config(self):
222
        self.linear.output_dim = self.output_dim * self.num_pieces
223
        self.maxout.num_pieces = self.num_pieces
224
225
    @application(inputs=['input_'], outputs=['output'])
226
    def apply(self, input_):
227
        """Apply the linear transformation followed by maxout.
228
229
        Parameters
230
        ----------
231
        input_ : :class:`~tensor.TensorVariable`
232
            The input on which to apply the transformations
233
234
        Returns
235
        -------
236
        output : :class:`~tensor.TensorVariable`
237
            The transformed input
238
239
        """
240
        pre_activation = self.linear.apply(input_)
241
        output = self.maxout.apply(pre_activation)
242
        return output
243
244
245
class Identity(Activation):
246
    @application(inputs=['input_'], outputs=['output'])
247
    def apply(self, input_):
248
        return input_
249
250
251
class Tanh(Activation):
252
    @application(inputs=['input_'], outputs=['output'])
253
    def apply(self, input_):
254
        return tensor.tanh(input_)
255
256
257
class Logistic(Activation):
258
    @application(inputs=['input_'], outputs=['output'])
259
    def apply(self, input_):
260
        return tensor.nnet.sigmoid(input_)
261
262
263
class Softplus(Activation):
264
    r""" Softplus brick.
265
266
    The softplus is defined as :math:`\zeta(x) = \log(1+e^x)`.
267
268
    .. Dugas, C., Bengio, Y., Belisle, F., Nadeau, C., and Garcia,
269
       R. (2001). Incorporating second-order functional knowledge
270
       for better option pricing. In NIPS 13 . MIT Press.
271
272
    """
273
    @application(inputs=['input_'], outputs=['output'])
274
    def apply(self, input_):
275
        return tensor.nnet.softplus(input_)
276
277
278
class Rectifier(Activation):
279
    @application(inputs=['input_'], outputs=['output'])
280
    def apply(self, input_):
281
        return tensor.nnet.relu(input_)
282
283
284
class LeakyRectifier(Activation):
285
    r"""Leaky ReLU
286
287
    Like Rectifier, but inputs are scaled by small constant for negative
288
    inputs.
289
290
    .. math:: f(x) = \text{max}(x, ax)
291
292
    Parameters
293
    ----------
294
    leak : float, optional
295
        The scalar to multiply negative values by. Named 'a' above.
296
297
    .. Maas, Andrew L., Awni Y. Hannun, and Andrew Y. Ng. Rectifier
298
       nonlinearities improve neural network acoustic models. Proc.
299
       ICML. Vol. 30. 2013.
300
301
    """
302
    def __init__(self, leak=0.01, **kwargs):
303
        super(LeakyRectifier, self).__init__(**kwargs)
304
        self._leak = leak
305
306
    @application(inputs=['input_'], outputs=['output'])
307
    def apply(self, input_):
308
        return tensor.nnet.relu(input_, alpha=self._leak)
309
310
311
class Softmax(Brick):
312
    """A softmax brick.
313
314
    Works with 2-dimensional inputs only. If you need more,
315
    see :class:`NDimensionalSoftmax`.
316
317
    """
318
    @application(inputs=['input_'], outputs=['output'])
319
    def apply(self, input_):
320
        """Standard softmax.
321
322
        Parameters
323
        ----------
324
        input_ : :class:`~theano.Variable`
325
            A matrix, each row contains unnormalized log-probabilities of a
326
            distribution.
327
328
        Returns
329
        -------
330
        output_ : :class:`~theano.Variable`
331
            A matrix with probabilities in each row for each distribution
332
            from `input_`.
333
334
        """
335
        return tensor.nnet.softmax(input_)
336
337
    @application(inputs=['input_'], outputs=['output'])
338
    def log_probabilities(self, input_):
339
        """Normalize log-probabilities.
340
341
        Converts unnormalized log-probabilities (exponents of which do not
342
        sum to one) into actual log-probabilities (exponents of which sum
343
        to one).
344
345
        Parameters
346
        ----------
347
        input_ : :class:`~theano.Variable`
348
            A matrix, each row contains unnormalized log-probabilities of a
349
            distribution.
350
351
        Returns
352
        -------
353
        output : :class:`~theano.Variable`
354
            A matrix with normalized log-probabilities in each row for each
355
            distribution from `input_`.
356
357
        """
358
        shifted = input_ - input_.max(axis=1, keepdims=True)
359
        return shifted - tensor.log(
360
            tensor.exp(shifted).sum(axis=1, keepdims=True))
361
362
    @application(inputs=['y', 'x'], outputs=['output'])
363
    def categorical_cross_entropy(self, application_call, y, x):
364
        """Computationally stable cross-entropy for pre-softmax values.
365
366
        Parameters
367
        ----------
368
        y : :class:`~tensor.TensorVariable`
369
            In the case of a matrix argument, each row represents a
370
            probabilility distribution. In the vector case, each element
371
            represents a distribution by specifying the position of 1 in a
372
            1-hot vector.
373
        x : :class:`~tensor.TensorVariable`
374
            A matrix, each row contains unnormalized probabilities of a
375
            distribution.
376
377
        Returns
378
        -------
379
        cost : :class:`~tensor.TensorVariable`
380
            A vector of cross-entropies between respective distributions
381
            from y and x.
382
383
        """
384
        x = self.log_probabilities(x)
385
        application_call.add_auxiliary_variable(
386
            x.copy(name='log_probabilities'))
387
        if y.ndim == x.ndim - 1:
388
            indices = tensor.arange(y.shape[0]) * x.shape[1] + y
389
            cost = -x.flatten()[indices]
390
        elif y.ndim == x.ndim:
391
            cost = -(x * y).sum(axis=1)
392
        else:
393
            raise TypeError('rank mismatch between x and y')
394
        return cost
395
396
397
class NDimensionalSoftmax(Softmax):
398
    decorators = [WithExtraDims()]
399