Completed
Push — master ( 8e81bf...e89cf9 )
by Dmitry
99:20 queued 42:52
created

blocks/bricks/parallel.py (2 issues)

1
"""Generic transformations with multiple inputs and/or outputs."""
2
import copy
3
4
from picklable_itertools.extras import equizip
5
6
from blocks.bricks.base import lazy, application
7
from blocks.bricks.simple import Initializable, Linear
8
from blocks.utils import pack, extract_args
9
10
11
class Parallel(Initializable):
12
    """Apply similar transformations to several inputs.
13
14
    Given a prototype brick, a :class:`Parallel` brick makes several
15
    copies of it (each with its own parameters). At the application time
16
    every copy is applied to the respective input.
17
18
    >>> from theano import tensor
19
    >>> from blocks.initialization import Constant
20
    >>> x, y = tensor.matrix('x'), tensor.matrix('y')
21
    >>> parallel = Parallel(
22
    ...     prototype=Linear(use_bias=False),
23
    ...     input_names=['x', 'y'], input_dims=[2, 3], output_dims=[4, 5],
24
    ...     weights_init=Constant(2))
25
    >>> parallel.initialize()
26
    >>> new_x, new_y = parallel.apply(x=x, y=y)
27
    >>> new_x.eval({x: [[1, 1]]}) # doctest: +ELLIPSIS
28
    array([[ 4.,  4.,  4.,  4.]]...
29
    >>> new_y.eval({y: [[1, 1, 1]]}) # doctest: +ELLIPSIS
30
    array([[ 6.,  6.,  6.,  6.,  6.]]...
31
32
    Parameters
33
    ----------
34
    input_names : list
35
        The input names.
36
    input_dims : list
37
        List of input dimensions, given in the same order as `input_names`.
38
    output_dims : list
39
        List of output dimensions.
40
    prototype : :class:`~blocks.bricks.Feedforward`
41
        The transformation prototype. A copy will be created for every
42
        input.
43
    child_prefix : str, optional
44
        The prefix for children names. By default "transform" is used.
45
46
    Attributes
47
    ----------
48
    input_names : list
49
        The input names.
50
    input_dims : list
51
        Input dimensions.
52
    output_dims : list
53
        Output dimensions.
54
55
    Notes
56
    -----
57
    See :class:`.Initializable` for initialization parameters.
58
59
    """
60
    @lazy(allocation=['input_names', 'input_dims', 'output_dims'])
61
    def __init__(self, input_names, input_dims, output_dims,
62
                 prototype, child_prefix=None, **kwargs):
63
        super(Parallel, self).__init__(**kwargs)
64
        if not child_prefix:
65
            child_prefix = "transform"
66
67
        self.input_names = input_names
68
        self.input_dims = input_dims
69
        self.output_dims = output_dims
70
        self.prototype = prototype
71
72
        self.children = []
73
        for name in input_names:
74
            self.children.append(copy.deepcopy(self.prototype))
75
            self.children[-1].name = "{}_{}".format(child_prefix, name)
76
77
    def _push_allocation_config(self):
78
        for input_dim, output_dim, child in \
79
                equizip(self.input_dims, self.output_dims, self.children):
80
            child.input_dim = input_dim
81
            child.output_dim = output_dim
82
83
    @application
84
    def apply(self, *args, **kwargs):
85
        routed_args = extract_args(self.input_names, *args, **kwargs)
86
        return [child.apply(routed_args[name])
87
                for name, child in equizip(self.input_names, self.children)]
88
89
    @apply.property('inputs')
90
    def apply_inputs(self):
91
        return self.input_names
92
93
    @apply.property('outputs')
94
    def apply_outputs(self):
95
        return self.input_names
96
97
98
class Fork(Parallel):
99
    """Several outputs from one input by applying similar transformations.
100
101
    Given a prototype brick, a :class:`Fork` brick makes several
102
    copies of it (each with its own parameters). At the application time
103
    the copies are applied to the input to produce different outputs.
104
105
    A typical usecase for this brick is to produce inputs for gates
106
    of gated recurrent bricks, such as
107
    :class:`~blocks.bricks.GatedRecurrent`.
108
109
    >>> from theano import tensor
110
    >>> from blocks.initialization import Constant
111
    >>> x = tensor.matrix('x')
112
    >>> fork = Fork(output_names=['y', 'z'],
113
    ...             input_dim=2, output_dims=[3, 4],
114
    ...             weights_init=Constant(2), biases_init=Constant(1))
115
    >>> fork.initialize()
116
    >>> y, z = fork.apply(x)
117
    >>> y.eval({x: [[1, 1]]}) # doctest: +ELLIPSIS
118
    array([[ 5.,  5.,  5.]]...
119
    >>> z.eval({x: [[1, 1]]}) # doctest: +ELLIPSIS
120
    array([[ 5.,  5.,  5.,  5.]]...
121
122
    Parameters
123
    ----------
124
    output_names : list of str
125
        Names of the outputs to produce.
126
    input_dim : int
127
        The input dimension.
128
    prototype : :class:`~blocks.bricks.Feedforward`, optional
129
        The transformation prototype. A copy will be created for every
130
        input. By default an affine transformation is used.
131
132
    Attributes
133
    ----------
134
    input_dim : int
135
        The input dimension.
136
    output_dims : list
137
        The output dimensions as a list of integers, corresponding to
138
        `output_names`.
139
140
    See Also
141
    --------
142
    :class:`Parallel` for other parameters.
143
144
    :class:`.Initializable` for initialization parameters.
145
146
    """
147
    @lazy(allocation=['input_dim'])
148
    def __init__(self, output_names, input_dim,  prototype=None, **kwargs):
0 ignored issues
show
Exactly one space required after comma
def __init__(self, output_names, input_dim, prototype=None, **kwargs):
^
Loading history...
149
        if not prototype:
150
            prototype = Linear()
151
152
        self.output_names = output_names
153
        self.input_dim = input_dim
154
155
        kwargs.setdefault('child_prefix', 'fork')
156
        super(Fork, self).__init__(output_names, prototype=prototype,
157
                                   **kwargs)
158
        self.input_dims = None
159
160
    def _push_allocation_config(self):
161
        self.input_dims = [self.input_dim for _ in self.output_names]
162
        super(Fork, self)._push_allocation_config()
163
164
    @application(inputs=['input_'])
165
    def apply(self, input_):
0 ignored issues
show
Arguments number differs from overridden 'apply' method
Loading history...
166
        return super(Fork, self).apply(**{name: input_
167
                                          for name in self.input_names})
168
169
    @apply.property('outputs')
170
    def apply_outputs(self):
171
        return super(Fork, self).apply.outputs
172
173
174
class Distribute(Fork):
175
    """Transform an input and add it to other inputs.
176
177
    This brick is designed for the following scenario: one has a group of
178
    variables and another separate variable, and one needs to somehow
179
    distribute information from the latter across the former. We call that
180
    "to distribute a varible across other variables", and refer to the
181
    separate variable as "the source" and to the variables from the group
182
    as "the targets".
183
184
    Given a prototype brick, a :class:`Parallel` brick makes several copies
185
    of it (each with its own parameters). At the application time the
186
    copies are applied to the source and the transformation results
187
    are added to the targets (in the literate sense).
188
189
    >>> from theano import tensor
190
    >>> from blocks.initialization import Constant
191
    >>> x = tensor.matrix('x')
192
    >>> y = tensor.matrix('y')
193
    >>> z = tensor.matrix('z')
194
    >>> distribute = Distribute(target_names=['x', 'y'], source_name='z',
195
    ...                         target_dims=[2, 3], source_dim=3,
196
    ...                         weights_init=Constant(2))
197
    >>> distribute.initialize()
198
    >>> new_x, new_y = distribute.apply(x=x, y=y, z=z)
199
    >>> new_x.eval({x: [[2, 2]], z: [[1, 1, 1]]}) # doctest: +ELLIPSIS
200
    array([[ 8.,  8.]]...
201
    >>> new_y.eval({y: [[1, 1, 1]], z: [[1, 1, 1]]}) # doctest: +ELLIPSIS
202
    array([[ 7.,  7.,  7.]]...
203
204
    Parameters
205
    ----------
206
    target_names : list
207
        The names of the targets.
208
    source_name : str
209
        The name of the source.
210
    target_dims : list
211
        A list of target dimensions, corresponding to `target_names`.
212
    source_dim : int
213
        The dimension of the source input.
214
    prototype : :class:`~blocks.bricks.Feedforward`, optional
215
        The transformation prototype. A copy will be created for every
216
        input. By default a linear transformation is used.
217
218
    Attributes
219
    ----------
220
    target_dims : list
221
    source_dim : int
222
223
    Notes
224
    -----
225
    See :class:`.Initializable` for initialization parameters.
226
227
    """
228
    @lazy(allocation=['source_name', 'target_dims', 'source_dim'])
229
    def __init__(self, target_names, source_name, target_dims, source_dim,
230
                 prototype=None, **kwargs):
231
        if not prototype:
232
            prototype = Linear(use_bias=False)
233
234
        self.target_names = target_names
235
        self.source_name = source_name
236
        self.target_dims = target_dims
237
        self.source_dim = source_dim
238
239
        super(Distribute, self).__init__(
240
            output_names=target_names, output_dims=target_dims,
241
            input_dim=source_dim, prototype=prototype, **kwargs)
242
243
    def _push_allocation_config(self):
244
        self.input_dim = self.source_dim
245
        self.output_dims = self.target_dims
246
        super(Distribute, self)._push_allocation_config()
247
248
    @application
249
    def apply(self, **kwargs):
250
        r"""Distribute the source across the targets.
251
252
        Parameters
253
        ----------
254
        \*\*kwargs : dict
255
            The source and the target variables.
256
257
        Returns
258
        -------
259
        output : list
260
            The new target variables.
261
262
        """
263
        result = super(Distribute, self).apply(kwargs.pop(self.source_name),
264
                                               as_list=True)
265
        for i, name in enumerate(self.target_names):
266
            result[i] += kwargs.pop(name)
267
        if len(kwargs):
268
            raise ValueError
269
        return result
270
271
    @apply.property('inputs')
272
    def apply_inputs(self):
273
        return [self.source_name] + self.target_names
274
275
    @apply.property('outputs')
276
    def apply_outputs(self):
277
        return self.target_names
278
279
280
class Merge(Parallel):
281
    """Merges several variables by applying a transformation and summing.
282
283
    Parameters
284
    ----------
285
    input_names : list
286
        The input names.
287
    input_dims : list
288
        The dictionary of input dimensions, keys are input names, values
289
        are dimensions.
290
    output_dim : int
291
        The output dimension of the merged variables.
292
    prototype : :class:`~blocks.bricks.Feedforward`, optional
293
        A transformation prototype. A copy will be created for every
294
        input.  If ``None``, a linear transformation is used.
295
    child_prefix : str, optional
296
        A prefix for children names. By default "transform" is used.
297
298
    .. warning::
299
300
       Note that if you want to have a bias you can pass a :class:`.Linear`
301
       brick as a `prototype`, but this will result in several redundant
302
       biases. It is a better idea to use ``merge.children[0].use_bias =
303
       True``.
304
305
    Attributes
306
    ----------
307
    input_names : list
308
        The input names.
309
    input_dims : list
310
        List of input dimensions corresponding to `input_names`.
311
    output_dim : int
312
        The output dimension.
313
314
    Examples
315
    --------
316
    >>> from theano import tensor
317
    >>> from blocks.initialization import Constant
318
    >>> a = tensor.matrix('a')
319
    >>> b = tensor.matrix('b')
320
    >>> merge = Merge(input_names=['a', 'b'], input_dims=[3, 4],
321
    ...               output_dim=2, weights_init=Constant(1.))
322
    >>> merge.initialize()
323
    >>> c = merge.apply(a=a, b=b)
324
    >>> c.eval({a: [[1, 1, 1]], b: [[2, 2, 2, 2]]})  # doctest: +ELLIPSIS
325
    array([[ 11.,  11.]]...
326
327
    """
328
    @lazy(allocation=['input_dims', 'output_dim'])
329
    def __init__(self, input_names, input_dims, output_dim, prototype=None,
330
                 **kwargs):
331
        if not prototype:
332
            prototype = Linear(use_bias=False)
333
        self.output_dim = output_dim
334
        super(Merge, self).__init__(
335
            input_names, input_dims,
336
            [output_dim for _ in input_names], prototype, **kwargs
337
        )
338
339
    @application(outputs=['output'])
340
    def apply(self, *args, **kwargs):
341
        outputs = super(Merge, self).apply(*args, **kwargs)
342
        outputs = pack(outputs)
343
        # Sum is often faster than tensor.sum(outputs, axis=0) for a
344
        # small number of outputs
345
        return sum(outputs)
346
347
    @apply.property('inputs')
348
    def apply_inputs(self):
349
        return self.input_names
350
351
    def _push_allocation_config(self):
352
        self.output_dims = [self.output_dim for input_name in self.input_names]
353
        super(Merge, self)._push_allocation_config()
354