Checks congruence of parent and child method arguments
1 | """Generic transformations with multiple inputs and/or outputs.""" |
||
2 | import copy |
||
3 | |||
4 | from picklable_itertools.extras import equizip |
||
5 | |||
6 | from blocks.bricks.base import lazy, application |
||
7 | from blocks.bricks.simple import Initializable, Linear |
||
8 | from blocks.utils import pack, extract_args |
||
9 | |||
10 | |||
11 | class Parallel(Initializable): |
||
12 | """Apply similar transformations to several inputs. |
||
13 | |||
14 | Given a prototype brick, a :class:`Parallel` brick makes several |
||
15 | copies of it (each with its own parameters). At the application time |
||
16 | every copy is applied to the respective input. |
||
17 | |||
18 | >>> from theano import tensor |
||
19 | >>> from blocks.initialization import Constant |
||
20 | >>> x, y = tensor.matrix('x'), tensor.matrix('y') |
||
21 | >>> parallel = Parallel( |
||
22 | ... prototype=Linear(use_bias=False), |
||
23 | ... input_names=['x', 'y'], input_dims=[2, 3], output_dims=[4, 5], |
||
24 | ... weights_init=Constant(2)) |
||
25 | >>> parallel.initialize() |
||
26 | >>> new_x, new_y = parallel.apply(x=x, y=y) |
||
27 | >>> new_x.eval({x: [[1, 1]]}) # doctest: +ELLIPSIS |
||
28 | array([[ 4., 4., 4., 4.]]... |
||
29 | >>> new_y.eval({y: [[1, 1, 1]]}) # doctest: +ELLIPSIS |
||
30 | array([[ 6., 6., 6., 6., 6.]]... |
||
31 | |||
32 | Parameters |
||
33 | ---------- |
||
34 | input_names : list |
||
35 | The input names. |
||
36 | input_dims : list |
||
37 | List of input dimensions, given in the same order as `input_names`. |
||
38 | output_dims : list |
||
39 | List of output dimensions. |
||
40 | prototype : :class:`~blocks.bricks.Feedforward` |
||
41 | The transformation prototype. A copy will be created for every |
||
42 | input. |
||
43 | child_prefix : str, optional |
||
44 | The prefix for children names. By default "transform" is used. |
||
45 | |||
46 | Attributes |
||
47 | ---------- |
||
48 | input_names : list |
||
49 | The input names. |
||
50 | input_dims : list |
||
51 | Input dimensions. |
||
52 | output_dims : list |
||
53 | Output dimensions. |
||
54 | |||
55 | Notes |
||
56 | ----- |
||
57 | See :class:`.Initializable` for initialization parameters. |
||
58 | |||
59 | """ |
||
60 | @lazy(allocation=['input_names', 'input_dims', 'output_dims']) |
||
61 | def __init__(self, input_names, input_dims, output_dims, |
||
62 | prototype, child_prefix=None, **kwargs): |
||
63 | super(Parallel, self).__init__(**kwargs) |
||
64 | if not child_prefix: |
||
65 | child_prefix = "transform" |
||
66 | |||
67 | self.input_names = input_names |
||
68 | self.input_dims = input_dims |
||
69 | self.output_dims = output_dims |
||
70 | self.prototype = prototype |
||
71 | |||
72 | self.children = [] |
||
73 | for name in input_names: |
||
74 | self.children.append(copy.deepcopy(self.prototype)) |
||
75 | self.children[-1].name = "{}_{}".format(child_prefix, name) |
||
76 | |||
77 | def _push_allocation_config(self): |
||
78 | for input_dim, output_dim, child in \ |
||
79 | equizip(self.input_dims, self.output_dims, self.children): |
||
80 | child.input_dim = input_dim |
||
81 | child.output_dim = output_dim |
||
82 | |||
83 | @application |
||
84 | def apply(self, *args, **kwargs): |
||
85 | routed_args = extract_args(self.input_names, *args, **kwargs) |
||
86 | return [child.apply(routed_args[name]) |
||
87 | for name, child in equizip(self.input_names, self.children)] |
||
88 | |||
89 | @apply.property('inputs') |
||
90 | def apply_inputs(self): |
||
91 | return self.input_names |
||
92 | |||
93 | @apply.property('outputs') |
||
94 | def apply_outputs(self): |
||
95 | return self.input_names |
||
96 | |||
97 | |||
98 | class Fork(Parallel): |
||
99 | """Several outputs from one input by applying similar transformations. |
||
100 | |||
101 | Given a prototype brick, a :class:`Fork` brick makes several |
||
102 | copies of it (each with its own parameters). At the application time |
||
103 | the copies are applied to the input to produce different outputs. |
||
104 | |||
105 | A typical usecase for this brick is to produce inputs for gates |
||
106 | of gated recurrent bricks, such as |
||
107 | :class:`~blocks.bricks.GatedRecurrent`. |
||
108 | |||
109 | >>> from theano import tensor |
||
110 | >>> from blocks.initialization import Constant |
||
111 | >>> x = tensor.matrix('x') |
||
112 | >>> fork = Fork(output_names=['y', 'z'], |
||
113 | ... input_dim=2, output_dims=[3, 4], |
||
114 | ... weights_init=Constant(2), biases_init=Constant(1)) |
||
115 | >>> fork.initialize() |
||
116 | >>> y, z = fork.apply(x) |
||
117 | >>> y.eval({x: [[1, 1]]}) # doctest: +ELLIPSIS |
||
118 | array([[ 5., 5., 5.]]... |
||
119 | >>> z.eval({x: [[1, 1]]}) # doctest: +ELLIPSIS |
||
120 | array([[ 5., 5., 5., 5.]]... |
||
121 | |||
122 | Parameters |
||
123 | ---------- |
||
124 | output_names : list of str |
||
125 | Names of the outputs to produce. |
||
126 | input_dim : int |
||
127 | The input dimension. |
||
128 | prototype : :class:`~blocks.bricks.Feedforward`, optional |
||
129 | The transformation prototype. A copy will be created for every |
||
130 | input. By default an affine transformation is used. |
||
131 | |||
132 | Attributes |
||
133 | ---------- |
||
134 | input_dim : int |
||
135 | The input dimension. |
||
136 | output_dims : list |
||
137 | The output dimensions as a list of integers, corresponding to |
||
138 | `output_names`. |
||
139 | |||
140 | See Also |
||
141 | -------- |
||
142 | :class:`Parallel` for other parameters. |
||
143 | |||
144 | :class:`.Initializable` for initialization parameters. |
||
145 | |||
146 | """ |
||
147 | @lazy(allocation=['input_dim']) |
||
148 | def __init__(self, output_names, input_dim, prototype=None, **kwargs): |
||
149 | if not prototype: |
||
150 | prototype = Linear() |
||
151 | |||
152 | self.output_names = output_names |
||
153 | self.input_dim = input_dim |
||
154 | |||
155 | kwargs.setdefault('child_prefix', 'fork') |
||
156 | super(Fork, self).__init__(output_names, prototype=prototype, |
||
157 | **kwargs) |
||
158 | self.input_dims = None |
||
159 | |||
160 | def _push_allocation_config(self): |
||
161 | self.input_dims = [self.input_dim for _ in self.output_names] |
||
162 | super(Fork, self)._push_allocation_config() |
||
163 | |||
164 | @application(inputs=['input_']) |
||
165 | def apply(self, input_): |
||
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
166 | return super(Fork, self).apply(**{name: input_ |
||
167 | for name in self.input_names}) |
||
168 | |||
169 | @apply.property('outputs') |
||
170 | def apply_outputs(self): |
||
171 | return super(Fork, self).apply.outputs |
||
172 | |||
173 | |||
174 | class Distribute(Fork): |
||
175 | """Transform an input and add it to other inputs. |
||
176 | |||
177 | This brick is designed for the following scenario: one has a group of |
||
178 | variables and another separate variable, and one needs to somehow |
||
179 | distribute information from the latter across the former. We call that |
||
180 | "to distribute a varible across other variables", and refer to the |
||
181 | separate variable as "the source" and to the variables from the group |
||
182 | as "the targets". |
||
183 | |||
184 | Given a prototype brick, a :class:`Parallel` brick makes several copies |
||
185 | of it (each with its own parameters). At the application time the |
||
186 | copies are applied to the source and the transformation results |
||
187 | are added to the targets (in the literate sense). |
||
188 | |||
189 | >>> from theano import tensor |
||
190 | >>> from blocks.initialization import Constant |
||
191 | >>> x = tensor.matrix('x') |
||
192 | >>> y = tensor.matrix('y') |
||
193 | >>> z = tensor.matrix('z') |
||
194 | >>> distribute = Distribute(target_names=['x', 'y'], source_name='z', |
||
195 | ... target_dims=[2, 3], source_dim=3, |
||
196 | ... weights_init=Constant(2)) |
||
197 | >>> distribute.initialize() |
||
198 | >>> new_x, new_y = distribute.apply(x=x, y=y, z=z) |
||
199 | >>> new_x.eval({x: [[2, 2]], z: [[1, 1, 1]]}) # doctest: +ELLIPSIS |
||
200 | array([[ 8., 8.]]... |
||
201 | >>> new_y.eval({y: [[1, 1, 1]], z: [[1, 1, 1]]}) # doctest: +ELLIPSIS |
||
202 | array([[ 7., 7., 7.]]... |
||
203 | |||
204 | Parameters |
||
205 | ---------- |
||
206 | target_names : list |
||
207 | The names of the targets. |
||
208 | source_name : str |
||
209 | The name of the source. |
||
210 | target_dims : list |
||
211 | A list of target dimensions, corresponding to `target_names`. |
||
212 | source_dim : int |
||
213 | The dimension of the source input. |
||
214 | prototype : :class:`~blocks.bricks.Feedforward`, optional |
||
215 | The transformation prototype. A copy will be created for every |
||
216 | input. By default a linear transformation is used. |
||
217 | |||
218 | Attributes |
||
219 | ---------- |
||
220 | target_dims : list |
||
221 | source_dim : int |
||
222 | |||
223 | Notes |
||
224 | ----- |
||
225 | See :class:`.Initializable` for initialization parameters. |
||
226 | |||
227 | """ |
||
228 | @lazy(allocation=['source_name', 'target_dims', 'source_dim']) |
||
229 | def __init__(self, target_names, source_name, target_dims, source_dim, |
||
230 | prototype=None, **kwargs): |
||
231 | if not prototype: |
||
232 | prototype = Linear(use_bias=False) |
||
233 | |||
234 | self.target_names = target_names |
||
235 | self.source_name = source_name |
||
236 | self.target_dims = target_dims |
||
237 | self.source_dim = source_dim |
||
238 | |||
239 | super(Distribute, self).__init__( |
||
240 | output_names=target_names, output_dims=target_dims, |
||
241 | input_dim=source_dim, prototype=prototype, **kwargs) |
||
242 | |||
243 | def _push_allocation_config(self): |
||
244 | self.input_dim = self.source_dim |
||
245 | self.output_dims = self.target_dims |
||
246 | super(Distribute, self)._push_allocation_config() |
||
247 | |||
248 | @application |
||
249 | def apply(self, **kwargs): |
||
250 | r"""Distribute the source across the targets. |
||
251 | |||
252 | Parameters |
||
253 | ---------- |
||
254 | \*\*kwargs : dict |
||
255 | The source and the target variables. |
||
256 | |||
257 | Returns |
||
258 | ------- |
||
259 | output : list |
||
260 | The new target variables. |
||
261 | |||
262 | """ |
||
263 | result = super(Distribute, self).apply(kwargs.pop(self.source_name), |
||
264 | as_list=True) |
||
265 | for i, name in enumerate(self.target_names): |
||
266 | result[i] += kwargs.pop(name) |
||
267 | if len(kwargs): |
||
268 | raise ValueError |
||
269 | return result |
||
270 | |||
271 | @apply.property('inputs') |
||
272 | def apply_inputs(self): |
||
273 | return [self.source_name] + self.target_names |
||
274 | |||
275 | @apply.property('outputs') |
||
276 | def apply_outputs(self): |
||
277 | return self.target_names |
||
278 | |||
279 | |||
280 | class Merge(Parallel): |
||
281 | """Merges several variables by applying a transformation and summing. |
||
282 | |||
283 | Parameters |
||
284 | ---------- |
||
285 | input_names : list |
||
286 | The input names. |
||
287 | input_dims : list |
||
288 | The dictionary of input dimensions, keys are input names, values |
||
289 | are dimensions. |
||
290 | output_dim : int |
||
291 | The output dimension of the merged variables. |
||
292 | prototype : :class:`~blocks.bricks.Feedforward`, optional |
||
293 | A transformation prototype. A copy will be created for every |
||
294 | input. If ``None``, a linear transformation is used. |
||
295 | child_prefix : str, optional |
||
296 | A prefix for children names. By default "transform" is used. |
||
297 | |||
298 | .. warning:: |
||
299 | |||
300 | Note that if you want to have a bias you can pass a :class:`.Linear` |
||
301 | brick as a `prototype`, but this will result in several redundant |
||
302 | biases. It is a better idea to use ``merge.children[0].use_bias = |
||
303 | True``. |
||
304 | |||
305 | Attributes |
||
306 | ---------- |
||
307 | input_names : list |
||
308 | The input names. |
||
309 | input_dims : list |
||
310 | List of input dimensions corresponding to `input_names`. |
||
311 | output_dim : int |
||
312 | The output dimension. |
||
313 | |||
314 | Examples |
||
315 | -------- |
||
316 | >>> from theano import tensor |
||
317 | >>> from blocks.initialization import Constant |
||
318 | >>> a = tensor.matrix('a') |
||
319 | >>> b = tensor.matrix('b') |
||
320 | >>> merge = Merge(input_names=['a', 'b'], input_dims=[3, 4], |
||
321 | ... output_dim=2, weights_init=Constant(1.)) |
||
322 | >>> merge.initialize() |
||
323 | >>> c = merge.apply(a=a, b=b) |
||
324 | >>> c.eval({a: [[1, 1, 1]], b: [[2, 2, 2, 2]]}) # doctest: +ELLIPSIS |
||
325 | array([[ 11., 11.]]... |
||
326 | |||
327 | """ |
||
328 | @lazy(allocation=['input_dims', 'output_dim']) |
||
329 | def __init__(self, input_names, input_dims, output_dim, prototype=None, |
||
330 | **kwargs): |
||
331 | if not prototype: |
||
332 | prototype = Linear(use_bias=False) |
||
333 | self.output_dim = output_dim |
||
334 | super(Merge, self).__init__( |
||
335 | input_names, input_dims, |
||
336 | [output_dim for _ in input_names], prototype, **kwargs |
||
337 | ) |
||
338 | |||
339 | @application(outputs=['output']) |
||
340 | def apply(self, *args, **kwargs): |
||
341 | outputs = super(Merge, self).apply(*args, **kwargs) |
||
342 | outputs = pack(outputs) |
||
343 | # Sum is often faster than tensor.sum(outputs, axis=0) for a |
||
344 | # small number of outputs |
||
345 | return sum(outputs) |
||
346 | |||
347 | @apply.property('inputs') |
||
348 | def apply_inputs(self): |
||
349 | return self.input_names |
||
350 | |||
351 | def _push_allocation_config(self): |
||
352 | self.output_dims = [self.output_dim for input_name in self.input_names] |
||
353 | super(Merge, self)._push_allocation_config() |
||
354 |