Completed
Pull Request — master (#941)
by David
03:42 queued 01:47
created

blocks.graph.ComputationGraph.replace()   B

Complexity

Conditions 6

Size

Total Lines 86

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 6
dl 0
loc 86
rs 7.2894

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""Annotated computation graph management."""
2
import logging
3
from collections import OrderedDict
4
from itertools import chain
5
import warnings
6
7
import numpy
8
import theano
9
from picklable_itertools.extras import equizip
10
from theano import Variable
11
from theano.gof import graph
12
from theano.sandbox.rng_mrg import MRG_RandomStreams
13
from theano.scan_module.scan_op import Scan
14
from toolz import unique
15
16
from ..config import config
17
from ..roles import (add_role, has_roles, AUXILIARY, PARAMETER, DROPOUT,
18
                     COLLECTED, COLLECTOR)
19
from ..utils import (is_graph_input, is_shared_variable, dict_union,
20
                     shared_floatx_zeros, shared_like)
21
from .annotations import add_annotation, Annotation  # noqa
22
from .bn import batch_normalization, apply_batch_normalization  # noqa
23
from .bn import batch_normalization_updates  # noqa
24
25
logger = logging.getLogger(__name__)
26
27
28
class ComputationGraph(object):
29
    r"""Encapsulates a managed Theano computation graph.
30
31
    This implies that it not only contains the variables required to
32
    compute the given outputs, but also all the auxiliary variables and
33
    updates that were attached to these variables through the annotation
34
    system.
35
36
    All variables are presented in topologically sorted order according to
37
    the apply nodes that they are an input to.
38
39
    Parameters
40
    ----------
41
    outputs : (list of) :class:`~tensor.TensorVariable`
42
        The output(s) of the computation graph.
43
44
    Attributes
45
    ----------
46
    inputs : list of :class:`~tensor.TensorVariable`
47
        The inputs of the computation graph. This does not include shared
48
        variables and constants.
49
    shared_variables : list of :class:`~tensor.TensorSharedVariable`
50
        All the shared variables in the graph.
51
    parameters : list of :class:`~tensor.TensorSharedVariable`
52
        All the shared variables which have the :const:`.PARAMETER` role.
53
    outputs : list of :class:`~tensor.TensorVariable`
54
        The outputs of the computations graph (as passed to the
55
        constructor).
56
    auxiliary_variables : list of :class:`~tensor.TensorVariable`
57
        All variables which have the :const:`.AUXILIARY` role.
58
    intermediary_variables : list of :class:`~tensor.TensorVariable`
59
        Any variable that is not part of :attr:`inputs` or :attr:`outputs`.
60
    variables : list of :class:`~tensor.TensorVariable`
61
        All variables (including auxiliary) in the managed graph.
62
    scans : list of :class:`~theano.scan_module.scan_op.Scan`
63
        All Scan ops used in this computation graph.
64
    scan_variables : list of :class:`~tensor.TensorVariable`
65
        All variables of the inner graphs of Scan ops.
66
    updates : :class:`~tensor.TensorSharedVariable` updates
67
        All the updates found attached to the annotations.
68
69
    """
70
    def __init__(self, outputs):
71
        if isinstance(outputs, Variable):
72
            outputs = [outputs]
73
        self.outputs = outputs
74
        self._get_variables()
75
        self._has_inputs = {}
76
77
    def __iter__(self):
78
        return iter(self.variables)
79
80
    @property
81
    def inputs(self):
82
        """Inputs to the graph, excluding constants and shared variables."""
83
        return [var for var in self.variables if is_graph_input(var)]
84
85
    @property
86
    def intermediary_variables(self):
87
        return [var for var in self.variables if
88
                var not in self.inputs and
89
                var not in self.outputs]
90
91
    @property
92
    def shared_variables(self):
93
        return [var for var in self.variables if is_shared_variable(var)]
94
95
    @property
96
    def parameters(self):
97
        return [var for var in self.shared_variables
98
                if has_roles(var, [PARAMETER])]
99
100
    @property
101
    def auxiliary_variables(self):
102
        return [var for var in self.variables if has_roles(var, [AUXILIARY])]
103
104
    @property
105
    def scan_variables(self):
106
        """Variables of Scan ops."""
107
        return list(chain(*[g.variables for g in self._scan_graphs]))
108
109
    def _get_variables(self):
110
        """Collect variables, updates and auxiliary variables.
111
112
        In addition collects all :class:`.Scan` ops and recurses in the
113
        respective inner Theano graphs.
114
115
        """
116
        updates = OrderedDict()
117
118
        shared_outputs = [o for o in self.outputs if is_shared_variable(o)]
119
        usual_outputs = [o for o in self.outputs if not is_shared_variable(o)]
120
        variables = shared_outputs
121
122
        if usual_outputs:
123
            # Sort apply nodes topologically, get variables and remove
124
            # duplicates
125
            inputs = graph.inputs(self.outputs)
126
            sorted_apply_nodes = graph.io_toposort(inputs, usual_outputs)
127
            self.scans = list(unique([node.op for node in sorted_apply_nodes
128
                                     if isinstance(node.op, Scan)]))
129
            self._scan_graphs = [ComputationGraph(scan.outputs)
130
                                 for scan in self.scans]
131
132
            seen = set()
133
            main_vars = (
134
                [var for var in list(chain(
135
                    *[apply_node.inputs for apply_node in sorted_apply_nodes]))
136
                 if not (var in seen or seen.add(var))] +
137
                [var for var in self.outputs if var not in seen])
138
139
            # While preserving order add auxiliary variables, and collect
140
            # updates
141
            seen = set()
142
            # Intermediate variables could be auxiliary
143
            seen_avs = set(main_vars)
144
            variables = []
145
            for var in main_vars:
146
                variables.append(var)
147
                for annotation in getattr(var.tag, 'annotations', []):
148
                    if annotation not in seen:
149
                        seen.add(annotation)
150
                        new_avs = [
151
                            av for av in annotation.auxiliary_variables
152
                            if not (av in seen_avs or seen_avs.add(av))]
153
                        variables.extend(new_avs)
154
                        updates = dict_union(updates, annotation.updates)
155
156
        self.variables = variables
157
        self.updates = updates
158
159
    def dict_of_inputs(self):
160
        """Return a mapping from an input name to the input."""
161
        return {var.name: var for var in self.inputs}
162
163
    def replace(self, replacements):
164
        """Replace certain variables in the computation graph.
165
166
        Parameters
167
        ----------
168
        replacements : dict
169
            The mapping from variables to be replaced to the corresponding
170
            substitutes.
171
172
        Examples
173
        --------
174
        >>> import theano
175
        >>> from theano import tensor, function
176
        >>> x = tensor.scalar('x')
177
        >>> y = x + 2
178
        >>> z = y + 3
179
        >>> a = z + 5
180
181
        Let's suppose we have dependent replacements like
182
183
        >>> replacements = {y: x * 2, z: y * 3}
184
        >>> cg = ComputationGraph([a])
185
        >>> theano.pprint(a)  # doctest: +NORMALIZE_WHITESPACE
186
        '(((x + TensorConstant{2}) + TensorConstant{3}) +
187
        TensorConstant{5})'
188
        >>> cg_new = cg.replace(replacements)
189
        >>> theano.pprint(
190
        ...     cg_new.outputs[0])  # doctest: +NORMALIZE_WHITESPACE
191
        '(((x * TensorConstant{2}) * TensorConstant{3}) +
192
        TensorConstant{5})'
193
194
        First two sums turned into multiplications
195
196
        >>> float(function(cg_new.inputs, cg_new.outputs)(3.)[0])
197
        23.0
198
199
        """
200
        # Due to theano specifics we have to make one replacement in time
201
        replacements = OrderedDict(replacements)
202
203
        outputs_cur = self.outputs
204
205
        # `replacements` with previous replacements applied. We have to track
206
        # variables in the new graph corresponding to original replacements.
207
        replacement_keys_cur = []
208
        replacement_vals_cur = []
209
        # Sort `replacements` in topological order
210
        # variables in self.variables are in topological order
211
        remaining_replacements = replacements.copy()
212
        for variable in self.variables:
213
            if variable in replacements:
214
                if has_roles(variable, [AUXILIARY]):
215
                    warnings.warn(
216
                        "replace method was asked to replace a variable ({}) "
217
                        "that is an auxiliary variable.".format(variable))
218
                replacement_keys_cur.append(variable)
219
                # self.variables should not contain duplicates,
220
                # otherwise pop() may fail.
221
                replacement_vals_cur.append(
222
                    remaining_replacements.pop(variable))
223
224
        # if remaining_replacements is not empty
225
        if remaining_replacements:
226
            warnings.warn(
227
                "replace method was asked to replace a variable(s) ({}) "
228
                "that is not a part of the computational "
229
                "graph.".format(str(remaining_replacements.keys())))
230
231
        # Replace step-by-step in topological order
232
        while replacement_keys_cur:
233
            replace_what = replacement_keys_cur[0]
234
            replace_by = replacement_vals_cur[0]
235
            # We also want to make changes in future replacements
236
            outputs_new = theano.clone(
237
                outputs_cur + replacement_keys_cur[1:] +
238
                replacement_vals_cur[1:],
239
                replace={replace_what: replace_by})
240
            # Reconstruct outputs, keys, and values
241
            outputs_cur = outputs_new[:len(outputs_cur)]
242
            replacement_keys_cur = outputs_new[len(outputs_cur):
243
                                               len(outputs_cur) +
244
                                               len(replacement_keys_cur) - 1]
245
            replacement_vals_cur = outputs_new[len(outputs_cur) +
246
                                               len(replacement_keys_cur):]
247
248
        return ComputationGraph(outputs_cur)
249
250
    def get_theano_function(self, additional_updates=None, **kwargs):
251
        r"""Create Theano function from the graph contained.
252
253
        Parameters
254
        ----------
255
        \*\*kwargs : dict
256
            Keyword arguments to theano.function.
257
            Useful for specifying compilation modes or profiling.
258
259
        """
260
        updates = self.updates
261
        if additional_updates:
262
            updates = dict_union(updates, OrderedDict(additional_updates))
263
        return theano.function(self.inputs, self.outputs, updates=updates,
264
                               **kwargs)
265
266
    def get_snapshot(self, data):
267
        """Evaluate all role-carrying Theano variables on given data.
268
269
        Parameters
270
        ----------
271
        data : dict of (data source, data) pairs
272
            Data for input variables. The sources should match with the
273
            names of the input variables.
274
275
        Returns
276
        -------
277
        Dictionary of (variable, variable value on given data) pairs.
278
279
        """
280
        role_variables = [var for var in self.variables
281
                          if hasattr(var.tag, "roles") and
282
                          not is_shared_variable(var)]
283
        value_holders = [shared_like(var) for var in role_variables]
284
        function = self.get_theano_function(equizip(value_holders,
285
                                                    role_variables))
286
        function(*(data[input_.name] for input_ in self.inputs))
287
        return OrderedDict([(var, value_holder.get_value(borrow=True))
288
                            for var, value_holder in equizip(role_variables,
289
                                                             value_holders)])
290
291
    def has_inputs(self, variable):
292
        """Check if a variable depends on input variables.
293
294
        Returns
295
        -------
296
        bool
297
            ``True`` if the given variable depends on input variables,
298
            ``False`` otherwise.
299
300
        """
301
        if variable not in self._has_inputs:
302
            self._has_inputs[variable] = False
303
            if is_graph_input(variable):
304
                self._has_inputs[variable] = True
305
            elif getattr(variable, 'owner', None):
306
                for dependancy in variable.owner.inputs:
307
                    if self.has_inputs(dependancy):
308
                        self._has_inputs[variable] = True
309
        return self._has_inputs[variable]
310
311
312
def apply_noise(computation_graph, variables, level, seed=None):
313
    """Add Gaussian noise to certain variable of a computation graph.
314
315
    Parameters
316
    ----------
317
    computation_graph : instance of :class:`ComputationGraph`
318
        The computation graph.
319
    variables : :class:`~tensor.TensorVariable`
320
        Variables to add noise to.
321
    level : float
322
        Noise level.
323
    seed : int, optional
324
        The seed with which
325
        :class:`~theano.sandbox.rng_mrg.MRG_RandomStreams` is initialized,
326
        is set to 1 by default.
327
328
    """
329
    if not seed:
330
        seed = config.default_seed
331
    rng = MRG_RandomStreams(seed)
332
    replace = {}
333
    for variable in variables:
334
        replace[variable] = (variable +
335
                             rng.normal(variable.shape, std=level))
336
    return computation_graph.replace(replace)
337
338
339
def collect_parameters(computation_graph, parameters):
340
    """Replace parameters with a single shared variable.
341
342
    This can be useful if you need to calculate the full Hessian of a
343
    computational graph. It replaces parameters with slices of a single
344
    large vectors like
345
346
    >>> from blocks.utils import shared_floatx
347
    >>> W1 = shared_floatx(numpy.random.rand(10, 10))
348
    >>> W2 = shared_floatx(numpy.random.rand(10, 10))
349
    >>> all_parameters = shared_floatx(numpy.concatenate(
350
    ...     [W1.get_value().flatten(), W2.get_value().flatten()]))
351
    >>> W1 = all_parameters[:W1.size]
352
    >>> W2 = all_parameters[W1.size:]
353
354
    Parameters
355
    ----------
356
    computation_graph : :class:`ComputationGraph` instance
357
        The managed Theano graph in which to collect parameters.
358
    parameters : list of Theano shared variables
359
        The parameters whose values should be collected.
360
361
    Returns
362
    -------
363
    ComputationGraph instance
364
        A new Theano graph which has all the given parameters collected
365
        into a single large shared variable.
366
367
    Notes
368
    -----
369
    Note that this replacement makes the training of the model
370
    significantly slower because of the large amount of Theano's
371
    ``set_subtensor`` calls needed to train the model.
372
373
    Examples
374
    --------
375
    >>> from blocks.bricks import MLP, Logistic
376
    >>> from blocks.bricks.cost import SquaredError
377
    >>> from theano import tensor
378
    >>> x = tensor.matrix()
379
    >>> mlp = MLP(activations=[Logistic(), Logistic()],
380
    ...           dims=[784, 100, 784])
381
    >>> cost = SquaredError().apply(x, mlp.apply(x))
382
    >>> cg = ComputationGraph(cost)
383
    >>> new_cg = collect_parameters(cg, cg.shared_variables)
384
385
    The new graph only has a single shared variable. This variable receives
386
    the :const:`COLLECTOR` role.
387
388
    >>> new_cg.shared_variables
389
    [collected_parameters]
390
391
    The bricks' variables have been replaced with reshaped segments of this
392
    single shared variable. These replacements are given the
393
    :const:`.COLLECTED` role.
394
395
    >>> from blocks.filter import VariableFilter
396
    >>> from blocks.roles import PARAMETER
397
    >>> var_filter = VariableFilter(roles=[COLLECTED])
398
    >>> var_filter(new_cg.variables)  # doctest: +SKIP
399
    [Reshape{1}.0, Reshape{1}.0, Reshape{2}.0, Reshape{2}.0]
400
401
    """
402
    parameter_values, parameter_sizes, parameter_shapes = [], [], []
403
    for parameter in parameters:
404
        parameter_values.append(parameter.get_value(borrow=True))
405
        parameter_sizes.append(parameter_values[-1].size)
406
        parameter_shapes.append(parameter_values[-1].shape)
407
408
    new_parameters = shared_floatx_zeros(sum(parameter_sizes))
409
    new_parameters.set_value(numpy.concatenate([value.flatten()
410
                             for value in parameter_values]))
411
    new_parameters.name = 'collected_parameters'
412
    add_role(new_parameters, COLLECTOR)
413
414
    replacements = {}
415
    for parameter, shape, i, j in zip(parameters, parameter_shapes,
416
                                      numpy.cumsum([0] + parameter_sizes[:-1]),
417
                                      numpy.cumsum(parameter_sizes)):
418
        new_parameter = new_parameters[i:j].reshape(shape)
419
        new_parameter.replacement_of = parameter
420
        add_role(new_parameter, COLLECTED)
421
        replacements[parameter] = new_parameter
422
    return computation_graph.replace(replacements)
423
424
425
def apply_dropout(computation_graph, variables, drop_prob, rng=None,
426
                  seed=None, custom_divisor=None):
427
    """Apply dropout to specified variables in a graph.
428
429
    Parameters
430
    ----------
431
    computation_graph : instance of :class:`ComputationGraph`
432
        The computation graph.
433
    variables : list of :class:`~tensor.TensorVariable`
434
        Variables to be dropped out.
435
    drop_prob : float
436
        Probability of dropping out. If you want to apply the dropout
437
        with different probabilities for different layers, call it
438
        several times.
439
    rng : :class:`~theano.sandbox.rng_mrg.MRG_RandomStreams`
440
        Random number generator.
441
    seed : int
442
        Random seed to be used if `rng` was not specified.
443
    custom_divisor : float or None, optional
444
        Divide dropped variables by a given scalar value. If `None`,
445
        (default) dropped variables will be divided by `(1 - drop_prob)`
446
        which is equivalent to scaling by `(1 - drop_prob)` at test
447
        time as recommended in [DROPOUT]_.
448
449
    Returns
450
    -------
451
    dropped_computation_graph : instance of :class:`ComputationGraph`
452
        A new computation graph with dropout applied to the specified
453
        variables. In order to train with, or monitor, the outputs
454
        of the original computation graph with dropout applies, use
455
        the variables contained in `dropped_computation_graph.outputs`.
456
457
    Notes
458
    -----
459
    For more information, see [DROPOUT]_.
460
461
    .. [DROPOUT] Hinton et al. *Improving neural networks by preventing
462
       co-adaptation of feature detectors*, arXiv:1207.0580.
463
464
    Examples
465
    --------
466
    >>> import numpy
467
    >>> from theano import tensor, function
468
    >>> from blocks.bricks import MLP, Identity
469
    >>> from blocks.filter import VariableFilter
470
    >>> from blocks.initialization import Constant
471
    >>> from blocks.roles import INPUT
472
    >>> linear = MLP([Identity(), Identity()], [2, 10, 2],
473
    ...              weights_init=Constant(1), biases_init=Constant(2))
474
    >>> x = tensor.matrix('x')
475
    >>> y = linear.apply(x)
476
    >>> cg = ComputationGraph(y)
477
478
    We are going to drop out all the input variables
479
480
    >>> inputs = VariableFilter(roles=[INPUT])(cg.variables)
481
482
    Here we apply dropout with default setting to our computation graph
483
484
    >>> cg_dropout = apply_dropout(cg, inputs, 0.5)
485
486
    Dropped out variables have role `DROPOUT` and are tagged with
487
    `replacement_of` tag. Let's filter these variables and check if they
488
    have the links to original ones.
489
490
    >>> dropped_out = VariableFilter(roles=[DROPOUT])(cg_dropout.variables)
491
    >>> inputs_referenced = [var.tag.replacement_of for var in dropped_out]
492
    >>> set(inputs) == set(inputs_referenced)
493
    True
494
495
    Compiling theano functions to forward propagate in original and dropped
496
    out graphs
497
498
    >>> fprop = function(cg.inputs, cg.outputs[0])
499
    >>> fprop_dropout = function(cg_dropout.inputs, cg_dropout.outputs[0])
500
501
    Initialize an MLP and apply these functions
502
503
    >>> linear.initialize()
504
    >>> fprop(numpy.ones((3, 2),
505
    ...       dtype=theano.config.floatX))  # doctest:+ELLIPSIS
506
    array([[ 42.,  42.],
507
           [ 42.,  42.],
508
           [ 42.,  42.]]...
509
    >>> fprop_dropout(numpy.ones((3, 2),
510
    ...               dtype=theano.config.floatX))  # doctest:+ELLIPSIS
511
    array([[ 0.,  0.],
512
           [ 0.,  0.],
513
           [ 0.,  0.]]...
514
515
    And after the second run answer is different
516
517
    >>> fprop_dropout(numpy.ones((3, 2),
518
    ...               dtype=theano.config.floatX))  # doctest:+ELLIPSIS
519
    array([[   0.,   52.],
520
           [ 100.,    0.],
521
           [   0.,    0.]]...
522
523
    """
524
    if not rng and not seed:
525
        seed = config.default_seed
526
    if not rng:
527
        rng = MRG_RandomStreams(seed)
528
    if custom_divisor is None:
529
        divisor = (1 - drop_prob)
530
    else:
531
        divisor = custom_divisor
532
    replacements = [(var, var *
533
                     rng.binomial(var.shape, p=1 - drop_prob,
534
                                  dtype=theano.config.floatX) /
535
                     divisor)
536
                    for var in variables]
537
    for variable, replacement in replacements:
538
        add_role(replacement, DROPOUT)
539
        replacement.tag.replacement_of = variable
540
541
    return computation_graph.replace(replacements)
542