1 | """Bricks that compose together other bricks in linear sequences.""" |
||
2 | import copy |
||
3 | from toolz import interleave, unique |
||
4 | from picklable_itertools.extras import equizip |
||
5 | |||
6 | from ..utils import pack |
||
7 | from .base import Brick, application, lazy |
||
8 | from .interfaces import Feedforward, Initializable |
||
9 | from .simple import Linear |
||
10 | |||
11 | |||
12 | class Sequence(Brick): |
||
13 | """A sequence of bricks. |
||
14 | |||
15 | This brick applies a sequence of bricks, assuming that their in- and |
||
16 | outputs are compatible. |
||
17 | |||
18 | Parameters |
||
19 | ---------- |
||
20 | application_methods : list |
||
21 | List of :class:`.BoundApplication` or :class:`.Brick` to apply. |
||
22 | For :class:`.Brick`s, the ``.apply`` method is used. |
||
23 | |||
24 | """ |
||
25 | def __init__(self, application_methods, **kwargs): |
||
26 | pairs = ((a.apply, a) if isinstance(a, Brick) else (a, a.brick) |
||
27 | for a in application_methods) |
||
28 | self.application_methods, bricks = zip(*pairs) |
||
29 | kwargs.setdefault('children', []).extend(unique(bricks)) |
||
30 | super(Sequence, self).__init__(**kwargs) |
||
31 | |||
32 | @application |
||
33 | def apply(self, *args): |
||
34 | child_input = args |
||
35 | for application_method in self.application_methods: |
||
36 | output = application_method(*pack(child_input)) |
||
37 | child_input = output |
||
38 | return output |
||
39 | |||
40 | @apply.property('inputs') |
||
41 | def apply_inputs(self): |
||
42 | return self.application_methods[0].inputs |
||
43 | |||
44 | @apply.property('outputs') |
||
45 | def apply_outputs(self): |
||
46 | return self.application_methods[-1].outputs |
||
47 | |||
48 | |||
49 | class FeedforwardSequence(Sequence, Feedforward): |
||
50 | """A sequence where the first and last bricks are feedforward. |
||
51 | |||
52 | Parameters |
||
53 | ---------- |
||
54 | application_methods : list |
||
55 | List of :class:`.BoundApplication` to apply. The first and last |
||
56 | application method should belong to a :class:`Feedforward` brick. |
||
57 | |||
58 | """ |
||
59 | @property |
||
60 | def input_dim(self): |
||
61 | return self.children[0].input_dim |
||
62 | |||
63 | @input_dim.setter |
||
64 | def input_dim(self, value): |
||
65 | self.children[0].input_dim = value |
||
66 | |||
67 | @property |
||
68 | def output_dim(self): |
||
69 | return self.children[-1].output_dim |
||
70 | |||
71 | @output_dim.setter |
||
72 | def output_dim(self, value): |
||
73 | self.children[-1].output_dim = value |
||
74 | |||
75 | |||
76 | class MLP(FeedforwardSequence, Initializable): |
||
77 | """A simple multi-layer perceptron. |
||
78 | |||
79 | Parameters |
||
80 | ---------- |
||
81 | activations : list of :class:`.Brick`, :class:`.BoundApplication`, |
||
82 | or ``None`` |
||
83 | A list of activations to apply after each linear transformation. |
||
84 | Give ``None`` to not apply any activation. It is assumed that the |
||
85 | application method to use is ``apply``. Required for |
||
86 | :meth:`__init__`. |
||
87 | dims : list of ints |
||
88 | A list of input dimensions, as well as the output dimension of the |
||
89 | last layer. Required for :meth:`~.Brick.allocate`. |
||
90 | prototype : :class:`.Brick`, optional |
||
91 | The transformation prototype. A copy will be created for every |
||
92 | activation. If not provided, an instance of :class:`~simple.Linear` |
||
93 | will be used. |
||
94 | |||
95 | Notes |
||
96 | ----- |
||
97 | See :class:`Initializable` for initialization parameters. |
||
98 | |||
99 | Note that the ``weights_init``, ``biases_init`` (as well as |
||
100 | ``use_bias`` if set to a value other than the default of ``None``) |
||
101 | configurations will overwrite those of the layers each time the |
||
102 | :class:`MLP` is re-initialized. For more fine-grained control, push the |
||
103 | configuration to the child layers manually before initialization. |
||
104 | |||
105 | >>> from blocks.bricks import Tanh |
||
106 | >>> from blocks.initialization import IsotropicGaussian, Constant |
||
107 | >>> mlp = MLP(activations=[Tanh(), None], dims=[30, 20, 10], |
||
108 | ... weights_init=IsotropicGaussian(), |
||
109 | ... biases_init=Constant(1)) |
||
110 | >>> mlp.push_initialization_config() # Configure children |
||
111 | >>> mlp.children[0].weights_init = IsotropicGaussian(0.1) |
||
112 | >>> mlp.initialize() |
||
113 | |||
114 | """ |
||
115 | @lazy(allocation=['dims']) |
||
116 | def __init__(self, activations, dims, prototype=None, **kwargs): |
||
117 | self.activations = activations |
||
118 | self.prototype = Linear() if prototype is None else prototype |
||
119 | self.linear_transformations = [] |
||
120 | for i in range(len(activations)): |
||
121 | linear = copy.deepcopy(self.prototype) |
||
122 | name = self.prototype.__class__.__name__.lower() |
||
123 | linear.name = '{}_{}'.format(name, i) |
||
124 | self.linear_transformations.append(linear) |
||
125 | if not dims: |
||
126 | dims = [None] * (len(activations) + 1) |
||
127 | self.dims = dims |
||
128 | # Interleave the transformations and activations |
||
129 | applications = [a for a in interleave([self.linear_transformations, |
||
130 | activations]) if a is not None] |
||
131 | super(MLP, self).__init__(applications, **kwargs) |
||
132 | |||
133 | @property |
||
134 | def input_dim(self): |
||
135 | return self.dims[0] |
||
136 | |||
137 | @input_dim.setter |
||
138 | def input_dim(self, value): |
||
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
139 | self.dims[0] = value |
||
140 | |||
141 | @property |
||
142 | def output_dim(self): |
||
143 | return self.dims[-1] |
||
144 | |||
145 | @output_dim.setter |
||
146 | def output_dim(self, value): |
||
0 ignored issues
–
show
|
|||
147 | self.dims[-1] = value |
||
148 | |||
149 | def _push_allocation_config(self): |
||
150 | if not len(self.dims) - 1 == len(self.linear_transformations): |
||
151 | raise ValueError |
||
152 | for input_dim, output_dim, layer in \ |
||
153 | equizip(self.dims[:-1], self.dims[1:], |
||
154 | self.linear_transformations): |
||
155 | layer.input_dim = input_dim |
||
156 | layer.output_dim = output_dim |
||
157 | if getattr(self, 'use_bias', None) is not None: |
||
158 | layer.use_bias = self.use_bias |
||
159 |