Issues in architectures.py (master) - Issues in master - mila-udem/blocks - Measure and Improve Code Quality continuously with Scrutinizer

Issues (119)

blocks/bricks/recurrent/architectures.py (4 issues)

Labels

Severity

Informational 4

# -*- coding: utf-8 -*-
import numpy
from theano import tensor

from ..base import application, lazy
from ..simple import Initializable, Logistic, Tanh
from ...roles import add_role, WEIGHT, INITIAL_STATE
from ...utils import shared_floatx_nans, shared_floatx_zeros
from .base import BaseRecurrent, recurrent


class SimpleRecurrent(BaseRecurrent, Initializable):
    """The traditional recurrent transition.

    The most well-known recurrent transition: a matrix multiplication,
    optionally followed by a non-linearity.

    Parameters
    ----------
    dim : int
        The dimension of the hidden state
    activation : :class:`~.bricks.Brick`
        The brick to apply as activation.

    Notes
    -----
    See :class:`.Initializable` for initialization parameters.

    """
    @lazy(allocation=['dim'])
    def __init__(self, dim, activation, **kwargs):
        self.dim = dim
        children = [activation]
        kwargs.setdefault('children', []).extend(children)
        super(SimpleRecurrent, self).__init__(**kwargs)

    @property
    def W(self):
        return self.parameters[0]

    def get_dim(self, name):
        if name == 'mask':
            return 0
        if name in (SimpleRecurrent.apply.sequences +
                    SimpleRecurrent.apply.states):
            return self.dim
        return super(SimpleRecurrent, self).get_dim(name)

    def _allocate(self):
        self.parameters.append(shared_floatx_nans((self.dim, self.dim),
                                                  name="W"))
        add_role(self.parameters[0], WEIGHT)
        self.parameters.append(shared_floatx_zeros((self.dim,),
                                                   name="initial_state"))
        add_role(self.parameters[1], INITIAL_STATE)

    def _initialize(self):
        self.weights_init.initialize(self.W, self.rng)

    @recurrent(sequences=['inputs', 'mask'], states=['states'],
               outputs=['states'], contexts=[])
    def apply(self, inputs, states, mask=None):
        """Apply the simple transition.

        Parameters
        ----------
        inputs : :class:`~tensor.TensorVariable`
            The 2D inputs, in the shape (batch, features).
        states : :class:`~tensor.TensorVariable`
            The 2D states, in the shape (batch, features).
        mask : :class:`~tensor.TensorVariable`
            A 1D binary array in the shape (batch,) which is 1 if
            there is data available, 0 if not. Assumed to be 1-s
            only if not given.

        """
        next_states = inputs + tensor.dot(states, self.W)
        next_states = self.children[0].apply(next_states)
        if mask:
            next_states = (mask[:, None] * next_states +
                           (1 - mask[:, None]) * states)
        return next_states

    @application(outputs=apply.states)
    def initial_states(self, batch_size, *args, **kwargs):
        return tensor.repeat(self.parameters[1][None, :], batch_size, 0)


class LSTM(BaseRecurrent, Initializable):
    u"""Long Short Term Memory.

    Every unit of an LSTM is equipped with input, forget and output gates.
    This implementation is based on code by Mohammad Pezeshki that
    implements the architecture used in [GSS03]_ and [Grav13]_. It aims to
    do as many computations in parallel as possible and expects the last
    dimension of the input to be four times the output dimension.

    Unlike a vanilla LSTM as described in [HS97]_, this model has peephole
    connections from the cells to the gates. The output gates receive
    information about the cells at the current time step, while the other
    gates only receive information about the cells at the previous time
    step. All 'peephole' weight matrices are diagonal.

    .. [GSS03] Gers, Felix A., Nicol N. Schraudolph, and Jürgen
        Schmidhuber, *Learning precise timing with LSTM recurrent
        networks*, Journal of Machine Learning Research 3 (2003),
        pp. 115-143.
    .. [Grav13] Graves, Alex, *Generating sequences with recurrent neural
        networks*, arXiv preprint arXiv:1308.0850 (2013).
    .. [HS97] Sepp Hochreiter, and Jürgen Schmidhuber, *Long Short-Term
        Memory*, Neural Computation 9(8) (1997), pp. 1735-1780.

    Parameters
    ----------
    dim : int
        The dimension of the hidden state.
    activation : :class:`~.bricks.Brick`, optional
        The activation function. The default and by far the most popular
        is :class:`.Tanh`.
    gate_activation : :class:`~.bricks.Brick` or None
        The brick to apply as activation for gates (input/output/forget).
        If ``None`` a :class:`.Logistic` brick is used.

    Notes
    -----
    See :class:`.Initializable` for initialization parameters.

    """
    @lazy(allocation=['dim'])
    def __init__(self, dim, activation=None, gate_activation=None, **kwargs):
        self.dim = dim

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation

        children = [self.activation, self.gate_activation]
        kwargs.setdefault('children', []).extend(children)
        super(LSTM, self).__init__(**kwargs)

    def get_dim(self, name):
        if name == 'inputs':
            return self.dim * 4
        if name in ['states', 'cells']:
            return self.dim
        if name == 'mask':
            return 0
        return super(LSTM, self).get_dim(name)

    def _allocate(self):
        self.W_state = shared_floatx_nans((self.dim, 4*self.dim),

                                          name='W_state')
        self.W_cell_to_in = shared_floatx_nans((self.dim,),

                                               name='W_cell_to_in')
        self.W_cell_to_forget = shared_floatx_nans((self.dim,),

                                                   name='W_cell_to_forget')
        self.W_cell_to_out = shared_floatx_nans((self.dim,),

                                                name='W_cell_to_out')
        # The underscore is required to prevent collision with
        # the `initial_state` application method
        self.initial_state_ = shared_floatx_zeros((self.dim,),
                                                  name="initial_state")
        self.initial_cells = shared_floatx_zeros((self.dim,),
                                                 name="initial_cells")
        add_role(self.W_state, WEIGHT)
        add_role(self.W_cell_to_in, WEIGHT)
        add_role(self.W_cell_to_forget, WEIGHT)
        add_role(self.W_cell_to_out, WEIGHT)
        add_role(self.initial_state_, INITIAL_STATE)
        add_role(self.initial_cells, INITIAL_STATE)

        self.parameters = [
            self.W_state, self.W_cell_to_in, self.W_cell_to_forget,
            self.W_cell_to_out, self.initial_state_, self.initial_cells]

    def _initialize(self):
        for weights in self.parameters[:4]:
            self.weights_init.initialize(weights, self.rng)

    @recurrent(sequences=['inputs', 'mask'], states=['states', 'cells'],
               contexts=[], outputs=['states', 'cells'])
    def apply(self, inputs, states, cells, mask=None):
        """Apply the Long Short Term Memory transition.

        Parameters
        ----------
        states : :class:`~tensor.TensorVariable`
            The 2 dimensional matrix of current states in the shape
            (batch_size, features). Required for `one_step` usage.
        cells : :class:`~tensor.TensorVariable`
            The 2 dimensional matrix of current cells in the shape
            (batch_size, features). Required for `one_step` usage.
        inputs : :class:`~tensor.TensorVariable`
            The 2 dimensional matrix of inputs in the shape (batch_size,
            features * 4). The `inputs` needs to be four times the
            dimension of the LSTM brick to insure each four gates receive
            different transformations of the input. See [Grav13]_
            equations 7 to 10 for more details. The `inputs` are then split
            in this order: Input gates, forget gates, cells and output
            gates.
        mask : :class:`~tensor.TensorVariable`
            A 1D binary array in the shape (batch,) which is 1 if there is
            data available, 0 if not. Assumed to be 1-s only if not given.

        .. [Grav13] Graves, Alex, *Generating sequences with recurrent*
            *neural networks*, arXiv preprint arXiv:1308.0850 (2013).

        Returns
        -------
        states : :class:`~tensor.TensorVariable`
            Next states of the network.
        cells : :class:`~tensor.TensorVariable`
            Next cell activations of the network.

        """
        def slice_last(x, no):
            return x[:, no*self.dim: (no+1)*self.dim]

        activation = tensor.dot(states, self.W_state) + inputs
        in_gate = self.gate_activation.apply(
            slice_last(activation, 0) + cells * self.W_cell_to_in)
        forget_gate = self.gate_activation.apply(
            slice_last(activation, 1) + cells * self.W_cell_to_forget)
        next_cells = (
            forget_gate * cells +
            in_gate * self.activation.apply(slice_last(activation, 2)))
        out_gate = self.gate_activation.apply(
            slice_last(activation, 3) + next_cells * self.W_cell_to_out)
        next_states = out_gate * self.activation.apply(next_cells)

        if mask:
            next_states = (mask[:, None] * next_states +
                           (1 - mask[:, None]) * states)
            next_cells = (mask[:, None] * next_cells +
                          (1 - mask[:, None]) * cells)

        return next_states, next_cells

    @application(outputs=apply.states)
    def initial_states(self, batch_size, *args, **kwargs):
        return [tensor.repeat(self.initial_state_[None, :], batch_size, 0),
                tensor.repeat(self.initial_cells[None, :], batch_size, 0)]


class GatedRecurrent(BaseRecurrent, Initializable):
    u"""Gated recurrent neural network.

    Gated recurrent neural network (GRNN) as introduced in [CvMG14]_. Every
    unit of a GRNN is equipped with update and reset gates that facilitate
    better gradient propagation.

    Parameters
    ----------
    dim : int
        The dimension of the hidden state.
    activation : :class:`~.bricks.Brick` or None
        The brick to apply as activation. If ``None`` a
        :class:`.Tanh` brick is used.
    gate_activation : :class:`~.bricks.Brick` or None
        The brick to apply as activation for gates. If ``None`` a
        :class:`.Logistic` brick is used.

    Notes
    -----
    See :class:`.Initializable` for initialization parameters.

    .. [CvMG14] Kyunghyun Cho, Bart van Merriënboer, Çağlar Gülçehre,
       Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua
       Bengio, *Learning Phrase Representations using RNN Encoder-Decoder
       for Statistical Machine Translation*, EMNLP (2014), pp. 1724-1734.

    """
    @lazy(allocation=['dim'])
    def __init__(self, dim, activation=None, gate_activation=None,
                 **kwargs):
        self.dim = dim

        if not activation:
            activation = Tanh()
        if not gate_activation:
            gate_activation = Logistic()
        self.activation = activation
        self.gate_activation = gate_activation

        children = [activation, gate_activation]
        kwargs.setdefault('children', []).extend(children)
        super(GatedRecurrent, self).__init__(**kwargs)

    @property
    def state_to_state(self):
        return self.parameters[0]

    @property
    def state_to_gates(self):
        return self.parameters[1]

    def get_dim(self, name):
        if name == 'mask':
            return 0
        if name in ['inputs', 'states']:
            return self.dim
        if name == 'gate_inputs':
            return 2 * self.dim
        return super(GatedRecurrent, self).get_dim(name)

    def _allocate(self):
        self.parameters.append(shared_floatx_nans((self.dim, self.dim),
                                                  name='state_to_state'))
        self.parameters.append(shared_floatx_nans((self.dim, 2 * self.dim),
                                                  name='state_to_gates'))
        self.parameters.append(shared_floatx_zeros((self.dim,),
                                                   name="initial_state"))
        for i in range(2):
            if self.parameters[i]:
                add_role(self.parameters[i], WEIGHT)
        add_role(self.parameters[2], INITIAL_STATE)

    def _initialize(self):
        self.weights_init.initialize(self.state_to_state, self.rng)
        state_to_update = self.weights_init.generate(
            self.rng, (self.dim, self.dim))
        state_to_reset = self.weights_init.generate(
            self.rng, (self.dim, self.dim))
        self.state_to_gates.set_value(
            numpy.hstack([state_to_update, state_to_reset]))

    @recurrent(sequences=['mask', 'inputs', 'gate_inputs'],
               states=['states'], outputs=['states'], contexts=[])
    def apply(self, inputs, gate_inputs, states, mask=None):
        """Apply the gated recurrent transition.

        Parameters
        ----------
        states : :class:`~tensor.TensorVariable`
            The 2 dimensional matrix of current states in the shape
            (batch_size, dim). Required for `one_step` usage.
        inputs : :class:`~tensor.TensorVariable`
            The 2 dimensional matrix of inputs in the shape (batch_size,
            dim)
        gate_inputs : :class:`~tensor.TensorVariable`
            The 2 dimensional matrix of inputs to the gates in the
            shape (batch_size, 2 * dim).
        mask : :class:`~tensor.TensorVariable`
            A 1D binary array in the shape (batch,) which is 1 if there is
            data available, 0 if not. Assumed to be 1-s only if not given.

        Returns
        -------
        output : :class:`~tensor.TensorVariable`
            Next states of the network.

        """
        gate_values = self.gate_activation.apply(
            states.dot(self.state_to_gates) + gate_inputs)
        update_values = gate_values[:, :self.dim]
        reset_values = gate_values[:, self.dim:]
        states_reset = states * reset_values
        next_states = self.activation.apply(
            states_reset.dot(self.state_to_state) + inputs)
        next_states = (next_states * update_values +
                       states * (1 - update_values))
        if mask:
            next_states = (mask[:, None] * next_states +
                           (1 - mask[:, None]) * states)
        return next_states

    @application(outputs=apply.states)
    def initial_states(self, batch_size, *args, **kwargs):
        return [tensor.repeat(self.parameters[2][None, :], batch_size, 0)]


1			# -- coding: utf-8 --
2			import numpy
3			from theano import tensor
4
5			from ..base import application, lazy
6			from ..simple import Initializable, Logistic, Tanh
7			from ...roles import add_role, WEIGHT, INITIAL_STATE
8			from ...utils import shared_floatx_nans, shared_floatx_zeros
9			from .base import BaseRecurrent, recurrent
10
11
12			class SimpleRecurrent(BaseRecurrent, Initializable):
13			"""The traditional recurrent transition.
14
15			The most well-known recurrent transition: a matrix multiplication,
16			optionally followed by a non-linearity.
17
18			Parameters
19			----------
20			dim : int
21			The dimension of the hidden state
22			activation : :class:`~.bricks.Brick`
23			The brick to apply as activation.
24
25			Notes
26			-----
27			See :class:`.Initializable` for initialization parameters.
28
29			"""
30			@lazy(allocation=['dim'])
31			def __init__(self, dim, activation, **kwargs):
32			self.dim = dim
33			children = [activation]
34			kwargs.setdefault('children', []).extend(children)
35			super(SimpleRecurrent, self).__init__(**kwargs)
36
37			@property
38			def W(self):
39			return self.parameters[0]
40
41			def get_dim(self, name):
42			if name == 'mask':
43			return 0
44			if name in (SimpleRecurrent.apply.sequences +
45			SimpleRecurrent.apply.states):
46			return self.dim
47			return super(SimpleRecurrent, self).get_dim(name)
48
49			def _allocate(self):
50			self.parameters.append(shared_floatx_nans((self.dim, self.dim),
51			name="W"))
52			add_role(self.parameters[0], WEIGHT)
53			self.parameters.append(shared_floatx_zeros((self.dim,),
54			name="initial_state"))
55			add_role(self.parameters[1], INITIAL_STATE)
56
57			def _initialize(self):
58			self.weights_init.initialize(self.W, self.rng)
59
60			@recurrent(sequences=['inputs', 'mask'], states=['states'],
61			outputs=['states'], contexts=[])
62			def apply(self, inputs, states, mask=None):
63			"""Apply the simple transition.
64
65			Parameters
66			----------
67			inputs : :class:`~tensor.TensorVariable`
68			The 2D inputs, in the shape (batch, features).
69			states : :class:`~tensor.TensorVariable`
70			The 2D states, in the shape (batch, features).
71			mask : :class:`~tensor.TensorVariable`
72			A 1D binary array in the shape (batch,) which is 1 if
73			there is data available, 0 if not. Assumed to be 1-s
74			only if not given.
75
76			"""
77			next_states = inputs + tensor.dot(states, self.W)
78			next_states = self.children[0].apply(next_states)
79			if mask:
80			next_states = (mask[:, None] * next_states +
81			(1 - mask[:, None]) * states)
82			return next_states
83
84			@application(outputs=apply.states)
85			def initial_states(self, batch_size, args, *kwargs):
86			return tensor.repeat(self.parameters[1][None, :], batch_size, 0)
87
88
89			class LSTM(BaseRecurrent, Initializable):
90			u"""Long Short Term Memory.
91
92			Every unit of an LSTM is equipped with input, forget and output gates.
93			This implementation is based on code by Mohammad Pezeshki that
94			implements the architecture used in [GSS03]_ and [Grav13]_. It aims to
95			do as many computations in parallel as possible and expects the last
96			dimension of the input to be four times the output dimension.
97
98			Unlike a vanilla LSTM as described in [HS97]_, this model has peephole
99			connections from the cells to the gates. The output gates receive
100			information about the cells at the current time step, while the other
101			gates only receive information about the cells at the previous time
102			step. All 'peephole' weight matrices are diagonal.
103
104			.. [GSS03] Gers, Felix A., Nicol N. Schraudolph, and Jürgen
105			Schmidhuber, *Learning precise timing with LSTM recurrent
106			networks*, Journal of Machine Learning Research 3 (2003),
107			pp. 115-143.
108			.. [Grav13] Graves, Alex, *Generating sequences with recurrent neural
109			networks*, arXiv preprint arXiv:1308.0850 (2013).
110			.. [HS97] Sepp Hochreiter, and Jürgen Schmidhuber, *Long Short-Term
111			Memory*, Neural Computation 9(8) (1997), pp. 1735-1780.
112
113			Parameters
114			----------
115			dim : int
116			The dimension of the hidden state.
117			activation : :class:`~.bricks.Brick`, optional
118			The activation function. The default and by far the most popular
119			is :class:`.Tanh`.
120			gate_activation : :class:`~.bricks.Brick` or None
121			The brick to apply as activation for gates (input/output/forget).
122			If ``None`` a :class:`.Logistic` brick is used.
123
124			Notes
125			-----
126			See :class:`.Initializable` for initialization parameters.
127
128			"""
129			@lazy(allocation=['dim'])
130			def __init__(self, dim, activation=None, gate_activation=None, **kwargs):
131			self.dim = dim
132
133			if not activation:
134			activation = Tanh()
135			if not gate_activation:
136			gate_activation = Logistic()
137			self.activation = activation
138			self.gate_activation = gate_activation
139
140			children = [self.activation, self.gate_activation]
141			kwargs.setdefault('children', []).extend(children)
142			super(LSTM, self).__init__(**kwargs)
143
144			def get_dim(self, name):
145			if name == 'inputs':
146			return self.dim * 4
147			if name in ['states', 'cells']:
148			return self.dim
149			if name == 'mask':
150			return 0
151			return super(LSTM, self).get_dim(name)
152
153			def _allocate(self):
154			self.W_state = shared_floatx_nans((self.dim, 4*self.dim),
			0 ignored issues – show Coding Style Naming introduced 2016-06-01 21:48 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `W_state` does not conform to the attribute naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
155			name='W_state')
156			self.W_cell_to_in = shared_floatx_nans((self.dim,),
			0 ignored issues – show Coding Style Naming introduced 2016-06-01 21:48 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `W_cell_to_in` does not conform to the attribute naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
157			name='W_cell_to_in')
158			self.W_cell_to_forget = shared_floatx_nans((self.dim,),
			0 ignored issues – show Coding Style Naming introduced 2016-06-01 21:48 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `W_cell_to_forget` does not conform to the attribute naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
159			name='W_cell_to_forget')
160			self.W_cell_to_out = shared_floatx_nans((self.dim,),
			0 ignored issues – show Coding Style Naming introduced 2016-06-01 21:48 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `W_cell_to_out` does not conform to the attribute naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
161			name='W_cell_to_out')
162			# The underscore is required to prevent collision with
163			# the `initial_state` application method
164			self.initial_state_ = shared_floatx_zeros((self.dim,),
165			name="initial_state")
166			self.initial_cells = shared_floatx_zeros((self.dim,),
167			name="initial_cells")
168			add_role(self.W_state, WEIGHT)
169			add_role(self.W_cell_to_in, WEIGHT)
170			add_role(self.W_cell_to_forget, WEIGHT)
171			add_role(self.W_cell_to_out, WEIGHT)
172			add_role(self.initial_state_, INITIAL_STATE)
173			add_role(self.initial_cells, INITIAL_STATE)
174
175			self.parameters = [
176			self.W_state, self.W_cell_to_in, self.W_cell_to_forget,
177			self.W_cell_to_out, self.initial_state_, self.initial_cells]
178
179			def _initialize(self):
180			for weights in self.parameters[:4]:
181			self.weights_init.initialize(weights, self.rng)
182
183			@recurrent(sequences=['inputs', 'mask'], states=['states', 'cells'],
184			contexts=[], outputs=['states', 'cells'])
185			def apply(self, inputs, states, cells, mask=None):
186			"""Apply the Long Short Term Memory transition.
187
188			Parameters
189			----------
190			states : :class:`~tensor.TensorVariable`
191			The 2 dimensional matrix of current states in the shape
192			(batch_size, features). Required for `one_step` usage.
193			cells : :class:`~tensor.TensorVariable`
194			The 2 dimensional matrix of current cells in the shape
195			(batch_size, features). Required for `one_step` usage.
196			inputs : :class:`~tensor.TensorVariable`
197			The 2 dimensional matrix of inputs in the shape (batch_size,
198			features * 4). The `inputs` needs to be four times the
199			dimension of the LSTM brick to insure each four gates receive
200			different transformations of the input. See [Grav13]_
201			equations 7 to 10 for more details. The `inputs` are then split
202			in this order: Input gates, forget gates, cells and output
203			gates.
204			mask : :class:`~tensor.TensorVariable`
205			A 1D binary array in the shape (batch,) which is 1 if there is
206			data available, 0 if not. Assumed to be 1-s only if not given.
207
208			.. [Grav13] Graves, Alex, Generating sequences with recurrent
209			neural networks, arXiv preprint arXiv:1308.0850 (2013).
210
211			Returns
212			-------
213			states : :class:`~tensor.TensorVariable`
214			Next states of the network.
215			cells : :class:`~tensor.TensorVariable`
216			Next cell activations of the network.
217
218			"""
219			def slice_last(x, no):
220			return x[:, noself.dim: (no+1)self.dim]
221
222			activation = tensor.dot(states, self.W_state) + inputs
223			in_gate = self.gate_activation.apply(
224			slice_last(activation, 0) + cells * self.W_cell_to_in)
225			forget_gate = self.gate_activation.apply(
226			slice_last(activation, 1) + cells * self.W_cell_to_forget)
227			next_cells = (
228			forget_gate * cells +
229			in_gate * self.activation.apply(slice_last(activation, 2)))
230			out_gate = self.gate_activation.apply(
231			slice_last(activation, 3) + next_cells * self.W_cell_to_out)
232			next_states = out_gate * self.activation.apply(next_cells)
233
234			if mask:
235			next_states = (mask[:, None] * next_states +
236			(1 - mask[:, None]) * states)
237			next_cells = (mask[:, None] * next_cells +
238			(1 - mask[:, None]) * cells)
239
240			return next_states, next_cells
241
242			@application(outputs=apply.states)
243			def initial_states(self, batch_size, args, *kwargs):
244			return [tensor.repeat(self.initial_state_[None, :], batch_size, 0),
245			tensor.repeat(self.initial_cells[None, :], batch_size, 0)]
246
247
248			class GatedRecurrent(BaseRecurrent, Initializable):
249			u"""Gated recurrent neural network.
250
251			Gated recurrent neural network (GRNN) as introduced in [CvMG14]_. Every
252			unit of a GRNN is equipped with update and reset gates that facilitate
253			better gradient propagation.
254
255			Parameters
256			----------
257			dim : int
258			The dimension of the hidden state.
259			activation : :class:`~.bricks.Brick` or None
260			The brick to apply as activation. If ``None`` a
261			:class:`.Tanh` brick is used.
262			gate_activation : :class:`~.bricks.Brick` or None
263			The brick to apply as activation for gates. If ``None`` a
264			:class:`.Logistic` brick is used.
265
266			Notes
267			-----
268			See :class:`.Initializable` for initialization parameters.
269
270			.. [CvMG14] Kyunghyun Cho, Bart van Merriënboer, Çağlar Gülçehre,
271			Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, and Yoshua
272			Bengio, *Learning Phrase Representations using RNN Encoder-Decoder
273			for Statistical Machine Translation*, EMNLP (2014), pp. 1724-1734.
274
275			"""
276			@lazy(allocation=['dim'])
277			def __init__(self, dim, activation=None, gate_activation=None,
278			**kwargs):
279			self.dim = dim
280
281			if not activation:
282			activation = Tanh()
283			if not gate_activation:
284			gate_activation = Logistic()
285			self.activation = activation
286			self.gate_activation = gate_activation
287
288			children = [activation, gate_activation]
289			kwargs.setdefault('children', []).extend(children)
290			super(GatedRecurrent, self).__init__(**kwargs)
291
292			@property
293			def state_to_state(self):
294			return self.parameters[0]
295
296			@property
297			def state_to_gates(self):
298			return self.parameters[1]
299
300			def get_dim(self, name):
301			if name == 'mask':
302			return 0
303			if name in ['inputs', 'states']:
304			return self.dim
305			if name == 'gate_inputs':
306			return 2 * self.dim
307			return super(GatedRecurrent, self).get_dim(name)
308
309			def _allocate(self):
310			self.parameters.append(shared_floatx_nans((self.dim, self.dim),
311			name='state_to_state'))
312			self.parameters.append(shared_floatx_nans((self.dim, 2 * self.dim),
313			name='state_to_gates'))
314			self.parameters.append(shared_floatx_zeros((self.dim,),
315			name="initial_state"))
316			for i in range(2):
317			if self.parameters[i]:
318			add_role(self.parameters[i], WEIGHT)
319			add_role(self.parameters[2], INITIAL_STATE)
320
321			def _initialize(self):
322			self.weights_init.initialize(self.state_to_state, self.rng)
323			state_to_update = self.weights_init.generate(
324			self.rng, (self.dim, self.dim))
325			state_to_reset = self.weights_init.generate(
326			self.rng, (self.dim, self.dim))
327			self.state_to_gates.set_value(
328			numpy.hstack([state_to_update, state_to_reset]))
329
330			@recurrent(sequences=['mask', 'inputs', 'gate_inputs'],
331			states=['states'], outputs=['states'], contexts=[])
332			def apply(self, inputs, gate_inputs, states, mask=None):
333			"""Apply the gated recurrent transition.
334
335			Parameters
336			----------
337			states : :class:`~tensor.TensorVariable`
338			The 2 dimensional matrix of current states in the shape
339			(batch_size, dim). Required for `one_step` usage.
340			inputs : :class:`~tensor.TensorVariable`
341			The 2 dimensional matrix of inputs in the shape (batch_size,
342			dim)
343			gate_inputs : :class:`~tensor.TensorVariable`
344			The 2 dimensional matrix of inputs to the gates in the
345			shape (batch_size, 2 * dim).
346			mask : :class:`~tensor.TensorVariable`
347			A 1D binary array in the shape (batch,) which is 1 if there is
348			data available, 0 if not. Assumed to be 1-s only if not given.
349
350			Returns
351			-------
352			output : :class:`~tensor.TensorVariable`
353			Next states of the network.
354
355			"""
356			gate_values = self.gate_activation.apply(
357			states.dot(self.state_to_gates) + gate_inputs)
358			update_values = gate_values[:, :self.dim]
359			reset_values = gate_values[:, self.dim:]
360			states_reset = states * reset_values
361			next_states = self.activation.apply(
362			states_reset.dot(self.state_to_state) + inputs)
363			next_states = (next_states * update_values +
364			states * (1 - update_values))
365			if mask:
366			next_states = (mask[:, None] * next_states +
367			(1 - mask[:, None]) * states)
368			return next_states
369
370			@application(outputs=apply.states)
371			def initial_states(self, batch_size, args, *kwargs):
372			return [tensor.repeat(self.parameters[2][None, :], batch_size, 0)]
373

mila-udem / blocks

Issues (119)

blocks/bricks/recurrent/architectures.py (4 issues)

Labels

Severity

Introduced By

Duplication Side-by-Side

Filter issues like