Issues (119)

blocks/initialization.py (8 issues)

1
"""Objects for encapsulating parameter initialization strategies."""
2
from abc import ABCMeta, abstractmethod
3
import numbers
4
5
import numpy
6
import theano
7
from six import add_metaclass
8
9
from blocks.utils import repr_attrs, pack
10
11
12
@add_metaclass(ABCMeta)
13
class NdarrayInitialization(object):
14
    """Base class specifying the interface for ndarray initialization."""
15
    @abstractmethod
16
    def generate(self, rng, shape):
17
        """Generate an initial set of parameters from a given distribution.
18
19
        Parameters
20
        ----------
21
        rng : :class:`numpy.random.RandomState`
22
        shape : tuple
23
            A shape tuple for the requested parameter array shape.
24
25
        Returns
26
        -------
27
        output : :class:`~numpy.ndarray`
28
            An ndarray with values drawn from the distribution specified by
29
            this object, of shape `shape`, with dtype
30
            :attr:`config.floatX`.
31
32
        """
33
34
    def initialize(self, var, rng, shape=None):
35
        """Initialize a shared variable with generated parameters.
36
37
        Parameters
38
        ----------
39
        var : object
40
            A Theano shared variable whose value will be set with values
41
            drawn from this :class:`NdarrayInitialization` instance.
42
        rng : :class:`numpy.random.RandomState`
43
        shape : tuple
44
            A shape tuple for the requested parameter array shape.
45
46
        """
47
        if not shape:
48
            shape = var.get_value(borrow=True, return_internal_type=True).shape
49
        var.set_value(self.generate(rng, shape))
50
51
52
class Constant(NdarrayInitialization):
53
    """Initialize parameters to a constant.
54
55
    The constant may be a scalar or a :class:`~numpy.ndarray` of any shape
56
    that is broadcastable with the requested parameter arrays.
57
58
    Parameters
59
    ----------
60
    constant : :class:`~numpy.ndarray`
61
        The initialization value to use. Must be a scalar or an ndarray (or
62
        compatible object, such as a nested list) that has a shape that is
63
        broadcastable with any shape requested by `initialize`.
64
65
    """
66
    def __init__(self, constant):
67
        self.constant = numpy.asarray(constant)
68
69
    def generate(self, rng, shape):
70
        dest = numpy.empty(shape, dtype=theano.config.floatX)
71
        dest[...] = self.constant
72
        return dest
73
74
    def __repr__(self):
75
        return repr_attrs(self, 'constant')
76
77
78
class IsotropicGaussian(NdarrayInitialization):
79
    """Initialize parameters from an isotropic Gaussian distribution.
80
81
    Parameters
82
    ----------
83
    std : float, optional
84
        The standard deviation of the Gaussian distribution. Defaults to 1.
85
    mean : float, optional
86
        The mean of the Gaussian distribution. Defaults to 0
87
88
    Notes
89
    -----
90
    Be careful: the standard deviation goes first and the mean goes
91
    second!
92
93
    """
94
    def __init__(self, std=1, mean=0):
95
        self.mean = mean
96
        self.std = std
97
98
    def generate(self, rng, shape):
99
        m = rng.normal(self.mean, self.std, size=shape)
100
        return m.astype(theano.config.floatX)
101
102
    def __repr__(self):
103
        return repr_attrs(self, 'mean', 'std')
104
105
106
class Uniform(NdarrayInitialization):
107
    """Initialize parameters from a uniform distribution.
108
109
    Parameters
110
    ----------
111
    mean : float, optional
112
        The mean of the uniform distribution (i.e. the center of mass for
113
        the density function); Defaults to 0.
114
    width : float, optional
115
        One way of specifying the range of the uniform distribution. The
116
        support will be [mean - width/2, mean + width/2]. **Exactly one**
117
        of `width` or `std` must be specified.
118
    std : float, optional
119
        An alternative method of specifying the range of the uniform
120
        distribution. Chooses the width of the uniform such that random
121
        variates will have a desired standard deviation. **Exactly one** of
122
        `width` or `std` must be specified.
123
124
    """
125
    def __init__(self, mean=0., width=None, std=None):
126
        if (width is not None) == (std is not None):
127
            raise ValueError("must specify width or std, "
128
                             "but not both")
129
        if std is not None:
130
            # Variance of a uniform is 1/12 * width^2
131
            self.width = numpy.sqrt(12) * std
132
        else:
133
            self.width = width
134
        self.mean = mean
135
136
    def generate(self, rng, shape):
137
        w = self.width / 2
138
        m = rng.uniform(self.mean - w, self.mean + w, size=shape)
139
        return m.astype(theano.config.floatX)
140
141
    def __repr__(self):
142
        return repr_attrs(self, 'mean', 'width')
143
144
145
class Identity(NdarrayInitialization):
146
    """Initialize to the identity matrix.
147
148
    Only works for 2D arrays. If the number of columns is not equal to the
149
    number of rows, the array will be truncated or padded with zeros.
150
151
    Parameters
152
    ----------
153
    mult : float, optional
154
        Multiply the identity matrix with a scalar. Defaults to 1.
155
156
    """
157
    def __init__(self, mult=1):
158
        self.mult = mult
159
160
    def generate(self, rng, shape):
161
        if len(shape) != 2:
162
            raise ValueError
163
        rows, cols = shape
164
        return self.mult * numpy.eye(rows, cols, dtype=theano.config.floatX)
165
166
    def __repr__(self):
167
        return repr_attrs(self, 'mult')
168
169
170
class Orthogonal(NdarrayInitialization):
171
    """Initialize a random orthogonal matrix.
172
173
    Only works for 2D arrays.
174
175
    Parameters
176
    ----------
177
    scale : float, optional
178
        Multiply the resulting matrix with a scalar. Defaults to 1.
179
        For a discussion of the importance of scale for training time
180
        and generalization refer to [Saxe2013]_.
181
182
        .. [Saxe2013] Saxe, A.M., McClelland, J.L., Ganguli, S., 2013.,
183
           *Exact solutions to the nonlinear dynamics of learning in deep
184
           linear neural networks*,
185
           arXiv:1312.6120 [cond-mat, q-bio, stat].
186
187
    """
188
    def __init__(self, scale=1):
189
        self.scale = scale
190
191
    def generate(self, rng, shape):
192
        if len(shape) != 2:
193
            raise ValueError
194
195
        if shape[0] == shape[1]:
196
            # For square weight matrices we can simplify the logic
197
            # and be more exact:
198
            M = rng.randn(*shape).astype(theano.config.floatX)
199
            Q, R = numpy.linalg.qr(M)
200
            Q = Q * numpy.sign(numpy.diag(R))
201
            return Q * self.scale
202
203
        M1 = rng.randn(shape[0], shape[0]).astype(theano.config.floatX)
0 ignored issues
show
Coding Style Naming introduced by
The name M1 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
204
        M2 = rng.randn(shape[1], shape[1]).astype(theano.config.floatX)
0 ignored issues
show
Coding Style Naming introduced by
The name M2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
205
206
        # QR decomposition of matrix with entries in N(0, 1) is random
207
        Q1, R1 = numpy.linalg.qr(M1)
0 ignored issues
show
Coding Style Naming introduced by
The name Q1 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
The name R1 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
208
        Q2, R2 = numpy.linalg.qr(M2)
0 ignored issues
show
Coding Style Naming introduced by
The name Q2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Coding Style Naming introduced by
The name R2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
209
        # Correct that NumPy doesn't force diagonal of R to be non-negative
210
        Q1 = Q1 * numpy.sign(numpy.diag(R1))
0 ignored issues
show
Coding Style Naming introduced by
The name Q1 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
211
        Q2 = Q2 * numpy.sign(numpy.diag(R2))
0 ignored issues
show
Coding Style Naming introduced by
The name Q2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
212
213
        n_min = min(shape[0], shape[1])
214
        return numpy.dot(Q1[:, :n_min], Q2[:n_min, :]) * self.scale
215
216
    def __repr__(self):
217
        return repr_attrs(self, 'scale')
218
219
220
class Sparse(NdarrayInitialization):
221
    """Initialize only a fraction of the weights, row-wise.
222
223
    Parameters
224
    ----------
225
    num_init : int or float
226
        If int, this is the number of weights to initialize per row. If
227
        float, it's the fraction of the weights per row to initialize.
228
    weights_init : :class:`NdarrayInitialization` instance
229
        The initialization scheme to initialize the weights with.
230
    sparse_init : :class:`NdarrayInitialization` instance, optional
231
        What to set the non-initialized weights to (0. by default)
232
233
    """
234
    def __init__(self, num_init, weights_init, sparse_init=None):
235
        self.num_init = num_init
236
        self.weights_init = weights_init
237
238
        if sparse_init is None:
239
            sparse_init = Constant(0.)
240
        self.sparse_init = sparse_init
241
242
    def generate(self, rng, shape):
243
        weights = self.sparse_init.generate(rng, shape)
244
        if isinstance(self.num_init, numbers.Integral):
245
            if not self.num_init > 0:
246
                raise ValueError
247
            num_init = self.num_init
248
        else:
249
            if not 1 >= self.num_init > 0:
250
                raise ValueError
251
            num_init = int(self.num_init * shape[1])
252
        values = self.weights_init.generate(rng, (shape[0], num_init))
253
        for i in range(shape[0]):
254
            random_indices = numpy.random.choice(shape[1], num_init,
255
                                                 replace=False)
256
            weights[i, random_indices] = values[i]
257
        return weights
258
259
260
class SparseND(Sparse):
261
    """Initialize only a fraction of the weights with configurable axes.
262
263
    Parameters
264
    ----------
265
    axis : int or sequence
266
        Which axis or axes are to be treated as a "unit" for the purpose
267
        of the number of elements initialized. For example, an axis of
268
        (0, 1) when initializing a 4D tensor `W` will treat the first two
269
        axes of the weight tensor as a grid and initialize `num_init`
270
        elements of `W[0, 0, :, :]`, another `num_init` elements of
271
        `W[0, 1, :, :]`, and so on.
272
273
    Notes
274
    -----
275
    See :class:`Sparse` for documentation of other arguments.
276
277
    """
278
    def __init__(self, axis, **kwargs):
279
        self.axis = axis
280
        super(SparseND, self).__init__(**kwargs)
281
282
    def generate(self, rng, shape):
283
        axis_ind = pack(self.axis)
284
        other_ind = [i for i in range(len(shape)) if i not in axis_ind]
285
        axis_shapes = [shape[i] for i in axis_ind]
286
        other_shapes = [shape[i] for i in other_ind]
287
        matrix = super(SparseND, self).generate(rng,
288
                                                (numpy.prod(axis_shapes),
289
                                                 numpy.prod(other_shapes)))
290
        unflattened = matrix.reshape(tuple(axis_shapes) + tuple(other_shapes))
291
        wrong_ind = axis_ind + other_ind
292
        transp_ind = [wrong_ind.index(i) for i in range(len(shape))]
293
        return unflattened.transpose(transp_ind)
294