Issues in initialization.py (master) - Issues in master - mila-udem/blocks - Measure and Improve Code Quality continuously with Scrutinizer

Issues (119)

blocks/initialization.py (8 issues)

Labels

Severity

Informational 8

"""Objects for encapsulating parameter initialization strategies."""
from abc import ABCMeta, abstractmethod
import numbers

import numpy
import theano
from six import add_metaclass

from blocks.utils import repr_attrs, pack


@add_metaclass(ABCMeta)
class NdarrayInitialization(object):
    """Base class specifying the interface for ndarray initialization."""
    @abstractmethod
    def generate(self, rng, shape):
        """Generate an initial set of parameters from a given distribution.

        Parameters
        ----------
        rng : :class:`numpy.random.RandomState`
        shape : tuple
            A shape tuple for the requested parameter array shape.

        Returns
        -------
        output : :class:`~numpy.ndarray`
            An ndarray with values drawn from the distribution specified by
            this object, of shape `shape`, with dtype
            :attr:`config.floatX`.

        """

    def initialize(self, var, rng, shape=None):
        """Initialize a shared variable with generated parameters.

        Parameters
        ----------
        var : object
            A Theano shared variable whose value will be set with values
            drawn from this :class:`NdarrayInitialization` instance.
        rng : :class:`numpy.random.RandomState`
        shape : tuple
            A shape tuple for the requested parameter array shape.

        """
        if not shape:
            shape = var.get_value(borrow=True, return_internal_type=True).shape
        var.set_value(self.generate(rng, shape))


class Constant(NdarrayInitialization):
    """Initialize parameters to a constant.

    The constant may be a scalar or a :class:`~numpy.ndarray` of any shape
    that is broadcastable with the requested parameter arrays.

    Parameters
    ----------
    constant : :class:`~numpy.ndarray`
        The initialization value to use. Must be a scalar or an ndarray (or
        compatible object, such as a nested list) that has a shape that is
        broadcastable with any shape requested by `initialize`.

    """
    def __init__(self, constant):
        self.constant = numpy.asarray(constant)

    def generate(self, rng, shape):
        dest = numpy.empty(shape, dtype=theano.config.floatX)
        dest[...] = self.constant
        return dest

    def __repr__(self):
        return repr_attrs(self, 'constant')


class IsotropicGaussian(NdarrayInitialization):
    """Initialize parameters from an isotropic Gaussian distribution.

    Parameters
    ----------
    std : float, optional
        The standard deviation of the Gaussian distribution. Defaults to 1.
    mean : float, optional
        The mean of the Gaussian distribution. Defaults to 0

    Notes
    -----
    Be careful: the standard deviation goes first and the mean goes
    second!

    """
    def __init__(self, std=1, mean=0):
        self.mean = mean
        self.std = std

    def generate(self, rng, shape):
        m = rng.normal(self.mean, self.std, size=shape)
        return m.astype(theano.config.floatX)

    def __repr__(self):
        return repr_attrs(self, 'mean', 'std')


class Uniform(NdarrayInitialization):
    """Initialize parameters from a uniform distribution.

    Parameters
    ----------
    mean : float, optional
        The mean of the uniform distribution (i.e. the center of mass for
        the density function); Defaults to 0.
    width : float, optional
        One way of specifying the range of the uniform distribution. The
        support will be [mean - width/2, mean + width/2]. **Exactly one**
        of `width` or `std` must be specified.
    std : float, optional
        An alternative method of specifying the range of the uniform
        distribution. Chooses the width of the uniform such that random
        variates will have a desired standard deviation. **Exactly one** of
        `width` or `std` must be specified.

    """
    def __init__(self, mean=0., width=None, std=None):
        if (width is not None) == (std is not None):
            raise ValueError("must specify width or std, "
                             "but not both")
        if std is not None:
            # Variance of a uniform is 1/12 * width^2
            self.width = numpy.sqrt(12) * std
        else:
            self.width = width
        self.mean = mean

    def generate(self, rng, shape):
        w = self.width / 2
        m = rng.uniform(self.mean - w, self.mean + w, size=shape)
        return m.astype(theano.config.floatX)

    def __repr__(self):
        return repr_attrs(self, 'mean', 'width')


class Identity(NdarrayInitialization):
    """Initialize to the identity matrix.

    Only works for 2D arrays. If the number of columns is not equal to the
    number of rows, the array will be truncated or padded with zeros.

    Parameters
    ----------
    mult : float, optional
        Multiply the identity matrix with a scalar. Defaults to 1.

    """
    def __init__(self, mult=1):
        self.mult = mult

    def generate(self, rng, shape):
        if len(shape) != 2:
            raise ValueError
        rows, cols = shape
        return self.mult * numpy.eye(rows, cols, dtype=theano.config.floatX)

    def __repr__(self):
        return repr_attrs(self, 'mult')


class Orthogonal(NdarrayInitialization):
    """Initialize a random orthogonal matrix.

    Only works for 2D arrays.

    Parameters
    ----------
    scale : float, optional
        Multiply the resulting matrix with a scalar. Defaults to 1.
        For a discussion of the importance of scale for training time
        and generalization refer to [Saxe2013]_.

        .. [Saxe2013] Saxe, A.M., McClelland, J.L., Ganguli, S., 2013.,
           *Exact solutions to the nonlinear dynamics of learning in deep
           linear neural networks*,
           arXiv:1312.6120 [cond-mat, q-bio, stat].

    """
    def __init__(self, scale=1):
        self.scale = scale

    def generate(self, rng, shape):
        if len(shape) != 2:
            raise ValueError

        if shape[0] == shape[1]:
            # For square weight matrices we can simplify the logic
            # and be more exact:
            M = rng.randn(*shape).astype(theano.config.floatX)
            Q, R = numpy.linalg.qr(M)
            Q = Q * numpy.sign(numpy.diag(R))
            return Q * self.scale

        M1 = rng.randn(shape[0], shape[0]).astype(theano.config.floatX)

        M2 = rng.randn(shape[1], shape[1]).astype(theano.config.floatX)


        # QR decomposition of matrix with entries in N(0, 1) is random
        Q1, R1 = numpy.linalg.qr(M1)

        Q2, R2 = numpy.linalg.qr(M2)

        # Correct that NumPy doesn't force diagonal of R to be non-negative
        Q1 = Q1 * numpy.sign(numpy.diag(R1))

        Q2 = Q2 * numpy.sign(numpy.diag(R2))


        n_min = min(shape[0], shape[1])
        return numpy.dot(Q1[:, :n_min], Q2[:n_min, :]) * self.scale

    def __repr__(self):
        return repr_attrs(self, 'scale')


class Sparse(NdarrayInitialization):
    """Initialize only a fraction of the weights, row-wise.

    Parameters
    ----------
    num_init : int or float
        If int, this is the number of weights to initialize per row. If
        float, it's the fraction of the weights per row to initialize.
    weights_init : :class:`NdarrayInitialization` instance
        The initialization scheme to initialize the weights with.
    sparse_init : :class:`NdarrayInitialization` instance, optional
        What to set the non-initialized weights to (0. by default)

    """
    def __init__(self, num_init, weights_init, sparse_init=None):
        self.num_init = num_init
        self.weights_init = weights_init

        if sparse_init is None:
            sparse_init = Constant(0.)
        self.sparse_init = sparse_init

    def generate(self, rng, shape):
        weights = self.sparse_init.generate(rng, shape)
        if isinstance(self.num_init, numbers.Integral):
            if not self.num_init > 0:
                raise ValueError
            num_init = self.num_init
        else:
            if not 1 >= self.num_init > 0:
                raise ValueError
            num_init = int(self.num_init * shape[1])
        values = self.weights_init.generate(rng, (shape[0], num_init))
        for i in range(shape[0]):
            random_indices = numpy.random.choice(shape[1], num_init,
                                                 replace=False)
            weights[i, random_indices] = values[i]
        return weights


class SparseND(Sparse):
    """Initialize only a fraction of the weights with configurable axes.

    Parameters
    ----------
    axis : int or sequence
        Which axis or axes are to be treated as a "unit" for the purpose
        of the number of elements initialized. For example, an axis of
        (0, 1) when initializing a 4D tensor `W` will treat the first two
        axes of the weight tensor as a grid and initialize `num_init`
        elements of `W[0, 0, :, :]`, another `num_init` elements of
        `W[0, 1, :, :]`, and so on.

    Notes
    -----
    See :class:`Sparse` for documentation of other arguments.

    """
    def __init__(self, axis, **kwargs):
        self.axis = axis
        super(SparseND, self).__init__(**kwargs)

    def generate(self, rng, shape):
        axis_ind = pack(self.axis)
        other_ind = [i for i in range(len(shape)) if i not in axis_ind]
        axis_shapes = [shape[i] for i in axis_ind]
        other_shapes = [shape[i] for i in other_ind]
        matrix = super(SparseND, self).generate(rng,
                                                (numpy.prod(axis_shapes),
                                                 numpy.prod(other_shapes)))
        unflattened = matrix.reshape(tuple(axis_shapes) + tuple(other_shapes))
        wrong_ind = axis_ind + other_ind
        transp_ind = [wrong_ind.index(i) for i in range(len(shape))]
        return unflattened.transpose(transp_ind)


1			"""Objects for encapsulating parameter initialization strategies."""
2			from abc import ABCMeta, abstractmethod
3			import numbers
4
5			import numpy
6			import theano
7			from six import add_metaclass
8
9			from blocks.utils import repr_attrs, pack
10
11
12			@add_metaclass(ABCMeta)
13			class NdarrayInitialization(object):
14			"""Base class specifying the interface for ndarray initialization."""
15			@abstractmethod
16			def generate(self, rng, shape):
17			"""Generate an initial set of parameters from a given distribution.
18
19			Parameters
20			----------
21			rng : :class:`numpy.random.RandomState`
22			shape : tuple
23			A shape tuple for the requested parameter array shape.
24
25			Returns
26			-------
27			output : :class:`~numpy.ndarray`
28			An ndarray with values drawn from the distribution specified by
29			this object, of shape `shape`, with dtype
30			:attr:`config.floatX`.
31
32			"""
33
34			def initialize(self, var, rng, shape=None):
35			"""Initialize a shared variable with generated parameters.
36
37			Parameters
38			----------
39			var : object
40			A Theano shared variable whose value will be set with values
41			drawn from this :class:`NdarrayInitialization` instance.
42			rng : :class:`numpy.random.RandomState`
43			shape : tuple
44			A shape tuple for the requested parameter array shape.
45
46			"""
47			if not shape:
48			shape = var.get_value(borrow=True, return_internal_type=True).shape
49			var.set_value(self.generate(rng, shape))
50
51
52			class Constant(NdarrayInitialization):
53			"""Initialize parameters to a constant.
54
55			The constant may be a scalar or a :class:`~numpy.ndarray` of any shape
56			that is broadcastable with the requested parameter arrays.
57
58			Parameters
59			----------
60			constant : :class:`~numpy.ndarray`
61			The initialization value to use. Must be a scalar or an ndarray (or
62			compatible object, such as a nested list) that has a shape that is
63			broadcastable with any shape requested by `initialize`.
64
65			"""
66			def __init__(self, constant):
67			self.constant = numpy.asarray(constant)
68
69			def generate(self, rng, shape):
70			dest = numpy.empty(shape, dtype=theano.config.floatX)
71			dest[...] = self.constant
72			return dest
73
74			def __repr__(self):
75			return repr_attrs(self, 'constant')
76
77
78			class IsotropicGaussian(NdarrayInitialization):
79			"""Initialize parameters from an isotropic Gaussian distribution.
80
81			Parameters
82			----------
83			std : float, optional
84			The standard deviation of the Gaussian distribution. Defaults to 1.
85			mean : float, optional
86			The mean of the Gaussian distribution. Defaults to 0
87
88			Notes
89			-----
90			Be careful: the standard deviation goes first and the mean goes
91			second!
92
93			"""
94			def __init__(self, std=1, mean=0):
95			self.mean = mean
96			self.std = std
97
98			def generate(self, rng, shape):
99			m = rng.normal(self.mean, self.std, size=shape)
100			return m.astype(theano.config.floatX)
101
102			def __repr__(self):
103			return repr_attrs(self, 'mean', 'std')
104
105
106			class Uniform(NdarrayInitialization):
107			"""Initialize parameters from a uniform distribution.
108
109			Parameters
110			----------
111			mean : float, optional
112			The mean of the uniform distribution (i.e. the center of mass for
113			the density function); Defaults to 0.
114			width : float, optional
115			One way of specifying the range of the uniform distribution. The
116			support will be [mean - width/2, mean + width/2]. Exactly one
117			of `width` or `std` must be specified.
118			std : float, optional
119			An alternative method of specifying the range of the uniform
120			distribution. Chooses the width of the uniform such that random
121			variates will have a desired standard deviation. Exactly one of
122			`width` or `std` must be specified.
123
124			"""
125			def __init__(self, mean=0., width=None, std=None):
126			if (width is not None) == (std is not None):
127			raise ValueError("must specify width or std, "
128			"but not both")
129			if std is not None:
130			# Variance of a uniform is 1/12 * width^2
131			self.width = numpy.sqrt(12) * std
132			else:
133			self.width = width
134			self.mean = mean
135
136			def generate(self, rng, shape):
137			w = self.width / 2
138			m = rng.uniform(self.mean - w, self.mean + w, size=shape)
139			return m.astype(theano.config.floatX)
140
141			def __repr__(self):
142			return repr_attrs(self, 'mean', 'width')
143
144
145			class Identity(NdarrayInitialization):
146			"""Initialize to the identity matrix.
147
148			Only works for 2D arrays. If the number of columns is not equal to the
149			number of rows, the array will be truncated or padded with zeros.
150
151			Parameters
152			----------
153			mult : float, optional
154			Multiply the identity matrix with a scalar. Defaults to 1.
155
156			"""
157			def __init__(self, mult=1):
158			self.mult = mult
159
160			def generate(self, rng, shape):
161			if len(shape) != 2:
162			raise ValueError
163			rows, cols = shape
164			return self.mult * numpy.eye(rows, cols, dtype=theano.config.floatX)
165
166			def __repr__(self):
167			return repr_attrs(self, 'mult')
168
169
170			class Orthogonal(NdarrayInitialization):
171			"""Initialize a random orthogonal matrix.
172
173			Only works for 2D arrays.
174
175			Parameters
176			----------
177			scale : float, optional
178			Multiply the resulting matrix with a scalar. Defaults to 1.
179			For a discussion of the importance of scale for training time
180			and generalization refer to [Saxe2013]_.
181
182			.. [Saxe2013] Saxe, A.M., McClelland, J.L., Ganguli, S., 2013.,
183			*Exact solutions to the nonlinear dynamics of learning in deep
184			linear neural networks*,
185			arXiv:1312.6120 [cond-mat, q-bio, stat].
186
187			"""
188			def __init__(self, scale=1):
189			self.scale = scale
190
191			def generate(self, rng, shape):
192			if len(shape) != 2:
193			raise ValueError
194
195			if shape[0] == shape[1]:
196			# For square weight matrices we can simplify the logic
197			# and be more exact:
198			M = rng.randn(*shape).astype(theano.config.floatX)
199			Q, R = numpy.linalg.qr(M)
200			Q = Q * numpy.sign(numpy.diag(R))
201			return Q * self.scale
202
203			M1 = rng.randn(shape[0], shape[0]).astype(theano.config.floatX)
			0 ignored issues – show Coding Style Naming introduced 2015-11-24 17:27 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `M1` does not conform to the variable naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
204			M2 = rng.randn(shape[1], shape[1]).astype(theano.config.floatX)
			0 ignored issues – show Coding Style Naming introduced 2015-11-24 17:27 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `M2` does not conform to the variable naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
205
206			# QR decomposition of matrix with entries in N(0, 1) is random
207			Q1, R1 = numpy.linalg.qr(M1)
			0 ignored issues – show Coding Style Naming introduced 2015-11-24 17:27 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `Q1` does not conform to the variable naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2015-11-24 17:27 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `R1` does not conform to the variable naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
208			Q2, R2 = numpy.linalg.qr(M2)
			0 ignored issues – show Coding Style Naming introduced 2015-11-24 17:27 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `Q2` does not conform to the variable naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2015-11-24 17:27 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `R2` does not conform to the variable naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
209			# Correct that NumPy doesn't force diagonal of R to be non-negative
210			Q1 = Q1 * numpy.sign(numpy.diag(R1))
			0 ignored issues – show Coding Style Naming introduced 2015-11-24 17:27 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `Q1` does not conform to the variable naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
211			Q2 = Q2 * numpy.sign(numpy.diag(R2))
			0 ignored issues – show Coding Style Naming introduced 2015-11-24 17:27 UTC by Report Bug Copy Issue Report Show Similar Issues like this The name `Q2` does not conform to the variable naming conventions (`(([a-z_][a-z0-9_]{0,30})\|(_?[A-Z]))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
212
213			n_min = min(shape[0], shape[1])
214			return numpy.dot(Q1[:, :n_min], Q2[:n_min, :]) * self.scale
215
216			def __repr__(self):
217			return repr_attrs(self, 'scale')
218
219
220			class Sparse(NdarrayInitialization):
221			"""Initialize only a fraction of the weights, row-wise.
222
223			Parameters
224			----------
225			num_init : int or float
226			If int, this is the number of weights to initialize per row. If
227			float, it's the fraction of the weights per row to initialize.
228			weights_init : :class:`NdarrayInitialization` instance
229			The initialization scheme to initialize the weights with.
230			sparse_init : :class:`NdarrayInitialization` instance, optional
231			What to set the non-initialized weights to (0. by default)
232
233			"""
234			def __init__(self, num_init, weights_init, sparse_init=None):
235			self.num_init = num_init
236			self.weights_init = weights_init
237
238			if sparse_init is None:
239			sparse_init = Constant(0.)
240			self.sparse_init = sparse_init
241
242			def generate(self, rng, shape):
243			weights = self.sparse_init.generate(rng, shape)
244			if isinstance(self.num_init, numbers.Integral):
245			if not self.num_init > 0:
246			raise ValueError
247			num_init = self.num_init
248			else:
249			if not 1 >= self.num_init > 0:
250			raise ValueError
251			num_init = int(self.num_init * shape[1])
252			values = self.weights_init.generate(rng, (shape[0], num_init))
253			for i in range(shape[0]):
254			random_indices = numpy.random.choice(shape[1], num_init,
255			replace=False)
256			weights[i, random_indices] = values[i]
257			return weights
258
259
260			class SparseND(Sparse):
261			"""Initialize only a fraction of the weights with configurable axes.
262
263			Parameters
264			----------
265			axis : int or sequence
266			Which axis or axes are to be treated as a "unit" for the purpose
267			of the number of elements initialized. For example, an axis of
268			(0, 1) when initializing a 4D tensor `W` will treat the first two
269			axes of the weight tensor as a grid and initialize `num_init`
270			elements of `W[0, 0, :, :]`, another `num_init` elements of
271			`W[0, 1, :, :]`, and so on.
272
273			Notes
274			-----
275			See :class:`Sparse` for documentation of other arguments.
276
277			"""
278			def __init__(self, axis, **kwargs):
279			self.axis = axis
280			super(SparseND, self).__init__(**kwargs)
281
282			def generate(self, rng, shape):
283			axis_ind = pack(self.axis)
284			other_ind = [i for i in range(len(shape)) if i not in axis_ind]
285			axis_shapes = [shape[i] for i in axis_ind]
286			other_shapes = [shape[i] for i in other_ind]
287			matrix = super(SparseND, self).generate(rng,
288			(numpy.prod(axis_shapes),
289			numpy.prod(other_shapes)))
290			unflattened = matrix.reshape(tuple(axis_shapes) + tuple(other_shapes))
291			wrong_ind = axis_ind + other_ind
292			transp_ind = [wrong_ind.index(i) for i in range(len(shape))]
293			return unflattened.transpose(transp_ind)
294

mila-udem / blocks

Issues (119)

blocks/initialization.py (8 issues)

Labels

Severity

Introduced By

Duplication Side-by-Side

Filter issues like