1 | """Objects for encapsulating parameter initialization strategies.""" |
||
2 | from abc import ABCMeta, abstractmethod |
||
3 | import numbers |
||
4 | |||
5 | import numpy |
||
6 | import theano |
||
7 | from six import add_metaclass |
||
8 | |||
9 | from blocks.utils import repr_attrs, pack |
||
10 | |||
11 | |||
12 | @add_metaclass(ABCMeta) |
||
13 | class NdarrayInitialization(object): |
||
14 | """Base class specifying the interface for ndarray initialization.""" |
||
15 | @abstractmethod |
||
16 | def generate(self, rng, shape): |
||
17 | """Generate an initial set of parameters from a given distribution. |
||
18 | |||
19 | Parameters |
||
20 | ---------- |
||
21 | rng : :class:`numpy.random.RandomState` |
||
22 | shape : tuple |
||
23 | A shape tuple for the requested parameter array shape. |
||
24 | |||
25 | Returns |
||
26 | ------- |
||
27 | output : :class:`~numpy.ndarray` |
||
28 | An ndarray with values drawn from the distribution specified by |
||
29 | this object, of shape `shape`, with dtype |
||
30 | :attr:`config.floatX`. |
||
31 | |||
32 | """ |
||
33 | |||
34 | def initialize(self, var, rng, shape=None): |
||
35 | """Initialize a shared variable with generated parameters. |
||
36 | |||
37 | Parameters |
||
38 | ---------- |
||
39 | var : object |
||
40 | A Theano shared variable whose value will be set with values |
||
41 | drawn from this :class:`NdarrayInitialization` instance. |
||
42 | rng : :class:`numpy.random.RandomState` |
||
43 | shape : tuple |
||
44 | A shape tuple for the requested parameter array shape. |
||
45 | |||
46 | """ |
||
47 | if not shape: |
||
48 | shape = var.get_value(borrow=True, return_internal_type=True).shape |
||
49 | var.set_value(self.generate(rng, shape)) |
||
50 | |||
51 | |||
52 | class Constant(NdarrayInitialization): |
||
53 | """Initialize parameters to a constant. |
||
54 | |||
55 | The constant may be a scalar or a :class:`~numpy.ndarray` of any shape |
||
56 | that is broadcastable with the requested parameter arrays. |
||
57 | |||
58 | Parameters |
||
59 | ---------- |
||
60 | constant : :class:`~numpy.ndarray` |
||
61 | The initialization value to use. Must be a scalar or an ndarray (or |
||
62 | compatible object, such as a nested list) that has a shape that is |
||
63 | broadcastable with any shape requested by `initialize`. |
||
64 | |||
65 | """ |
||
66 | def __init__(self, constant): |
||
67 | self.constant = numpy.asarray(constant) |
||
68 | |||
69 | def generate(self, rng, shape): |
||
70 | dest = numpy.empty(shape, dtype=theano.config.floatX) |
||
71 | dest[...] = self.constant |
||
72 | return dest |
||
73 | |||
74 | def __repr__(self): |
||
75 | return repr_attrs(self, 'constant') |
||
76 | |||
77 | |||
78 | class IsotropicGaussian(NdarrayInitialization): |
||
79 | """Initialize parameters from an isotropic Gaussian distribution. |
||
80 | |||
81 | Parameters |
||
82 | ---------- |
||
83 | std : float, optional |
||
84 | The standard deviation of the Gaussian distribution. Defaults to 1. |
||
85 | mean : float, optional |
||
86 | The mean of the Gaussian distribution. Defaults to 0 |
||
87 | |||
88 | Notes |
||
89 | ----- |
||
90 | Be careful: the standard deviation goes first and the mean goes |
||
91 | second! |
||
92 | |||
93 | """ |
||
94 | def __init__(self, std=1, mean=0): |
||
95 | self.mean = mean |
||
96 | self.std = std |
||
97 | |||
98 | def generate(self, rng, shape): |
||
99 | m = rng.normal(self.mean, self.std, size=shape) |
||
100 | return m.astype(theano.config.floatX) |
||
101 | |||
102 | def __repr__(self): |
||
103 | return repr_attrs(self, 'mean', 'std') |
||
104 | |||
105 | |||
106 | class Uniform(NdarrayInitialization): |
||
107 | """Initialize parameters from a uniform distribution. |
||
108 | |||
109 | Parameters |
||
110 | ---------- |
||
111 | mean : float, optional |
||
112 | The mean of the uniform distribution (i.e. the center of mass for |
||
113 | the density function); Defaults to 0. |
||
114 | width : float, optional |
||
115 | One way of specifying the range of the uniform distribution. The |
||
116 | support will be [mean - width/2, mean + width/2]. **Exactly one** |
||
117 | of `width` or `std` must be specified. |
||
118 | std : float, optional |
||
119 | An alternative method of specifying the range of the uniform |
||
120 | distribution. Chooses the width of the uniform such that random |
||
121 | variates will have a desired standard deviation. **Exactly one** of |
||
122 | `width` or `std` must be specified. |
||
123 | |||
124 | """ |
||
125 | def __init__(self, mean=0., width=None, std=None): |
||
126 | if (width is not None) == (std is not None): |
||
127 | raise ValueError("must specify width or std, " |
||
128 | "but not both") |
||
129 | if std is not None: |
||
130 | # Variance of a uniform is 1/12 * width^2 |
||
131 | self.width = numpy.sqrt(12) * std |
||
132 | else: |
||
133 | self.width = width |
||
134 | self.mean = mean |
||
135 | |||
136 | def generate(self, rng, shape): |
||
137 | w = self.width / 2 |
||
138 | m = rng.uniform(self.mean - w, self.mean + w, size=shape) |
||
139 | return m.astype(theano.config.floatX) |
||
140 | |||
141 | def __repr__(self): |
||
142 | return repr_attrs(self, 'mean', 'width') |
||
143 | |||
144 | |||
145 | class Identity(NdarrayInitialization): |
||
146 | """Initialize to the identity matrix. |
||
147 | |||
148 | Only works for 2D arrays. If the number of columns is not equal to the |
||
149 | number of rows, the array will be truncated or padded with zeros. |
||
150 | |||
151 | Parameters |
||
152 | ---------- |
||
153 | mult : float, optional |
||
154 | Multiply the identity matrix with a scalar. Defaults to 1. |
||
155 | |||
156 | """ |
||
157 | def __init__(self, mult=1): |
||
158 | self.mult = mult |
||
159 | |||
160 | def generate(self, rng, shape): |
||
161 | if len(shape) != 2: |
||
162 | raise ValueError |
||
163 | rows, cols = shape |
||
164 | return self.mult * numpy.eye(rows, cols, dtype=theano.config.floatX) |
||
165 | |||
166 | def __repr__(self): |
||
167 | return repr_attrs(self, 'mult') |
||
168 | |||
169 | |||
170 | class Orthogonal(NdarrayInitialization): |
||
171 | """Initialize a random orthogonal matrix. |
||
172 | |||
173 | Only works for 2D arrays. |
||
174 | |||
175 | Parameters |
||
176 | ---------- |
||
177 | scale : float, optional |
||
178 | Multiply the resulting matrix with a scalar. Defaults to 1. |
||
179 | For a discussion of the importance of scale for training time |
||
180 | and generalization refer to [Saxe2013]_. |
||
181 | |||
182 | .. [Saxe2013] Saxe, A.M., McClelland, J.L., Ganguli, S., 2013., |
||
183 | *Exact solutions to the nonlinear dynamics of learning in deep |
||
184 | linear neural networks*, |
||
185 | arXiv:1312.6120 [cond-mat, q-bio, stat]. |
||
186 | |||
187 | """ |
||
188 | def __init__(self, scale=1): |
||
189 | self.scale = scale |
||
190 | |||
191 | def generate(self, rng, shape): |
||
192 | if len(shape) != 2: |
||
193 | raise ValueError |
||
194 | |||
195 | if shape[0] == shape[1]: |
||
196 | # For square weight matrices we can simplify the logic |
||
197 | # and be more exact: |
||
198 | M = rng.randn(*shape).astype(theano.config.floatX) |
||
199 | Q, R = numpy.linalg.qr(M) |
||
200 | Q = Q * numpy.sign(numpy.diag(R)) |
||
201 | return Q * self.scale |
||
202 | |||
203 | M1 = rng.randn(shape[0], shape[0]).astype(theano.config.floatX) |
||
0 ignored issues
–
show
|
|||
204 | M2 = rng.randn(shape[1], shape[1]).astype(theano.config.floatX) |
||
0 ignored issues
–
show
The name
M2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
205 | |||
206 | # QR decomposition of matrix with entries in N(0, 1) is random |
||
207 | Q1, R1 = numpy.linalg.qr(M1) |
||
0 ignored issues
–
show
The name
Q1 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() The name
R1 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
208 | Q2, R2 = numpy.linalg.qr(M2) |
||
0 ignored issues
–
show
The name
Q2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() The name
R2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
209 | # Correct that NumPy doesn't force diagonal of R to be non-negative |
||
210 | Q1 = Q1 * numpy.sign(numpy.diag(R1)) |
||
0 ignored issues
–
show
The name
Q1 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
211 | Q2 = Q2 * numpy.sign(numpy.diag(R2)) |
||
0 ignored issues
–
show
The name
Q2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() |
|||
212 | |||
213 | n_min = min(shape[0], shape[1]) |
||
214 | return numpy.dot(Q1[:, :n_min], Q2[:n_min, :]) * self.scale |
||
215 | |||
216 | def __repr__(self): |
||
217 | return repr_attrs(self, 'scale') |
||
218 | |||
219 | |||
220 | class Sparse(NdarrayInitialization): |
||
221 | """Initialize only a fraction of the weights, row-wise. |
||
222 | |||
223 | Parameters |
||
224 | ---------- |
||
225 | num_init : int or float |
||
226 | If int, this is the number of weights to initialize per row. If |
||
227 | float, it's the fraction of the weights per row to initialize. |
||
228 | weights_init : :class:`NdarrayInitialization` instance |
||
229 | The initialization scheme to initialize the weights with. |
||
230 | sparse_init : :class:`NdarrayInitialization` instance, optional |
||
231 | What to set the non-initialized weights to (0. by default) |
||
232 | |||
233 | """ |
||
234 | def __init__(self, num_init, weights_init, sparse_init=None): |
||
235 | self.num_init = num_init |
||
236 | self.weights_init = weights_init |
||
237 | |||
238 | if sparse_init is None: |
||
239 | sparse_init = Constant(0.) |
||
240 | self.sparse_init = sparse_init |
||
241 | |||
242 | def generate(self, rng, shape): |
||
243 | weights = self.sparse_init.generate(rng, shape) |
||
244 | if isinstance(self.num_init, numbers.Integral): |
||
245 | if not self.num_init > 0: |
||
246 | raise ValueError |
||
247 | num_init = self.num_init |
||
248 | else: |
||
249 | if not 1 >= self.num_init > 0: |
||
250 | raise ValueError |
||
251 | num_init = int(self.num_init * shape[1]) |
||
252 | values = self.weights_init.generate(rng, (shape[0], num_init)) |
||
253 | for i in range(shape[0]): |
||
254 | random_indices = numpy.random.choice(shape[1], num_init, |
||
255 | replace=False) |
||
256 | weights[i, random_indices] = values[i] |
||
257 | return weights |
||
258 | |||
259 | |||
260 | class SparseND(Sparse): |
||
261 | """Initialize only a fraction of the weights with configurable axes. |
||
262 | |||
263 | Parameters |
||
264 | ---------- |
||
265 | axis : int or sequence |
||
266 | Which axis or axes are to be treated as a "unit" for the purpose |
||
267 | of the number of elements initialized. For example, an axis of |
||
268 | (0, 1) when initializing a 4D tensor `W` will treat the first two |
||
269 | axes of the weight tensor as a grid and initialize `num_init` |
||
270 | elements of `W[0, 0, :, :]`, another `num_init` elements of |
||
271 | `W[0, 1, :, :]`, and so on. |
||
272 | |||
273 | Notes |
||
274 | ----- |
||
275 | See :class:`Sparse` for documentation of other arguments. |
||
276 | |||
277 | """ |
||
278 | def __init__(self, axis, **kwargs): |
||
279 | self.axis = axis |
||
280 | super(SparseND, self).__init__(**kwargs) |
||
281 | |||
282 | def generate(self, rng, shape): |
||
283 | axis_ind = pack(self.axis) |
||
284 | other_ind = [i for i in range(len(shape)) if i not in axis_ind] |
||
285 | axis_shapes = [shape[i] for i in axis_ind] |
||
286 | other_shapes = [shape[i] for i in other_ind] |
||
287 | matrix = super(SparseND, self).generate(rng, |
||
288 | (numpy.prod(axis_shapes), |
||
289 | numpy.prod(other_shapes))) |
||
290 | unflattened = matrix.reshape(tuple(axis_shapes) + tuple(other_shapes)) |
||
291 | wrong_ind = axis_ind + other_ind |
||
292 | transp_ind = [wrong_ind.index(i) for i in range(len(shape))] |
||
293 | return unflattened.transpose(transp_ind) |
||
294 |
This check looks for invalid names for a range of different identifiers.
You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.
If your project includes a Pylint configuration file, the settings contained in that file take precedence.
To find out more about Pylint, please refer to their site.