1 | """Objects for encapsulating parameter initialization strategies.""" |
||
2 | from abc import ABCMeta, abstractmethod |
||
3 | import numbers |
||
4 | |||
5 | import numpy |
||
6 | import theano |
||
7 | from six import add_metaclass |
||
8 | |||
9 | from blocks.utils import repr_attrs, pack |
||
10 | |||
11 | |||
12 | @add_metaclass(ABCMeta) |
||
13 | class NdarrayInitialization(object): |
||
14 | """Base class specifying the interface for ndarray initialization.""" |
||
15 | @abstractmethod |
||
16 | def generate(self, rng, shape): |
||
17 | """Generate an initial set of parameters from a given distribution. |
||
18 | |||
19 | Parameters |
||
20 | ---------- |
||
21 | rng : :class:`numpy.random.RandomState` |
||
22 | shape : tuple |
||
23 | A shape tuple for the requested parameter array shape. |
||
24 | |||
25 | Returns |
||
26 | ------- |
||
27 | output : :class:`~numpy.ndarray` |
||
28 | An ndarray with values drawn from the distribution specified by |
||
29 | this object, of shape `shape`, with dtype |
||
30 | :attr:`config.floatX`. |
||
31 | |||
32 | """ |
||
33 | |||
34 | def initialize(self, var, rng, shape=None): |
||
35 | """Initialize a shared variable with generated parameters. |
||
36 | |||
37 | Parameters |
||
38 | ---------- |
||
39 | var : object |
||
40 | A Theano shared variable whose value will be set with values |
||
41 | drawn from this :class:`NdarrayInitialization` instance. |
||
42 | rng : :class:`numpy.random.RandomState` |
||
43 | shape : tuple |
||
44 | A shape tuple for the requested parameter array shape. |
||
45 | |||
46 | """ |
||
47 | if not shape: |
||
48 | shape = var.get_value(borrow=True, return_internal_type=True).shape |
||
49 | var.set_value(self.generate(rng, shape)) |
||
50 | |||
51 | |||
52 | class Constant(NdarrayInitialization): |
||
53 | """Initialize parameters to a constant. |
||
54 | |||
55 | The constant may be a scalar or a :class:`~numpy.ndarray` of any shape |
||
56 | that is broadcastable with the requested parameter arrays. |
||
57 | |||
58 | Parameters |
||
59 | ---------- |
||
60 | constant : :class:`~numpy.ndarray` |
||
61 | The initialization value to use. Must be a scalar or an ndarray (or |
||
62 | compatible object, such as a nested list) that has a shape that is |
||
63 | broadcastable with any shape requested by `initialize`. |
||
64 | |||
65 | """ |
||
66 | def __init__(self, constant): |
||
67 | self.constant = numpy.asarray(constant) |
||
68 | |||
69 | def generate(self, rng, shape): |
||
70 | dest = numpy.empty(shape, dtype=theano.config.floatX) |
||
71 | dest[...] = self.constant |
||
72 | return dest |
||
73 | |||
74 | def __repr__(self): |
||
75 | return repr_attrs(self, 'constant') |
||
76 | |||
77 | |||
78 | class IsotropicGaussian(NdarrayInitialization): |
||
79 | """Initialize parameters from an isotropic Gaussian distribution. |
||
80 | |||
81 | Parameters |
||
82 | ---------- |
||
83 | std : float, optional |
||
84 | The standard deviation of the Gaussian distribution. Defaults to 1. |
||
85 | mean : float, optional |
||
86 | The mean of the Gaussian distribution. Defaults to 0 |
||
87 | |||
88 | Notes |
||
89 | ----- |
||
90 | Be careful: the standard deviation goes first and the mean goes |
||
91 | second! |
||
92 | |||
93 | """ |
||
94 | def __init__(self, std=1, mean=0): |
||
95 | self.mean = mean |
||
96 | self.std = std |
||
97 | |||
98 | def generate(self, rng, shape): |
||
99 | m = rng.normal(self.mean, self.std, size=shape) |
||
100 | return m.astype(theano.config.floatX) |
||
101 | |||
102 | def __repr__(self): |
||
103 | return repr_attrs(self, 'mean', 'std') |
||
104 | |||
105 | |||
106 | class Uniform(NdarrayInitialization): |
||
107 | """Initialize parameters from a uniform distribution. |
||
108 | |||
109 | Parameters |
||
110 | ---------- |
||
111 | mean : float, optional |
||
112 | The mean of the uniform distribution (i.e. the center of mass for |
||
113 | the density function); Defaults to 0. |
||
114 | width : float, optional |
||
115 | One way of specifying the range of the uniform distribution. The |
||
116 | support will be [mean - width/2, mean + width/2]. **Exactly one** |
||
117 | of `width` or `std` must be specified. |
||
118 | std : float, optional |
||
119 | An alternative method of specifying the range of the uniform |
||
120 | distribution. Chooses the width of the uniform such that random |
||
121 | variates will have a desired standard deviation. **Exactly one** of |
||
122 | `width` or `std` must be specified. |
||
123 | |||
124 | """ |
||
125 | def __init__(self, mean=0., width=None, std=None): |
||
126 | if (width is not None) == (std is not None): |
||
127 | raise ValueError("must specify width or std, " |
||
128 | "but not both") |
||
129 | if std is not None: |
||
130 | # Variance of a uniform is 1/12 * width^2 |
||
131 | self.width = numpy.sqrt(12) * std |
||
132 | else: |
||
133 | self.width = width |
||
134 | self.mean = mean |
||
135 | |||
136 | def generate(self, rng, shape): |
||
137 | w = self.width / 2 |
||
138 | m = rng.uniform(self.mean - w, self.mean + w, size=shape) |
||
139 | return m.astype(theano.config.floatX) |
||
140 | |||
141 | def __repr__(self): |
||
142 | return repr_attrs(self, 'mean', 'width') |
||
143 | |||
144 | |||
145 | class Identity(NdarrayInitialization): |
||
146 | """Initialize to the identity matrix. |
||
147 | |||
148 | Only works for 2D arrays. If the number of columns is not equal to the |
||
149 | number of rows, the array will be truncated or padded with zeros. |
||
150 | |||
151 | Parameters |
||
152 | ---------- |
||
153 | mult : float, optional |
||
154 | Multiply the identity matrix with a scalar. Defaults to 1. |
||
155 | |||
156 | """ |
||
157 | def __init__(self, mult=1): |
||
158 | self.mult = mult |
||
159 | |||
160 | def generate(self, rng, shape): |
||
161 | if len(shape) != 2: |
||
162 | raise ValueError |
||
163 | rows, cols = shape |
||
164 | return self.mult * numpy.eye(rows, cols, dtype=theano.config.floatX) |
||
165 | |||
166 | def __repr__(self): |
||
167 | return repr_attrs(self, 'mult') |
||
168 | |||
169 | |||
170 | class Orthogonal(NdarrayInitialization): |
||
171 | """Initialize a random orthogonal matrix. |
||
172 | |||
173 | Only works for 2D arrays. |
||
174 | |||
175 | Parameters |
||
176 | ---------- |
||
177 | scale : float, optional |
||
178 | Multiply the resulting matrix with a scalar. Defaults to 1. |
||
179 | For a discussion of the importance of scale for training time |
||
180 | and generalization refer to [Saxe2013]_. |
||
181 | |||
182 | .. [Saxe2013] Saxe, A.M., McClelland, J.L., Ganguli, S., 2013., |
||
183 | *Exact solutions to the nonlinear dynamics of learning in deep |
||
184 | linear neural networks*, |
||
185 | arXiv:1312.6120 [cond-mat, q-bio, stat]. |
||
186 | |||
187 | """ |
||
188 | def __init__(self, scale=1): |
||
189 | self.scale = scale |
||
190 | |||
191 | def generate(self, rng, shape): |
||
192 | if len(shape) != 2: |
||
193 | raise ValueError |
||
194 | |||
195 | if shape[0] == shape[1]: |
||
196 | # For square weight matrices we can simplify the logic |
||
197 | # and be more exact: |
||
198 | M = rng.randn(*shape).astype(theano.config.floatX) |
||
199 | Q, R = numpy.linalg.qr(M) |
||
200 | Q = Q * numpy.sign(numpy.diag(R)) |
||
201 | return Q * self.scale |
||
202 | |||
203 | M1 = rng.randn(shape[0], shape[0]).astype(theano.config.floatX) |
||
0 ignored issues
–
show
|
|||
204 | M2 = rng.randn(shape[1], shape[1]).astype(theano.config.floatX) |
||
0 ignored issues
–
show
The name
M2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
205 | |||
206 | # QR decomposition of matrix with entries in N(0, 1) is random |
||
207 | Q1, R1 = numpy.linalg.qr(M1) |
||
0 ignored issues
–
show
The name
Q1 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
The name
R1 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
208 | Q2, R2 = numpy.linalg.qr(M2) |
||
0 ignored issues
–
show
The name
Q2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
The name
R2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
209 | # Correct that NumPy doesn't force diagonal of R to be non-negative |
||
210 | Q1 = Q1 * numpy.sign(numpy.diag(R1)) |
||
0 ignored issues
–
show
The name
Q1 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
211 | Q2 = Q2 * numpy.sign(numpy.diag(R2)) |
||
0 ignored issues
–
show
The name
Q2 does not conform to the variable naming conventions ((([a-z_][a-z0-9_]{0,30})|(_?[A-Z]))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site.
Loading history...
|
|||
212 | |||
213 | n_min = min(shape[0], shape[1]) |
||
214 | return numpy.dot(Q1[:, :n_min], Q2[:n_min, :]) * self.scale |
||
215 | |||
216 | def __repr__(self): |
||
217 | return repr_attrs(self, 'scale') |
||
218 | |||
219 | |||
220 | class Sparse(NdarrayInitialization): |
||
221 | """Initialize only a fraction of the weights, row-wise. |
||
222 | |||
223 | Parameters |
||
224 | ---------- |
||
225 | num_init : int or float |
||
226 | If int, this is the number of weights to initialize per row. If |
||
227 | float, it's the fraction of the weights per row to initialize. |
||
228 | weights_init : :class:`NdarrayInitialization` instance |
||
229 | The initialization scheme to initialize the weights with. |
||
230 | sparse_init : :class:`NdarrayInitialization` instance, optional |
||
231 | What to set the non-initialized weights to (0. by default) |
||
232 | |||
233 | """ |
||
234 | def __init__(self, num_init, weights_init, sparse_init=None): |
||
235 | self.num_init = num_init |
||
236 | self.weights_init = weights_init |
||
237 | |||
238 | if sparse_init is None: |
||
239 | sparse_init = Constant(0.) |
||
240 | self.sparse_init = sparse_init |
||
241 | |||
242 | def generate(self, rng, shape): |
||
243 | weights = self.sparse_init.generate(rng, shape) |
||
244 | if isinstance(self.num_init, numbers.Integral): |
||
245 | if not self.num_init > 0: |
||
246 | raise ValueError |
||
247 | num_init = self.num_init |
||
248 | else: |
||
249 | if not 1 >= self.num_init > 0: |
||
250 | raise ValueError |
||
251 | num_init = int(self.num_init * shape[1]) |
||
252 | values = self.weights_init.generate(rng, (shape[0], num_init)) |
||
253 | for i in range(shape[0]): |
||
254 | random_indices = numpy.random.choice(shape[1], num_init, |
||
255 | replace=False) |
||
256 | weights[i, random_indices] = values[i] |
||
257 | return weights |
||
258 | |||
259 | |||
260 | class SparseND(Sparse): |
||
261 | """Initialize only a fraction of the weights with configurable axes. |
||
262 | |||
263 | Parameters |
||
264 | ---------- |
||
265 | axis : int or sequence |
||
266 | Which axis or axes are to be treated as a "unit" for the purpose |
||
267 | of the number of elements initialized. For example, an axis of |
||
268 | (0, 1) when initializing a 4D tensor `W` will treat the first two |
||
269 | axes of the weight tensor as a grid and initialize `num_init` |
||
270 | elements of `W[0, 0, :, :]`, another `num_init` elements of |
||
271 | `W[0, 1, :, :]`, and so on. |
||
272 | |||
273 | Notes |
||
274 | ----- |
||
275 | See :class:`Sparse` for documentation of other arguments. |
||
276 | |||
277 | """ |
||
278 | def __init__(self, axis, **kwargs): |
||
279 | self.axis = axis |
||
280 | super(SparseND, self).__init__(**kwargs) |
||
281 | |||
282 | def generate(self, rng, shape): |
||
283 | axis_ind = pack(self.axis) |
||
284 | other_ind = [i for i in range(len(shape)) if i not in axis_ind] |
||
285 | axis_shapes = [shape[i] for i in axis_ind] |
||
286 | other_shapes = [shape[i] for i in other_ind] |
||
287 | matrix = super(SparseND, self).generate(rng, |
||
288 | (numpy.prod(axis_shapes), |
||
289 | numpy.prod(other_shapes))) |
||
290 | unflattened = matrix.reshape(tuple(axis_shapes) + tuple(other_shapes)) |
||
291 | wrong_ind = axis_ind + other_ind |
||
292 | transp_ind = [wrong_ind.index(i) for i in range(len(shape))] |
||
293 | return unflattened.transpose(transp_ind) |
||
294 |
This check looks for invalid names for a range of different identifiers.
You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.
If your project includes a Pylint configuration file, the settings contained in that file take precedence.
To find out more about Pylint, please refer to their site.