1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
|
4
|
|
|
|
5
|
|
|
import logging |
6
|
|
|
|
7
|
|
|
from deepy.trainers import THEANO_LINKER |
8
|
|
|
from deepy.trainers.base import NeuralTrainer |
9
|
|
|
from deepy.core.env import FLOATX |
10
|
|
|
import theano |
11
|
|
|
import numpy as np |
12
|
|
|
import theano.tensor as T |
13
|
|
|
import scipy |
14
|
|
|
|
15
|
|
|
|
16
|
|
|
class ScipyTrainer(NeuralTrainer): |
17
|
|
|
""" |
18
|
|
|
Optimizer based on Scipy. |
19
|
|
|
This class was modified based on the corresponding one the theanets. |
20
|
|
|
""" |
21
|
|
|
|
22
|
|
|
METHODS = ('bfgs', 'cg', 'dogleg', 'newton-cg', 'trust-ncg', 'l-bfgs-b') |
23
|
|
|
|
24
|
|
|
def __init__(self, network, method, config=None): |
25
|
|
|
super(ScipyTrainer, self).__init__(network, config) |
26
|
|
|
|
27
|
|
|
self.method = method |
28
|
|
|
# Updates in one iteration |
29
|
|
|
self.scipy_updates = config.get("scipy_updates", 5) if config else 5 |
30
|
|
|
|
31
|
|
|
logging.info('compiling gradient function') |
32
|
|
|
self._shapes = [p.get_value(borrow=True).shape for p in self.network.parameters] |
33
|
|
|
self._counts = [np.prod(s) for s in self._shapes] |
34
|
|
|
self._starts = np.cumsum([0] + self._counts)[:-1] |
35
|
|
|
self._dtype = FLOATX |
36
|
|
|
self._gradient_func = None |
37
|
|
|
# Declares that the learning function is implemented |
38
|
|
|
self.learning_func = True |
39
|
|
|
|
40
|
|
|
def train_step(self, train_set, train_size=None): |
41
|
|
|
|
42
|
|
|
res = scipy.optimize.minimize( |
43
|
|
|
fun=self._function_at, |
44
|
|
|
jac=self._gradient_at, |
45
|
|
|
x0=self._arrays_to_flat(self.best_params[0]), |
46
|
|
|
args=(train_set, ), |
47
|
|
|
method=self.method, |
48
|
|
|
options=dict(maxiter=self.scipy_updates), |
49
|
|
|
) |
50
|
|
|
|
51
|
|
|
self.set_params(self._flat_to_arrays(res.x)) |
52
|
|
|
|
53
|
|
|
return [('J', res.fun)] |
54
|
|
|
|
55
|
|
|
def _gradient_function(self): |
56
|
|
|
if not self._gradient_func: |
57
|
|
|
params = self.network.parameters |
58
|
|
|
inputs = self.network.input_variables + self.network.target_variables |
59
|
|
|
self._gradient_func = theano.function(inputs, T.grad(self.cost, params), |
60
|
|
|
allow_input_downcast=True, mode=theano.Mode(linker=THEANO_LINKER)) |
61
|
|
|
return self._gradient_func |
62
|
|
|
|
63
|
|
|
def _function_at(self, x, train_set): |
64
|
|
|
self.set_params(self._flat_to_arrays(x)) |
65
|
|
|
return np.mean([self.evaluation_func(*x)[0] for x in train_set]) |
66
|
|
|
|
67
|
|
|
def _gradient_at(self, x, train_set): |
68
|
|
|
self.set_params(self._flat_to_arrays(x)) |
69
|
|
|
grads = [[] for _ in range(len(self.network.parameters))] |
70
|
|
|
grad_func = self._gradient_function() |
71
|
|
|
for x in train_set: |
72
|
|
|
for i, g in enumerate(grad_func(*x)): |
73
|
|
|
grads[i].append(np.asarray(g)) |
74
|
|
|
return self._arrays_to_flat([np.mean(g, axis=0) for g in grads]) |
75
|
|
|
|
76
|
|
|
def _flat_to_arrays(self, x): |
77
|
|
|
x = x.astype(self._dtype) |
78
|
|
|
return [x[o:o+n].reshape(s) for s, o, n in |
79
|
|
|
zip(self._shapes, self._starts, self._counts)] |
80
|
|
|
|
81
|
|
|
def _arrays_to_flat(self, arrays): |
82
|
|
|
x = np.zeros((sum(self._counts), ), self._dtype) |
83
|
|
|
for arr, o, n in zip(arrays, self._starts, self._counts): |
84
|
|
|
x[o:o+n] = arr.ravel() |
85
|
|
|
return x |