1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
|
4
|
|
|
import logging as loggers |
5
|
|
|
import gzip |
6
|
|
|
import cPickle as pickle |
7
|
|
|
import os |
8
|
|
|
from threading import Thread |
9
|
|
|
|
10
|
|
|
import numpy as np |
11
|
|
|
import theano.tensor as T |
12
|
|
|
import theano |
13
|
|
|
|
14
|
|
|
import deepy |
15
|
|
|
from deepy.layers.layer import NeuralLayer |
16
|
|
|
from deepy.layers.block import Block |
17
|
|
|
from deepy.utils import dim_to_var, TrainLogger |
18
|
|
|
|
19
|
|
|
logging = loggers.getLogger(__name__) |
20
|
|
|
|
21
|
|
|
DEEPY_MESSAGE = "deepy version = %s" % deepy.__version__ |
22
|
|
|
|
23
|
|
|
def save_network_params(params, path): |
24
|
|
|
if path.endswith('gz'): |
25
|
|
|
opener = gzip.open if path.lower().endswith('.gz') else open |
26
|
|
|
handle = opener(path, 'wb') |
27
|
|
|
pickle.dump(params, handle) |
28
|
|
|
handle.close() |
29
|
|
|
elif path.endswith('uncompressed.npz'): |
30
|
|
|
np.savez(path, *params) |
31
|
|
|
elif path.endswith('.npz'): |
32
|
|
|
np.savez_compressed(path, *params) |
33
|
|
|
else: |
34
|
|
|
raise Exception("File format of %s is not supported, use '.gz' or '.npz' or '.uncompressed.gz'" % path) |
35
|
|
|
|
36
|
|
|
class NeuralNetwork(object): |
37
|
|
|
""" |
38
|
|
|
The base class of neural networks. |
39
|
|
|
""" |
40
|
|
|
|
41
|
|
|
def __init__(self, input_dim, input_tensor=None): |
42
|
|
|
logging.info(DEEPY_MESSAGE) |
43
|
|
|
self.input_dim = input_dim |
44
|
|
|
self.input_tensor = input_tensor |
45
|
|
|
self.parameter_count = 0 |
46
|
|
|
|
47
|
|
|
self.parameters = [] |
48
|
|
|
self.free_parameters = [] |
49
|
|
|
|
50
|
|
|
self.training_updates = [] |
51
|
|
|
self.updates = [] |
52
|
|
|
|
53
|
|
|
self.input_variables = [] |
54
|
|
|
self.target_variables = [] |
55
|
|
|
|
56
|
|
|
self.training_callbacks = [] |
57
|
|
|
self.testing_callbacks = [] |
58
|
|
|
self.epoch_callbacks = [] |
59
|
|
|
|
60
|
|
|
self.layers = [] |
61
|
|
|
|
62
|
|
|
self._hidden_outputs = [] |
63
|
|
|
self.training_monitors = [] |
64
|
|
|
self.testing_monitors = [] |
65
|
|
|
|
66
|
|
|
self.setup_variables() |
67
|
|
|
self.train_logger = TrainLogger() |
68
|
|
|
|
69
|
|
|
def stack_layer(self, layer, no_setup=False): |
70
|
|
|
""" |
71
|
|
|
Stack a neural layer. |
72
|
|
|
:type layer: NeuralLayer |
73
|
|
|
:param no_setup: whether the layer is already initialized |
74
|
|
|
""" |
75
|
|
|
if layer.name: |
76
|
|
|
layer.name += "%d" % (len(self.layers) + 1) |
77
|
|
|
if not self.layers: |
78
|
|
|
layer.initialize(self.input_dim, no_prepare=no_setup) |
79
|
|
|
else: |
80
|
|
|
layer.initialize(self.layers[-1].output_dim, no_prepare=no_setup) |
81
|
|
|
self._output = layer.compute_tensor(self._output) |
82
|
|
|
self._test_output = layer.compute_test_tesnor(self._test_output) |
83
|
|
|
self._hidden_outputs.append(self._output) |
84
|
|
|
self.register_layer(layer) |
85
|
|
|
self.layers.append(layer) |
86
|
|
|
|
87
|
|
|
def register(self, *layers): |
88
|
|
|
""" |
89
|
|
|
Register multiple layers as the components of the network. |
90
|
|
|
The parameter of those layers will be trained. |
91
|
|
|
But the output of the layer will not be stacked. |
92
|
|
|
""" |
93
|
|
|
for layer in layers: |
94
|
|
|
self.register_layer(layer) |
95
|
|
|
|
96
|
|
|
def register_layer(self, layer): |
97
|
|
|
""" |
98
|
|
|
Register the layer so that it's param will be trained. |
99
|
|
|
But the output of the layer will not be stacked. |
100
|
|
|
""" |
101
|
|
|
if type(layer) == Block: |
102
|
|
|
layer.fix() |
103
|
|
|
self.parameter_count += layer.parameter_count |
104
|
|
|
self.parameters.extend(layer.parameters) |
105
|
|
|
self.free_parameters.extend(layer.free_parameters) |
106
|
|
|
self.training_monitors.extend(layer.training_monitors) |
107
|
|
|
self.testing_monitors.extend(layer.testing_monitors) |
108
|
|
|
self.updates.extend(layer.updates) |
109
|
|
|
self.training_updates.extend(layer.training_updates) |
110
|
|
|
self.input_variables.extend(layer.external_inputs) |
111
|
|
|
self.target_variables.extend(layer.external_targets) |
112
|
|
|
|
113
|
|
|
self.training_callbacks.extend(layer.training_callbacks) |
114
|
|
|
self.testing_callbacks.extend(layer.testing_callbacks) |
115
|
|
|
self.epoch_callbacks.extend(layer.epoch_callbacks) |
116
|
|
|
|
117
|
|
|
def first_layer(self): |
118
|
|
|
""" |
119
|
|
|
Return first layer. |
120
|
|
|
""" |
121
|
|
|
return self.layers[0] if self.layers else None |
122
|
|
|
|
123
|
|
|
def stack(self, *layers): |
124
|
|
|
""" |
125
|
|
|
Stack layers. |
126
|
|
|
""" |
127
|
|
|
for layer in layers: |
128
|
|
|
self.stack_layer(layer) |
129
|
|
|
return self |
130
|
|
|
|
131
|
|
|
def prepare_training(self): |
132
|
|
|
""" |
133
|
|
|
This function will be called before training. |
134
|
|
|
""" |
135
|
|
|
self.report() |
136
|
|
|
|
137
|
|
|
def monitor_layer_outputs(self): |
138
|
|
|
""" |
139
|
|
|
Monitoring the outputs of each layer. |
140
|
|
|
Useful for troubleshooting convergence problems. |
141
|
|
|
""" |
142
|
|
|
for layer, hidden in zip(self.layers, self._hidden_outputs): |
143
|
|
|
self.training_monitors.append(('mean(%s)' % (layer.name), abs(hidden).mean())) |
144
|
|
|
|
145
|
|
|
@property |
146
|
|
|
def all_parameters(self): |
147
|
|
|
""" |
148
|
|
|
Return all parameters. |
149
|
|
|
""" |
150
|
|
|
params = [] |
151
|
|
|
params.extend(self.parameters) |
152
|
|
|
params.extend(self.free_parameters) |
153
|
|
|
|
154
|
|
|
return params |
155
|
|
|
|
156
|
|
|
def setup_variables(self): |
157
|
|
|
""" |
158
|
|
|
Set up variables. |
159
|
|
|
""" |
160
|
|
|
if self.input_tensor: |
161
|
|
|
if type(self.input_tensor) == int: |
162
|
|
|
x = dim_to_var(self.input_tensor, name="x") |
163
|
|
|
else: |
164
|
|
|
x = self.input_tensor |
165
|
|
|
else: |
166
|
|
|
x = T.matrix('x') |
167
|
|
|
self.input_variables.append(x) |
168
|
|
|
self._output = x |
169
|
|
|
self._test_output = x |
170
|
|
|
|
171
|
|
|
def _compile(self): |
172
|
|
|
if not hasattr(self, '_compute'): |
173
|
|
|
self._compute = theano.function( |
174
|
|
|
filter(lambda x: x not in self.target_variables, self.input_variables), |
175
|
|
|
self.test_output, updates=self.updates, allow_input_downcast=True) |
176
|
|
|
|
177
|
|
|
def compute(self, *x): |
178
|
|
|
""" |
179
|
|
|
Return network output. |
180
|
|
|
""" |
181
|
|
|
self._compile() |
182
|
|
|
return self._compute(*x) |
183
|
|
|
|
184
|
|
|
@property |
185
|
|
|
def output(self): |
186
|
|
|
""" |
187
|
|
|
Return output variable. |
188
|
|
|
""" |
189
|
|
|
return self._output |
190
|
|
|
|
191
|
|
|
@property |
192
|
|
|
def test_output(self): |
193
|
|
|
""" |
194
|
|
|
Return output variable in test time. |
195
|
|
|
""" |
196
|
|
|
return self._test_output |
197
|
|
|
|
198
|
|
|
@property |
199
|
|
|
def cost(self): |
200
|
|
|
""" |
201
|
|
|
Return cost variable. |
202
|
|
|
""" |
203
|
|
|
return T.constant(0) |
204
|
|
|
|
205
|
|
|
@property |
206
|
|
|
def test_cost(self): |
207
|
|
|
""" |
208
|
|
|
Return cost variable in test time. |
209
|
|
|
""" |
210
|
|
|
return self.cost |
211
|
|
|
|
212
|
|
|
def save_params(self, path, new_thread=False): |
213
|
|
|
""" |
214
|
|
|
Save parameters to file. |
215
|
|
|
""" |
216
|
|
|
logging.info("saving parameters to %s" % path) |
217
|
|
|
param_variables = self.all_parameters |
218
|
|
|
params = [p.get_value().copy() for p in param_variables] |
219
|
|
|
if new_thread: |
220
|
|
|
thread = Thread(target=save_network_params, args=(params, path)) |
221
|
|
|
thread.start() |
222
|
|
|
else: |
223
|
|
|
save_network_params(params, path) |
224
|
|
|
self.train_logger.save(path) |
225
|
|
|
|
226
|
|
|
def load_params(self, path, exclude_free_params=False): |
227
|
|
|
""" |
228
|
|
|
Load parameters from file. |
229
|
|
|
""" |
230
|
|
|
if not os.path.exists(path): return; |
231
|
|
|
logging.info("loading parameters from %s" % path) |
232
|
|
|
# Decide which parameters to load |
233
|
|
|
if exclude_free_params: |
234
|
|
|
params_to_load = self.parameters |
235
|
|
|
else: |
236
|
|
|
params_to_load = self.all_parameters |
237
|
|
|
# Load parameters |
238
|
|
|
if path.endswith(".gz"): |
239
|
|
|
opener = gzip.open if path.lower().endswith('.gz') else open |
240
|
|
|
handle = opener(path, 'rb') |
241
|
|
|
saved_params = pickle.load(handle) |
242
|
|
|
handle.close() |
243
|
|
|
# Write parameters |
244
|
|
|
for target, source in zip(params_to_load, saved_params): |
245
|
|
|
logging.info('%s: setting value %s', target.name, source.shape) |
246
|
|
|
target.set_value(source) |
247
|
|
|
elif path.endswith(".npz"): |
248
|
|
|
arrs = np.load(path) |
249
|
|
|
# Write parameters |
250
|
|
|
for target, idx in zip(params_to_load, range(len(arrs.keys()))): |
251
|
|
|
source = arrs['arr_%d' % idx] |
252
|
|
|
logging.info('%s: setting value %s', target.name, source.shape) |
253
|
|
|
target.set_value(source) |
254
|
|
|
else: |
255
|
|
|
raise Exception("File format of %s is not supported, use '.gz' or '.npz' or '.uncompressed.gz'" % path) |
256
|
|
|
|
257
|
|
|
self.train_logger.load(path) |
258
|
|
|
|
259
|
|
|
def report(self): |
260
|
|
|
""" |
261
|
|
|
Print network statistics. |
262
|
|
|
""" |
263
|
|
|
logging.info("network inputs: %s", " ".join(map(str, self.input_variables))) |
264
|
|
|
logging.info("network targets: %s", " ".join(map(str, self.target_variables))) |
265
|
|
|
logging.info("network parameters: %s", " ".join(map(str, self.all_parameters))) |
266
|
|
|
logging.info("parameter count: %d", self.parameter_count) |
267
|
|
|
|
268
|
|
|
def epoch_callback(self): |
269
|
|
|
""" |
270
|
|
|
Callback for each epoch. |
271
|
|
|
""" |
272
|
|
|
for cb in self.epoch_callbacks: |
273
|
|
|
cb() |
274
|
|
|
|
275
|
|
|
def training_callback(self): |
276
|
|
|
""" |
277
|
|
|
Callback for each training iteration. |
278
|
|
|
""" |
279
|
|
|
for cb in self.training_callbacks: |
280
|
|
|
cb() |
281
|
|
|
|
282
|
|
|
def testing_callback(self): |
283
|
|
|
""" |
284
|
|
|
Callback for each testing iteration. |
285
|
|
|
""" |
286
|
|
|
for cb in self.training_callbacks: |
287
|
|
|
cb() |
288
|
|
|
|