1
|
|
|
import os |
2
|
|
|
import math |
3
|
|
|
import logging |
4
|
|
|
import numpy as np |
5
|
|
|
import tensorflow as tf |
6
|
|
|
import tensorflow.contrib as tfcontrib |
7
|
|
|
|
8
|
|
|
from pyActLearn.learning.nn import variable_summary |
9
|
|
|
from .layers import HiddenLayer, SoftmaxLayer |
10
|
|
|
from .injectors import BatchSequenceInjector |
11
|
|
|
from .criterion import MonitorBased, ConstIterations |
12
|
|
|
|
13
|
|
|
logger = logging.getLogger(__name__) |
14
|
|
|
|
15
|
|
|
|
16
|
|
|
class LSTM_Legacy: |
17
|
|
|
"""Basic Single Layer Long-Short-Term Memory |
18
|
|
|
""" |
19
|
|
|
def __init__(self, num_features, num_classes, num_units, num_steps, optimizer=None): |
20
|
|
|
self.num_features = num_features |
21
|
|
|
self.num_classes = num_classes |
22
|
|
|
self.num_steps = num_steps |
23
|
|
|
self.num_units = num_units |
24
|
|
|
self.summaries = [] |
25
|
|
|
with tf.name_scope('input'): |
26
|
|
|
self.x = tf.placeholder(tf.float32, shape=[None, num_steps, num_features], name='input_x') |
27
|
|
|
self.init_state = tf.placeholder(tf.float32, shape=[None, 2 * num_units], name='init_state') |
28
|
|
|
self.y_ = tf.placeholder(tf.float32, shape=[None, num_classes], name='input_y') |
29
|
|
|
# Input Hidden Layer - Need to unroll num_steps and apply W/b |
30
|
|
|
hidden_x = tf.reshape(tf.transpose(self.x, [1, 0, 2]), [-1, num_features]) |
31
|
|
|
self.hidden_layer = HiddenLayer(num_features, num_units, 'Hidden', x=hidden_x) |
32
|
|
|
# Output of the hidden layer needs to be split to be used with RNN |
33
|
|
|
hidden_y = tf.split(axis=0, num_or_size_splits=int(num_steps), value=self.hidden_layer.y) |
34
|
|
|
# Apply RNN |
35
|
|
|
self.cell = tfcontrib.rnn.BasicLSTMCell(num_units=num_units, state_is_tuple=False) |
36
|
|
|
outputs, states = tfcontrib.rnn.static_rnn(self.cell, hidden_y, initial_state=self.init_state) |
37
|
|
|
self.last_state = states[-1] |
38
|
|
|
# Output Softmax Layer |
39
|
|
|
self.output_layer = SoftmaxLayer(num_units, num_classes, 'SoftmaxLayer', x=outputs[-1]) |
40
|
|
|
# Predicted Probability |
41
|
|
|
self.y = self.output_layer.y |
42
|
|
|
self.y_class = tf.argmax(self.y, 1) |
43
|
|
|
# Softmax Cross-Entropy Loss |
44
|
|
|
self.loss = tf.reduce_mean( |
45
|
|
|
tf.nn.softmax_cross_entropy_with_logits(logits=self.output_layer.logits, labels=self.y_, |
46
|
|
|
name='SoftmaxCrossEntropy') |
47
|
|
|
) |
48
|
|
|
# Setup Optimizer |
49
|
|
|
if optimizer is None: |
50
|
|
|
self.optimizer = tf.train.AdamOptimizer() |
51
|
|
|
else: |
52
|
|
|
self.optimizer = optimizer |
53
|
|
|
# Evaluation |
54
|
|
|
self.correct_prediction = tf.equal(self.y_class, tf.argmax(self.y_, 1)) |
55
|
|
|
self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32)) |
56
|
|
|
# Fit Step |
57
|
|
|
with tf.name_scope('train'): |
58
|
|
|
self.fit_step = self.optimizer.minimize(self.loss) |
59
|
|
|
# Setup Summaries |
60
|
|
|
self.summaries += self.hidden_layer.summaries |
61
|
|
|
self.summaries += self.output_layer.summaries |
62
|
|
|
self.summaries.append(tf.summary.scalar('cross_entropy', self.loss)) |
63
|
|
|
self.summaries.append(tf.summary.scalar('accuracy', self.accuracy)) |
64
|
|
|
self.merged = tf.summary.merge(self.summaries) |
65
|
|
|
self.sess = None |
66
|
|
|
|
67
|
|
|
def fit(self, x, y, batch_size=100, iter_num=100, summaries_dir=None, summary_interval=10, |
68
|
|
|
test_x=None, test_y=None, session=None, criterion='const_iteration'): |
69
|
|
|
"""Fit the model to the dataset |
70
|
|
|
|
71
|
|
|
Args: |
72
|
|
|
x (:obj:`numpy.ndarray`): Input features of shape (num_samples, num_features). |
73
|
|
|
y (:obj:`numpy.ndarray`): Corresponding Labels of shape (num_samples) for binary classification, |
74
|
|
|
or (num_samples, num_classes) for multi-class classification. |
75
|
|
|
batch_size (:obj:`int`): Batch size used in gradient descent. |
76
|
|
|
iter_num (:obj:`int`): Number of training iterations for const iterations, step depth for monitor based |
77
|
|
|
stopping criterion. |
78
|
|
|
summaries_dir (:obj:`str`): Path of the directory to store summaries and saved values. |
79
|
|
|
summary_interval (:obj:`int`): The step interval to export variable summaries. |
80
|
|
|
test_x (:obj:`numpy.ndarray`): Test feature array used for monitoring training progress. |
81
|
|
|
test_y (:obj:`numpy.ndarray): Test label array used for monitoring training progress. |
82
|
|
|
session (:obj:`tensorflow.Session`): Session to run training functions. |
83
|
|
|
criterion (:obj:`str`): Stopping criteria. 'const_iterations' or 'monitor_based' |
84
|
|
|
""" |
85
|
|
|
if session is None: |
86
|
|
|
if self.sess is None: |
87
|
|
|
session = tf.Session() |
88
|
|
|
self.sess = session |
89
|
|
|
else: |
90
|
|
|
session = self.sess |
91
|
|
|
if summaries_dir is not None: |
92
|
|
|
train_writer = tf.summary.FileWriter(summaries_dir + '/train') |
93
|
|
|
test_writer = tf.summary.FileWriter(summaries_dir + '/test') |
94
|
|
|
session.run(tf.global_variables_initializer()) |
95
|
|
|
# Get Stopping Criterion |
96
|
|
|
if criterion == 'const_iteration': |
97
|
|
|
_criterion = ConstIterations(num_iters=iter_num) |
98
|
|
|
elif criterion == 'monitor_based': |
99
|
|
|
num_samples = x.shape[0] |
100
|
|
|
valid_set_len = int(1/5 * num_samples) |
101
|
|
|
valid_x = x[num_samples-valid_set_len:num_samples, :] |
102
|
|
|
valid_y = y[num_samples-valid_set_len:num_samples, :] |
103
|
|
|
x = x[0:num_samples-valid_set_len, :] |
104
|
|
|
y = y[0:num_samples-valid_set_len, :] |
105
|
|
|
_criterion = MonitorBased(n_steps=iter_num, |
106
|
|
|
monitor_fn=self.predict_accuracy, |
107
|
|
|
monitor_fn_args=(valid_x, valid_y[self.num_steps:, :]), |
108
|
|
|
save_fn=tf.train.Saver().save, |
109
|
|
|
save_fn_args=(session, summaries_dir + '/best.ckpt')) |
110
|
|
|
else: |
111
|
|
|
logger.error('Wrong criterion %s specified.' % criterion) |
112
|
|
|
return |
113
|
|
|
# Setup batch injector |
114
|
|
|
injector = BatchSequenceInjector(data_x=x, data_y=y, batch_size=batch_size, seq_len=self.num_steps) |
115
|
|
|
# Train/Test sequence for brief reporting of accuracy and loss |
116
|
|
|
train_seq_x, train_seq_y = BatchSequenceInjector.to_sequence( |
117
|
|
|
self.num_steps, x, y, start=0, end=2000 |
118
|
|
|
) |
119
|
|
|
if (test_x is not None) and (test_y is not None): |
120
|
|
|
test_seq_x, test_seq_y = BatchSequenceInjector.to_sequence( |
121
|
|
|
self.num_steps, test_x, test_y, start=0, end=2000 |
122
|
|
|
) |
123
|
|
|
# Iteration Starts |
124
|
|
|
i = 0 |
125
|
|
|
while _criterion.continue_learning(): |
126
|
|
|
batch_x, batch_y = injector.next_batch() |
127
|
|
View Code Duplication |
if summaries_dir is not None and (i % summary_interval == 0): |
|
|
|
|
128
|
|
|
summary, loss, accuracy = session.run( |
129
|
|
|
[self.merged, self.loss, self.accuracy], |
130
|
|
|
feed_dict={self.x: train_seq_x, self.y_: train_seq_y, |
131
|
|
|
self.init_state: np.zeros((train_seq_x.shape[0], 2 * self.num_units))} |
132
|
|
|
) |
133
|
|
|
train_writer.add_summary(summary, i) |
134
|
|
|
logger.info('Step %d, train_set accuracy %g, loss %g' % (i, accuracy, loss)) |
135
|
|
|
if (test_x is not None) and (test_y is not None): |
136
|
|
|
merged, accuracy = session.run( |
137
|
|
|
[self.merged, self.accuracy], |
138
|
|
|
feed_dict={self.x: test_seq_x, self.y_: test_seq_y, |
139
|
|
|
self.init_state: np.zeros((test_seq_x.shape[0], 2*self.num_units))}) |
140
|
|
|
test_writer.add_summary(merged, i) |
141
|
|
|
logger.info('test_set accuracy %g' % accuracy) |
142
|
|
|
loss, accuracy, _ = session.run( |
143
|
|
|
[self.loss, self.accuracy, self.fit_step], |
144
|
|
|
feed_dict={self.x: batch_x, self.y_: batch_y, |
145
|
|
|
self.init_state: np.zeros((batch_x.shape[0], 2 * self.num_units))}) |
146
|
|
|
i += 1 |
147
|
|
|
# Finish Iteration |
148
|
|
|
if criterion == 'monitor_based': |
149
|
|
|
tf.train.Saver().restore(session, os.path.join(summaries_dir, 'best.ckpt')) |
150
|
|
|
logger.debug('Total Epoch: %d, current batch %d', injector.num_epochs, injector.cur_batch) |
151
|
|
|
|
152
|
|
View Code Duplication |
def predict_proba(self, x, session=None, batch_size=500): |
|
|
|
|
153
|
|
|
"""Predict probability (Softmax) |
154
|
|
|
""" |
155
|
|
|
if session is None: |
156
|
|
|
if self.sess is None: |
157
|
|
|
session = tf.Session() |
158
|
|
|
self.sess = session |
159
|
|
|
else: |
160
|
|
|
session = self.sess |
161
|
|
|
injector = BatchSequenceInjector(batch_size=batch_size, data_x=x, seq_len=self.num_steps) |
162
|
|
|
injector.reset() |
163
|
|
|
result = None |
164
|
|
|
while injector.num_epochs == 0: |
165
|
|
|
batch_x = injector.next_batch() |
166
|
|
|
batch_y = session.run(self.y, |
167
|
|
|
feed_dict={self.x: batch_x, |
168
|
|
|
self.init_state: np.zeros((batch_x.shape[0], 2 * self.num_units))}) |
169
|
|
|
if result is None: |
170
|
|
|
result = batch_y |
171
|
|
|
else: |
172
|
|
|
result = np.concatenate((result, batch_y), axis=0) |
173
|
|
|
return result |
174
|
|
|
|
175
|
|
View Code Duplication |
def predict(self, x, session=None, batch_size=500): |
|
|
|
|
176
|
|
|
if session is None: |
177
|
|
|
if self.sess is None: |
178
|
|
|
session = tf.Session() |
179
|
|
|
self.sess = session |
180
|
|
|
else: |
181
|
|
|
session = self.sess |
182
|
|
|
injector = BatchSequenceInjector(batch_size=batch_size, data_x=x, seq_len=self.num_steps) |
183
|
|
|
injector.reset() |
184
|
|
|
result = None |
185
|
|
|
while injector.num_epochs == 0: |
186
|
|
|
batch_x = injector.next_batch() |
187
|
|
|
batch_y = session.run(self.y_class, |
188
|
|
|
feed_dict={self.x: batch_x, |
189
|
|
|
self.init_state: np.zeros((batch_x.shape[0], 2 * self.num_units))}) |
190
|
|
|
if result is None: |
191
|
|
|
result = batch_y |
192
|
|
|
else: |
193
|
|
|
result = np.concatenate((result, batch_y), axis=0) |
194
|
|
|
return result |
195
|
|
|
|
196
|
|
|
def predict_accuracy(self, x, y, session=None): |
197
|
|
|
"""Get Accuracy given feature array and corresponding labels |
198
|
|
|
""" |
199
|
|
|
if session is None: |
200
|
|
|
if self.sess is None: |
201
|
|
|
session = tf.Session() |
202
|
|
|
self.sess = session |
203
|
|
|
else: |
204
|
|
|
session = self.sess |
205
|
|
|
predict = self.predict(x, session=session) |
206
|
|
|
accuracy = np.sum(predict == y.argmax(y.ndim - 1)) / float(y.shape[0]) |
207
|
|
|
return accuracy |
208
|
|
|
|
209
|
|
|
|
210
|
|
|
class LSTM: |
211
|
|
|
"""Single Layer LSTM Implementation |
212
|
|
|
|
213
|
|
|
In this new implementation, state_is_tuple is disabled to suppress the "deprecated" warning and |
214
|
|
|
performance improvement. The static unrolling of the RNN is replaced with dynamic unrolling. |
215
|
|
|
As a result, no batch injector is needed for prediction. |
216
|
|
|
|
217
|
|
|
Args: |
218
|
|
|
num_features (:obj:`int`): Number of input features. |
219
|
|
|
num_classes (:obj:`int`): Number of target classes. |
220
|
|
|
num_hidden (:obj:`int`): Number of units in the input hidden layer. |
221
|
|
|
num_units (:obj:`int`): Number of units in the RNN layer. |
222
|
|
|
|
223
|
|
|
Attributes: |
224
|
|
|
num_features (:obj:`int`): Number of input features. |
225
|
|
|
num_classes (:obj:`int`): Number of target classes. |
226
|
|
|
num_hidden (:obj:`int`): Number of units in the input hidden layer. |
227
|
|
|
num_units (:obj:`int`): Number of units in the RNN layer. |
228
|
|
|
summaries (:obj:`list`): List of tensorflow summaries to be displayed on tensorboard. |
229
|
|
|
x (:obj:`tf.Tensor`): Input tensor of size [num_batches, length, num_features] |
230
|
|
|
length (:obj:`tf.Tensor`): 1D length array (int) of size [num_batches, 1] for the length of each batch data. |
231
|
|
|
init_state (:obj:`tf.Tensor`): Initial states. 2D tensor (float) of size [num_batches, 2*num_units]. |
232
|
|
|
y_ (:obj:`tf.Tensor`): Ground Truth of size [num_batches, length, num_classes]. |
233
|
|
|
""" |
234
|
|
|
def __init__(self, num_features, num_classes, num_hidden, num_units, num_skip=0, graph=None, optimizer=None): |
235
|
|
|
self.num_features = num_features |
236
|
|
|
self.num_classes = num_classes |
237
|
|
|
self.num_units = num_units |
238
|
|
|
self.num_skip = num_skip |
239
|
|
|
self.summaries = [] |
240
|
|
|
if graph is None: |
241
|
|
|
self.graph = tf.Graph() |
242
|
|
|
with self.graph.as_default(): |
243
|
|
|
# Inputs |
244
|
|
|
with tf.name_scope('input'): |
245
|
|
|
# Input tensor X, shape: [batch, length, features] |
246
|
|
|
self.x = tf.placeholder(tf.float32, shape=[None, None, num_features], name='input_x') |
247
|
|
|
# Length, shape: [batch, length] |
248
|
|
|
self.length = tf.placeholder(tf.float32, shape=[None, ], name='input_x_length') |
249
|
|
|
# Initial states (as tupples), shape: [batch, units] |
250
|
|
|
self.initial_state_c = tf.placeholder(tf.float32, shape=[None, num_units], name='initial_state_c') |
251
|
|
|
self.initial_state_h = tf.placeholder(tf.float32, shape=[None, num_units], name='initial_state_h') |
252
|
|
|
# Targets, shape: [batch, length, classes] |
253
|
|
|
self.y_ = tf.placeholder(tf.float32, shape=[None, None, num_classes], name='targets') |
254
|
|
|
# Input hidden layer with num_hidden units |
255
|
|
|
with tf.name_scope('input_layer'): |
256
|
|
|
self.input_W = tf.Variable( |
257
|
|
|
tf.truncated_normal( |
258
|
|
|
shape=[num_features, num_hidden], stddev=1.0 / math.sqrt(float(num_hidden))), |
259
|
|
|
name='weights') |
260
|
|
|
self.input_b = tf.Variable(tf.zeros(shape=[num_hidden]), name='bias') |
261
|
|
|
|
262
|
|
|
def hidden_fn(slice): |
263
|
|
|
return tf.nn.sigmoid(tf.matmul(slice, self.input_W) + self.input_b) |
264
|
|
|
# Activation of hidden layer, shape: [batch, length, num_hidden] |
265
|
|
|
self.hidden_y = tf.map_fn(hidden_fn, self.x) |
266
|
|
|
# Recursive Layer (RNN) |
267
|
|
|
with tf.name_scope('rnn'): |
268
|
|
|
# Apply RNN |
269
|
|
|
self.cell = tfcontrib.rnn.BasicLSTMCell(num_units=num_units, state_is_tuple=True) |
270
|
|
|
# rnn outputs, shape: [batch, length, num_units] |
271
|
|
|
rnn_outputs, rnn_states = tf.nn.dynamic_rnn( |
272
|
|
|
self.cell, self.hidden_y, sequence_length=self.length, |
273
|
|
|
initial_state=tfcontrib.rnn.LSTMStateTuple(self.initial_state_c, self.initial_state_h)) |
274
|
|
|
# Apply Softmax Layer to all outputs in all batches |
275
|
|
|
with tf.name_scope('output_layer'): |
276
|
|
|
self.output_W = tf.Variable( |
277
|
|
|
tf.truncated_normal(shape=[num_units, num_classes], stddev=1.0/math.sqrt(float(num_units))), |
278
|
|
|
name='weights' |
279
|
|
|
) |
280
|
|
|
self.output_b = tf.Variable(tf.zeros(shape=[num_classes]), name='biases') |
281
|
|
|
|
282
|
|
|
def out_mult_fn(slice): |
283
|
|
|
return tf.matmul(slice, self.output_W) + self.output_b |
284
|
|
|
|
285
|
|
|
def out_softmax_fn(slice): |
286
|
|
|
return tf.nn.softmax(slice) |
287
|
|
|
|
288
|
|
|
def out_class_fn(slice): |
289
|
|
|
return tf.argmax(slice, axis=1) |
290
|
|
|
|
291
|
|
|
def out_softmax_entropy(params): |
292
|
|
|
logits, labels = params |
293
|
|
|
return tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels) |
294
|
|
|
|
295
|
|
|
# self.logit_outputs is a tensor of shape [batch, length, num_classes] |
296
|
|
|
self.logit_outputs = tf.map_fn(out_mult_fn, rnn_outputs) |
297
|
|
|
# self.softmax_outputs applies softmax to logit_outputs as a tensor of shape |
298
|
|
|
# [batch, length, num_classes] |
299
|
|
|
self.softmax_outputs = tf.map_fn(out_softmax_fn, self.logit_outputs) |
300
|
|
|
# Probability output y, shape: [batch, length-num_skip, num_classes] |
301
|
|
|
self.y = self.softmax_outputs[:, num_skip:, :] |
302
|
|
|
self.y_class = tf.map_fn(out_class_fn, self.y, dtype=tf.int64) |
303
|
|
|
|
304
|
|
|
# Acciracy |
305
|
|
|
def accuracy_fn(params): |
306
|
|
|
prediction, truth = params |
307
|
|
|
return tf.reduce_mean(tf.cast(tf.equal(prediction, tf.argmax(truth, 1)), tf.float32)) |
308
|
|
|
|
309
|
|
|
self.accuracy_outputs = tf.map_fn(accuracy_fn, (self.y_class, self.y_[:, num_skip:, :]), dtype=tf.float32) |
310
|
|
|
self.accuracy = tf.reduce_mean(self.accuracy_outputs) |
311
|
|
|
# self.class_outputs gets the class label for each item in sequence as a tensor of shape |
312
|
|
|
# [batch_size, max_time, 1] |
313
|
|
|
self.entropy_outputs = tf.map_fn(out_softmax_entropy, |
314
|
|
|
(self.logit_outputs[:, num_skip:, :], self.y_[:, num_skip:, :]), |
315
|
|
|
dtype=tf.float32) |
316
|
|
|
# Softmax Cross-Entropy Loss |
317
|
|
|
self.loss = tf.reduce_mean(self.entropy_outputs) |
318
|
|
|
# Setup Optimizer |
319
|
|
|
if optimizer is None: |
320
|
|
|
self.optimizer = tf.train.AdamOptimizer() |
321
|
|
|
else: |
322
|
|
|
self.optimizer = optimizer |
323
|
|
|
# Fit Step |
324
|
|
|
with tf.name_scope('train'): |
325
|
|
|
self.fit_step = self.optimizer.minimize(self.loss) |
326
|
|
|
# Setup Summaries |
327
|
|
|
self.summaries.append(variable_summary(self.input_W, tag='input_layer/weights')) |
328
|
|
|
self.summaries.append(variable_summary(self.input_b, tag='input_layer/biases')) |
329
|
|
|
self.summaries.append(variable_summary(self.output_W, tag='output_layer/weights')) |
330
|
|
|
self.summaries.append(variable_summary(self.output_b, tag='output_layer/biases')) |
331
|
|
|
self.summaries.append(tf.summary.scalar('cross_entropy', self.loss)) |
332
|
|
|
self.summaries.append(tf.summary.scalar('accuracy', self.accuracy)) |
333
|
|
|
self.merged = tf.summary.merge(self.summaries) |
334
|
|
|
self.init_op = tf.global_variables_initializer() |
335
|
|
|
self.sess = None |
336
|
|
|
|
337
|
|
|
def fit(self, x, y, length, batch_size=100, iter_num=100, summaries_dir=None, summary_interval=100, |
338
|
|
|
test_x=None, test_y=None, session=None, criterion='const_iteration', reintialize=True): |
339
|
|
|
"""Fit the model to the dataset |
340
|
|
|
|
341
|
|
|
Args: |
342
|
|
|
x (:obj:`numpy.ndarray`): Input features x, shape: [num_samples, num_features]. |
343
|
|
|
y (:obj:`numpy.ndarray`): Corresponding Labels of shape (num_samples) for binary classification, |
344
|
|
|
or (num_samples, num_classes) for multi-class classification. |
345
|
|
|
length (:obj:`int`): Length of each batch (needs to be greater than self.num_skip. |
346
|
|
|
batch_size (:obj:`int`): Batch size used in gradient descent. |
347
|
|
|
iter_num (:obj:`int`): Number of training iterations for const iterations, step depth for monitor based |
348
|
|
|
stopping criterion. |
349
|
|
|
summaries_dir (:obj:`str`): Path of the directory to store summaries and saved values. |
350
|
|
|
summary_interval (:obj:`int`): The step interval to export variable summaries. |
351
|
|
|
test_x (:obj:`numpy.ndarray`): Test feature array used for monitoring training progress. |
352
|
|
|
test_y (:obj:`numpy.ndarray): Test label array used for monitoring training progress. |
353
|
|
|
session (:obj:`tensorflow.Session`): Session to run training functions. |
354
|
|
|
criterion (:obj:`str`): Stopping criteria. 'const_iterations' or 'monitor_based' |
355
|
|
|
""" |
356
|
|
|
if session is None: |
357
|
|
|
if self.sess is None: |
358
|
|
|
session = tf.Session() |
359
|
|
|
self.sess = session |
360
|
|
|
else: |
361
|
|
|
session = self.sess |
362
|
|
|
if summaries_dir is not None: |
363
|
|
|
train_writer = tf.summary.FileWriter(summaries_dir + '/train') |
364
|
|
|
test_writer = tf.summary.FileWriter(summaries_dir + '/test') |
365
|
|
|
valid_writer = tf.summary.FileWriter(summaries_dir + '/valid') |
366
|
|
|
else: |
367
|
|
|
train_writer = None |
368
|
|
|
test_writer = None |
369
|
|
|
valid_writer = None |
370
|
|
|
if reintialize: |
371
|
|
|
session.run(self.init_op) |
372
|
|
|
with self.graph.as_default(): |
373
|
|
|
saver = tf.train.Saver() |
374
|
|
|
num_samples = x.shape[0] |
375
|
|
|
# Get Stopping Criterion |
376
|
|
|
if criterion == 'const_iteration': |
377
|
|
|
_criterion = ConstIterations(num_iters=iter_num) |
378
|
|
|
elif criterion == 'monitor_based': |
379
|
|
|
valid_set_start = int(4/5 * (num_samples - self.num_skip)) |
380
|
|
|
valid_x = x[valid_set_start:num_samples, :] |
381
|
|
|
valid_y = y[valid_set_start:num_samples, :] |
382
|
|
|
x = x[0:valid_set_start + self.num_skip, :] |
383
|
|
|
y = y[0:valid_set_start + self.num_skip, :] |
384
|
|
|
_criterion = MonitorBased(n_steps=iter_num, |
385
|
|
|
monitor_fn=self.predict_accuracy, |
386
|
|
|
monitor_fn_args=(valid_x, valid_y), |
387
|
|
|
save_fn=saver.save, |
388
|
|
|
save_fn_args=(session, summaries_dir + '/best.ckpt')) |
389
|
|
|
else: |
390
|
|
|
logger.error('Wrong criterion %s specified.' % criterion) |
391
|
|
|
return |
392
|
|
|
# Setup batch injector |
393
|
|
|
injector = BatchSequenceInjector(data_x=x, data_y=y, batch_size=batch_size, length=self.num_skip + length, |
394
|
|
|
with_seq=True) |
395
|
|
|
# Iteration Starts |
396
|
|
|
i = 0 |
397
|
|
|
while _criterion.continue_learning(): |
398
|
|
|
# Learning |
399
|
|
|
batch_x, batch_y, batch_length = injector.next_batch(skip=50) |
400
|
|
|
loss, accuracy, _ = session.run( |
401
|
|
|
[self.loss, self.accuracy, self.fit_step], |
402
|
|
|
feed_dict={self.x: batch_x, self.y_: batch_y, self.length: batch_length, |
403
|
|
|
self.initial_state_c: np.zeros((batch_x.shape[0], self.num_units)), |
404
|
|
|
self.initial_state_h: np.zeros((batch_x.shape[0], self.num_units))}) |
405
|
|
|
# Take summaries |
406
|
|
|
if summaries_dir is not None and (i % summary_interval == 0): |
407
|
|
|
accuracy, loss = self.predict_accuracy(x, y, writer=train_writer, writer_id=i, with_loss=True) |
408
|
|
|
logger.info('Step %d, train_set accuracy %g, loss %g' % (i, accuracy, loss)) |
409
|
|
|
accuracy, loss = self.predict_accuracy(test_x, test_y, writer=test_writer, writer_id=i, with_loss=True) |
410
|
|
|
logger.info('Step %d, test_set accuracy %g, loss %g' % (i, accuracy, loss)) |
411
|
|
|
if criterion == 'monitor_based': |
412
|
|
|
accuracy, loss = self.predict_accuracy(valid_x, valid_y, writer=valid_writer, writer_id=i, with_loss=True) |
413
|
|
|
logger.info('Step %d, valid_set accuracy %g, loss %g' % (i, accuracy, loss)) |
414
|
|
|
# Get Summary |
415
|
|
|
i += 1 |
416
|
|
|
# Finish Iteration |
417
|
|
|
if criterion == 'monitor_based': |
418
|
|
|
saver.restore(session, os.path.join(summaries_dir, 'best.ckpt')) |
419
|
|
|
logger.debug('Total Epoch: %d, current batch %d', injector.num_epochs, injector.cur_batch) |
420
|
|
|
|
421
|
|
View Code Duplication |
def predict_proba(self, x, session=None, writer=None, writer_id=None): |
|
|
|
|
422
|
|
|
"""Predict probability (Softmax) |
423
|
|
|
""" |
424
|
|
|
if session is None: |
425
|
|
|
if self.sess is None: |
426
|
|
|
session = tf.Session() |
427
|
|
|
self.sess = session |
428
|
|
|
else: |
429
|
|
|
session = self.sess |
430
|
|
|
targets = [self.y] |
431
|
|
|
if writer is not None: |
432
|
|
|
targets += [self.merged] |
433
|
|
|
results = session.run(targets, |
434
|
|
|
feed_dict={self.x: x.reshape(tuple([1]) + x.shape), |
435
|
|
|
self.length: np.array([x.shape[0]], dtype=np.int), |
436
|
|
|
self.initial_state_c: np.zeros((1, self.num_units)), |
437
|
|
|
self.initial_state_h: np.zeros((1, self.num_units))}) |
438
|
|
|
if writer is not None: |
439
|
|
|
writer.add_summary(results[1], writer_id) |
440
|
|
|
batch_y = results[0] |
441
|
|
|
# Get result |
442
|
|
|
return batch_y[0, :, :] |
443
|
|
|
|
444
|
|
View Code Duplication |
def predict(self, x, session=None, writer=None, writer_id=None): |
|
|
|
|
445
|
|
|
if session is None: |
446
|
|
|
if self.sess is None: |
447
|
|
|
session = tf.Session() |
448
|
|
|
self.sess = session |
449
|
|
|
else: |
450
|
|
|
session = self.sess |
451
|
|
|
targets = [self.y_class] |
452
|
|
|
if writer is not None: |
453
|
|
|
targets += [self.merged] |
454
|
|
|
results = session.run(targets, |
455
|
|
|
feed_dict={self.x: x.reshape(tuple([1]) + x.shape), |
456
|
|
|
self.length: np.array([x.shape[0]], dtype=np.int), |
457
|
|
|
self.initial_state_c: np.zeros((1, self.num_units)), |
458
|
|
|
self.initial_state_h: np.zeros((1, self.num_units))}) |
459
|
|
|
if writer is not None: |
460
|
|
|
writer.add_summary(results[1], writer_id) |
461
|
|
|
batch_y = results[0] |
462
|
|
|
# Get result |
463
|
|
|
return batch_y[0, :] |
464
|
|
|
|
465
|
|
|
def predict_accuracy(self, x, y, session=None, writer=None, writer_id=None, with_loss=False): |
466
|
|
|
"""Get Accuracy given feature array and corresponding labels |
467
|
|
|
""" |
468
|
|
|
if session is None: |
469
|
|
|
if self.sess is None: |
470
|
|
|
session = tf.Session() |
471
|
|
|
self.sess = session |
472
|
|
|
else: |
473
|
|
|
session = self.sess |
474
|
|
|
targets = [self.accuracy] |
475
|
|
|
if with_loss: |
476
|
|
|
targets += [self.loss] |
477
|
|
|
if writer is not None: |
478
|
|
|
targets += [self.merged] |
479
|
|
|
results = session.run(targets, |
480
|
|
|
feed_dict={self.x: x.reshape(tuple([1]) + x.shape), |
481
|
|
|
self.y_: y.reshape(tuple([1]) + y.shape), |
482
|
|
|
self.length: np.array([x.shape[0]], dtype=np.int), |
483
|
|
|
self.initial_state_c: np.zeros((1, self.num_units)), |
484
|
|
|
self.initial_state_h: np.zeros((1, self.num_units))}) |
485
|
|
|
if with_loss: |
486
|
|
|
return_values = results[0], results[1] |
487
|
|
|
else: |
488
|
|
|
return_values = results[0] |
489
|
|
|
if writer is not None: |
490
|
|
|
writer.add_summary(results[-1], writer_id) |
491
|
|
|
# Get result |
492
|
|
|
return return_values |
493
|
|
|
|
494
|
|
|
|
495
|
|
|
|
496
|
|
|
class SimpleLSTM: |
497
|
|
|
"""Single Layer LSTM Implementation |
498
|
|
|
|
499
|
|
|
In this new implementation, state_is_tuple is disabled to suppress the "deprecated" warning and |
500
|
|
|
performance improvement. The static unrolling of the RNN is replaced with dynamic unrolling. |
501
|
|
|
As a result, no batch injector is needed for prediction. |
502
|
|
|
|
503
|
|
|
Args: |
504
|
|
|
num_features |
505
|
|
|
num_classes |
506
|
|
|
num_units |
507
|
|
|
""" |
508
|
|
|
|
509
|
|
|
def __init__(self, num_features, num_classes, num_hidden, num_units, num_skip, graph=None, optimizer=None): |
510
|
|
|
self.num_features = num_features |
511
|
|
|
self.num_classes = num_classes |
512
|
|
|
self.num_units = num_units |
513
|
|
|
self.num_skip = num_skip |
514
|
|
|
self.summaries = [] |
515
|
|
|
if graph is None: |
516
|
|
|
graph = tf.Graph() |
517
|
|
|
with graph.as_default(): |
518
|
|
|
# Inputs |
519
|
|
|
with tf.name_scope('input'): |
520
|
|
|
# X in the shape of (seq_length + num_skip, features) |
521
|
|
|
self.x = tf.placeholder(tf.float32, shape=[None, num_features], name='input_x') |
522
|
|
|
# length is the actual length of the sequence for each batch |
523
|
|
|
self.length = tf.placeholder(tf.int64, shape=[1], name='input_x_length') |
524
|
|
|
self.init_state = tf.placeholder(tf.float32, shape=[2 * num_units], name='init_state') |
525
|
|
|
self.y_ = tf.placeholder(tf.float32, shape=[None, num_classes], name='input_y') |
526
|
|
|
self.y_skiped = self.y_[num_skip:, :] |
527
|
|
|
# Input Hidden layers |
528
|
|
|
self.hidden_layer = HiddenLayer(num_features, num_units, 'Hidden', x=self.x) |
529
|
|
|
# Recursive Layer |
530
|
|
|
with tf.name_scope('rnn'): |
531
|
|
|
# Apply RNN |
532
|
|
|
self.cell = rnn.BasicLSTMCell(num_units=num_units, state_is_tuple=False) |
533
|
|
|
# Outputs is a tensor with shape [seq_length + num_skip, num_units] |
534
|
|
|
outputs, states = tf.nn.dynamic_rnn( |
535
|
|
|
self.cell, tf.reshape(self.hidden_layer.y, [1, -1, num_units]), |
536
|
|
|
sequence_length=(self.length + num_skip), |
537
|
|
|
initial_state=self.init_state, time_major=False) |
538
|
|
|
# Apply Softmax Layer to all outputs in the valid items in the sequence. |
539
|
|
|
self.output_layer = SoftmaxLayer(num_units, num_classes, 'SoftmaxLayer', |
540
|
|
|
x=outputs[1, num_skip:, :]) |
541
|
|
|
# Softmax Cross-Entropy Loss |
542
|
|
|
self.loss = tf.reduce_mean( |
543
|
|
|
tf.nn.softmax_cross_entropy_with_logits(self.output_layer.logits, self.y_skiped, |
544
|
|
|
name='SoftmaxCrossEntropy') |
545
|
|
|
) |
546
|
|
|
# Setup Optimizer |
547
|
|
|
if optimizer is None: |
548
|
|
|
self.optimizer = tf.train.AdamOptimizer() |
549
|
|
|
else: |
550
|
|
|
self.optimizer = optimizer |
551
|
|
|
# Predicted Probability |
552
|
|
|
self.y = self.output_layer.y |
553
|
|
|
self.y_class = tf.argmax(self.y, 1) |
554
|
|
|
# Evaluation |
555
|
|
|
self.correct_prediction = tf.equal(self.y_class, tf.argmax(self.y_skiped, 1)) |
556
|
|
|
self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32)) |
557
|
|
|
# Fit Step |
558
|
|
|
with tf.name_scope('train'): |
559
|
|
|
self.fit_step = self.optimizer.minimize(self.loss) |
560
|
|
|
# Setup Summaries |
561
|
|
|
self.summaries += self.hidden_layer.summaries |
562
|
|
|
self.summaries += self.output_layer.summaries |
563
|
|
|
self.summaries.append(tf.summary.scalar('cross_entropy', self.loss)) |
564
|
|
|
self.summaries.append(tf.summary.scalar('accuracy', self.accuracy)) |
565
|
|
|
self.merged = tf.summary.merge(self.summaries) |
566
|
|
|
self.sess = None |
567
|
|
|
|
568
|
|
|
def fit(self, x, y, num_skip=100, batch_size=100, iter_num=100, summaries_dir=None, summary_interval=10, |
569
|
|
|
test_x=None, test_y=None, session=None, criterion='const_iteration'): |
570
|
|
|
"""Fit the model to the dataset |
571
|
|
|
|
572
|
|
|
Args: |
573
|
|
|
x (:obj:`numpy.ndarray`): Input features of shape (num_samples, num_features). |
574
|
|
|
y (:obj:`numpy.ndarray`): Corresponding Labels of shape (num_samples) for binary classification, |
575
|
|
|
or (num_samples, num_classes) for multi-class classification. |
576
|
|
|
batch_size (:obj:`int`): Batch size used in gradient descent. |
577
|
|
|
iter_num (:obj:`int`): Number of training iterations for const iterations, step depth for monitor based |
578
|
|
|
stopping criterion. |
579
|
|
|
summaries_dir (:obj:`str`): Path of the directory to store summaries and saved values. |
580
|
|
|
summary_interval (:obj:`int`): The step interval to export variable summaries. |
581
|
|
|
test_x (:obj:`numpy.ndarray`): Test feature array used for monitoring training progress. |
582
|
|
|
test_y (:obj:`numpy.ndarray): Test label array used for monitoring training progress. |
583
|
|
|
session (:obj:`tensorflow.Session`): Session to run training functions. |
584
|
|
|
criterion (:obj:`str`): Stopping criteria. 'const_iterations' or 'monitor_based' |
585
|
|
|
""" |
586
|
|
|
if session is None: |
587
|
|
|
if self.sess is None: |
588
|
|
|
session = tf.Session() |
589
|
|
|
self.sess = session |
590
|
|
|
else: |
591
|
|
|
session = self.sess |
592
|
|
|
if summaries_dir is not None: |
593
|
|
|
train_writer = tf.summary.FileWriter(summaries_dir + '/train') |
594
|
|
|
test_writer = tf.summary.FileWriter(summaries_dir + '/test') |
595
|
|
|
session.run(tf.global_variables_initializer()) |
596
|
|
|
# Get Stopping Criterion |
597
|
|
|
if criterion == 'const_iteration': |
598
|
|
|
_criterion = ConstIterations(num_iters=iter_num) |
599
|
|
|
elif criterion == 'monitor_based': |
600
|
|
|
num_samples = x.shape[0] |
601
|
|
|
valid_set_len = int(1 / 5 * (num_samples - num_skip)) |
602
|
|
|
valid_x = x[num_samples - valid_set_len - num_skip:num_samples, :] |
603
|
|
|
valid_y = y[num_samples - valid_set_len - num_skip:num_samples, :] |
604
|
|
|
x = x[0:num_samples - valid_set_len, :] |
605
|
|
|
y = y[0:num_samples - valid_set_len, :] |
606
|
|
|
_criterion = MonitorBased(n_steps=iter_num, |
607
|
|
|
monitor_fn=self.predict_accuracy, |
608
|
|
|
monitor_fn_args=(valid_x, valid_y), |
609
|
|
|
save_fn=tf.train.Saver().save, |
610
|
|
|
save_fn_args=(session, summaries_dir + '/best.ckpt')) |
611
|
|
|
else: |
612
|
|
|
logger.error('Wrong criterion %s specified.' % criterion) |
613
|
|
|
return |
614
|
|
|
# Iteration Starts |
615
|
|
|
i = 0 |
616
|
|
|
while _criterion.continue_learning(): |
617
|
|
|
# Learning |
618
|
|
|
batch_x = x[i:num_skip + batch_size, :] |
619
|
|
|
batch_y = y[i:num_skip + batch_size, :] |
620
|
|
|
loss, accuracy, _ = session.run( |
621
|
|
|
[self.loss, self.accuracy, self.fit_step], |
622
|
|
|
feed_dict={self.x: batch_x, self.y_: batch_y, self.length: batch_size, |
623
|
|
|
self.init_state: np.zeros(2 * self.num_units)}) |
624
|
|
|
# Summary |
625
|
|
View Code Duplication |
if summaries_dir is not None and (i % summary_interval == 0): |
|
|
|
|
626
|
|
|
summary, loss, accuracy = session.run( |
627
|
|
|
[self.merged, self.loss, self.accuracy], |
628
|
|
|
feed_dict={self.x: x, self.y_: y, self.length: num_samples - valid_set_len - num_skip, |
629
|
|
|
self.init_state: np.zeros(2 * self.num_units)} |
630
|
|
|
) |
631
|
|
|
train_writer.add_summary(summary, i) |
632
|
|
|
logger.info('Step %d, train_set accuracy %g, loss %g' % (i, accuracy, loss)) |
633
|
|
|
if (test_x is not None) and (test_y is not None): |
634
|
|
|
merged, accuracy = session.run( |
635
|
|
|
[self.merged, self.accuracy], |
636
|
|
|
feed_dict={self.x: test_x, self.y_: test_y, self.length: test_x.shape[0] - num_skip, |
637
|
|
|
self.init_state: np.zeros(2*self.num_units)}) |
638
|
|
|
test_writer.add_summary(merged, i) |
639
|
|
|
logger.info('test_set accuracy %g' % accuracy) |
640
|
|
|
# Get Summary |
641
|
|
|
if i == x.shape[0] - num_skip: |
642
|
|
|
i = 0 |
643
|
|
|
else: |
644
|
|
|
i += 1 |
645
|
|
|
# Finish Iteration |
646
|
|
|
if criterion == 'monitor_based': |
647
|
|
|
tf.train.Saver().restore(session, os.path.join(summaries_dir, 'best.ckpt')) |
648
|
|
|
|
649
|
|
View Code Duplication |
def predict_proba(self, x, session=None, batch_size=500): |
|
|
|
|
650
|
|
|
"""Predict probability (Softmax) |
651
|
|
|
""" |
652
|
|
|
if session is None: |
653
|
|
|
if self.sess is None: |
654
|
|
|
session = tf.Session() |
655
|
|
|
self.sess = session |
656
|
|
|
else: |
657
|
|
|
session = self.sess |
658
|
|
|
return session.run(self.y, |
659
|
|
|
feed_dict={self.x: x, self.length: x.shape[0] - self.num_skip, |
660
|
|
|
self.init_state: np.zeros(2*self.num_units)}) |
661
|
|
|
|
662
|
|
View Code Duplication |
def predict(self, x, session=None): |
|
|
|
|
663
|
|
|
if session is None: |
664
|
|
|
if self.sess is None: |
665
|
|
|
session = tf.Session() |
666
|
|
|
self.sess = session |
667
|
|
|
else: |
668
|
|
|
session = self.sess |
669
|
|
|
return session.run(self.y_class, |
670
|
|
|
feed_dict={self.x: x, self.length: x.shape[0] - self.num_skip, |
671
|
|
|
self.init_state: np.zeros(2*self.num_units)}) |
672
|
|
|
|
673
|
|
View Code Duplication |
def predict_accuracy(self, x, y, session=None): |
|
|
|
|
674
|
|
|
"""Get Accuracy given feature array and corresponding labels |
675
|
|
|
""" |
676
|
|
|
if session is None: |
677
|
|
|
if self.sess is None: |
678
|
|
|
session = tf.Session() |
679
|
|
|
self.sess = session |
680
|
|
|
else: |
681
|
|
|
session = self.sess |
682
|
|
|
return session.run(self.accuracy, |
683
|
|
|
feed_dict={self.x: x, self.y_: y, self.length: x.shape[0] - self.num_skip, |
684
|
|
|
self.init_state: np.zeros(2*self.num_units)}) |