|
1
|
|
|
import os |
|
2
|
|
|
import math |
|
3
|
|
|
import logging |
|
4
|
|
|
import numpy as np |
|
5
|
|
|
import tensorflow as tf |
|
6
|
|
|
import tensorflow.contrib as tfcontrib |
|
7
|
|
|
|
|
8
|
|
|
from pyActLearn.learning.nn import variable_summary |
|
9
|
|
|
from .layers import HiddenLayer, SoftmaxLayer |
|
10
|
|
|
from .injectors import BatchSequenceInjector |
|
11
|
|
|
from .criterion import MonitorBased, ConstIterations |
|
12
|
|
|
|
|
13
|
|
|
logger = logging.getLogger(__name__) |
|
14
|
|
|
|
|
15
|
|
|
|
|
16
|
|
|
class LSTM_Legacy: |
|
17
|
|
|
"""Basic Single Layer Long-Short-Term Memory |
|
18
|
|
|
""" |
|
19
|
|
|
def __init__(self, num_features, num_classes, num_units, num_steps, optimizer=None): |
|
20
|
|
|
self.num_features = num_features |
|
21
|
|
|
self.num_classes = num_classes |
|
22
|
|
|
self.num_steps = num_steps |
|
23
|
|
|
self.num_units = num_units |
|
24
|
|
|
self.summaries = [] |
|
25
|
|
|
with tf.name_scope('input'): |
|
26
|
|
|
self.x = tf.placeholder(tf.float32, shape=[None, num_steps, num_features], name='input_x') |
|
27
|
|
|
self.init_state = tf.placeholder(tf.float32, shape=[None, 2 * num_units], name='init_state') |
|
28
|
|
|
self.y_ = tf.placeholder(tf.float32, shape=[None, num_classes], name='input_y') |
|
29
|
|
|
# Input Hidden Layer - Need to unroll num_steps and apply W/b |
|
30
|
|
|
hidden_x = tf.reshape(tf.transpose(self.x, [1, 0, 2]), [-1, num_features]) |
|
31
|
|
|
self.hidden_layer = HiddenLayer(num_features, num_units, 'Hidden', x=hidden_x) |
|
32
|
|
|
# Output of the hidden layer needs to be split to be used with RNN |
|
33
|
|
|
hidden_y = tf.split(axis=0, num_or_size_splits=int(num_steps), value=self.hidden_layer.y) |
|
34
|
|
|
# Apply RNN |
|
35
|
|
|
self.cell = tfcontrib.rnn.BasicLSTMCell(num_units=num_units, state_is_tuple=False) |
|
36
|
|
|
outputs, states = tfcontrib.rnn.static_rnn(self.cell, hidden_y, initial_state=self.init_state) |
|
37
|
|
|
self.last_state = states[-1] |
|
38
|
|
|
# Output Softmax Layer |
|
39
|
|
|
self.output_layer = SoftmaxLayer(num_units, num_classes, 'SoftmaxLayer', x=outputs[-1]) |
|
40
|
|
|
# Predicted Probability |
|
41
|
|
|
self.y = self.output_layer.y |
|
42
|
|
|
self.y_class = tf.argmax(self.y, 1) |
|
43
|
|
|
# Softmax Cross-Entropy Loss |
|
44
|
|
|
self.loss = tf.reduce_mean( |
|
45
|
|
|
tf.nn.softmax_cross_entropy_with_logits(logits=self.output_layer.logits, labels=self.y_, |
|
46
|
|
|
name='SoftmaxCrossEntropy') |
|
47
|
|
|
) |
|
48
|
|
|
# Setup Optimizer |
|
49
|
|
|
if optimizer is None: |
|
50
|
|
|
self.optimizer = tf.train.AdamOptimizer() |
|
51
|
|
|
else: |
|
52
|
|
|
self.optimizer = optimizer |
|
53
|
|
|
# Evaluation |
|
54
|
|
|
self.correct_prediction = tf.equal(self.y_class, tf.argmax(self.y_, 1)) |
|
55
|
|
|
self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32)) |
|
56
|
|
|
# Fit Step |
|
57
|
|
|
with tf.name_scope('train'): |
|
58
|
|
|
self.fit_step = self.optimizer.minimize(self.loss) |
|
59
|
|
|
# Setup Summaries |
|
60
|
|
|
self.summaries += self.hidden_layer.summaries |
|
61
|
|
|
self.summaries += self.output_layer.summaries |
|
62
|
|
|
self.summaries.append(tf.summary.scalar('cross_entropy', self.loss)) |
|
63
|
|
|
self.summaries.append(tf.summary.scalar('accuracy', self.accuracy)) |
|
64
|
|
|
self.merged = tf.summary.merge(self.summaries) |
|
65
|
|
|
self.sess = None |
|
66
|
|
|
|
|
67
|
|
|
def fit(self, x, y, batch_size=100, iter_num=100, summaries_dir=None, summary_interval=10, |
|
68
|
|
|
test_x=None, test_y=None, session=None, criterion='const_iteration'): |
|
69
|
|
|
"""Fit the model to the dataset |
|
70
|
|
|
|
|
71
|
|
|
Args: |
|
72
|
|
|
x (:obj:`numpy.ndarray`): Input features of shape (num_samples, num_features). |
|
73
|
|
|
y (:obj:`numpy.ndarray`): Corresponding Labels of shape (num_samples) for binary classification, |
|
74
|
|
|
or (num_samples, num_classes) for multi-class classification. |
|
75
|
|
|
batch_size (:obj:`int`): Batch size used in gradient descent. |
|
76
|
|
|
iter_num (:obj:`int`): Number of training iterations for const iterations, step depth for monitor based |
|
77
|
|
|
stopping criterion. |
|
78
|
|
|
summaries_dir (:obj:`str`): Path of the directory to store summaries and saved values. |
|
79
|
|
|
summary_interval (:obj:`int`): The step interval to export variable summaries. |
|
80
|
|
|
test_x (:obj:`numpy.ndarray`): Test feature array used for monitoring training progress. |
|
81
|
|
|
test_y (:obj:`numpy.ndarray): Test label array used for monitoring training progress. |
|
82
|
|
|
session (:obj:`tensorflow.Session`): Session to run training functions. |
|
83
|
|
|
criterion (:obj:`str`): Stopping criteria. 'const_iterations' or 'monitor_based' |
|
84
|
|
|
""" |
|
85
|
|
|
if session is None: |
|
86
|
|
|
if self.sess is None: |
|
87
|
|
|
session = tf.Session() |
|
88
|
|
|
self.sess = session |
|
89
|
|
|
else: |
|
90
|
|
|
session = self.sess |
|
91
|
|
|
if summaries_dir is not None: |
|
92
|
|
|
train_writer = tf.summary.FileWriter(summaries_dir + '/train') |
|
93
|
|
|
test_writer = tf.summary.FileWriter(summaries_dir + '/test') |
|
94
|
|
|
session.run(tf.global_variables_initializer()) |
|
95
|
|
|
# Get Stopping Criterion |
|
96
|
|
|
if criterion == 'const_iteration': |
|
97
|
|
|
_criterion = ConstIterations(num_iters=iter_num) |
|
98
|
|
|
elif criterion == 'monitor_based': |
|
99
|
|
|
num_samples = x.shape[0] |
|
100
|
|
|
valid_set_len = int(1/5 * num_samples) |
|
101
|
|
|
valid_x = x[num_samples-valid_set_len:num_samples, :] |
|
102
|
|
|
valid_y = y[num_samples-valid_set_len:num_samples, :] |
|
103
|
|
|
x = x[0:num_samples-valid_set_len, :] |
|
104
|
|
|
y = y[0:num_samples-valid_set_len, :] |
|
105
|
|
|
_criterion = MonitorBased(n_steps=iter_num, |
|
106
|
|
|
monitor_fn=self.predict_accuracy, |
|
107
|
|
|
monitor_fn_args=(valid_x, valid_y[self.num_steps:, :]), |
|
108
|
|
|
save_fn=tf.train.Saver().save, |
|
109
|
|
|
save_fn_args=(session, summaries_dir + '/best.ckpt')) |
|
110
|
|
|
else: |
|
111
|
|
|
logger.error('Wrong criterion %s specified.' % criterion) |
|
112
|
|
|
return |
|
113
|
|
|
# Setup batch injector |
|
114
|
|
|
injector = BatchSequenceInjector(data_x=x, data_y=y, batch_size=batch_size, seq_len=self.num_steps) |
|
115
|
|
|
# Train/Test sequence for brief reporting of accuracy and loss |
|
116
|
|
|
train_seq_x, train_seq_y = BatchSequenceInjector.to_sequence( |
|
117
|
|
|
self.num_steps, x, y, start=0, end=2000 |
|
118
|
|
|
) |
|
119
|
|
|
if (test_x is not None) and (test_y is not None): |
|
120
|
|
|
test_seq_x, test_seq_y = BatchSequenceInjector.to_sequence( |
|
121
|
|
|
self.num_steps, test_x, test_y, start=0, end=2000 |
|
122
|
|
|
) |
|
123
|
|
|
# Iteration Starts |
|
124
|
|
|
i = 0 |
|
125
|
|
|
while _criterion.continue_learning(): |
|
126
|
|
|
batch_x, batch_y = injector.next_batch() |
|
127
|
|
View Code Duplication |
if summaries_dir is not None and (i % summary_interval == 0): |
|
|
|
|
|
|
128
|
|
|
summary, loss, accuracy = session.run( |
|
129
|
|
|
[self.merged, self.loss, self.accuracy], |
|
130
|
|
|
feed_dict={self.x: train_seq_x, self.y_: train_seq_y, |
|
131
|
|
|
self.init_state: np.zeros((train_seq_x.shape[0], 2 * self.num_units))} |
|
132
|
|
|
) |
|
133
|
|
|
train_writer.add_summary(summary, i) |
|
134
|
|
|
logger.info('Step %d, train_set accuracy %g, loss %g' % (i, accuracy, loss)) |
|
135
|
|
|
if (test_x is not None) and (test_y is not None): |
|
136
|
|
|
merged, accuracy = session.run( |
|
137
|
|
|
[self.merged, self.accuracy], |
|
138
|
|
|
feed_dict={self.x: test_seq_x, self.y_: test_seq_y, |
|
139
|
|
|
self.init_state: np.zeros((test_seq_x.shape[0], 2*self.num_units))}) |
|
140
|
|
|
test_writer.add_summary(merged, i) |
|
141
|
|
|
logger.info('test_set accuracy %g' % accuracy) |
|
142
|
|
|
loss, accuracy, _ = session.run( |
|
143
|
|
|
[self.loss, self.accuracy, self.fit_step], |
|
144
|
|
|
feed_dict={self.x: batch_x, self.y_: batch_y, |
|
145
|
|
|
self.init_state: np.zeros((batch_x.shape[0], 2 * self.num_units))}) |
|
146
|
|
|
i += 1 |
|
147
|
|
|
# Finish Iteration |
|
148
|
|
|
if criterion == 'monitor_based': |
|
149
|
|
|
tf.train.Saver().restore(session, os.path.join(summaries_dir, 'best.ckpt')) |
|
150
|
|
|
logger.debug('Total Epoch: %d, current batch %d', injector.num_epochs, injector.cur_batch) |
|
151
|
|
|
|
|
152
|
|
View Code Duplication |
def predict_proba(self, x, session=None, batch_size=500): |
|
|
|
|
|
|
153
|
|
|
"""Predict probability (Softmax) |
|
154
|
|
|
""" |
|
155
|
|
|
if session is None: |
|
156
|
|
|
if self.sess is None: |
|
157
|
|
|
session = tf.Session() |
|
158
|
|
|
self.sess = session |
|
159
|
|
|
else: |
|
160
|
|
|
session = self.sess |
|
161
|
|
|
injector = BatchSequenceInjector(batch_size=batch_size, data_x=x, seq_len=self.num_steps) |
|
162
|
|
|
injector.reset() |
|
163
|
|
|
result = None |
|
164
|
|
|
while injector.num_epochs == 0: |
|
165
|
|
|
batch_x = injector.next_batch() |
|
166
|
|
|
batch_y = session.run(self.y, |
|
167
|
|
|
feed_dict={self.x: batch_x, |
|
168
|
|
|
self.init_state: np.zeros((batch_x.shape[0], 2 * self.num_units))}) |
|
169
|
|
|
if result is None: |
|
170
|
|
|
result = batch_y |
|
171
|
|
|
else: |
|
172
|
|
|
result = np.concatenate((result, batch_y), axis=0) |
|
173
|
|
|
return result |
|
174
|
|
|
|
|
175
|
|
View Code Duplication |
def predict(self, x, session=None, batch_size=500): |
|
|
|
|
|
|
176
|
|
|
if session is None: |
|
177
|
|
|
if self.sess is None: |
|
178
|
|
|
session = tf.Session() |
|
179
|
|
|
self.sess = session |
|
180
|
|
|
else: |
|
181
|
|
|
session = self.sess |
|
182
|
|
|
injector = BatchSequenceInjector(batch_size=batch_size, data_x=x, seq_len=self.num_steps) |
|
183
|
|
|
injector.reset() |
|
184
|
|
|
result = None |
|
185
|
|
|
while injector.num_epochs == 0: |
|
186
|
|
|
batch_x = injector.next_batch() |
|
187
|
|
|
batch_y = session.run(self.y_class, |
|
188
|
|
|
feed_dict={self.x: batch_x, |
|
189
|
|
|
self.init_state: np.zeros((batch_x.shape[0], 2 * self.num_units))}) |
|
190
|
|
|
if result is None: |
|
191
|
|
|
result = batch_y |
|
192
|
|
|
else: |
|
193
|
|
|
result = np.concatenate((result, batch_y), axis=0) |
|
194
|
|
|
return result |
|
195
|
|
|
|
|
196
|
|
|
def predict_accuracy(self, x, y, session=None): |
|
197
|
|
|
"""Get Accuracy given feature array and corresponding labels |
|
198
|
|
|
""" |
|
199
|
|
|
if session is None: |
|
200
|
|
|
if self.sess is None: |
|
201
|
|
|
session = tf.Session() |
|
202
|
|
|
self.sess = session |
|
203
|
|
|
else: |
|
204
|
|
|
session = self.sess |
|
205
|
|
|
predict = self.predict(x, session=session) |
|
206
|
|
|
accuracy = np.sum(predict == y.argmax(y.ndim - 1)) / float(y.shape[0]) |
|
207
|
|
|
return accuracy |
|
208
|
|
|
|
|
209
|
|
|
|
|
210
|
|
|
class LSTM: |
|
211
|
|
|
"""Single Layer LSTM Implementation |
|
212
|
|
|
|
|
213
|
|
|
In this new implementation, state_is_tuple is disabled to suppress the "deprecated" warning and |
|
214
|
|
|
performance improvement. The static unrolling of the RNN is replaced with dynamic unrolling. |
|
215
|
|
|
As a result, no batch injector is needed for prediction. |
|
216
|
|
|
|
|
217
|
|
|
Args: |
|
218
|
|
|
num_features (:obj:`int`): Number of input features. |
|
219
|
|
|
num_classes (:obj:`int`): Number of target classes. |
|
220
|
|
|
num_hidden (:obj:`int`): Number of units in the input hidden layer. |
|
221
|
|
|
num_units (:obj:`int`): Number of units in the RNN layer. |
|
222
|
|
|
|
|
223
|
|
|
Attributes: |
|
224
|
|
|
num_features (:obj:`int`): Number of input features. |
|
225
|
|
|
num_classes (:obj:`int`): Number of target classes. |
|
226
|
|
|
num_hidden (:obj:`int`): Number of units in the input hidden layer. |
|
227
|
|
|
num_units (:obj:`int`): Number of units in the RNN layer. |
|
228
|
|
|
summaries (:obj:`list`): List of tensorflow summaries to be displayed on tensorboard. |
|
229
|
|
|
x (:obj:`tf.Tensor`): Input tensor of size [num_batches, length, num_features] |
|
230
|
|
|
length (:obj:`tf.Tensor`): 1D length array (int) of size [num_batches, 1] for the length of each batch data. |
|
231
|
|
|
init_state (:obj:`tf.Tensor`): Initial states. 2D tensor (float) of size [num_batches, 2*num_units]. |
|
232
|
|
|
y_ (:obj:`tf.Tensor`): Ground Truth of size [num_batches, length, num_classes]. |
|
233
|
|
|
""" |
|
234
|
|
|
def __init__(self, num_features, num_classes, num_hidden, num_units, num_skip=0, graph=None, optimizer=None): |
|
235
|
|
|
self.num_features = num_features |
|
236
|
|
|
self.num_classes = num_classes |
|
237
|
|
|
self.num_units = num_units |
|
238
|
|
|
self.num_skip = num_skip |
|
239
|
|
|
self.summaries = [] |
|
240
|
|
|
if graph is None: |
|
241
|
|
|
self.graph = tf.Graph() |
|
242
|
|
|
with self.graph.as_default(): |
|
243
|
|
|
# Inputs |
|
244
|
|
|
with tf.name_scope('input'): |
|
245
|
|
|
# Input tensor X, shape: [batch, length, features] |
|
246
|
|
|
self.x = tf.placeholder(tf.float32, shape=[None, None, num_features], name='input_x') |
|
247
|
|
|
# Length, shape: [batch, length] |
|
248
|
|
|
self.length = tf.placeholder(tf.float32, shape=[None, ], name='input_x_length') |
|
249
|
|
|
# Initial states (as tupples), shape: [batch, units] |
|
250
|
|
|
self.initial_state_c = tf.placeholder(tf.float32, shape=[None, num_units], name='initial_state_c') |
|
251
|
|
|
self.initial_state_h = tf.placeholder(tf.float32, shape=[None, num_units], name='initial_state_h') |
|
252
|
|
|
# Targets, shape: [batch, length, classes] |
|
253
|
|
|
self.y_ = tf.placeholder(tf.float32, shape=[None, None, num_classes], name='targets') |
|
254
|
|
|
# Input hidden layer with num_hidden units |
|
255
|
|
|
with tf.name_scope('input_layer'): |
|
256
|
|
|
self.input_W = tf.Variable( |
|
257
|
|
|
tf.truncated_normal( |
|
258
|
|
|
shape=[num_features, num_hidden], stddev=1.0 / math.sqrt(float(num_hidden))), |
|
259
|
|
|
name='weights') |
|
260
|
|
|
self.input_b = tf.Variable(tf.zeros(shape=[num_hidden]), name='bias') |
|
261
|
|
|
|
|
262
|
|
|
def hidden_fn(slice): |
|
263
|
|
|
return tf.nn.sigmoid(tf.matmul(slice, self.input_W) + self.input_b) |
|
264
|
|
|
# Activation of hidden layer, shape: [batch, length, num_hidden] |
|
265
|
|
|
self.hidden_y = tf.map_fn(hidden_fn, self.x) |
|
266
|
|
|
# Recursive Layer (RNN) |
|
267
|
|
|
with tf.name_scope('rnn'): |
|
268
|
|
|
# Apply RNN |
|
269
|
|
|
self.cell = tfcontrib.rnn.BasicLSTMCell(num_units=num_units, state_is_tuple=True) |
|
270
|
|
|
# rnn outputs, shape: [batch, length, num_units] |
|
271
|
|
|
rnn_outputs, rnn_states = tf.nn.dynamic_rnn( |
|
272
|
|
|
self.cell, self.hidden_y, sequence_length=self.length, |
|
273
|
|
|
initial_state=tfcontrib.rnn.LSTMStateTuple(self.initial_state_c, self.initial_state_h)) |
|
274
|
|
|
# Apply Softmax Layer to all outputs in all batches |
|
275
|
|
|
with tf.name_scope('output_layer'): |
|
276
|
|
|
self.output_W = tf.Variable( |
|
277
|
|
|
tf.truncated_normal(shape=[num_units, num_classes], stddev=1.0/math.sqrt(float(num_units))), |
|
278
|
|
|
name='weights' |
|
279
|
|
|
) |
|
280
|
|
|
self.output_b = tf.Variable(tf.zeros(shape=[num_classes]), name='biases') |
|
281
|
|
|
|
|
282
|
|
|
def out_mult_fn(slice): |
|
283
|
|
|
return tf.matmul(slice, self.output_W) + self.output_b |
|
284
|
|
|
|
|
285
|
|
|
def out_softmax_fn(slice): |
|
286
|
|
|
return tf.nn.softmax(slice) |
|
287
|
|
|
|
|
288
|
|
|
def out_class_fn(slice): |
|
289
|
|
|
return tf.argmax(slice, axis=1) |
|
290
|
|
|
|
|
291
|
|
|
def out_softmax_entropy(params): |
|
292
|
|
|
logits, labels = params |
|
293
|
|
|
return tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels) |
|
294
|
|
|
|
|
295
|
|
|
# self.logit_outputs is a tensor of shape [batch, length, num_classes] |
|
296
|
|
|
self.logit_outputs = tf.map_fn(out_mult_fn, rnn_outputs) |
|
297
|
|
|
# self.softmax_outputs applies softmax to logit_outputs as a tensor of shape |
|
298
|
|
|
# [batch, length, num_classes] |
|
299
|
|
|
self.softmax_outputs = tf.map_fn(out_softmax_fn, self.logit_outputs) |
|
300
|
|
|
# Probability output y, shape: [batch, length-num_skip, num_classes] |
|
301
|
|
|
self.y = self.softmax_outputs[:, num_skip:, :] |
|
302
|
|
|
self.y_class = tf.map_fn(out_class_fn, self.y, dtype=tf.int64) |
|
303
|
|
|
|
|
304
|
|
|
# Acciracy |
|
305
|
|
|
def accuracy_fn(params): |
|
306
|
|
|
prediction, truth = params |
|
307
|
|
|
return tf.reduce_mean(tf.cast(tf.equal(prediction, tf.argmax(truth, 1)), tf.float32)) |
|
308
|
|
|
|
|
309
|
|
|
self.accuracy_outputs = tf.map_fn(accuracy_fn, (self.y_class, self.y_[:, num_skip:, :]), dtype=tf.float32) |
|
310
|
|
|
self.accuracy = tf.reduce_mean(self.accuracy_outputs) |
|
311
|
|
|
# self.class_outputs gets the class label for each item in sequence as a tensor of shape |
|
312
|
|
|
# [batch_size, max_time, 1] |
|
313
|
|
|
self.entropy_outputs = tf.map_fn(out_softmax_entropy, |
|
314
|
|
|
(self.logit_outputs[:, num_skip:, :], self.y_[:, num_skip:, :]), |
|
315
|
|
|
dtype=tf.float32) |
|
316
|
|
|
# Softmax Cross-Entropy Loss |
|
317
|
|
|
self.loss = tf.reduce_mean(self.entropy_outputs) |
|
318
|
|
|
# Setup Optimizer |
|
319
|
|
|
if optimizer is None: |
|
320
|
|
|
self.optimizer = tf.train.AdamOptimizer() |
|
321
|
|
|
else: |
|
322
|
|
|
self.optimizer = optimizer |
|
323
|
|
|
# Fit Step |
|
324
|
|
|
with tf.name_scope('train'): |
|
325
|
|
|
self.fit_step = self.optimizer.minimize(self.loss) |
|
326
|
|
|
# Setup Summaries |
|
327
|
|
|
self.summaries.append(variable_summary(self.input_W, tag='input_layer/weights')) |
|
328
|
|
|
self.summaries.append(variable_summary(self.input_b, tag='input_layer/biases')) |
|
329
|
|
|
self.summaries.append(variable_summary(self.output_W, tag='output_layer/weights')) |
|
330
|
|
|
self.summaries.append(variable_summary(self.output_b, tag='output_layer/biases')) |
|
331
|
|
|
self.summaries.append(tf.summary.scalar('cross_entropy', self.loss)) |
|
332
|
|
|
self.summaries.append(tf.summary.scalar('accuracy', self.accuracy)) |
|
333
|
|
|
self.merged = tf.summary.merge(self.summaries) |
|
334
|
|
|
self.init_op = tf.global_variables_initializer() |
|
335
|
|
|
self.sess = None |
|
336
|
|
|
|
|
337
|
|
|
def fit(self, x, y, length, batch_size=100, iter_num=100, summaries_dir=None, summary_interval=100, |
|
338
|
|
|
test_x=None, test_y=None, session=None, criterion='const_iteration', reintialize=True): |
|
339
|
|
|
"""Fit the model to the dataset |
|
340
|
|
|
|
|
341
|
|
|
Args: |
|
342
|
|
|
x (:obj:`numpy.ndarray`): Input features x, shape: [num_samples, num_features]. |
|
343
|
|
|
y (:obj:`numpy.ndarray`): Corresponding Labels of shape (num_samples) for binary classification, |
|
344
|
|
|
or (num_samples, num_classes) for multi-class classification. |
|
345
|
|
|
length (:obj:`int`): Length of each batch (needs to be greater than self.num_skip. |
|
346
|
|
|
batch_size (:obj:`int`): Batch size used in gradient descent. |
|
347
|
|
|
iter_num (:obj:`int`): Number of training iterations for const iterations, step depth for monitor based |
|
348
|
|
|
stopping criterion. |
|
349
|
|
|
summaries_dir (:obj:`str`): Path of the directory to store summaries and saved values. |
|
350
|
|
|
summary_interval (:obj:`int`): The step interval to export variable summaries. |
|
351
|
|
|
test_x (:obj:`numpy.ndarray`): Test feature array used for monitoring training progress. |
|
352
|
|
|
test_y (:obj:`numpy.ndarray): Test label array used for monitoring training progress. |
|
353
|
|
|
session (:obj:`tensorflow.Session`): Session to run training functions. |
|
354
|
|
|
criterion (:obj:`str`): Stopping criteria. 'const_iterations' or 'monitor_based' |
|
355
|
|
|
""" |
|
356
|
|
|
if session is None: |
|
357
|
|
|
if self.sess is None: |
|
358
|
|
|
session = tf.Session() |
|
359
|
|
|
self.sess = session |
|
360
|
|
|
else: |
|
361
|
|
|
session = self.sess |
|
362
|
|
|
if summaries_dir is not None: |
|
363
|
|
|
train_writer = tf.summary.FileWriter(summaries_dir + '/train') |
|
364
|
|
|
test_writer = tf.summary.FileWriter(summaries_dir + '/test') |
|
365
|
|
|
valid_writer = tf.summary.FileWriter(summaries_dir + '/valid') |
|
366
|
|
|
else: |
|
367
|
|
|
train_writer = None |
|
368
|
|
|
test_writer = None |
|
369
|
|
|
valid_writer = None |
|
370
|
|
|
if reintialize: |
|
371
|
|
|
session.run(self.init_op) |
|
372
|
|
|
with self.graph.as_default(): |
|
373
|
|
|
saver = tf.train.Saver() |
|
374
|
|
|
num_samples = x.shape[0] |
|
375
|
|
|
# Get Stopping Criterion |
|
376
|
|
|
if criterion == 'const_iteration': |
|
377
|
|
|
_criterion = ConstIterations(num_iters=iter_num) |
|
378
|
|
|
elif criterion == 'monitor_based': |
|
379
|
|
|
valid_set_start = int(4/5 * (num_samples - self.num_skip)) |
|
380
|
|
|
valid_x = x[valid_set_start:num_samples, :] |
|
381
|
|
|
valid_y = y[valid_set_start:num_samples, :] |
|
382
|
|
|
x = x[0:valid_set_start + self.num_skip, :] |
|
383
|
|
|
y = y[0:valid_set_start + self.num_skip, :] |
|
384
|
|
|
_criterion = MonitorBased(n_steps=iter_num, |
|
385
|
|
|
monitor_fn=self.predict_accuracy, |
|
386
|
|
|
monitor_fn_args=(valid_x, valid_y), |
|
387
|
|
|
save_fn=saver.save, |
|
388
|
|
|
save_fn_args=(session, summaries_dir + '/best.ckpt')) |
|
389
|
|
|
else: |
|
390
|
|
|
logger.error('Wrong criterion %s specified.' % criterion) |
|
391
|
|
|
return |
|
392
|
|
|
# Setup batch injector |
|
393
|
|
|
injector = BatchSequenceInjector(data_x=x, data_y=y, batch_size=batch_size, length=self.num_skip + length, |
|
394
|
|
|
with_seq=True) |
|
395
|
|
|
# Iteration Starts |
|
396
|
|
|
i = 0 |
|
397
|
|
|
while _criterion.continue_learning(): |
|
398
|
|
|
# Learning |
|
399
|
|
|
batch_x, batch_y, batch_length = injector.next_batch(skip=50) |
|
400
|
|
|
loss, accuracy, _ = session.run( |
|
401
|
|
|
[self.loss, self.accuracy, self.fit_step], |
|
402
|
|
|
feed_dict={self.x: batch_x, self.y_: batch_y, self.length: batch_length, |
|
403
|
|
|
self.initial_state_c: np.zeros((batch_x.shape[0], self.num_units)), |
|
404
|
|
|
self.initial_state_h: np.zeros((batch_x.shape[0], self.num_units))}) |
|
405
|
|
|
# Take summaries |
|
406
|
|
|
if summaries_dir is not None and (i % summary_interval == 0): |
|
407
|
|
|
accuracy, loss = self.predict_accuracy(x, y, writer=train_writer, writer_id=i, with_loss=True) |
|
408
|
|
|
logger.info('Step %d, train_set accuracy %g, loss %g' % (i, accuracy, loss)) |
|
409
|
|
|
accuracy, loss = self.predict_accuracy(test_x, test_y, writer=test_writer, writer_id=i, with_loss=True) |
|
410
|
|
|
logger.info('Step %d, test_set accuracy %g, loss %g' % (i, accuracy, loss)) |
|
411
|
|
|
if criterion == 'monitor_based': |
|
412
|
|
|
accuracy, loss = self.predict_accuracy(valid_x, valid_y, writer=valid_writer, writer_id=i, with_loss=True) |
|
413
|
|
|
logger.info('Step %d, valid_set accuracy %g, loss %g' % (i, accuracy, loss)) |
|
414
|
|
|
# Get Summary |
|
415
|
|
|
i += 1 |
|
416
|
|
|
# Finish Iteration |
|
417
|
|
|
if criterion == 'monitor_based': |
|
418
|
|
|
saver.restore(session, os.path.join(summaries_dir, 'best.ckpt')) |
|
419
|
|
|
logger.debug('Total Epoch: %d, current batch %d', injector.num_epochs, injector.cur_batch) |
|
420
|
|
|
|
|
421
|
|
View Code Duplication |
def predict_proba(self, x, session=None, writer=None, writer_id=None): |
|
|
|
|
|
|
422
|
|
|
"""Predict probability (Softmax) |
|
423
|
|
|
""" |
|
424
|
|
|
if session is None: |
|
425
|
|
|
if self.sess is None: |
|
426
|
|
|
session = tf.Session() |
|
427
|
|
|
self.sess = session |
|
428
|
|
|
else: |
|
429
|
|
|
session = self.sess |
|
430
|
|
|
targets = [self.y] |
|
431
|
|
|
if writer is not None: |
|
432
|
|
|
targets += [self.merged] |
|
433
|
|
|
results = session.run(targets, |
|
434
|
|
|
feed_dict={self.x: x.reshape(tuple([1]) + x.shape), |
|
435
|
|
|
self.length: np.array([x.shape[0]], dtype=np.int), |
|
436
|
|
|
self.initial_state_c: np.zeros((1, self.num_units)), |
|
437
|
|
|
self.initial_state_h: np.zeros((1, self.num_units))}) |
|
438
|
|
|
if writer is not None: |
|
439
|
|
|
writer.add_summary(results[1], writer_id) |
|
440
|
|
|
batch_y = results[0] |
|
441
|
|
|
# Get result |
|
442
|
|
|
return batch_y[0, :, :] |
|
443
|
|
|
|
|
444
|
|
View Code Duplication |
def predict(self, x, session=None, writer=None, writer_id=None): |
|
|
|
|
|
|
445
|
|
|
if session is None: |
|
446
|
|
|
if self.sess is None: |
|
447
|
|
|
session = tf.Session() |
|
448
|
|
|
self.sess = session |
|
449
|
|
|
else: |
|
450
|
|
|
session = self.sess |
|
451
|
|
|
targets = [self.y_class] |
|
452
|
|
|
if writer is not None: |
|
453
|
|
|
targets += [self.merged] |
|
454
|
|
|
results = session.run(targets, |
|
455
|
|
|
feed_dict={self.x: x.reshape(tuple([1]) + x.shape), |
|
456
|
|
|
self.length: np.array([x.shape[0]], dtype=np.int), |
|
457
|
|
|
self.initial_state_c: np.zeros((1, self.num_units)), |
|
458
|
|
|
self.initial_state_h: np.zeros((1, self.num_units))}) |
|
459
|
|
|
if writer is not None: |
|
460
|
|
|
writer.add_summary(results[1], writer_id) |
|
461
|
|
|
batch_y = results[0] |
|
462
|
|
|
# Get result |
|
463
|
|
|
return batch_y[0, :] |
|
464
|
|
|
|
|
465
|
|
|
def predict_accuracy(self, x, y, session=None, writer=None, writer_id=None, with_loss=False): |
|
466
|
|
|
"""Get Accuracy given feature array and corresponding labels |
|
467
|
|
|
""" |
|
468
|
|
|
if session is None: |
|
469
|
|
|
if self.sess is None: |
|
470
|
|
|
session = tf.Session() |
|
471
|
|
|
self.sess = session |
|
472
|
|
|
else: |
|
473
|
|
|
session = self.sess |
|
474
|
|
|
targets = [self.accuracy] |
|
475
|
|
|
if with_loss: |
|
476
|
|
|
targets += [self.loss] |
|
477
|
|
|
if writer is not None: |
|
478
|
|
|
targets += [self.merged] |
|
479
|
|
|
results = session.run(targets, |
|
480
|
|
|
feed_dict={self.x: x.reshape(tuple([1]) + x.shape), |
|
481
|
|
|
self.y_: y.reshape(tuple([1]) + y.shape), |
|
482
|
|
|
self.length: np.array([x.shape[0]], dtype=np.int), |
|
483
|
|
|
self.initial_state_c: np.zeros((1, self.num_units)), |
|
484
|
|
|
self.initial_state_h: np.zeros((1, self.num_units))}) |
|
485
|
|
|
if with_loss: |
|
486
|
|
|
return_values = results[0], results[1] |
|
487
|
|
|
else: |
|
488
|
|
|
return_values = results[0] |
|
489
|
|
|
if writer is not None: |
|
490
|
|
|
writer.add_summary(results[-1], writer_id) |
|
491
|
|
|
# Get result |
|
492
|
|
|
return return_values |
|
493
|
|
|
|
|
494
|
|
|
|
|
495
|
|
|
|
|
496
|
|
|
class SimpleLSTM: |
|
497
|
|
|
"""Single Layer LSTM Implementation |
|
498
|
|
|
|
|
499
|
|
|
In this new implementation, state_is_tuple is disabled to suppress the "deprecated" warning and |
|
500
|
|
|
performance improvement. The static unrolling of the RNN is replaced with dynamic unrolling. |
|
501
|
|
|
As a result, no batch injector is needed for prediction. |
|
502
|
|
|
|
|
503
|
|
|
Args: |
|
504
|
|
|
num_features |
|
505
|
|
|
num_classes |
|
506
|
|
|
num_units |
|
507
|
|
|
""" |
|
508
|
|
|
|
|
509
|
|
|
def __init__(self, num_features, num_classes, num_hidden, num_units, num_skip, graph=None, optimizer=None): |
|
510
|
|
|
self.num_features = num_features |
|
511
|
|
|
self.num_classes = num_classes |
|
512
|
|
|
self.num_units = num_units |
|
513
|
|
|
self.num_skip = num_skip |
|
514
|
|
|
self.summaries = [] |
|
515
|
|
|
if graph is None: |
|
516
|
|
|
graph = tf.Graph() |
|
517
|
|
|
with graph.as_default(): |
|
518
|
|
|
# Inputs |
|
519
|
|
|
with tf.name_scope('input'): |
|
520
|
|
|
# X in the shape of (seq_length + num_skip, features) |
|
521
|
|
|
self.x = tf.placeholder(tf.float32, shape=[None, num_features], name='input_x') |
|
522
|
|
|
# length is the actual length of the sequence for each batch |
|
523
|
|
|
self.length = tf.placeholder(tf.int64, shape=[1], name='input_x_length') |
|
524
|
|
|
self.init_state = tf.placeholder(tf.float32, shape=[2 * num_units], name='init_state') |
|
525
|
|
|
self.y_ = tf.placeholder(tf.float32, shape=[None, num_classes], name='input_y') |
|
526
|
|
|
self.y_skiped = self.y_[num_skip:, :] |
|
527
|
|
|
# Input Hidden layers |
|
528
|
|
|
self.hidden_layer = HiddenLayer(num_features, num_units, 'Hidden', x=self.x) |
|
529
|
|
|
# Recursive Layer |
|
530
|
|
|
with tf.name_scope('rnn'): |
|
531
|
|
|
# Apply RNN |
|
532
|
|
|
self.cell = rnn.BasicLSTMCell(num_units=num_units, state_is_tuple=False) |
|
533
|
|
|
# Outputs is a tensor with shape [seq_length + num_skip, num_units] |
|
534
|
|
|
outputs, states = tf.nn.dynamic_rnn( |
|
535
|
|
|
self.cell, tf.reshape(self.hidden_layer.y, [1, -1, num_units]), |
|
536
|
|
|
sequence_length=(self.length + num_skip), |
|
537
|
|
|
initial_state=self.init_state, time_major=False) |
|
538
|
|
|
# Apply Softmax Layer to all outputs in the valid items in the sequence. |
|
539
|
|
|
self.output_layer = SoftmaxLayer(num_units, num_classes, 'SoftmaxLayer', |
|
540
|
|
|
x=outputs[1, num_skip:, :]) |
|
541
|
|
|
# Softmax Cross-Entropy Loss |
|
542
|
|
|
self.loss = tf.reduce_mean( |
|
543
|
|
|
tf.nn.softmax_cross_entropy_with_logits(self.output_layer.logits, self.y_skiped, |
|
544
|
|
|
name='SoftmaxCrossEntropy') |
|
545
|
|
|
) |
|
546
|
|
|
# Setup Optimizer |
|
547
|
|
|
if optimizer is None: |
|
548
|
|
|
self.optimizer = tf.train.AdamOptimizer() |
|
549
|
|
|
else: |
|
550
|
|
|
self.optimizer = optimizer |
|
551
|
|
|
# Predicted Probability |
|
552
|
|
|
self.y = self.output_layer.y |
|
553
|
|
|
self.y_class = tf.argmax(self.y, 1) |
|
554
|
|
|
# Evaluation |
|
555
|
|
|
self.correct_prediction = tf.equal(self.y_class, tf.argmax(self.y_skiped, 1)) |
|
556
|
|
|
self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32)) |
|
557
|
|
|
# Fit Step |
|
558
|
|
|
with tf.name_scope('train'): |
|
559
|
|
|
self.fit_step = self.optimizer.minimize(self.loss) |
|
560
|
|
|
# Setup Summaries |
|
561
|
|
|
self.summaries += self.hidden_layer.summaries |
|
562
|
|
|
self.summaries += self.output_layer.summaries |
|
563
|
|
|
self.summaries.append(tf.summary.scalar('cross_entropy', self.loss)) |
|
564
|
|
|
self.summaries.append(tf.summary.scalar('accuracy', self.accuracy)) |
|
565
|
|
|
self.merged = tf.summary.merge(self.summaries) |
|
566
|
|
|
self.sess = None |
|
567
|
|
|
|
|
568
|
|
|
def fit(self, x, y, num_skip=100, batch_size=100, iter_num=100, summaries_dir=None, summary_interval=10, |
|
569
|
|
|
test_x=None, test_y=None, session=None, criterion='const_iteration'): |
|
570
|
|
|
"""Fit the model to the dataset |
|
571
|
|
|
|
|
572
|
|
|
Args: |
|
573
|
|
|
x (:obj:`numpy.ndarray`): Input features of shape (num_samples, num_features). |
|
574
|
|
|
y (:obj:`numpy.ndarray`): Corresponding Labels of shape (num_samples) for binary classification, |
|
575
|
|
|
or (num_samples, num_classes) for multi-class classification. |
|
576
|
|
|
batch_size (:obj:`int`): Batch size used in gradient descent. |
|
577
|
|
|
iter_num (:obj:`int`): Number of training iterations for const iterations, step depth for monitor based |
|
578
|
|
|
stopping criterion. |
|
579
|
|
|
summaries_dir (:obj:`str`): Path of the directory to store summaries and saved values. |
|
580
|
|
|
summary_interval (:obj:`int`): The step interval to export variable summaries. |
|
581
|
|
|
test_x (:obj:`numpy.ndarray`): Test feature array used for monitoring training progress. |
|
582
|
|
|
test_y (:obj:`numpy.ndarray): Test label array used for monitoring training progress. |
|
583
|
|
|
session (:obj:`tensorflow.Session`): Session to run training functions. |
|
584
|
|
|
criterion (:obj:`str`): Stopping criteria. 'const_iterations' or 'monitor_based' |
|
585
|
|
|
""" |
|
586
|
|
|
if session is None: |
|
587
|
|
|
if self.sess is None: |
|
588
|
|
|
session = tf.Session() |
|
589
|
|
|
self.sess = session |
|
590
|
|
|
else: |
|
591
|
|
|
session = self.sess |
|
592
|
|
|
if summaries_dir is not None: |
|
593
|
|
|
train_writer = tf.summary.FileWriter(summaries_dir + '/train') |
|
594
|
|
|
test_writer = tf.summary.FileWriter(summaries_dir + '/test') |
|
595
|
|
|
session.run(tf.global_variables_initializer()) |
|
596
|
|
|
# Get Stopping Criterion |
|
597
|
|
|
if criterion == 'const_iteration': |
|
598
|
|
|
_criterion = ConstIterations(num_iters=iter_num) |
|
599
|
|
|
elif criterion == 'monitor_based': |
|
600
|
|
|
num_samples = x.shape[0] |
|
601
|
|
|
valid_set_len = int(1 / 5 * (num_samples - num_skip)) |
|
602
|
|
|
valid_x = x[num_samples - valid_set_len - num_skip:num_samples, :] |
|
603
|
|
|
valid_y = y[num_samples - valid_set_len - num_skip:num_samples, :] |
|
604
|
|
|
x = x[0:num_samples - valid_set_len, :] |
|
605
|
|
|
y = y[0:num_samples - valid_set_len, :] |
|
606
|
|
|
_criterion = MonitorBased(n_steps=iter_num, |
|
607
|
|
|
monitor_fn=self.predict_accuracy, |
|
608
|
|
|
monitor_fn_args=(valid_x, valid_y), |
|
609
|
|
|
save_fn=tf.train.Saver().save, |
|
610
|
|
|
save_fn_args=(session, summaries_dir + '/best.ckpt')) |
|
611
|
|
|
else: |
|
612
|
|
|
logger.error('Wrong criterion %s specified.' % criterion) |
|
613
|
|
|
return |
|
614
|
|
|
# Iteration Starts |
|
615
|
|
|
i = 0 |
|
616
|
|
|
while _criterion.continue_learning(): |
|
617
|
|
|
# Learning |
|
618
|
|
|
batch_x = x[i:num_skip + batch_size, :] |
|
619
|
|
|
batch_y = y[i:num_skip + batch_size, :] |
|
620
|
|
|
loss, accuracy, _ = session.run( |
|
621
|
|
|
[self.loss, self.accuracy, self.fit_step], |
|
622
|
|
|
feed_dict={self.x: batch_x, self.y_: batch_y, self.length: batch_size, |
|
623
|
|
|
self.init_state: np.zeros(2 * self.num_units)}) |
|
624
|
|
|
# Summary |
|
625
|
|
View Code Duplication |
if summaries_dir is not None and (i % summary_interval == 0): |
|
|
|
|
|
|
626
|
|
|
summary, loss, accuracy = session.run( |
|
627
|
|
|
[self.merged, self.loss, self.accuracy], |
|
628
|
|
|
feed_dict={self.x: x, self.y_: y, self.length: num_samples - valid_set_len - num_skip, |
|
629
|
|
|
self.init_state: np.zeros(2 * self.num_units)} |
|
630
|
|
|
) |
|
631
|
|
|
train_writer.add_summary(summary, i) |
|
632
|
|
|
logger.info('Step %d, train_set accuracy %g, loss %g' % (i, accuracy, loss)) |
|
633
|
|
|
if (test_x is not None) and (test_y is not None): |
|
634
|
|
|
merged, accuracy = session.run( |
|
635
|
|
|
[self.merged, self.accuracy], |
|
636
|
|
|
feed_dict={self.x: test_x, self.y_: test_y, self.length: test_x.shape[0] - num_skip, |
|
637
|
|
|
self.init_state: np.zeros(2*self.num_units)}) |
|
638
|
|
|
test_writer.add_summary(merged, i) |
|
639
|
|
|
logger.info('test_set accuracy %g' % accuracy) |
|
640
|
|
|
# Get Summary |
|
641
|
|
|
if i == x.shape[0] - num_skip: |
|
642
|
|
|
i = 0 |
|
643
|
|
|
else: |
|
644
|
|
|
i += 1 |
|
645
|
|
|
# Finish Iteration |
|
646
|
|
|
if criterion == 'monitor_based': |
|
647
|
|
|
tf.train.Saver().restore(session, os.path.join(summaries_dir, 'best.ckpt')) |
|
648
|
|
|
|
|
649
|
|
View Code Duplication |
def predict_proba(self, x, session=None, batch_size=500): |
|
|
|
|
|
|
650
|
|
|
"""Predict probability (Softmax) |
|
651
|
|
|
""" |
|
652
|
|
|
if session is None: |
|
653
|
|
|
if self.sess is None: |
|
654
|
|
|
session = tf.Session() |
|
655
|
|
|
self.sess = session |
|
656
|
|
|
else: |
|
657
|
|
|
session = self.sess |
|
658
|
|
|
return session.run(self.y, |
|
659
|
|
|
feed_dict={self.x: x, self.length: x.shape[0] - self.num_skip, |
|
660
|
|
|
self.init_state: np.zeros(2*self.num_units)}) |
|
661
|
|
|
|
|
662
|
|
View Code Duplication |
def predict(self, x, session=None): |
|
|
|
|
|
|
663
|
|
|
if session is None: |
|
664
|
|
|
if self.sess is None: |
|
665
|
|
|
session = tf.Session() |
|
666
|
|
|
self.sess = session |
|
667
|
|
|
else: |
|
668
|
|
|
session = self.sess |
|
669
|
|
|
return session.run(self.y_class, |
|
670
|
|
|
feed_dict={self.x: x, self.length: x.shape[0] - self.num_skip, |
|
671
|
|
|
self.init_state: np.zeros(2*self.num_units)}) |
|
672
|
|
|
|
|
673
|
|
View Code Duplication |
def predict_accuracy(self, x, y, session=None): |
|
|
|
|
|
|
674
|
|
|
"""Get Accuracy given feature array and corresponding labels |
|
675
|
|
|
""" |
|
676
|
|
|
if session is None: |
|
677
|
|
|
if self.sess is None: |
|
678
|
|
|
session = tf.Session() |
|
679
|
|
|
self.sess = session |
|
680
|
|
|
else: |
|
681
|
|
|
session = self.sess |
|
682
|
|
|
return session.run(self.accuracy, |
|
683
|
|
|
feed_dict={self.x: x, self.y_: y, self.length: x.shape[0] - self.num_skip, |
|
684
|
|
|
self.init_state: np.zeros(2*self.num_units)}) |