1
|
1 |
|
from keras.models import Sequential |
2
|
1 |
|
from keras.layers import Dense, Activation, Convolution1D, Lambda, \ |
3
|
|
|
Convolution2D, Flatten, \ |
4
|
|
|
Reshape, LSTM, Dropout, TimeDistributed, BatchNormalization |
5
|
1 |
|
from keras.regularizers import l2 |
6
|
1 |
|
from keras.optimizers import Adam |
7
|
1 |
|
import numpy as np |
8
|
|
|
|
9
|
|
|
|
10
|
1 |
|
def generate_models( |
11
|
|
|
x_shape, number_of_classes, number_of_models=5, model_type=None, |
12
|
|
|
cnn_min_layers=1, cnn_max_layers=10, |
13
|
|
|
cnn_min_filters=10, cnn_max_filters=100, |
14
|
|
|
cnn_min_fc_nodes=10, cnn_max_fc_nodes=2000, |
15
|
|
|
deepconvlstm_min_conv_layers=1, deepconvlstm_max_conv_layers=10, |
16
|
|
|
deepconvlstm_min_conv_filters=10, deepconvlstm_max_conv_filters=100, |
17
|
|
|
deepconvlstm_min_lstm_layers=1, deepconvlstm_max_lstm_layers=5, |
18
|
|
|
deepconvlstm_min_lstm_dims=10, deepconvlstm_max_lstm_dims=100, |
19
|
|
|
low_lr=1, high_lr=4, low_reg=1, high_reg=4 |
20
|
|
|
): |
21
|
|
|
""" |
22
|
|
|
Generate one or multiple Keras models with random hyperparameters. |
23
|
|
|
|
24
|
|
|
Parameters |
25
|
|
|
---------- |
26
|
|
|
x_shape : tuple |
27
|
|
|
Shape of the input dataset: (num_samples, num_timesteps, num_channels) |
28
|
|
|
number_of_classes : int |
29
|
|
|
Number of classes for classification task |
30
|
|
|
number_of_models : int |
31
|
|
|
Number of models to generate |
32
|
|
|
model_type : str, optional |
33
|
|
|
Type of model to build: 'CNN' or 'DeepConvLSTM'. |
34
|
|
|
Default option None generates both models. |
35
|
|
|
cnn_min_layers : int |
36
|
|
|
minimum of Conv layers in CNN model |
37
|
|
|
cnn_max_layers : int |
38
|
|
|
maximum of Conv layers in CNN model |
39
|
|
|
cnn_min_filters : int |
40
|
|
|
minimum number of filters per Conv layer in CNN model |
41
|
|
|
cnn_max_filters : int |
42
|
|
|
maximum number of filters per Conv layer in CNN model |
43
|
|
|
cnn_min_fc_nodes : int |
44
|
|
|
minimum number of hidden nodes per Dense layer in CNN model |
45
|
|
|
cnn_max_fc_nodes : int |
46
|
|
|
maximum number of hidden nodes per Dense layer in CNN model |
47
|
|
|
deepconvlstm_min_conv_layers : int |
48
|
|
|
minimum number of Conv layers in DeepConvLSTM model |
49
|
|
|
deepconvlstm_max_conv_layers : int |
50
|
|
|
maximum number of Conv layers in DeepConvLSTM model |
51
|
|
|
deepconvlstm_min_conv_filters : int |
52
|
|
|
minimum number of filters per Conv layer in DeepConvLSTM model |
53
|
|
|
deepconvlstm_max_conv_filters : int |
54
|
|
|
maximum number of filters per Conv layer in DeepConvLSTM model |
55
|
|
|
deepconvlstm_min_lstm_layers : int |
56
|
|
|
minimum number of Conv layers in DeepConvLSTM model |
57
|
|
|
deepconvlstm_max_lstm_layers : int |
58
|
|
|
maximum number of Conv layers in DeepConvLSTM model |
59
|
|
|
deepconvlstm_min_lstm_dims : int |
60
|
|
|
minimum number of hidden nodes per LSTM layer in DeepConvLSTM model |
61
|
|
|
deepconvlstm_max_lstm_dims : int |
62
|
|
|
maximum number of hidden nodes per LSTM layer in DeepConvLSTM model |
63
|
|
|
low_lr : float |
64
|
|
|
minimum of log range for learning rate: learning rate is sampled |
65
|
|
|
between `10**(-low_reg)` and `10**(-high_reg)` |
66
|
|
|
high_lr : float |
67
|
|
|
maximum of log range for learning rate: learning rate is sampled |
68
|
|
|
between `10**(-low_reg)` and `10**(-high_reg)` |
69
|
|
|
low_reg : float |
70
|
|
|
minimum of log range for regularization rate: regularization rate is |
71
|
|
|
sampled between `10**(-low_reg)` and `10**(-high_reg)` |
72
|
|
|
high_reg : float |
73
|
|
|
maximum of log range for regularization rate: regularization rate is |
74
|
|
|
sampled between `10**(-low_reg)` and `10**(-high_reg)` |
75
|
|
|
|
76
|
|
|
Returns |
77
|
|
|
------- |
78
|
|
|
models : list |
79
|
|
|
List of compiled models |
80
|
|
|
""" |
81
|
1 |
|
models = [] |
82
|
1 |
|
for _ in range(0, number_of_models): |
83
|
1 |
|
if model_type is None: # random model choice: |
84
|
1 |
|
current_model_type = 'CNN' if np.random.random( |
85
|
|
|
) < 0.5 else 'DeepConvLSTM' |
86
|
|
|
else: # user-defined model choice: |
87
|
|
|
current_model_type = model_type |
88
|
1 |
|
generate_model = None |
89
|
1 |
|
if current_model_type == 'CNN': |
90
|
1 |
|
generate_model = generate_CNN_model # generate_model is a function |
91
|
1 |
|
hyperparameters = generate_CNN_hyperparameter_set( |
92
|
|
|
min_layers=cnn_min_layers, max_layers=cnn_max_layers, |
93
|
|
|
min_filters=cnn_min_filters, max_filters=cnn_max_filters, |
94
|
|
|
min_fc_nodes=cnn_min_fc_nodes, max_fc_nodes=cnn_max_fc_nodes, |
95
|
|
|
low_lr=low_lr, high_lr=high_lr, low_reg=low_reg, |
96
|
|
|
high_reg=high_reg) |
97
|
1 |
|
if current_model_type == 'DeepConvLSTM': |
98
|
|
|
generate_model = generate_DeepConvLSTM_model |
99
|
|
|
hyperparameters = generate_DeepConvLSTM_hyperparameter_set( |
100
|
|
|
min_conv_layers=deepconvlstm_min_conv_layers, |
101
|
|
|
max_conv_layers=deepconvlstm_max_conv_layers, |
102
|
|
|
min_conv_filters=deepconvlstm_min_conv_filters, |
103
|
|
|
max_conv_filters=deepconvlstm_max_conv_filters, |
104
|
|
|
min_lstm_layers=deepconvlstm_min_lstm_layers, |
105
|
|
|
max_lstm_layers=deepconvlstm_max_lstm_layers, |
106
|
|
|
min_lstm_dims=deepconvlstm_min_lstm_dims, |
107
|
|
|
max_lstm_dims=deepconvlstm_max_lstm_dims, |
108
|
|
|
low_lr=low_lr, high_lr=high_lr, low_reg=low_reg, |
109
|
|
|
high_reg=high_reg) |
110
|
1 |
|
models.append( |
111
|
|
|
(generate_model(x_shape, number_of_classes, **hyperparameters), |
112
|
|
|
hyperparameters, current_model_type)) |
113
|
1 |
|
return models |
114
|
|
|
|
115
|
|
|
|
116
|
1 |
|
def generate_DeepConvLSTM_model( |
117
|
|
|
x_shape, class_number, filters, lstm_dims, learning_rate=0.01, |
118
|
|
|
regularization_rate=0.01): |
119
|
|
|
""" |
120
|
|
|
Generate a model with convolution and LSTM layers. |
121
|
|
|
See Ordonez et al., 2016, http://dx.doi.org/10.3390/s16010115 |
122
|
|
|
|
123
|
|
|
Parameters |
124
|
|
|
---------- |
125
|
|
|
x_shape : tuple |
126
|
|
|
Shape of the input dataset: (num_samples, num_timesteps, num_channels) |
127
|
|
|
class_number : int |
128
|
|
|
Number of classes for classification task |
129
|
|
|
filters : list of ints |
130
|
|
|
number of filters for each convolutional layer |
131
|
|
|
lstm_dims : list of ints |
132
|
|
|
number of hidden nodes for each LSTM layer |
133
|
|
|
learning_rate : float |
134
|
|
|
learning rate |
135
|
|
|
regularization_rate : float |
136
|
|
|
regularization rate |
137
|
|
|
|
138
|
|
|
Returns |
139
|
|
|
------- |
140
|
|
|
model : Keras model |
141
|
|
|
The compiled Keras model |
142
|
|
|
""" |
143
|
1 |
|
dim_length = x_shape[1] # number of samples in a time series |
144
|
1 |
|
dim_channels = x_shape[2] # number of channels |
145
|
1 |
|
output_dim = class_number # number of classes |
146
|
1 |
|
weightinit = 'lecun_uniform' # weight initialization |
147
|
1 |
|
model = Sequential() # initialize model |
148
|
1 |
|
model.add(BatchNormalization(input_shape=(dim_length, dim_channels))) |
149
|
|
|
# reshape a 2 dimensional array per file/person/object into a |
150
|
|
|
# 3 dimensional array |
151
|
1 |
|
model.add( |
152
|
|
|
Reshape(target_shape=(1, dim_length, dim_channels))) |
153
|
1 |
|
for filt in filters: |
154
|
|
|
# filt: number of filters used in a layer |
155
|
|
|
# filters: vector of filt values |
156
|
1 |
|
model.add( |
157
|
|
|
Convolution2D(filt, nb_row=3, nb_col=1, border_mode='same', |
158
|
|
|
W_regularizer=l2(regularization_rate), |
159
|
|
|
init=weightinit, |
160
|
|
|
dim_ordering='th')) |
161
|
1 |
|
model.add(BatchNormalization()) |
162
|
1 |
|
model.add(Activation('relu')) |
163
|
|
|
# reshape 3 dimensional array back into a 2 dimensional array, |
164
|
|
|
# but now with more dept as we have the the filters for each channel |
165
|
1 |
|
model.add(Reshape(target_shape=(dim_length, filters[-1] * dim_channels))) |
166
|
|
|
|
167
|
1 |
|
for lstm_dim in lstm_dims: |
168
|
1 |
|
model.add(LSTM(output_dim=lstm_dim, return_sequences=True, |
169
|
|
|
activation='tanh')) |
170
|
|
|
|
171
|
1 |
|
model.add(Dropout(0.5)) # dropout before the dense layer |
172
|
|
|
# set up final dense layer such that every timestamp is given one |
173
|
|
|
# classification |
174
|
1 |
|
model.add( |
175
|
|
|
TimeDistributed( |
176
|
|
|
Dense(output_dim, W_regularizer=l2(regularization_rate)))) |
177
|
1 |
|
model.add(Activation("softmax")) |
178
|
|
|
# Final classification layer - per timestep |
179
|
1 |
|
model.add(Lambda(lambda x: x[:, -1, :], output_shape=[output_dim])) |
180
|
|
|
|
181
|
1 |
|
model.compile(loss='categorical_crossentropy', |
182
|
|
|
optimizer=Adam(lr=learning_rate), |
183
|
|
|
metrics=['accuracy']) |
184
|
|
|
|
185
|
1 |
|
return model |
186
|
|
|
|
187
|
|
|
|
188
|
1 |
|
def generate_CNN_model(x_shape, class_number, filters, fc_hidden_nodes, |
189
|
|
|
learning_rate=0.01, regularization_rate=0.01): |
190
|
|
|
""" |
191
|
|
|
Generate a convolutional neural network (CNN) model. |
192
|
|
|
|
193
|
|
|
The compiled Keras model is returned. |
194
|
|
|
|
195
|
|
|
Parameters |
196
|
|
|
---------- |
197
|
|
|
x_shape : tuple |
198
|
|
|
Shape of the input dataset: (num_samples, num_timesteps, num_channels) |
199
|
|
|
class_number : int |
200
|
|
|
Number of classes for classification task |
201
|
|
|
filters : list of ints |
202
|
|
|
number of filters for each convolutional layer |
203
|
|
|
fc_hidden_nodes : int |
204
|
|
|
number of hidden nodes for the hidden dense layer |
205
|
|
|
learning_rate : float |
206
|
|
|
learning rate |
207
|
|
|
regularization_rate : float |
208
|
|
|
regularization rate |
209
|
|
|
|
210
|
|
|
Returns |
211
|
|
|
------- |
212
|
|
|
model : Keras model |
213
|
|
|
The compiled Keras model |
214
|
|
|
""" |
215
|
1 |
|
dim_length = x_shape[1] # number of samples in a time series |
216
|
1 |
|
dim_channels = x_shape[2] # number of channels |
217
|
1 |
|
outputdim = class_number # number of classes |
218
|
1 |
|
weightinit = 'lecun_uniform' # weight initialization |
219
|
1 |
|
model = Sequential() |
220
|
1 |
|
model.add( |
221
|
|
|
BatchNormalization( |
222
|
|
|
input_shape=( |
223
|
|
|
dim_length, |
224
|
|
|
dim_channels), |
225
|
|
|
mode=0, |
226
|
|
|
axis=2)) |
227
|
1 |
|
for filter_number in filters: |
228
|
1 |
|
model.add(Convolution1D(filter_number, 3, border_mode='same', |
229
|
|
|
W_regularizer=l2(regularization_rate), |
230
|
|
|
init=weightinit)) |
231
|
1 |
|
model.add(BatchNormalization()) |
232
|
1 |
|
model.add(Activation('relu')) |
233
|
1 |
|
model.add(Flatten()) |
234
|
1 |
|
model.add(Dense(output_dim=fc_hidden_nodes, |
235
|
|
|
W_regularizer=l2(regularization_rate), |
236
|
|
|
init=weightinit)) # Fully connected layer |
237
|
1 |
|
model.add(Activation('relu')) # Relu activation |
238
|
1 |
|
model.add(Dense(output_dim=outputdim, init=weightinit)) |
239
|
1 |
|
model.add(BatchNormalization()) |
240
|
1 |
|
model.add(Activation("softmax")) # Final classification layer |
241
|
|
|
|
242
|
1 |
|
model.compile(loss='categorical_crossentropy', |
243
|
|
|
optimizer=Adam(lr=learning_rate), |
244
|
|
|
metrics=['accuracy']) |
245
|
|
|
|
246
|
1 |
|
return model |
247
|
|
|
|
248
|
|
|
|
249
|
1 |
|
def generate_CNN_hyperparameter_set(min_layers=1, max_layers=10, |
250
|
|
|
min_filters=10, max_filters=100, |
251
|
|
|
min_fc_nodes=10, max_fc_nodes=2000, |
252
|
|
|
low_lr=1, high_lr=4, low_reg=1, |
253
|
|
|
high_reg=4): |
254
|
|
|
""" Generate a hyperparameter set that define a CNN model. |
255
|
|
|
|
256
|
|
|
Parameters |
257
|
|
|
---------- |
258
|
|
|
min_layers : int |
259
|
|
|
minimum of Conv layers |
260
|
|
|
max_layers : int |
261
|
|
|
maximum of Conv layers |
262
|
|
|
min_filters : int |
263
|
|
|
minimum number of filters per Conv layer |
264
|
|
|
max_filters : int |
265
|
|
|
maximum number of filters per Conv layer |
266
|
|
|
min_fc_nodes : int |
267
|
|
|
minimum number of hidden nodes per Dense layer |
268
|
|
|
max_fc_nodes : int |
269
|
|
|
maximum number of hidden nodes per Dense layer |
270
|
|
|
low_lr : float |
271
|
|
|
minimum of log range for learning rate: learning rate is sampled |
272
|
|
|
between `10**(-low_reg)` and `10**(-high_reg)` |
273
|
|
|
high_lr : float |
274
|
|
|
maximum of log range for learning rate: learning rate is sampled |
275
|
|
|
between `10**(-low_reg)` and `10**(-high_reg)` |
276
|
|
|
low_reg : float |
277
|
|
|
minimum of log range for regularization rate: regularization rate is |
278
|
|
|
sampled between `10**(-low_reg)` and `10**(-high_reg)` |
279
|
|
|
high_reg : float |
280
|
|
|
maximum of log range for regularization rate: regularization rate is |
281
|
|
|
sampled between `10**(-low_reg)` and `10**(-high_reg)` |
282
|
|
|
|
283
|
|
|
Returns |
284
|
|
|
---------- |
285
|
|
|
hyperparameters : dict |
286
|
|
|
parameters for a CNN model |
287
|
|
|
""" |
288
|
1 |
|
hyperparameters = generate_base_hyper_parameter_set( |
289
|
|
|
low_lr, high_lr, low_reg, high_reg) |
290
|
1 |
|
number_of_layers = np.random.randint(min_layers, max_layers + 1) |
291
|
1 |
|
hyperparameters['filters'] = np.random.randint( |
292
|
|
|
min_filters, max_filters + 1, number_of_layers) |
293
|
1 |
|
hyperparameters['fc_hidden_nodes'] = np.random.randint( |
294
|
|
|
min_fc_nodes, max_fc_nodes + 1) |
295
|
1 |
|
return hyperparameters |
296
|
|
|
|
297
|
|
|
|
298
|
1 |
|
def generate_DeepConvLSTM_hyperparameter_set( |
299
|
|
|
min_conv_layers=1, max_conv_layers=10, |
300
|
|
|
min_conv_filters=10, max_conv_filters=100, |
301
|
|
|
min_lstm_layers=1, max_lstm_layers=5, |
302
|
|
|
min_lstm_dims=10, max_lstm_dims=100, |
303
|
|
|
low_lr=1, high_lr=4, low_reg=1, high_reg=4): |
304
|
|
|
""" Generate a hyperparameter set that defines a DeepConvLSTM model. |
305
|
|
|
|
306
|
|
|
Parameters |
307
|
|
|
---------- |
308
|
|
|
min_conv_layers : int |
309
|
|
|
minimum number of Conv layers in DeepConvLSTM model |
310
|
|
|
max_conv_layers : int |
311
|
|
|
maximum number of Conv layers in DeepConvLSTM model |
312
|
|
|
min_conv_filters : int |
313
|
|
|
minimum number of filters per Conv layer in DeepConvLSTM model |
314
|
|
|
max_conv_filters : int |
315
|
|
|
maximum number of filters per Conv layer in DeepConvLSTM model |
316
|
|
|
min_lstm_layers : int |
317
|
|
|
minimum number of Conv layers in DeepConvLSTM model |
318
|
|
|
max_lstm_layers : int |
319
|
|
|
maximum number of Conv layers in DeepConvLSTM model |
320
|
|
|
min_lstm_dims : int |
321
|
|
|
minimum number of hidden nodes per LSTM layer in DeepConvLSTM model |
322
|
|
|
max_lstm_dims : int |
323
|
|
|
maximum number of hidden nodes per LSTM layer in DeepConvLSTM model |
324
|
|
|
low_lr : float |
325
|
|
|
minimum of log range for learning rate: learning rate is sampled |
326
|
|
|
between `10**(-low_reg)` and `10**(-high_reg)` |
327
|
|
|
high_lr : float |
328
|
|
|
maximum of log range for learning rate: learning rate is sampled |
329
|
|
|
between `10**(-low_reg)` and `10**(-high_reg)` |
330
|
|
|
low_reg : float |
331
|
|
|
minimum of log range for regularization rate: regularization rate is |
332
|
|
|
sampled between `10**(-low_reg)` and `10**(-high_reg)` |
333
|
|
|
high_reg : float |
334
|
|
|
maximum of log range for regularization rate: regularization rate is |
335
|
|
|
sampled between `10**(-low_reg)` and `10**(-high_reg)` |
336
|
|
|
|
337
|
|
|
Returns |
338
|
|
|
---------- |
339
|
|
|
hyperparameters: dict |
340
|
|
|
hyperparameters for a DeepConvLSTM model |
341
|
|
|
""" |
342
|
1 |
|
hyperparameters = generate_base_hyper_parameter_set( |
343
|
|
|
low_lr, high_lr, low_reg, high_reg) |
344
|
1 |
|
number_of_conv_layers = np.random.randint( |
345
|
|
|
min_conv_layers, max_conv_layers + 1) |
346
|
1 |
|
hyperparameters['filters'] = np.random.randint( |
347
|
|
|
min_conv_filters, max_conv_filters + 1, number_of_conv_layers) |
348
|
1 |
|
number_of_lstm_layers = np.random.randint( |
349
|
|
|
min_lstm_layers, max_lstm_layers + 1) |
350
|
1 |
|
hyperparameters['lstm_dims'] = np.random.randint( |
351
|
|
|
min_lstm_dims, max_lstm_dims + 1, number_of_lstm_layers) |
352
|
1 |
|
return hyperparameters |
353
|
|
|
|
354
|
|
|
|
355
|
1 |
|
def generate_base_hyper_parameter_set( |
356
|
|
|
low_lr=1, |
357
|
|
|
high_lr=4, |
358
|
|
|
low_reg=1, |
359
|
|
|
high_reg=4): |
360
|
|
|
""" Generate a base set of hyperparameters that are necessary for any |
361
|
|
|
model, but sufficient for none. |
362
|
|
|
|
363
|
|
|
Parameters |
364
|
|
|
---------- |
365
|
|
|
low_lr : float |
366
|
|
|
minimum of log range for learning rate: learning rate is sampled |
367
|
|
|
between `10**(-low_reg)` and `10**(-high_reg)` |
368
|
|
|
high_lr : float |
369
|
|
|
maximum of log range for learning rate: learning rate is sampled |
370
|
|
|
between `10**(-low_reg)` and `10**(-high_reg)` |
371
|
|
|
low_reg : float |
372
|
|
|
minimum of log range for regularization rate: regularization rate is |
373
|
|
|
sampled between `10**(-low_reg)` and `10**(-high_reg)` |
374
|
|
|
high_reg : float |
375
|
|
|
maximum of log range for regularization rate: regularization rate is |
376
|
|
|
sampled between `10**(-low_reg)` and `10**(-high_reg)` |
377
|
|
|
|
378
|
|
|
Returns |
379
|
|
|
------- |
380
|
|
|
hyperparameters : dict |
381
|
|
|
basis hyperpameters |
382
|
|
|
""" |
383
|
1 |
|
hyperparameters = {} |
384
|
1 |
|
hyperparameters['learning_rate'] = get_learning_rate(low_lr, high_lr) |
385
|
1 |
|
hyperparameters['regularization_rate'] = get_regularization( |
386
|
|
|
low_reg, high_reg) |
387
|
1 |
|
return hyperparameters |
388
|
|
|
|
389
|
|
|
|
390
|
1 |
|
def get_learning_rate(low=1, high=4): |
391
|
|
|
""" Return random learning rate 10^-n where n is sampled uniformly between |
392
|
|
|
low and high bounds. |
393
|
|
|
|
394
|
|
|
Parameters |
395
|
|
|
---------- |
396
|
|
|
low : float |
397
|
|
|
low bound |
398
|
|
|
high : float |
399
|
|
|
high bound |
400
|
|
|
|
401
|
|
|
Returns |
402
|
|
|
------- |
403
|
|
|
learning_rate : float |
404
|
|
|
learning rate |
405
|
|
|
""" |
406
|
1 |
|
result = 10 ** (-np.random.uniform(low, high)) |
407
|
1 |
|
return result |
408
|
|
|
|
409
|
|
|
|
410
|
1 |
|
def get_regularization(low=1, high=4): |
411
|
|
|
""" Return random regularization rate 10^-n where n is sampled uniformly |
412
|
|
|
between low and high bounds. |
413
|
|
|
|
414
|
|
|
Parameters |
415
|
|
|
---------- |
416
|
|
|
low : float |
417
|
|
|
low bound |
418
|
|
|
high : float |
419
|
|
|
high bound |
420
|
|
|
|
421
|
|
|
Returns |
422
|
|
|
------- |
423
|
|
|
regularization_rate : float |
424
|
|
|
regularization rate |
425
|
|
|
""" |
426
|
|
|
return 10 ** (-np.random.uniform(low, high)) |
427
|
|
|
|