Completed
Push — master ( 3e0ec6...69dab7 )
by Christiaan
04:45
created

generate_models()   B

Complexity

Conditions 6

Size

Total Lines 108

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 6.0106

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 6
c 1
b 0
f 0
dl 0
loc 108
ccs 14
cts 15
cp 0.9333
crap 6.0106
rs 7.0769

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1 1
from keras.models import Sequential
2 1
from keras.layers import Dense, Activation, Convolution1D, Lambda, \
3
    Convolution2D, Flatten, \
4
    Reshape, LSTM, Dropout, TimeDistributed, BatchNormalization
5 1
from keras.regularizers import l2
6 1
from keras.optimizers import Adam
7 1
import numpy as np
8
9
10 1
def generate_models(
11
        x_shape, number_of_classes, number_of_models=5, metrics=['accuracy'],
12
        model_type=None,
13
        cnn_min_layers=1, cnn_max_layers=10,
14
        cnn_min_filters=10, cnn_max_filters=100,
15
        cnn_min_fc_nodes=10, cnn_max_fc_nodes=2000,
16
        deepconvlstm_min_conv_layers=1, deepconvlstm_max_conv_layers=10,
17
        deepconvlstm_min_conv_filters=10, deepconvlstm_max_conv_filters=100,
18
        deepconvlstm_min_lstm_layers=1, deepconvlstm_max_lstm_layers=5,
19
        deepconvlstm_min_lstm_dims=10, deepconvlstm_max_lstm_dims=100,
20
        low_lr=1, high_lr=4, low_reg=1, high_reg=4
21
):
22
    """
23
    Generate one or multiple untrained Keras models with random hyperparameters.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (80/79).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
24
25
    Parameters
26
    ----------
27
    x_shape : tuple
28
        Shape of the input dataset: (num_samples, num_timesteps, num_channels)
29
    number_of_classes : int
30
        Number of classes for classification task
31
    number_of_models : int
32
        Number of models to generate
33
    metrics : list
34
        Metrics to calculate on the validation set.
35
        See https://keras.io/metrics/ for possible values.
36
    model_type : str, optional
37
        Type of model to build: 'CNN' or 'DeepConvLSTM'.
38
        Default option None generates both models.
39
    cnn_min_layers : int
40
        minimum of Conv layers in CNN model
41
    cnn_max_layers : int
42
        maximum of Conv layers in CNN model
43
    cnn_min_filters : int
44
        minimum number of filters per Conv layer in CNN model
45
    cnn_max_filters : int
46
        maximum number of filters per Conv layer in CNN model
47
    cnn_min_fc_nodes : int
48
        minimum number of hidden nodes per Dense layer in CNN model
49
    cnn_max_fc_nodes : int
50
        maximum number of hidden nodes per Dense layer in CNN model
51
    deepconvlstm_min_conv_layers : int
52
        minimum number of Conv layers in DeepConvLSTM model
53
    deepconvlstm_max_conv_layers : int
54
        maximum number of Conv layers in DeepConvLSTM model
55
    deepconvlstm_min_conv_filters : int
56
        minimum number of filters per Conv layer in DeepConvLSTM model
57
    deepconvlstm_max_conv_filters : int
58
        maximum number of filters per Conv layer in DeepConvLSTM model
59
    deepconvlstm_min_lstm_layers : int
60
        minimum number of Conv layers in DeepConvLSTM model
61
    deepconvlstm_max_lstm_layers : int
62
        maximum number of Conv layers in DeepConvLSTM model
63
    deepconvlstm_min_lstm_dims : int
64
        minimum number of hidden nodes per LSTM layer in DeepConvLSTM model
65
    deepconvlstm_max_lstm_dims : int
66
        maximum number of hidden nodes per LSTM layer in DeepConvLSTM model
67
    low_lr : float
68
        minimum of log range for learning rate: learning rate is sampled
69
        between `10**(-low_reg)` and `10**(-high_reg)`
70
    high_lr : float
71
        maximum  of log range for learning rate: learning rate is sampled
72
        between `10**(-low_reg)` and `10**(-high_reg)`
73
    low_reg : float
74
        minimum  of log range for regularization rate: regularization rate is
75
        sampled between `10**(-low_reg)` and `10**(-high_reg)`
76
    high_reg : float
77
        maximum  of log range for regularization rate: regularization rate is
78
        sampled between `10**(-low_reg)` and `10**(-high_reg)`
79
80
    Returns
81
    -------
82
    models : list
83
        List of compiled models
84
    """
85 1
    models = []
86 1
    for _ in range(0, number_of_models):
87 1
        if model_type is None:  # random model choice:
88 1
            current_model_type = 'CNN' if np.random.random(
89
            ) < 0.5 else 'DeepConvLSTM'
90
        else:  # user-defined model choice:
91
            current_model_type = model_type
92 1
        generate_model = None
93 1
        if current_model_type == 'CNN':
94 1
            generate_model = generate_CNN_model  # generate_model is a function
95 1
            hyperparameters = generate_CNN_hyperparameter_set(
96
                min_layers=cnn_min_layers, max_layers=cnn_max_layers,
97
                min_filters=cnn_min_filters, max_filters=cnn_max_filters,
98
                min_fc_nodes=cnn_min_fc_nodes, max_fc_nodes=cnn_max_fc_nodes,
99
                low_lr=low_lr, high_lr=high_lr, low_reg=low_reg,
100
                high_reg=high_reg)
101 1
        if current_model_type == 'DeepConvLSTM':
102 1
            generate_model = generate_DeepConvLSTM_model
103 1
            hyperparameters = generate_DeepConvLSTM_hyperparameter_set(
104
                min_conv_layers=deepconvlstm_min_conv_layers,
105
                max_conv_layers=deepconvlstm_max_conv_layers,
106
                min_conv_filters=deepconvlstm_min_conv_filters,
107
                max_conv_filters=deepconvlstm_max_conv_filters,
108
                min_lstm_layers=deepconvlstm_min_lstm_layers,
109
                max_lstm_layers=deepconvlstm_max_lstm_layers,
110
                min_lstm_dims=deepconvlstm_min_lstm_dims,
111
                max_lstm_dims=deepconvlstm_max_lstm_dims,
112
                low_lr=low_lr, high_lr=high_lr, low_reg=low_reg,
113
                high_reg=high_reg)
114 1
        models.append(
115
            (generate_model(x_shape, number_of_classes, metrics=metrics, **hyperparameters),
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (92/79).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
116
             hyperparameters, current_model_type))
117 1
    return models
118
119
120 1
def generate_DeepConvLSTM_model(
121
        x_shape, class_number, filters, lstm_dims, learning_rate=0.01,
122
        regularization_rate=0.01, metrics=['accuracy']):
123
    """
124
    Generate a model with convolution and LSTM layers.
125
    See Ordonez et al., 2016, http://dx.doi.org/10.3390/s16010115
126
127
    Parameters
128
    ----------
129
    x_shape : tuple
130
        Shape of the input dataset: (num_samples, num_timesteps, num_channels)
131
    class_number : int
132
        Number of classes for classification task
133
    filters : list of ints
134
        number of filters for each convolutional layer
135
    lstm_dims : list of ints
136
        number of hidden nodes for each LSTM layer
137
    learning_rate : float
138
        learning rate
139
    regularization_rate : float
140
        regularization rate
141
    metrics : list
142
        Metrics to calculate on the validation set.
143
        See https://keras.io/metrics/ for possible values.
144
145
    Returns
146
    -------
147
    model : Keras model
148
        The compiled Keras model
149
    """
150 1
    dim_length = x_shape[1]  # number of samples in a time series
151 1
    dim_channels = x_shape[2]  # number of channels
152 1
    output_dim = class_number  # number of classes
153 1
    weightinit = 'lecun_uniform'  # weight initialization
154 1
    model = Sequential()  # initialize model
155 1
    model.add(BatchNormalization(input_shape=(dim_length, dim_channels)))
156
    # reshape a 2 dimensional array per file/person/object into a
157
    # 3 dimensional array
158 1
    model.add(
159
        Reshape(target_shape=(dim_length, dim_channels, 1)))
160 1
    for filt in filters:
161
        # filt: number of filters used in a layer
162
        # filters: vector of filt values
163 1
        model.add(
164
            Convolution2D(filt, kernel_size=(3, 1), padding='same',
165
                          kernel_regularizer=l2(regularization_rate),
166
                          kernel_initializer=weightinit))
167 1
        model.add(BatchNormalization())
168 1
        model.add(Activation('relu'))
169
    # reshape 3 dimensional array back into a 2 dimensional array,
170
    # but now with more dept as we have the the filters for each channel
171 1
    model.add(Reshape(target_shape=(dim_length, filters[-1] * dim_channels)))
172
173 1
    for lstm_dim in lstm_dims:
174 1
        model.add(LSTM(units=lstm_dim, return_sequences=True,
175
                       activation='tanh'))
176
177 1
    model.add(Dropout(0.5))  # dropout before the dense layer
178
    # set up final dense layer such that every timestamp is given one
179
    # classification
180 1
    model.add(
181
        TimeDistributed(
182
            Dense(units=output_dim, kernel_regularizer=l2(regularization_rate))))
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (81/79).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
183 1
    model.add(Activation("softmax"))
184
    # Final classification layer - per timestep
185 1
    model.add(Lambda(lambda x: x[:, -1, :], output_shape=[output_dim]))
186
187 1
    model.compile(loss='categorical_crossentropy',
188
                  optimizer=Adam(lr=learning_rate),
189
                  metrics=metrics)
190
191 1
    return model
192
193
194 1
def generate_CNN_model(x_shape, class_number, filters, fc_hidden_nodes,
195
                       learning_rate=0.01, regularization_rate=0.01,
196
                       metrics=['accuracy']):
197
    """
198
    Generate a convolutional neural network (CNN) model.
199
200
    The compiled Keras model is returned.
201
202
    Parameters
203
    ----------
204
    x_shape : tuple
205
        Shape of the input dataset: (num_samples, num_timesteps, num_channels)
206
    class_number : int
207
        Number of classes for classification task
208
    filters : list of ints
209
        number of filters for each convolutional layer
210
    fc_hidden_nodes : int
211
        number of hidden nodes for the hidden dense layer
212
    learning_rate : float
213
        learning rate
214
    regularization_rate : float
215
        regularization rate
216
    metrics : list
217
        Metrics to calculate on the validation set.
218
        See https://keras.io/metrics/ for possible values.
219
220
    Returns
221
    -------
222
    model : Keras model
223
        The compiled Keras model
224
    """
225 1
    dim_length = x_shape[1]  # number of samples in a time series
226 1
    dim_channels = x_shape[2]  # number of channels
227 1
    outputdim = class_number  # number of classes
228 1
    weightinit = 'lecun_uniform'  # weight initialization
229 1
    model = Sequential()
230 1
    model.add(
231
        BatchNormalization(
232
            input_shape=(
233
                dim_length,
234
                dim_channels)))
235 1
    for filter_number in filters:
236 1
        model.add(Convolution1D(filter_number, kernel_size=3, padding='same',
237
                                kernel_regularizer=l2(regularization_rate),
238
                                kernel_initializer=weightinit))
239 1
        model.add(BatchNormalization())
240 1
        model.add(Activation('relu'))
241 1
    model.add(Flatten())
242 1
    model.add(Dense(units=fc_hidden_nodes,
243
                    kernel_regularizer=l2(regularization_rate),
244
                    kernel_initializer=weightinit))  # Fully connected layer
245 1
    model.add(Activation('relu'))  # Relu activation
246 1
    model.add(Dense(units=outputdim, kernel_initializer=weightinit))
247 1
    model.add(BatchNormalization())
248 1
    model.add(Activation("softmax"))  # Final classification layer
249
250 1
    model.compile(loss='categorical_crossentropy',
251
                  optimizer=Adam(lr=learning_rate),
252
                  metrics=metrics)
253
254 1
    return model
255
256
257 1
def generate_CNN_hyperparameter_set(min_layers=1, max_layers=10,
258
                                    min_filters=10, max_filters=100,
259
                                    min_fc_nodes=10, max_fc_nodes=2000,
260
                                    low_lr=1, high_lr=4, low_reg=1,
261
                                    high_reg=4):
262
    """ Generate a hyperparameter set that define a CNN model.
263
264
    Parameters
265
    ----------
266
    min_layers : int
267
        minimum of Conv layers
268
    max_layers : int
269
        maximum of Conv layers
270
    min_filters : int
271
        minimum number of filters per Conv layer
272
    max_filters : int
273
        maximum number of filters per Conv layer
274
    min_fc_nodes : int
275
        minimum number of hidden nodes per Dense layer
276
    max_fc_nodes : int
277
        maximum number of hidden nodes per Dense layer
278
    low_lr : float
279
        minimum of log range for learning rate: learning rate is sampled
280
        between `10**(-low_reg)` and `10**(-high_reg)`
281
    high_lr : float
282
        maximum  of log range for learning rate: learning rate is sampled
283
        between `10**(-low_reg)` and `10**(-high_reg)`
284
    low_reg : float
285
        minimum  of log range for regularization rate: regularization rate is
286
        sampled between `10**(-low_reg)` and `10**(-high_reg)`
287
    high_reg : float
288
        maximum  of log range for regularization rate: regularization rate is
289
        sampled between `10**(-low_reg)` and `10**(-high_reg)`
290
291
    Returns
292
    ----------
293
    hyperparameters : dict
294
        parameters for a CNN model
295
    """
296 1
    hyperparameters = generate_base_hyper_parameter_set(
297
        low_lr, high_lr, low_reg, high_reg)
298 1
    number_of_layers = np.random.randint(min_layers, max_layers + 1)
299 1
    hyperparameters['filters'] = np.random.randint(
300
        min_filters, max_filters + 1, number_of_layers)
301 1
    hyperparameters['fc_hidden_nodes'] = np.random.randint(
302
        min_fc_nodes, max_fc_nodes + 1)
303 1
    return hyperparameters
304
305
306 1
def generate_DeepConvLSTM_hyperparameter_set(
307
        min_conv_layers=1, max_conv_layers=10,
308
        min_conv_filters=10, max_conv_filters=100,
309
        min_lstm_layers=1, max_lstm_layers=5,
310
        min_lstm_dims=10, max_lstm_dims=100,
311
        low_lr=1, high_lr=4, low_reg=1, high_reg=4):
312
    """ Generate a hyperparameter set that defines a DeepConvLSTM model.
313
314
    Parameters
315
    ----------
316
    min_conv_layers : int
317
        minimum number of Conv layers in DeepConvLSTM model
318
    max_conv_layers : int
319
        maximum number of Conv layers in DeepConvLSTM model
320
    min_conv_filters : int
321
        minimum number of filters per Conv layer in DeepConvLSTM model
322
    max_conv_filters : int
323
        maximum number of filters per Conv layer in DeepConvLSTM model
324
    min_lstm_layers : int
325
        minimum number of Conv layers in DeepConvLSTM model
326
    max_lstm_layers : int
327
        maximum number of Conv layers in DeepConvLSTM model
328
    min_lstm_dims : int
329
        minimum number of hidden nodes per LSTM layer in DeepConvLSTM model
330
    max_lstm_dims : int
331
        maximum number of hidden nodes per LSTM layer in DeepConvLSTM model
332
    low_lr : float
333
        minimum of log range for learning rate: learning rate is sampled
334
        between `10**(-low_reg)` and `10**(-high_reg)`
335
    high_lr : float
336
        maximum  of log range for learning rate: learning rate is sampled
337
        between `10**(-low_reg)` and `10**(-high_reg)`
338
    low_reg : float
339
        minimum  of log range for regularization rate: regularization rate is
340
        sampled between `10**(-low_reg)` and `10**(-high_reg)`
341
    high_reg : float
342
        maximum  of log range for regularization rate: regularization rate is
343
        sampled between `10**(-low_reg)` and `10**(-high_reg)`
344
345
    Returns
346
    ----------
347
    hyperparameters: dict
348
        hyperparameters for a DeepConvLSTM model
349
    """
350 1
    hyperparameters = generate_base_hyper_parameter_set(
351
        low_lr, high_lr, low_reg, high_reg)
352 1
    number_of_conv_layers = np.random.randint(
353
        min_conv_layers, max_conv_layers + 1)
354 1
    hyperparameters['filters'] = np.random.randint(
355
        min_conv_filters, max_conv_filters + 1, number_of_conv_layers).tolist()
356 1
    number_of_lstm_layers = np.random.randint(
357
        min_lstm_layers, max_lstm_layers + 1)
358 1
    hyperparameters['lstm_dims'] = np.random.randint(
359
        min_lstm_dims, max_lstm_dims + 1, number_of_lstm_layers).tolist()
360 1
    return hyperparameters
361
362
363 1
def generate_base_hyper_parameter_set(
364
        low_lr=1,
365
        high_lr=4,
366
        low_reg=1,
367
        high_reg=4):
368
    """ Generate a base set of hyperparameters that are necessary for any
369
    model, but sufficient for none.
370
371
    Parameters
372
    ----------
373
    low_lr : float
374
        minimum of log range for learning rate: learning rate is sampled
375
        between `10**(-low_reg)` and `10**(-high_reg)`
376
    high_lr : float
377
        maximum  of log range for learning rate: learning rate is sampled
378
        between `10**(-low_reg)` and `10**(-high_reg)`
379
    low_reg : float
380
        minimum  of log range for regularization rate: regularization rate is
381
        sampled between `10**(-low_reg)` and `10**(-high_reg)`
382
    high_reg : float
383
        maximum  of log range for regularization rate: regularization rate is
384
        sampled between `10**(-low_reg)` and `10**(-high_reg)`
385
386
    Returns
387
    -------
388
    hyperparameters : dict
389
        basis hyperpameters
390
    """
391 1
    hyperparameters = {}
392 1
    hyperparameters['learning_rate'] = get_learning_rate(low_lr, high_lr)
393 1
    hyperparameters['regularization_rate'] = get_regularization(
394
        low_reg, high_reg)
395 1
    return hyperparameters
396
397
398 1
def get_learning_rate(low=1, high=4):
399
    """ Return random learning rate 10^-n where n is sampled uniformly between
400
    low and high bounds.
401
402
    Parameters
403
    ----------
404
    low : float
405
        low bound
406
    high : float
407
        high bound
408
409
    Returns
410
    -------
411
    learning_rate : float
412
        learning rate
413
    """
414 1
    result = 10 ** (-np.random.uniform(low, high))
415 1
    return result
416
417
418 1
def get_regularization(low=1, high=4):
419
    """ Return random regularization rate 10^-n where n is sampled uniformly
420
    between low and high bounds.
421
422
    Parameters
423
    ----------
424
    low : float
425
        low bound
426
    high : float
427
        high bound
428
429
    Returns
430
    -------
431
    regularization_rate : float
432
        regularization rate
433
    """
434
    return 10 ** (-np.random.uniform(low, high))
435