make_history() - Code Metrics - Inspection of "adds provisional support for noodles, towards #167" - NLeSC/mcfly - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#168)

unknown

created 2017-12-12 16:23 UTC

make_history() B

↳ Parent: train_models_on_samples()

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	0
CRAP Score	42

Importance

Changes

Metric	Value
cc	6
c	0
b	0
f	0
dl	0
loc	23
ccs	0
cts	13
cp	0
crap	42
rs	7.6949

"""
 Summary:
 This module provides the main functionality of mcfly: searching for an
 optimal model architecture. The work flow is as follows:
 Function generate_models from modelgen.py generates and compiles models.
 Function train_models_on_samples trains those models.
 Function plotTrainingProcess plots the training process.
 Function find_best_architecture is wrapper function that combines
 these steps.
 Example function calls can be found in the tutorial notebook
 'EvaluateDifferentModels.ipynb'.
"""
import numpy as np
from . import modelgen
from .storage import TrainedModel

try:
    import noodles
    from .storage import serial_registry
except ImportError:
    has_noodles = False
else:
    has_noodles = True

from sklearn import neighbors, metrics as sklearnmetrics
import warnings
import json
import os
from keras.callbacks import EarlyStopping
from keras import metrics


def train_model(
        model, X_train_sub, y_train_sub, epochs, batch_size,
        validation_data, verbose, callbacks):

    result = model.fit(
        X_train_sub,
        y_train_sub,
        epochs=epochs,
        batch_size=batch_size,  # see comment on subsize_set
        validation_data=validation_data,
        verbose=verbose,
        callbacks=callbacks)

    # metric = result.history['val_' + metric_name][-1]
    # loss = result.history['val_loss'][-1]

    return TrainedModel(
        history=result.history, model=model)  # , metric=metric, loss=loss)


def train_models_on_samples(X_train, y_train, X_val, y_val, models,
                            nr_epochs=5, subset_size=100, verbose=True, outputfile=None,

                            model_path=None, early_stopping=False,
                            batch_size=20, metric='accuracy', use_noodles=None):

    """
    Given a list of compiled models, this function trains
    them all on a subset of the train data. If the given size of the subset is
    smaller then the size of the data, the complete data set is used.

    Parameters
    ----------
    X_train : numpy array of shape (num_samples, num_timesteps, num_channels)
        The input dataset for training
    y_train : numpy array of shape (num_samples, num_classes)
        The output classes for the train data, in binary format
    X_val : numpy array of shape (num_samples_val, num_timesteps, num_channels)
        The input dataset for validation
    y_val : numpy array of shape (num_samples_val, num_classes)
        The output classes for the validation data, in binary format
    models : list of model, params, modeltypes
        List of keras models to train
    nr_epochs : int, optional
        nr of epochs to use for training one model
    subset_size :
        The number of samples used from the complete train set
    verbose : bool, optional
        flag for displaying verbose output
    outputfile: str, optional
        Filename to store the model training results
    model_path : str, optional
        Directory to store the models as HDF5 files
    early_stopping: bool
        Stop when validation loss does not decrease
    batch_size : int
        nr of samples per batch
    metric : str
        metric to store in the history object

    Returns
    ----------
    histories : list of Keras History objects
        train histories for all models
    val_metrics : list of floats
        validation accuraracies of the models
    val_losses : list of floats
        validation losses of the models
    """
    # if subset_size is smaller then X_train, this will work fine
    X_train_sub = X_train[:subset_size, :, :]
    y_train_sub = y_train[:subset_size, :]

    metric_name = get_metric_name(metric)

    val_metrics = []
    val_losses = []

    def make_history(model):
        model_metrics = [get_metric_name(name) for name in model.metrics]
        if metric_name not in model_metrics:
            raise ValueError(
                'Invalid metric. The model was not compiled with {} as metric'.format(metric_name))

        if early_stopping:
            callbacks = [
                EarlyStopping(monitor='val_loss', patience=0, verbose=verbose, mode='auto')]

        else:
            callbacks = []

        args = (model, X_train_sub, y_train_sub)
        kwargs = {'epochs': nr_epochs,
                  'batch_size': batch_size,
                  'validation_data': (X_val, y_val),
                  'verbose': verbose,
                  'callbacks': callbacks}

        if use_noodles is None:
            return train_model(*args, **kwargs)
        else:
            assert has_noodles, "Noodles is not installed, or could not be imported."

            return noodles.schedule_hint(call_by_ref=['model'])(train_model)(*args, **kwargs)


    if use_noodles is None:
        trained_models = [make_history(model[0]) for model in models]
    else:
        assert has_noodles, "Noodles is not installed, or could not be imported."

        training_wf = noodles.gather_all([make_history(model[0]) for model in models])

        trained_models = use_noodles(training_wf)
        # noodles.run_process(training_wf, n_processes=4, registry=serial_registry)


    val_metrics = [tm.history['val_' + metric_name]
                   for tm in trained_models]
    val_losses = [tm.history['val_loss']
                  for tm in trained_models]

    for i, (history, model) in enumerate(trained_models):
        if outputfile is not None:
            store_train_hist_as_json(models[i][1], models[i][2],
                                     history, outputfile)
        if model_path is not None:
            model.save(os.path.join(model_path, 'model_{}.h5'.format(i)))

    return [tm.history for tm in trained_models], val_metrics, val_losses


def store_train_hist_as_json(params, model_type, history, outputfile, metric_name='acc'):

    """
    This function stores the model parameters, the loss and accuracy history
    of one model in a JSON file. It appends the model information to the
    existing models in the file.

    Parameters
    ----------
    params : dict
        parameters for one model
    model_type : Keras model object
        Keras model object for one model
    history : dict
        training history from one model
    outputfile : str
        path where the json file needs to be stored
    metric_name : str, optional
        name of metric from history to store
    """
    jsondata = params.copy()
    for k in jsondata.keys():
        if isinstance(jsondata[k], np.ndarray):
            jsondata[k] = jsondata[k].tolist()
    jsondata['train_metric'] = history[metric_name]
    jsondata['train_loss'] = history['loss']
    jsondata['val_metric'] = history['val_' + metric_name]
    jsondata['val_loss'] = history['val_loss']
    jsondata['modeltype'] = model_type
    jsondata['metric'] = metric_name
    if os.path.isfile(outputfile):
        with open(outputfile, 'r') as outfile:
            previousdata = json.load(outfile)
    else:
        previousdata = []
    previousdata.append(jsondata)
    with open(outputfile, 'w') as outfile:
        json.dump(previousdata, outfile, sort_keys=True,
                  indent=4, ensure_ascii=False)


def find_best_architecture(X_train, y_train, X_val, y_val, verbose=True,
                           number_of_models=5, nr_epochs=5, subset_size=100,
                           outputpath=None, model_path=None, metric='accuracy',
                           use_noodles=None, **kwargs):
    """
    Tries out a number of models on a subsample of the data,
    and outputs the best found architecture and hyperparameters.

    Parameters
    ----------
    X_train : numpy array
        The input dataset for training of shape
        (num_samples, num_timesteps, num_channels)
    y_train : numpy array
        The output classes for the train data, in binary format of shape
        (num_samples, num_classes)
    X_val : numpy array
        The input dataset for validation of shape
        (num_samples_val, num_timesteps, num_channels)
    y_val : numpy array
        The output classes for the validation data, in binary format of shape
        (num_samples_val, num_classes)
    verbose : bool, optional
        flag for displaying verbose output
    number_of_models : int, optiona
        The number of models to generate and test
    nr_epochs : int, optional
        The number of epochs that each model is trained
    subset_size : int, optional
        The size of the subset of the data that is used for finding
        the optimal architecture
    outputpath : str, optional
        File location to store the model results
    model_path: str, optional
        Directory to save the models as HDF5 files
    metric: str, optional
        metric that is used to evaluate the model on the validation set.
        See https://keras.io/metrics/ for possible metrics
    **kwargs: key-value parameters
        parameters for generating the models
        (see docstring for modelgen.generate_models)

    Returns
    ----------
    best_model : Keras model
        Best performing model, already trained on a small sample data set.
    best_params : dict
        Dictionary containing the hyperparameters for the best model
    best_model_type : str
        Type of the best model
    knn_acc : float
        accuaracy for kNN prediction on validation set
    """
    models = modelgen.generate_models(X_train.shape, y_train.shape[1],
                                      number_of_models=number_of_models,
                                      metrics=[metric],
                                      **kwargs)
    histories, val_accuracies, val_losses = train_models_on_samples(X_train,
                                                                    y_train,
                                                                    X_val,
                                                                    y_val,
                                                                    models,
                                                                    nr_epochs,
                                                                    subset_size=subset_size,

                                                                    verbose=verbose,

                                                                    outputfile=outputpath,

                                                                    model_path=model_path,

                                                                    metric=metric,

                                                                    use_noodles=use_noodles)

    best_model_index = np.argmax(val_accuracies)
    best_model, best_params, best_model_type = models[best_model_index]
    knn_acc = kNN_accuracy(
        X_train[:subset_size, :, :], y_train[:subset_size, :], X_val, y_val)
    if verbose:
        print('Best model: model ', best_model_index)
        print('Model type: ', best_model_type)
        print('Hyperparameters: ', best_params)
        print(str(metric) + ' on validation set: ',
              val_accuracies[best_model_index])
        print('Accuracy of kNN on validation set', knn_acc)

    if val_accuracies[best_model_index] < knn_acc:
        warnings.warn('Best model not better than kNN: ' +
                      str(val_accuracies[best_model_index]) + ' vs  ' +
                      str(knn_acc)
                      )
    return best_model, best_params, best_model_type, knn_acc


def get_metric_name(name):
    """
    Gives the keras name for a metric

    Parameters
    ----------
    name : str
        original name of the metric
    Returns
    -------

    """
    if name == 'acc' or name == 'accuracy':
        return 'acc'
    try:
        metric_fn = metrics.get(name)
        return metric_fn.__name__
    except:
        pass
    return name


def kNN_accuracy(X_train, y_train, X_val, y_val, k=1):
    """
    Performs k-Neigherst Neighbors and returns the accuracy score.

    Parameters
    ----------
    X_train : numpy array
        Train set of shape (num_samples, num_timesteps, num_channels)
    y_train : numpy array
        Class labels for train set
    X_val : numpy array
        Validation set of shape (num_samples, num_timesteps, num_channels)
    y_val : numpy array
        Class labels for validation set
    k : int
        number of neighbors to use for classifying

    Returns
    -------
    accuracy: float
        accuracy score on the validation set
    """
    num_samples, num_timesteps, num_channels = X_train.shape
    clf = neighbors.KNeighborsClassifier(k)
    clf.fit(
        X_train.reshape(
            num_samples,
            num_timesteps *
            num_channels),
        y_train)
    num_samples, num_timesteps, num_channels = X_val.shape
    val_predict = clf.predict(
        X_val.reshape(num_samples,
                      num_timesteps * num_channels))
    return sklearnmetrics.accuracy_score(val_predict, y_val)


1		"""
2		Summary:
3		This module provides the main functionality of mcfly: searching for an
4		optimal model architecture. The work flow is as follows:
5		Function generate_models from modelgen.py generates and compiles models.
6		Function train_models_on_samples trains those models.
7		Function plotTrainingProcess plots the training process.
8		Function find_best_architecture is wrapper function that combines
9		these steps.
10		Example function calls can be found in the tutorial notebook
11		'EvaluateDifferentModels.ipynb'.
12		"""
13	1	import numpy as np
14	1	from . import modelgen
15	1	from .storage import TrainedModel
16
17	1	try:
18	1	import noodles
19		from .storage import serial_registry
20	1	except ImportError:
21	1	has_noodles = False
22		else:
23		has_noodles = True
24
25	1	from sklearn import neighbors, metrics as sklearnmetrics
26		import warnings
27		import json
28		import os
29		from keras.callbacks import EarlyStopping
30		from keras import metrics
31
32
33		def train_model(
34		model, X_train_sub, y_train_sub, epochs, batch_size,
35		validation_data, verbose, callbacks):
36
37		result = model.fit(
38		X_train_sub,
39		y_train_sub,
40		epochs=epochs,
41		batch_size=batch_size, # see comment on subsize_set
42		validation_data=validation_data,
43		verbose=verbose,
44		callbacks=callbacks)
45
46		# metric = result.history['val_' + metric_name][-1]
47		# loss = result.history['val_loss'][-1]
48
49		return TrainedModel(
50		history=result.history, model=model) # , metric=metric, loss=loss)
51
52
53		def train_models_on_samples(X_train, y_train, X_val, y_val, models,
54		nr_epochs=5, subset_size=100, verbose=True, outputfile=None,
		0 ignored issues – show Coding Style introduced 2017-07-27 12:18 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (88/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
55		model_path=None, early_stopping=False,
56		batch_size=20, metric='accuracy', use_noodles=None):
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (80/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
57		"""
58		Given a list of compiled models, this function trains
59		them all on a subset of the train data. If the given size of the subset is
60		smaller then the size of the data, the complete data set is used.
61
62		Parameters
63		----------
64		X_train : numpy array of shape (num_samples, num_timesteps, num_channels)
65		The input dataset for training
66		y_train : numpy array of shape (num_samples, num_classes)
67		The output classes for the train data, in binary format
68		X_val : numpy array of shape (num_samples_val, num_timesteps, num_channels)
69		The input dataset for validation
70		y_val : numpy array of shape (num_samples_val, num_classes)
71		The output classes for the validation data, in binary format
72		models : list of model, params, modeltypes
73		List of keras models to train
74		nr_epochs : int, optional
75		nr of epochs to use for training one model
76		subset_size :
77		The number of samples used from the complete train set
78		verbose : bool, optional
79		flag for displaying verbose output
80		outputfile: str, optional
81		Filename to store the model training results
82		model_path : str, optional
83		Directory to store the models as HDF5 files
84		early_stopping: bool
85		Stop when validation loss does not decrease
86		batch_size : int
87		nr of samples per batch
88		metric : str
89		metric to store in the history object
90
91		Returns
92		----------
93		histories : list of Keras History objects
94		train histories for all models
95		val_metrics : list of floats
96		validation accuraracies of the models
97		val_losses : list of floats
98		validation losses of the models
99		"""
100		# if subset_size is smaller then X_train, this will work fine
101		X_train_sub = X_train[:subset_size, :, :]
102		y_train_sub = y_train[:subset_size, :]
103
104		metric_name = get_metric_name(metric)
105
106		val_metrics = []
107		val_losses = []
108
109		def make_history(model):
110		model_metrics = [get_metric_name(name) for name in model.metrics]
111		if metric_name not in model_metrics:
112		raise ValueError(
113		'Invalid metric. The model was not compiled with {} as metric'.format(metric_name))
		0 ignored issues – show Coding Style introduced 2017-08-31 08:54 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (99/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
114		if early_stopping:
115		callbacks = [
116		EarlyStopping(monitor='val_loss', patience=0, verbose=verbose, mode='auto')]
		0 ignored issues – show Coding Style introduced 2017-08-31 08:54 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (92/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
117		else:
118		callbacks = []
119
120		args = (model, X_train_sub, y_train_sub)
121		kwargs = {'epochs': nr_epochs,
122		'batch_size': batch_size,
123		'validation_data': (X_val, y_val),
124		'verbose': verbose,
125		'callbacks': callbacks}
126
127		if use_noodles is None:
128		return train_model(args, *kwargs)
129		else:
130		assert has_noodles, "Noodles is not installed, or could not be imported."
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (85/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
131		return noodles.schedule_hint(call_by_ref=['model'])(train_model)(args, *kwargs)
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (93/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
132
133		if use_noodles is None:
134		trained_models = [make_history(model[0]) for model in models]
135		else:
136		assert has_noodles, "Noodles is not installed, or could not be imported."
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (81/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
137		training_wf = noodles.gather_all([make_history(model[0]) for model in models])
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (86/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
138		trained_models = use_noodles(training_wf)
139		# noodles.run_process(training_wf, n_processes=4, registry=serial_registry)
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (83/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
140
141		val_metrics = [tm.history['val_' + metric_name]
142		for tm in trained_models]
143		val_losses = [tm.history['val_loss']
144		for tm in trained_models]
145
146		for i, (history, model) in enumerate(trained_models):
147		if outputfile is not None:
148		store_train_hist_as_json(models[i][1], models[i][2],
149		history, outputfile)
150		if model_path is not None:
151		model.save(os.path.join(model_path, 'model_{}.h5'.format(i)))
152
153		return [tm.history for tm in trained_models], val_metrics, val_losses
154
155
156		def store_train_hist_as_json(params, model_type, history, outputfile, metric_name='acc'):
		0 ignored issues – show Coding Style introduced 2017-06-29 15:18 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (89/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
157		"""
158		This function stores the model parameters, the loss and accuracy history
159		of one model in a JSON file. It appends the model information to the
160		existing models in the file.
161
162		Parameters
163		----------
164		params : dict
165		parameters for one model
166		model_type : Keras model object
167		Keras model object for one model
168		history : dict
169		training history from one model
170		outputfile : str
171		path where the json file needs to be stored
172		metric_name : str, optional
173		name of metric from history to store
174		"""
175		jsondata = params.copy()
176		for k in jsondata.keys():
177		if isinstance(jsondata[k], np.ndarray):
178		jsondata[k] = jsondata[k].tolist()
179		jsondata['train_metric'] = history[metric_name]
180		jsondata['train_loss'] = history['loss']
181		jsondata['val_metric'] = history['val_' + metric_name]
182		jsondata['val_loss'] = history['val_loss']
183		jsondata['modeltype'] = model_type
184		jsondata['metric'] = metric_name
185		if os.path.isfile(outputfile):
186		with open(outputfile, 'r') as outfile:
187		previousdata = json.load(outfile)
188		else:
189		previousdata = []
190		previousdata.append(jsondata)
191		with open(outputfile, 'w') as outfile:
192		json.dump(previousdata, outfile, sort_keys=True,
193		indent=4, ensure_ascii=False)
194
195
196		def find_best_architecture(X_train, y_train, X_val, y_val, verbose=True,
197		number_of_models=5, nr_epochs=5, subset_size=100,
198		outputpath=None, model_path=None, metric='accuracy',
199		use_noodles=None, **kwargs):
200		"""
201		Tries out a number of models on a subsample of the data,
202		and outputs the best found architecture and hyperparameters.
203
204		Parameters
205		----------
206		X_train : numpy array
207		The input dataset for training of shape
208		(num_samples, num_timesteps, num_channels)
209		y_train : numpy array
210		The output classes for the train data, in binary format of shape
211		(num_samples, num_classes)
212		X_val : numpy array
213		The input dataset for validation of shape
214		(num_samples_val, num_timesteps, num_channels)
215		y_val : numpy array
216		The output classes for the validation data, in binary format of shape
217		(num_samples_val, num_classes)
218		verbose : bool, optional
219		flag for displaying verbose output
220		number_of_models : int, optiona
221		The number of models to generate and test
222		nr_epochs : int, optional
223		The number of epochs that each model is trained
224		subset_size : int, optional
225		The size of the subset of the data that is used for finding
226		the optimal architecture
227		outputpath : str, optional
228		File location to store the model results
229		model_path: str, optional
230		Directory to save the models as HDF5 files
231		metric: str, optional
232		metric that is used to evaluate the model on the validation set.
233		See https://keras.io/metrics/ for possible metrics
234		**kwargs: key-value parameters
235		parameters for generating the models
236		(see docstring for modelgen.generate_models)
237
238		Returns
239		----------
240		best_model : Keras model
241		Best performing model, already trained on a small sample data set.
242		best_params : dict
243		Dictionary containing the hyperparameters for the best model
244		best_model_type : str
245		Type of the best model
246		knn_acc : float
247		accuaracy for kNN prediction on validation set
248		"""
249		models = modelgen.generate_models(X_train.shape, y_train.shape[1],
250		number_of_models=number_of_models,
251		metrics=[metric],
252		**kwargs)
253		histories, val_accuracies, val_losses = train_models_on_samples(X_train,
254		y_train,
255		X_val,
256		y_val,
257		models,
258		nr_epochs,
259		subset_size=subset_size,
		0 ignored issues – show Coding Style introduced 2016-08-04 09:16 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (92/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
260		verbose=verbose,
		0 ignored issues – show Coding Style introduced 2016-07-07 14:47 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (84/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
261		outputfile=outputpath,
		0 ignored issues – show Coding Style introduced 2017-06-29 15:18 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (90/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
262		model_path=model_path,
		0 ignored issues – show Coding Style introduced 2017-08-31 08:29 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (90/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
263		metric=metric,
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (82/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
264		use_noodles=use_noodles)
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (92/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
265		best_model_index = np.argmax(val_accuracies)
266		best_model, best_params, best_model_type = models[best_model_index]
267		knn_acc = kNN_accuracy(
268		X_train[:subset_size, :, :], y_train[:subset_size, :], X_val, y_val)
269		if verbose:
270		print('Best model: model ', best_model_index)
271		print('Model type: ', best_model_type)
272		print('Hyperparameters: ', best_params)
273		print(str(metric) + ' on validation set: ',
274		val_accuracies[best_model_index])
275		print('Accuracy of kNN on validation set', knn_acc)
276
277		if val_accuracies[best_model_index] < knn_acc:
278		warnings.warn('Best model not better than kNN: ' +
279		str(val_accuracies[best_model_index]) + ' vs ' +
280		str(knn_acc)
281		)
282		return best_model, best_params, best_model_type, knn_acc
283
284
285		def get_metric_name(name):
286		"""
287		Gives the keras name for a metric
288
289		Parameters
290		----------
291		name : str
292		original name of the metric
293		Returns
294		-------
295
296		"""
297		if name == 'acc' or name == 'accuracy':
298		return 'acc'
299		try:
300		metric_fn = metrics.get(name)
301		return metric_fn.__name__
302		except:
303		pass
304		return name
305
306
307		def kNN_accuracy(X_train, y_train, X_val, y_val, k=1):
308		"""
309		Performs k-Neigherst Neighbors and returns the accuracy score.
310
311		Parameters
312		----------
313		X_train : numpy array
314		Train set of shape (num_samples, num_timesteps, num_channels)
315		y_train : numpy array
316		Class labels for train set
317		X_val : numpy array
318		Validation set of shape (num_samples, num_timesteps, num_channels)
319		y_val : numpy array
320		Class labels for validation set
321		k : int
322		number of neighbors to use for classifying
323
324		Returns
325		-------
326		accuracy: float
327		accuracy score on the validation set
328		"""
329		num_samples, num_timesteps, num_channels = X_train.shape
330		clf = neighbors.KNeighborsClassifier(k)
331		clf.fit(
332		X_train.reshape(
333		num_samples,
334		num_timesteps *
335		num_channels),
336		y_train)
337		num_samples, num_timesteps, num_channels = X_val.shape
338		val_predict = clf.predict(
339		X_val.reshape(num_samples,
340		num_timesteps * num_channels))
341		return sklearnmetrics.accuracy_score(val_predict, y_val)
342

NLeSC / mcfly

Pull Request — master (#168)

make_history() B

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like