train_models_on_samples() - Code Metrics - Inspection of "adds provisional support for noodles, towards #167" - NLeSC/mcfly - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#168)

unknown

created 2018-03-05 11:56 UTC

train_models_on_samples() F

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

116

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	23
CRAP Score	38.5596

Importance

Changes	6
Bugs	1	Features	0

Metric	Value
cc	19
dl	0
loc	116
ccs	23
cts	37
cp	0.6216
crap	38.5596
rs	2
c	6
b	1
f	0

1 Method

Rating	Name	Duplication	Size	Complexity
D	make_history()	0	33	8

How to fix Long Method Complexity

"""
 Summary:
 This module provides the main functionality of mcfly: searching for an
 optimal model architecture. The work flow is as follows:
 Function generate_models from modelgen.py generates and compiles models.
 Function train_models_on_samples trains those models.
 Function plotTrainingProcess plots the training process.
 Function find_best_architecture is wrapper function that combines
 these steps.
 Example function calls can be found in the tutorial notebook
 'EvaluateDifferentModels.ipynb'.
"""
import numpy as np
from . import modelgen
from .storage import TrainedModel

try:
    import noodles
    from .storage import serial_registry
except ImportError:
    has_noodles = False
else:
    has_noodles = True

from sklearn import neighbors, metrics as sklearnmetrics
import warnings
import json
import os
from keras.callbacks import EarlyStopping
from keras import metrics


def train_model(
        model, X_train_sub, y_train_sub, epochs, batch_size,
        validation_data, verbose, callbacks):

    result = model.fit(
        X_train_sub,
        y_train_sub,
        epochs=epochs,
        batch_size=batch_size,  # see comment on subsize_set
        validation_data=validation_data,
        verbose=verbose,
        callbacks=callbacks)

    # metric = result.history['val_' + metric_name][-1]
    # loss = result.history['val_loss'][-1]

    return TrainedModel(
        history=result.history, model=model)  # , metric=metric, loss=loss)


def train_models_on_samples(X_train, y_train, X_val, y_val, models,
                            nr_epochs=5, subset_size=100, verbose=True, outputfile=None,

                            model_path=None, early_stopping=False,
                            batch_size=20, metric='accuracy', use_noodles=None):

    """
    Given a list of compiled models, this function trains
    them all on a subset of the train data. If the given size of the subset is
    smaller then the size of the data, the complete data set is used.

    Parameters
    ----------
    X_train : numpy array of shape (num_samples, num_timesteps, num_channels)
        The input dataset for training
    y_train : numpy array of shape (num_samples, num_classes)
        The output classes for the train data, in binary format
    X_val : numpy array of shape (num_samples_val, num_timesteps, num_channels)
        The input dataset for validation
    y_val : numpy array of shape (num_samples_val, num_classes)
        The output classes for the validation data, in binary format
    models : list of model, params, modeltypes
        List of keras models to train
    nr_epochs : int, optional
        nr of epochs to use for training one model
    subset_size :
        The number of samples used from the complete train set
    verbose : bool, optional
        flag for displaying verbose output
    outputfile: str, optional
        Filename to store the model training results
    model_path : str, optional
        Directory to store the models as HDF5 files
    early_stopping: bool
        Stop when validation loss does not decrease
    batch_size : int
        nr of samples per batch
    metric : str
        metric to store in the history object

    Returns
    ----------
    histories : list of Keras History objects
        train histories for all models
    val_metrics : list of floats
        validation accuraracies of the models
    val_losses : list of floats
        validation losses of the models
    """
    # if subset_size is smaller then X_train, this will work fine
    X_train_sub = X_train[:subset_size, :, :]
    y_train_sub = y_train[:subset_size, :]

    metric_name = get_metric_name(metric)

    val_metrics = []
    val_losses = []

    def make_history(model, i=None):
        model_metrics = [get_metric_name(name) for name in model.metrics]
        if metric_name not in model_metrics:
            raise ValueError(
                'Invalid metric. The model was not compiled with {} as metric'.format(metric_name))

        if early_stopping:
            callbacks = [
                EarlyStopping(monitor='val_loss', patience=0, verbose=verbose, mode='auto')]

        else:
            callbacks = []

        args = (model, X_train_sub, y_train_sub)
        kwargs = {'epochs': nr_epochs,
                  'batch_size': batch_size,
                  'validation_data': (X_val, y_val),
                  'verbose': verbose,
                  'callbacks': callbacks}

        if use_noodles is None:
            # if not using noodles, save every nugget when it comes
            trained_model = train_model(*args, **kwargs)
            if outputfile is not None:
                store_train_hist_as_json(models[i][1], models[i][2],
                                         trained_model.history, outputfile)
            if model_path is not None:
                trained_model.save(
                        os.path.join(model_path, 'model_{}.h5'.format(i)))
            return trained_model

        else:
            assert has_noodles, "Noodles is not installed, or could not be imported."

            return noodles.schedule_hint(call_by_ref=['model']) \
                    (train_model)(*args, **kwargs)

    if use_noodles is None:
        trained_models = [
            make_history(model[0], i)
            for i, model in enumerate(models)]

    else:
        assert has_noodles, "Noodles is not installed, or could not be imported."

        
        # in case of noodles, first run everything
        training_wf = noodles.gather_all([make_history(model[0]) for model in models])

        trained_models = use_noodles(training_wf)

        # then save everything
        for i, (history, model) in enumerate(trained_models):
            if outputfile is not None:
                store_train_hist_as_json(models[i][1], models[i][2],
                                         history, outputfile)
            if model_path is not None:
                model.save(os.path.join(model_path, 'model_{}.h5'.format(i)))

    # accumulate results
    val_metrics = [tm.history['val_' + metric_name]
                   for tm in trained_models]
    val_losses = [tm.history['val_loss']
                  for tm in trained_models]
    return [tm.history for tm in trained_models], val_metrics, val_losses


def store_train_hist_as_json(params, model_type, history, outputfile, metric_name='acc'):

    """
    This function stores the model parameters, the loss and accuracy history
    of one model in a JSON file. It appends the model information to the
    existing models in the file.

    Parameters
    ----------
    params : dict
        parameters for one model
    model_type : Keras model object
        Keras model object for one model
    history : dict
        training history from one model
    outputfile : str
        path where the json file needs to be stored
    metric_name : str, optional
        name of metric from history to store
    """
    jsondata = params.copy()
    for k in jsondata.keys():
        if isinstance(jsondata[k], np.ndarray):
            jsondata[k] = jsondata[k].tolist()
    jsondata['train_metric'] = history[metric_name]
    jsondata['train_loss'] = history['loss']
    jsondata['val_metric'] = history['val_' + metric_name]
    jsondata['val_loss'] = history['val_loss']
    jsondata['modeltype'] = model_type
    jsondata['metric'] = metric_name
    if os.path.isfile(outputfile):
        with open(outputfile, 'r') as outfile:
            previousdata = json.load(outfile)
    else:
        previousdata = []
    previousdata.append(jsondata)
    with open(outputfile, 'w') as outfile:
        json.dump(previousdata, outfile, sort_keys=True,
                  indent=4, ensure_ascii=False)


def find_best_architecture(X_train, y_train, X_val, y_val, verbose=True,
                           number_of_models=5, nr_epochs=5, subset_size=100,
                           outputpath=None, model_path=None, metric='accuracy',
                           use_noodles=None, **kwargs):
    """
    Tries out a number of models on a subsample of the data,
    and outputs the best found architecture and hyperparameters.

    Parameters
    ----------
    X_train : numpy array
        The input dataset for training of shape
        (num_samples, num_timesteps, num_channels)
    y_train : numpy array
        The output classes for the train data, in binary format of shape
        (num_samples, num_classes)
    X_val : numpy array
        The input dataset for validation of shape
        (num_samples_val, num_timesteps, num_channels)
    y_val : numpy array
        The output classes for the validation data, in binary format of shape
        (num_samples_val, num_classes)
    verbose : bool, optional
        flag for displaying verbose output
    number_of_models : int, optiona
        The number of models to generate and test
    nr_epochs : int, optional
        The number of epochs that each model is trained
    subset_size : int, optional
        The size of the subset of the data that is used for finding
        the optimal architecture
    outputpath : str, optional
        File location to store the model results
    model_path: str, optional
        Directory to save the models as HDF5 files
    metric: str, optional
        metric that is used to evaluate the model on the validation set.
        See https://keras.io/metrics/ for possible metrics
    **kwargs: key-value parameters
        parameters for generating the models
        (see docstring for modelgen.generate_models)

    Returns
    ----------
    best_model : Keras model
        Best performing model, already trained on a small sample data set.
    best_params : dict
        Dictionary containing the hyperparameters for the best model
    best_model_type : str
        Type of the best model
    knn_acc : float
        accuaracy for kNN prediction on validation set
    """
    models = modelgen.generate_models(X_train.shape, y_train.shape[1],
                                      number_of_models=number_of_models,
                                      metrics=[metric],
                                      **kwargs)
    histories, val_accuracies, val_losses = train_models_on_samples(X_train,
                                                                    y_train,
                                                                    X_val,
                                                                    y_val,
                                                                    models,
                                                                    nr_epochs,
                                                                    subset_size=subset_size,

                                                                    verbose=verbose,

                                                                    outputfile=outputpath,

                                                                    model_path=model_path,

                                                                    metric=metric,

                                                                    use_noodles=use_noodles)

    best_model_index = np.argmax(val_accuracies)
    best_model, best_params, best_model_type = models[best_model_index]
    knn_acc = kNN_accuracy(
        X_train[:subset_size, :, :], y_train[:subset_size, :], X_val, y_val)
    if verbose:
        print('Best model: model ', best_model_index)
        print('Model type: ', best_model_type)
        print('Hyperparameters: ', best_params)
        print(str(metric) + ' on validation set: ',
              val_accuracies[best_model_index])
        print('Accuracy of kNN on validation set', knn_acc)

    if val_accuracies[best_model_index] < knn_acc:
        warnings.warn('Best model not better than kNN: ' +
                      str(val_accuracies[best_model_index]) + ' vs  ' +
                      str(knn_acc)
                      )
    return best_model, best_params, best_model_type, knn_acc


def get_metric_name(name):
    """
    Gives the keras name for a metric

    Parameters
    ----------
    name : str
        original name of the metric
    Returns
    -------

    """
    if name == 'acc' or name == 'accuracy':
        return 'acc'
    try:
        metric_fn = metrics.get(name)
        return metric_fn.__name__
    except:
        pass
    return name


def kNN_accuracy(X_train, y_train, X_val, y_val, k=1):
    """
    Performs k-Neigherst Neighbors and returns the accuracy score.

    Parameters
    ----------
    X_train : numpy array
        Train set of shape (num_samples, num_timesteps, num_channels)
    y_train : numpy array
        Class labels for train set
    X_val : numpy array
        Validation set of shape (num_samples, num_timesteps, num_channels)
    y_val : numpy array
        Class labels for validation set
    k : int
        number of neighbors to use for classifying

    Returns
    -------
    accuracy: float
        accuracy score on the validation set
    """
    num_samples, num_timesteps, num_channels = X_train.shape
    clf = neighbors.KNeighborsClassifier(k)
    clf.fit(
        X_train.reshape(
            num_samples,
            num_timesteps *
            num_channels),
        y_train)
    num_samples, num_timesteps, num_channels = X_val.shape
    val_predict = clf.predict(
        X_val.reshape(num_samples,
                      num_timesteps * num_channels))
    return sklearnmetrics.accuracy_score(val_predict, y_val)


1		"""
2		Summary:
3		This module provides the main functionality of mcfly: searching for an
4		optimal model architecture. The work flow is as follows:
5		Function generate_models from modelgen.py generates and compiles models.
6		Function train_models_on_samples trains those models.
7		Function plotTrainingProcess plots the training process.
8		Function find_best_architecture is wrapper function that combines
9		these steps.
10		Example function calls can be found in the tutorial notebook
11		'EvaluateDifferentModels.ipynb'.
12		"""
13	1	import numpy as np
14	1	from . import modelgen
15	1	from .storage import TrainedModel
16
17	1	try:
18	1	import noodles
19		from .storage import serial_registry
20	1	except ImportError:
21	1	has_noodles = False
22		else:
23		has_noodles = True
24
25	1	from sklearn import neighbors, metrics as sklearnmetrics
26	1	import warnings
27	1	import json
28	1	import os
29	1	from keras.callbacks import EarlyStopping
30	1	from keras import metrics
31
32
33	1	def train_model(
34		model, X_train_sub, y_train_sub, epochs, batch_size,
35		validation_data, verbose, callbacks):
36
37	1	result = model.fit(
38		X_train_sub,
39		y_train_sub,
40		epochs=epochs,
41		batch_size=batch_size, # see comment on subsize_set
42		validation_data=validation_data,
43		verbose=verbose,
44		callbacks=callbacks)
45
46		# metric = result.history['val_' + metric_name][-1]
47		# loss = result.history['val_loss'][-1]
48
49	1	return TrainedModel(
50		history=result.history, model=model) # , metric=metric, loss=loss)
51
52
53	1	def train_models_on_samples(X_train, y_train, X_val, y_val, models,
54		nr_epochs=5, subset_size=100, verbose=True, outputfile=None,
		0 ignored issues – show Coding Style introduced 2017-07-27 12:18 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (88/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
55		model_path=None, early_stopping=False,
56		batch_size=20, metric='accuracy', use_noodles=None):
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (80/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
57		"""
58		Given a list of compiled models, this function trains
59		them all on a subset of the train data. If the given size of the subset is
60		smaller then the size of the data, the complete data set is used.
61
62		Parameters
63		----------
64		X_train : numpy array of shape (num_samples, num_timesteps, num_channels)
65		The input dataset for training
66		y_train : numpy array of shape (num_samples, num_classes)
67		The output classes for the train data, in binary format
68		X_val : numpy array of shape (num_samples_val, num_timesteps, num_channels)
69		The input dataset for validation
70		y_val : numpy array of shape (num_samples_val, num_classes)
71		The output classes for the validation data, in binary format
72		models : list of model, params, modeltypes
73		List of keras models to train
74		nr_epochs : int, optional
75		nr of epochs to use for training one model
76		subset_size :
77		The number of samples used from the complete train set
78		verbose : bool, optional
79		flag for displaying verbose output
80		outputfile: str, optional
81		Filename to store the model training results
82		model_path : str, optional
83		Directory to store the models as HDF5 files
84		early_stopping: bool
85		Stop when validation loss does not decrease
86		batch_size : int
87		nr of samples per batch
88		metric : str
89		metric to store in the history object
90
91		Returns
92		----------
93		histories : list of Keras History objects
94		train histories for all models
95		val_metrics : list of floats
96		validation accuraracies of the models
97		val_losses : list of floats
98		validation losses of the models
99		"""
100		# if subset_size is smaller then X_train, this will work fine
101	1	X_train_sub = X_train[:subset_size, :, :]
102	1	y_train_sub = y_train[:subset_size, :]
103
104	1	metric_name = get_metric_name(metric)
105
106	1	val_metrics = []
107	1	val_losses = []
108
109	1	def make_history(model, i=None):
110	1	model_metrics = [get_metric_name(name) for name in model.metrics]
111	1	if metric_name not in model_metrics:
112		raise ValueError(
113		'Invalid metric. The model was not compiled with {} as metric'.format(metric_name))
		0 ignored issues – show Coding Style introduced 2017-08-31 08:54 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (99/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
114	1	if early_stopping:
115		callbacks = [
116		EarlyStopping(monitor='val_loss', patience=0, verbose=verbose, mode='auto')]
		0 ignored issues – show Coding Style introduced 2017-08-31 08:54 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (92/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
117		else:
118	1	callbacks = []
119
120	1	args = (model, X_train_sub, y_train_sub)
121	1	kwargs = {'epochs': nr_epochs,
122		'batch_size': batch_size,
123		'validation_data': (X_val, y_val),
124		'verbose': verbose,
125		'callbacks': callbacks}
126
127	1	if use_noodles is None:
128		# if not using noodles, save every nugget when it comes
129	1	trained_model = train_model(args, *kwargs)
130	1	if outputfile is not None:
131		store_train_hist_as_json(models[i][1], models[i][2],
132		trained_model.history, outputfile)
133	1	if model_path is not None:
134		trained_model.save(
135		os.path.join(model_path, 'model_{}.h5'.format(i)))
136	1	return trained_model
137
138		else:
139		assert has_noodles, "Noodles is not installed, or could not be imported."
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (85/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
140		return noodles.schedule_hint(call_by_ref=['model']) \
141		(train_model)(args, *kwargs)
142
143	1	if use_noodles is None:
144	1	trained_models = [
145		make_history(model[0], i)
146		for i, model in enumerate(models)]
147
148		else:
149		assert has_noodles, "Noodles is not installed, or could not be imported."
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (81/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
150
151		# in case of noodles, first run everything
152		training_wf = noodles.gather_all([make_history(model[0]) for model in models])
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (86/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
153		trained_models = use_noodles(training_wf)
154
155		# then save everything
156		for i, (history, model) in enumerate(trained_models):
157		if outputfile is not None:
158		store_train_hist_as_json(models[i][1], models[i][2],
159		history, outputfile)
160		if model_path is not None:
161		model.save(os.path.join(model_path, 'model_{}.h5'.format(i)))
162
163		# accumulate results
164	1	val_metrics = [tm.history['val_' + metric_name]
165		for tm in trained_models]
166	1	val_losses = [tm.history['val_loss']
167		for tm in trained_models]
168	1	return [tm.history for tm in trained_models], val_metrics, val_losses
169
170
171	1	def store_train_hist_as_json(params, model_type, history, outputfile, metric_name='acc'):
		0 ignored issues – show Coding Style introduced 2017-06-29 15:18 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (89/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
172		"""
173		This function stores the model parameters, the loss and accuracy history
174		of one model in a JSON file. It appends the model information to the
175		existing models in the file.
176
177		Parameters
178		----------
179		params : dict
180		parameters for one model
181		model_type : Keras model object
182		Keras model object for one model
183		history : dict
184		training history from one model
185		outputfile : str
186		path where the json file needs to be stored
187		metric_name : str, optional
188		name of metric from history to store
189		"""
190	1	jsondata = params.copy()
191	1	for k in jsondata.keys():
192	1	if isinstance(jsondata[k], np.ndarray):
193	1	jsondata[k] = jsondata[k].tolist()
194	1	jsondata['train_metric'] = history[metric_name]
195	1	jsondata['train_loss'] = history['loss']
196	1	jsondata['val_metric'] = history['val_' + metric_name]
197	1	jsondata['val_loss'] = history['val_loss']
198	1	jsondata['modeltype'] = model_type
199	1	jsondata['metric'] = metric_name
200	1	if os.path.isfile(outputfile):
201		with open(outputfile, 'r') as outfile:
202		previousdata = json.load(outfile)
203		else:
204	1	previousdata = []
205	1	previousdata.append(jsondata)
206	1	with open(outputfile, 'w') as outfile:
207	1	json.dump(previousdata, outfile, sort_keys=True,
208		indent=4, ensure_ascii=False)
209
210
211	1	def find_best_architecture(X_train, y_train, X_val, y_val, verbose=True,
212		number_of_models=5, nr_epochs=5, subset_size=100,
213		outputpath=None, model_path=None, metric='accuracy',
214		use_noodles=None, **kwargs):
215		"""
216		Tries out a number of models on a subsample of the data,
217		and outputs the best found architecture and hyperparameters.
218
219		Parameters
220		----------
221		X_train : numpy array
222		The input dataset for training of shape
223		(num_samples, num_timesteps, num_channels)
224		y_train : numpy array
225		The output classes for the train data, in binary format of shape
226		(num_samples, num_classes)
227		X_val : numpy array
228		The input dataset for validation of shape
229		(num_samples_val, num_timesteps, num_channels)
230		y_val : numpy array
231		The output classes for the validation data, in binary format of shape
232		(num_samples_val, num_classes)
233		verbose : bool, optional
234		flag for displaying verbose output
235		number_of_models : int, optiona
236		The number of models to generate and test
237		nr_epochs : int, optional
238		The number of epochs that each model is trained
239		subset_size : int, optional
240		The size of the subset of the data that is used for finding
241		the optimal architecture
242		outputpath : str, optional
243		File location to store the model results
244		model_path: str, optional
245		Directory to save the models as HDF5 files
246		metric: str, optional
247		metric that is used to evaluate the model on the validation set.
248		See https://keras.io/metrics/ for possible metrics
249		**kwargs: key-value parameters
250		parameters for generating the models
251		(see docstring for modelgen.generate_models)
252
253		Returns
254		----------
255		best_model : Keras model
256		Best performing model, already trained on a small sample data set.
257		best_params : dict
258		Dictionary containing the hyperparameters for the best model
259		best_model_type : str
260		Type of the best model
261		knn_acc : float
262		accuaracy for kNN prediction on validation set
263		"""
264	1	models = modelgen.generate_models(X_train.shape, y_train.shape[1],
265		number_of_models=number_of_models,
266		metrics=[metric],
267		**kwargs)
268	1	histories, val_accuracies, val_losses = train_models_on_samples(X_train,
269		y_train,
270		X_val,
271		y_val,
272		models,
273		nr_epochs,
274		subset_size=subset_size,
		0 ignored issues – show Coding Style introduced 2016-08-04 09:16 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (92/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
275		verbose=verbose,
		0 ignored issues – show Coding Style introduced 2016-07-07 14:47 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (84/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
276		outputfile=outputpath,
		0 ignored issues – show Coding Style introduced 2017-06-29 15:18 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (90/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
277		model_path=model_path,
		0 ignored issues – show Coding Style introduced 2017-08-31 08:29 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (90/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
278		metric=metric,
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (82/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
279		use_noodles=use_noodles)
		0 ignored issues – show Coding Style introduced 2017-12-12 16:27 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (92/79). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
280	1	best_model_index = np.argmax(val_accuracies)
281	1	best_model, best_params, best_model_type = models[best_model_index]
282	1	knn_acc = kNN_accuracy(
283		X_train[:subset_size, :, :], y_train[:subset_size, :], X_val, y_val)
284	1	if verbose:
285		print('Best model: model ', best_model_index)
286		print('Model type: ', best_model_type)
287		print('Hyperparameters: ', best_params)
288		print(str(metric) + ' on validation set: ',
289		val_accuracies[best_model_index])
290		print('Accuracy of kNN on validation set', knn_acc)
291
292	1	if val_accuracies[best_model_index] < knn_acc:
293		warnings.warn('Best model not better than kNN: ' +
294		str(val_accuracies[best_model_index]) + ' vs ' +
295		str(knn_acc)
296		)
297	1	return best_model, best_params, best_model_type, knn_acc
298
299
300	1	def get_metric_name(name):
301		"""
302		Gives the keras name for a metric
303
304		Parameters
305		----------
306		name : str
307		original name of the metric
308		Returns
309		-------
310
311		"""
312	1	if name == 'acc' or name == 'accuracy':
313	1	return 'acc'
314	1	try:
315	1	metric_fn = metrics.get(name)
316	1	return metric_fn.__name__
317		except:
318		pass
319		return name
320
321
322	1	def kNN_accuracy(X_train, y_train, X_val, y_val, k=1):
323		"""
324		Performs k-Neigherst Neighbors and returns the accuracy score.
325
326		Parameters
327		----------
328		X_train : numpy array
329		Train set of shape (num_samples, num_timesteps, num_channels)
330		y_train : numpy array
331		Class labels for train set
332		X_val : numpy array
333		Validation set of shape (num_samples, num_timesteps, num_channels)
334		y_val : numpy array
335		Class labels for validation set
336		k : int
337		number of neighbors to use for classifying
338
339		Returns
340		-------
341		accuracy: float
342		accuracy score on the validation set
343		"""
344	1	num_samples, num_timesteps, num_channels = X_train.shape
345	1	clf = neighbors.KNeighborsClassifier(k)
346	1	clf.fit(
347		X_train.reshape(
348		num_samples,
349		num_timesteps *
350		num_channels),
351		y_train)
352	1	num_samples, num_timesteps, num_channels = X_val.shape
353	1	val_predict = clf.predict(
354		X_val.reshape(num_samples,
355		num_timesteps * num_channels))
356		return sklearnmetrics.accuracy_score(val_predict, y_val)
357

NLeSC / mcfly

Pull Request — master (#168)

train_models_on_samples() F

Complexity

Size

Duplication

Code Coverage

Importance

1 Method

How to fix Long Method Complexity

Long Method

Complexity

Duplication Side-by-Side

Filter issues like