fetch_and_preprocess() - Code Metrics - Inspection of "fixed bug in tests and created seperate preprocess..." - NLeSC/mcfly - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 8bbed5...9e4b64 )

unknown

created 2016-09-07 13:48 UTC

fetch_and_preprocess() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	1
CRAP Score	16.0213

Importance

Changes	8
Bugs	0	Features	0

Metric	Value
cc	4
dl	0
loc	22
ccs	1
cts	11
cp	0.0909
crap	16.0213
rs	8.9197
c	8
b	0
f	0

"""
 Summary:
 Function fetch_and_preprocess from tutorial_pamap2.py helps to fetch and
 preproces the data.
 Example function calls in 'Tutorial mcfly on PAMAP2.ipynb'
"""
import numpy as np
from numpy import genfromtxt
import pandas as pd
import matplotlib.pyplot as plt
from os import listdir
import os.path
import zipfile
import keras
from keras.utils.np_utils import to_categorical
import sys
if sys.version_info <= (3,): #python2
    import urllib
else: #python3
    import urllib.request

def split_activities(labels, X, borders=10*100):

    """
    Splits up the data per activity and exclude activity=0.
    Also remove borders for each activity.
    Returns lists with subdatasets
    """
    tot_len = len(labels)
    startpoints = np.where([1] + [labels[i] != labels[i-1] \
        for i in range(1, tot_len)])[0]
    endpoints = np.append(startpoints[1:]-1, tot_len-1)
    acts = [labels[s] for s, e in zip(startpoints, endpoints)]
    #Also split up the data, and only keep the non-zero activities
    xysplit = [(X[s+borders:e-borders+1, :], a) \
        for s, e, a in zip(startpoints, endpoints, acts) if a != 0]
    xysplit = [(X, y) for X, y in xysplit if len(X) > 0]
    Xlist = [X for X, y in xysplit]

    ylist = [y for X, y in xysplit]
    return Xlist, ylist

def sliding_window(frame_length, step, Xsamples,\

    ysamples, Xsampleslist, ysampleslist):
    """
    Splits time series in ysampleslist and Xsampleslist
    into segments by applying a sliding overlapping window
    of size equal to frame_length with steps equal to step
    it does this for all the samples and appends all the output together.
    So, the participant distinction is not kept
    """
    for j in range(len(Xsampleslist)):
        X = Xsampleslist[j]

        ybinary = ysampleslist[j]
        for i in range(0, X.shape[0]-frame_length, step):
            xsub = X[i:i+frame_length, :]
            ysub = ybinary
            Xsamples.append(xsub)
            ysamples.append(ysub)

def transform_y(y, mapclasses, nr_classes):

    """
    Transforms y, a list with one sequence of A timesteps
    and B unique classes into a binary Numpy matrix of
    shape (A, B)
    """
    ymapped = np.array([mapclasses[c] for c in y], dtype='int')
    ybinary = to_categorical(ymapped, nr_classes)
    return ybinary

def addheader(datasets):
    """
    The columns of the pandas data frame are numbers
    this function adds the column labels
    """
    axes = ['x', 'y', 'z']
    IMUsensor_columns = ['temperature'] + \

                    ['acc_16g_' + i for i in axes] + \
                    ['acc_6g_' + i for i in axes] + \
                    ['gyroscope_'+ i for i in axes] + \
                    ['magnometer_'+ i for i in axes] + \
                    ['orientation_' + str(i) for i in range(4)]
    header = ["timestamp", "activityID", "heartrate"] + ["hand_"+s \
        for s in IMUsensor_columns] \
        + ["chest_"+s for s in IMUsensor_columns]+ ["ankle_"+s \
            for s in IMUsensor_columns]
    for i in range(0, len(datasets)):
            datasets[i].columns = header

    return datasets

def numpify_and_store(X, y, xname, yname, outdatapath, shuffle=False):

    """
    Converts python lists x 3D and y 1D into numpy arrays
    and stores the numpy array in directory outdatapath
    shuffle is optional and shuffles the samples
    """
    X = np.array(X)
    y = np.array(y)
    #Shuffle around the train set
    if shuffle is True:
        np.random.seed(123)
        neworder = np.random.permutation(X.shape[0])
        X = X[neworder, :, :]
        y = y[neworder, :]
    # Save binary file
    np.save(outdatapath+ xname, X)
    np.save(outdatapath+ yname, y)


def fetch_data(directory_to_extract_to):
    """
    Fetch the data and extract the contents of the zip file
    to the directory_to_extract_to.
    First check whether this was done before, if yes, then skip
    """
    targetdir = directory_to_extract_to + '/PAMAP2'
    if os.path.exists(targetdir):
        print('Data previously downloaded and stored in ' + targetdir)
    else:
        os.makedirs(targetdir) # create target directory
        #download the PAMAP2 data, this is 688 Mb
        path_to_zip_file = directory_to_extract_to + '/PAMAP2_Dataset.zip'
        test_file_exist = os.path.isfile(path_to_zip_file)
        if test_file_exist is False:
            url = str('https://archive.ics.uci.edu/ml/' +
                'machine-learning-databases/00231/PAMAP2_Dataset.zip')
            #retrieve data from url
            if sys.version_info <= (3,): #python2
                local_fn, headers = urllib.urlretrieve(url,\
                    filename=path_to_zip_file)
            else: #python3
                local_fn, headers = urllib.request.urlretrieve(url,\
                    filename=path_to_zip_file)
            print('Download complete and stored in: ' + path_to_zip_file)
        else:
            print('The data was previously downloaded and stored in ' +
                path_to_zip_file)
        # unzip
        with zipfile.ZipFile(path_to_zip_file ,"r") as zip_ref:

            zip_ref.extractall(targetdir)
    return targetdir


def slidingwindow_store(y_list, x_list,X_name, y_name, outdatapath, shuffle):

    # Take sliding-window frames. Target is label of last time step
    # Data is 100 Hz
    frame_length = int(5.12 * 100)
    step = 1 * 100
    x_set = []
    y_set = []
    sliding_window(frame_length, step, x_set, y_set, x_list, y_list)
    numpify_and_store(x_set, y_set, X_name, y_name, \
        outdatapath, shuffle)

def map_clas(datasets_filled):
    ysetall = [set(np.array(data.activityID)) - set([0]) \
        for data in datasets_filled]
    classlabels = list(set.union(*[set(y) for y in ysetall]))
    nr_classes = len(classlabels)
    mapclasses = {classlabels[i] : i for i in range(len(classlabels))}
    return classlabels, nr_classes, mapclasses

def split_data(Xlists,ybinarylists,indices):

    """ Function takes subset from list given indices"""
    if str(type(indices)) == "<class 'slice'>":
        x_setlist = [X for Xlist in Xlists[indices] for X in Xlist]
        y_setlist = [y for ylist in ybinarylists[indices] for y in ylist]
    else:
        x_setlist = [X for X in Xlists[indices]]
        y_setlist = [y for y in ybinarylists[indices]]
    return x_setlist, y_setlist

def preprocess(targetdir,outdatapath):

    datadir = targetdir + '/PAMAP2_Dataset/Protocol'
    filenames = listdir(datadir)
    print('Start pre-processing all ' + str(len(filenames)) + ' files...')
    # load the files and put them in a list of pandas dataframes:
    datasets = [pd.read_csv(datadir+'/'+fn, header=None, sep=' ') \
        for fn in filenames]
    datasets = addheader(datasets) # add headers to the datasets
    #Interpolate dataset to get same sample rate between channels
    datasets_filled = [d.interpolate() for d in datasets]
    # Create mapping for class labels
    classlabels, nr_classes, mapclasses = map_clas(datasets_filled)
    #Create input (x) and output (y) sets
    xall = [np.array(data[columns_to_use]) for data in datasets_filled]
    yall = [np.array(data.activityID) for data in datasets_filled]
    xylists = [split_activities(y, x) for x, y in zip(xall, yall)]
    Xlists, ylists = zip(*xylists)

    ybinarylists = [transform_y(y, mapclasses, nr_classes) for y in ylists]
    # Split in train, test and val
    x_vallist, y_vallist = split_data(Xlists,ybinarylists,indices=6)

    test_range = slice(7, len(datasets_filled))
    x_testlist, y_testlist = split_data(Xlists,ybinarylists,test_range)

    x_trainlist, y_trainlist = split_data(Xlists,ybinarylists,\

        indices=slice(0, 6))
    # Take sliding-window frames, target is label of last time step,
    # and store as numpy file
    slidingwindow_store(y_list=y_trainlist, x_list=x_trainlist, \
                X_name='X_train', y_name='y_train', \
                outdatapath=outdatapath,shuffle=True)

    slidingwindow_store(y_list=y_vallist, x_list=x_vallist, \
        X_name='X_val', y_name='y_val', \
        outdatapath=outdatapath,shuffle=False)

    slidingwindow_store(y_list=y_testlist, x_list=x_testlist, \
            X_name='X_test', y_name='y_test', \
            outdatapath=outdatapath,shuffle=False)

    print('Processed data succesfully stored in ' + outdatapath)
    return None

def fetch_and_preprocess(directory_to_extract_to, columns_to_use=None):
    """
    High level function to fetch_and_preprocess the PAMAP2 dataset
    directory_to_extract_to: the directory where the data will be stored
    columns_to_use: the columns to use

    The function will store the numpy output in directory outdatapath
    """
    if columns_to_use is None:
        columns_to_use = ['hand_acc_16g_x', 'hand_acc_16g_y', 'hand_acc_16g_z',
                     'ankle_acc_16g_x', 'ankle_acc_16g_y', 'ankle_acc_16g_z',
                     'chest_acc_16g_x', 'chest_acc_16g_y', 'chest_acc_16g_z']
    targetdir = fetch_data(directory_to_extract_to)
    outdatapath = targetdir + '/PAMAP2_Dataset/slidingwindow512cleaned/'
    if not os.path.exists(outdatapath):
        os.makedirs(outdatapath)
    if os.path.isfile(outdatapath+'x_train.npy'):
        print('Data previously pre-processed and np-files saved to ' +
            outdatapath)
    else:
        preprocess(targetdir,outdatapath)

    return outdatapath

def load_data(outputpath):
    ext = '.npy'
    x_train = np.load(outputpath+'X_train'+ext)
    y_train_binary = np.load(outputpath+'y_train'+ext)
    x_val = np.load(outputpath+'X_val'+ext)
    y_val_binary = np.load(outputpath+'y_val'+ext)
    x_test = np.load(outputpath+'X_test'+ext)
    y_test_binary = np.load(outputpath+'y_test'+ext)
    return x_train, y_train_binary, x_val, y_val_binary, x_test, y_test_binary


1		"""
2		Summary:
3		Function fetch_and_preprocess from tutorial_pamap2.py helps to fetch and
4		preproces the data.
5		Example function calls in 'Tutorial mcfly on PAMAP2.ipynb'
6		"""
7	1	import numpy as np
8	1	from numpy import genfromtxt
9	1	import pandas as pd
10	1	import matplotlib.pyplot as plt
11	1	from os import listdir
12	1	import os.path
13	1	import zipfile
14	1	import keras
15	1	from keras.utils.np_utils import to_categorical
16	1	import sys
17	1	if sys.version_info <= (3,): #python2
18		import urllib
19		else: #python3
20	1	import urllib.request
21
22	1	def split_activities(labels, X, borders=10*100):
		0 ignored issues – show Coding Style Naming introduced 2016-09-01 14:23 UTC by Report Bug Copy Issue Report The name `X` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
23		"""
24		Splits up the data per activity and exclude activity=0.
25		Also remove borders for each activity.
26		Returns lists with subdatasets
27		"""
28	1	tot_len = len(labels)
29	1	startpoints = np.where([1] + [labels[i] != labels[i-1] \
30		for i in range(1, tot_len)])[0]
31	1	endpoints = np.append(startpoints[1:]-1, tot_len-1)
32	1	acts = [labels[s] for s, e in zip(startpoints, endpoints)]
33		#Also split up the data, and only keep the non-zero activities
34	1	xysplit = [(X[s+borders:e-borders+1, :], a) \
35		for s, e, a in zip(startpoints, endpoints, acts) if a != 0]
36	1	xysplit = [(X, y) for X, y in xysplit if len(X) > 0]
37	1	Xlist = [X for X, y in xysplit]
		0 ignored issues – show Coding Style Naming introduced 2016-09-02 09:06 UTC by Report Bug Copy Issue Report The name `Xlist` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
38	1	ylist = [y for X, y in xysplit]
39	1	return Xlist, ylist
40
41	1	def sliding_window(frame_length, step, Xsamples,\
		0 ignored issues – show Coding Style Naming introduced 2016-09-02 09:06 UTC by Report Bug Copy Issue Report The name `Xsamples` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2016-09-02 09:06 UTC by Report Bug Copy Issue Report The name `Xsampleslist` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
42		ysamples, Xsampleslist, ysampleslist):
43		"""
44		Splits time series in ysampleslist and Xsampleslist
45		into segments by applying a sliding overlapping window
46		of size equal to frame_length with steps equal to step
47		it does this for all the samples and appends all the output together.
48		So, the participant distinction is not kept
49		"""
50	1	for j in range(len(Xsampleslist)):
51	1	X = Xsampleslist[j]
		0 ignored issues – show Coding Style Naming introduced 2016-09-01 15:34 UTC by Report Bug Copy Issue Report The name `X` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
52	1	ybinary = ysampleslist[j]
53	1	for i in range(0, X.shape[0]-frame_length, step):
54	1	xsub = X[i:i+frame_length, :]
55	1	ysub = ybinary
56	1	Xsamples.append(xsub)
57	1	ysamples.append(ysub)
58
59	1	def transform_y(y, mapclasses, nr_classes):
		0 ignored issues – show Coding Style Naming introduced 2016-09-01 15:34 UTC by Report Bug Copy Issue Report The name `y` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
60		"""
61		Transforms y, a list with one sequence of A timesteps
62		and B unique classes into a binary Numpy matrix of
63		shape (A, B)
64		"""
65	1	ymapped = np.array([mapclasses[c] for c in y], dtype='int')
66	1	ybinary = to_categorical(ymapped, nr_classes)
67	1	return ybinary
68
69	1	def addheader(datasets):
70		"""
71		The columns of the pandas data frame are numbers
72		this function adds the column labels
73		"""
74	1	axes = ['x', 'y', 'z']
75	1	IMUsensor_columns = ['temperature'] + \
		0 ignored issues – show Coding Style Naming introduced 2016-09-01 14:23 UTC by Report Bug Copy Issue Report The name `IMUsensor_columns` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
76		['acc_16g_' + i for i in axes] + \
77		['acc_6g_' + i for i in axes] + \
78		['gyroscope_'+ i for i in axes] + \
79		['magnometer_'+ i for i in axes] + \
80		['orientation_' + str(i) for i in range(4)]
81	1	header = ["timestamp", "activityID", "heartrate"] + ["hand_"+s \
82		for s in IMUsensor_columns] \
83		+ ["chest_"+s for s in IMUsensor_columns]+ ["ankle_"+s \
84		for s in IMUsensor_columns]
85	1	for i in range(0, len(datasets)):
86	1	datasets[i].columns = header
		0 ignored issues – show Coding Style introduced 2016-09-01 14:23 UTC by Report Bug Copy Issue Report The indentation here looks off. 8 spaces were expected, but 12 were found. Loading history...
87	1	return datasets
88
89	1	def numpify_and_store(X, y, xname, yname, outdatapath, shuffle=False):
		0 ignored issues – show Coding Style Naming introduced 2016-09-05 08:07 UTC by Report Bug Copy Issue Report The name `X` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2016-09-01 15:34 UTC by Report Bug Copy Issue Report The name `y` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
90		"""
91		Converts python lists x 3D and y 1D into numpy arrays
92		and stores the numpy array in directory outdatapath
93		shuffle is optional and shuffles the samples
94		"""
95	1	X = np.array(X)
96	1	y = np.array(y)
97		#Shuffle around the train set
98	1	if shuffle is True:
99	1	np.random.seed(123)
100	1	neworder = np.random.permutation(X.shape[0])
101	1	X = X[neworder, :, :]
102	1	y = y[neworder, :]
103		# Save binary file
104	1	np.save(outdatapath+ xname, X)
105	1	np.save(outdatapath+ yname, y)
106
107
108	1	def fetch_data(directory_to_extract_to):
109		"""
110		Fetch the data and extract the contents of the zip file
111		to the directory_to_extract_to.
112		First check whether this was done before, if yes, then skip
113		"""
114		targetdir = directory_to_extract_to + '/PAMAP2'
115		if os.path.exists(targetdir):
116		print('Data previously downloaded and stored in ' + targetdir)
117		else:
118		os.makedirs(targetdir) # create target directory
119		#download the PAMAP2 data, this is 688 Mb
120		path_to_zip_file = directory_to_extract_to + '/PAMAP2_Dataset.zip'
121		test_file_exist = os.path.isfile(path_to_zip_file)
122		if test_file_exist is False:
123		url = str('https://archive.ics.uci.edu/ml/' +
124		'machine-learning-databases/00231/PAMAP2_Dataset.zip')
125		#retrieve data from url
126		if sys.version_info <= (3,): #python2
127		local_fn, headers = urllib.urlretrieve(url,\
128		filename=path_to_zip_file)
129		else: #python3
130		local_fn, headers = urllib.request.urlretrieve(url,\
131		filename=path_to_zip_file)
132		print('Download complete and stored in: ' + path_to_zip_file)
133		else:
134		print('The data was previously downloaded and stored in ' +
135		path_to_zip_file)
136		# unzip
137		with zipfile.ZipFile(path_to_zip_file ,"r") as zip_ref:
		0 ignored issues – show Coding Style introduced 2016-09-01 14:23 UTC by Report Bug Copy Issue Report No space allowed before comma with zipfile.ZipFile(path_to_zip_file ,"r") as zip_ref: ^ Loading history... Coding Style introduced 2016-09-01 14:23 UTC by Report Bug Copy Issue Report Exactly one space required after comma with zipfile.ZipFile(path_to_zip_file ,"r") as zip_ref: ^ Loading history...
138		zip_ref.extractall(targetdir)
139		return targetdir
140
141
142	1	def slidingwindow_store(y_list, x_list,X_name, y_name, outdatapath, shuffle):
		0 ignored issues – show Coding Style introduced 2016-09-07 16:24 UTC by Report Bug Copy Issue Report Exactly one space required after comma def slidingwindow_store(y_list, x_list,X_name, y_name, outdatapath, shuffle): ^ Loading history... Coding Style Naming introduced 2016-09-07 16:24 UTC by Report Bug Copy Issue Report The name `X_name` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
143		# Take sliding-window frames. Target is label of last time step
144		# Data is 100 Hz
145		frame_length = int(5.12 * 100)
146		step = 1 * 100
147		x_set = []
148		y_set = []
149		sliding_window(frame_length, step, x_set, y_set, x_list, y_list)
150		numpify_and_store(x_set, y_set, X_name, y_name, \
151		outdatapath, shuffle)
152
153	1	def map_clas(datasets_filled):
154		ysetall = [set(np.array(data.activityID)) - set([0]) \
155		for data in datasets_filled]
156		classlabels = list(set.union(*[set(y) for y in ysetall]))
157		nr_classes = len(classlabels)
158		mapclasses = {classlabels[i] : i for i in range(len(classlabels))}
159		return classlabels, nr_classes, mapclasses
160
161	1	def split_data(Xlists,ybinarylists,indices):
		0 ignored issues – show Coding Style introduced 2016-09-07 16:26 UTC by Report Bug Copy Issue Report Exactly one space required after comma def split_data(Xlists,ybinarylists,indices): ^ Loading history... Coding Style introduced 2016-09-07 16:26 UTC by Report Bug Copy Issue Report Exactly one space required after comma def split_data(Xlists,ybinarylists,indices): ^ Loading history... Coding Style Naming introduced 2016-09-07 16:26 UTC by Report Bug Copy Issue Report The name `Xlists` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
162		""" Function takes subset from list given indices"""
163		if str(type(indices)) == "<class 'slice'>":
164		x_setlist = [X for Xlist in Xlists[indices] for X in Xlist]
165		y_setlist = [y for ylist in ybinarylists[indices] for y in ylist]
166		else:
167		x_setlist = [X for X in Xlists[indices]]
168		y_setlist = [y for y in ybinarylists[indices]]
169		return x_setlist, y_setlist
170
171	1	def preprocess(targetdir,outdatapath):
		0 ignored issues – show Coding Style introduced 2016-09-07 16:27 UTC by Report Bug Copy Issue Report Exactly one space required after comma def preprocess(targetdir,outdatapath): ^ Loading history...
172		datadir = targetdir + '/PAMAP2_Dataset/Protocol'
173		filenames = listdir(datadir)
174		print('Start pre-processing all ' + str(len(filenames)) + ' files...')
175		# load the files and put them in a list of pandas dataframes:
176		datasets = [pd.read_csv(datadir+'/'+fn, header=None, sep=' ') \
177		for fn in filenames]
178		datasets = addheader(datasets) # add headers to the datasets
179		#Interpolate dataset to get same sample rate between channels
180		datasets_filled = [d.interpolate() for d in datasets]
181		# Create mapping for class labels
182		classlabels, nr_classes, mapclasses = map_clas(datasets_filled)
183		#Create input (x) and output (y) sets
184		xall = [np.array(data[columns_to_use]) for data in datasets_filled]
185		yall = [np.array(data.activityID) for data in datasets_filled]
186		xylists = [split_activities(y, x) for x, y in zip(xall, yall)]
187		Xlists, ylists = zip(*xylists)
		0 ignored issues – show Coding Style Naming introduced 2016-09-02 09:06 UTC by Report Bug Copy Issue Report The name `Xlists` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{1,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
188		ybinarylists = [transform_y(y, mapclasses, nr_classes) for y in ylists]
189		# Split in train, test and val
190		x_vallist, y_vallist = split_data(Xlists,ybinarylists,indices=6)
		0 ignored issues – show Coding Style introduced 2016-09-07 16:27 UTC by Report Bug Copy Issue Report Exactly one space required after comma x_vallist, y_vallist = split_data(Xlists,ybinarylists,indices=6) ^ Loading history... Coding Style introduced 2016-09-07 16:27 UTC by Report Bug Copy Issue Report Exactly one space required after comma x_vallist, y_vallist = split_data(Xlists,ybinarylists,indices=6) ^ Loading history...
191		test_range = slice(7, len(datasets_filled))
192		x_testlist, y_testlist = split_data(Xlists,ybinarylists,test_range)
		0 ignored issues – show Coding Style introduced 2016-09-07 16:27 UTC by Report Bug Copy Issue Report Exactly one space required after comma x_testlist, y_testlist = split_data(Xlists,ybinarylists,test_range) ^ Loading history... Coding Style introduced 2016-09-07 16:27 UTC by Report Bug Copy Issue Report Exactly one space required after comma x_testlist, y_testlist = split_data(Xlists,ybinarylists,test_range) ^ Loading history...
193		x_trainlist, y_trainlist = split_data(Xlists,ybinarylists,\
		0 ignored issues – show Coding Style introduced 2016-09-07 16:27 UTC by Report Bug Copy Issue Report Exactly one space required after comma x_trainlist, y_trainlist = split_data(Xlists,ybinarylists,\ ^ Loading history...
194		indices=slice(0, 6))
195		# Take sliding-window frames, target is label of last time step,
196		# and store as numpy file
197		slidingwindow_store(y_list=y_trainlist, x_list=x_trainlist, \
198		X_name='X_train', y_name='y_train', \
199		outdatapath=outdatapath,shuffle=True)
		0 ignored issues – show Coding Style introduced 2016-09-07 16:27 UTC by Report Bug Copy Issue Report Exactly one space required after comma outdatapath=outdatapath,shuffle=True) ^ Loading history...
200		slidingwindow_store(y_list=y_vallist, x_list=x_vallist, \
201		X_name='X_val', y_name='y_val', \
202		outdatapath=outdatapath,shuffle=False)
		0 ignored issues – show Coding Style introduced 2016-09-07 16:27 UTC by Report Bug Copy Issue Report Exactly one space required after comma outdatapath=outdatapath,shuffle=False) ^ Loading history...
203		slidingwindow_store(y_list=y_testlist, x_list=x_testlist, \
204		X_name='X_test', y_name='y_test', \
205		outdatapath=outdatapath,shuffle=False)
		0 ignored issues – show Coding Style introduced 2016-09-07 16:24 UTC by Report Bug Copy Issue Report Exactly one space required after comma outdatapath=outdatapath,shuffle=False) ^ Loading history...
206		print('Processed data succesfully stored in ' + outdatapath)
207		return None
208
209	1	def fetch_and_preprocess(directory_to_extract_to, columns_to_use=None):
210		"""
211		High level function to fetch_and_preprocess the PAMAP2 dataset
212		directory_to_extract_to: the directory where the data will be stored
213		columns_to_use: the columns to use
214
215		The function will store the numpy output in directory outdatapath
216		"""
217		if columns_to_use is None:
218		columns_to_use = ['hand_acc_16g_x', 'hand_acc_16g_y', 'hand_acc_16g_z',
219		'ankle_acc_16g_x', 'ankle_acc_16g_y', 'ankle_acc_16g_z',
220		'chest_acc_16g_x', 'chest_acc_16g_y', 'chest_acc_16g_z']
221		targetdir = fetch_data(directory_to_extract_to)
222		outdatapath = targetdir + '/PAMAP2_Dataset/slidingwindow512cleaned/'
223		if not os.path.exists(outdatapath):
224		os.makedirs(outdatapath)
225		if os.path.isfile(outdatapath+'x_train.npy'):
226		print('Data previously pre-processed and np-files saved to ' +
227		outdatapath)
228		else:
229		preprocess(targetdir,outdatapath)
		0 ignored issues – show Coding Style introduced 2016-09-07 16:27 UTC by Report Bug Copy Issue Report Exactly one space required after comma preprocess(targetdir,outdatapath) ^ Loading history...
230		return outdatapath
231
232	1	def load_data(outputpath):
233		ext = '.npy'
234		x_train = np.load(outputpath+'X_train'+ext)
235		y_train_binary = np.load(outputpath+'y_train'+ext)
236		x_val = np.load(outputpath+'X_val'+ext)
237		y_val_binary = np.load(outputpath+'y_val'+ext)
238		x_test = np.load(outputpath+'X_test'+ext)
239		y_test_binary = np.load(outputpath+'y_test'+ext)
240		return x_train, y_train_binary, x_val, y_val_binary, x_test, y_test_binary
241

NLeSC / mcfly

Push — master ( 8bbed5...9e4b64 )

fetch_and_preprocess() B

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like