|
1
|
|
|
""" |
|
2
|
|
|
Summary: |
|
3
|
|
|
Function fetch_and_preprocess from tutorial_pamap2.py helps to fetch and |
|
4
|
|
|
preproces the data. |
|
5
|
|
|
Example function calls in 'Tutorial mcfly on PAMAP2.ipynb' |
|
6
|
|
|
""" |
|
7
|
|
|
import numpy as np |
|
8
|
|
|
from numpy import genfromtxt |
|
9
|
|
|
import pandas as pd |
|
10
|
|
|
import matplotlib.pyplot as plt |
|
11
|
|
|
from os import listdir |
|
12
|
|
|
import os.path |
|
13
|
|
|
import zipfile |
|
14
|
|
|
import keras |
|
15
|
|
|
from keras.utils.np_utils import to_categorical |
|
16
|
|
|
import sys |
|
17
|
|
|
import six.moves.urllib as urllib |
|
18
|
|
|
|
|
19
|
|
|
|
|
20
|
|
|
def split_activities(labels, X, exclude_activities, borders=10 * 100): |
|
21
|
|
|
""" |
|
22
|
|
|
Splits up the data per activity and exclude activity=0. |
|
23
|
|
|
Also remove borders for each activity. |
|
24
|
|
|
Returns lists with subdatasets |
|
25
|
|
|
|
|
26
|
|
|
Parameters |
|
27
|
|
|
---------- |
|
28
|
|
|
labels : numpy array |
|
29
|
|
|
Activity labels |
|
30
|
|
|
X : numpy array |
|
31
|
|
|
Data points |
|
32
|
|
|
borders : int |
|
33
|
|
|
Nr of timesteps to remove from the borders of an activity |
|
34
|
|
|
exclude_activities : list or tuple |
|
35
|
|
|
activities to exclude from the |
|
36
|
|
|
|
|
37
|
|
|
Returns |
|
38
|
|
|
------- |
|
39
|
|
|
X_list |
|
40
|
|
|
y_list |
|
41
|
|
|
""" |
|
42
|
|
|
tot_len = len(labels) |
|
43
|
|
|
startpoints = np.where([1] + [labels[i] != labels[i - 1] |
|
44
|
|
|
for i in range(1, tot_len)])[0] |
|
45
|
|
|
endpoints = np.append(startpoints[1:] - 1, tot_len - 1) |
|
46
|
|
|
acts = [labels[s] for s, e in zip(startpoints, endpoints)] |
|
47
|
|
|
# Also split up the data, and only keep the non-zero activities |
|
48
|
|
|
xysplit = [(X[s + borders:e - borders + 1, :], a) |
|
49
|
|
|
for s, e, a in zip(startpoints, endpoints, acts) |
|
50
|
|
|
if a not in exclude_activities] |
|
51
|
|
|
xysplit = [(X, y) for X, y in xysplit if len(X) > 0] |
|
52
|
|
|
Xlist = [X for X, y in xysplit] |
|
53
|
|
|
ylist = [y for X, y in xysplit] |
|
54
|
|
|
return Xlist, ylist |
|
55
|
|
|
|
|
56
|
|
|
|
|
57
|
|
|
def sliding_window(frame_length, step, Xsampleslist, ysampleslist): |
|
58
|
|
|
""" |
|
59
|
|
|
Splits time series in ysampleslist and Xsampleslist |
|
60
|
|
|
into segments by applying a sliding overlapping window |
|
61
|
|
|
of size equal to frame_length with steps equal to step |
|
62
|
|
|
it does this for all the samples and appends all the output together. |
|
63
|
|
|
So, the participant distinction is not kept |
|
64
|
|
|
|
|
65
|
|
|
Parameters |
|
66
|
|
|
---------- |
|
67
|
|
|
frame_length : int |
|
68
|
|
|
Length of sliding window |
|
69
|
|
|
step : int |
|
70
|
|
|
Stepsize between windows |
|
71
|
|
|
Xsamples : list |
|
72
|
|
|
Existing list of window fragments |
|
73
|
|
|
ysamples : list |
|
74
|
|
|
Existing list of window fragments |
|
75
|
|
|
Xsampleslist : list |
|
76
|
|
|
Samples to take sliding windows from |
|
77
|
|
|
ysampleslist |
|
78
|
|
|
Samples to take sliding windows from |
|
79
|
|
|
|
|
80
|
|
|
""" |
|
81
|
|
|
Xsamples = [] |
|
82
|
|
|
ysamples = [] |
|
83
|
|
|
for j in range(len(Xsampleslist)): |
|
84
|
|
|
X = Xsampleslist[j] |
|
85
|
|
|
ybinary = ysampleslist[j] |
|
86
|
|
|
for i in range(0, X.shape[0] - frame_length, step): |
|
87
|
|
|
xsub = X[i:i + frame_length, :] |
|
88
|
|
|
ysub = ybinary |
|
89
|
|
|
Xsamples.append(xsub) |
|
90
|
|
|
ysamples.append(ysub) |
|
91
|
|
|
return Xsamples, ysamples |
|
92
|
|
|
|
|
93
|
|
|
|
|
94
|
|
|
def transform_y(y, mapclasses, nr_classes): |
|
95
|
|
|
""" |
|
96
|
|
|
Transforms y, a list with one sequence of A timesteps |
|
97
|
|
|
and B unique classes into a binary Numpy matrix of |
|
98
|
|
|
shape (A, B) |
|
99
|
|
|
|
|
100
|
|
|
Parameters |
|
101
|
|
|
---------- |
|
102
|
|
|
y : list or array |
|
103
|
|
|
List of classes |
|
104
|
|
|
mapclasses : dict |
|
105
|
|
|
dictionary that maps the classes to numbers |
|
106
|
|
|
nr_classes : int |
|
107
|
|
|
total number of classes |
|
108
|
|
|
""" |
|
109
|
|
|
ymapped = np.array([mapclasses[c] for c in y], dtype='int') |
|
110
|
|
|
ybinary = to_categorical(ymapped, nr_classes) |
|
111
|
|
|
return ybinary |
|
112
|
|
|
|
|
113
|
|
|
def get_header(): |
|
114
|
|
|
axes = ['x', 'y', 'z'] |
|
115
|
|
|
IMUsensor_columns = ['temperature'] + \ |
|
116
|
|
|
['acc_16g_' + i for i in axes] + \ |
|
117
|
|
|
['acc_6g_' + i for i in axes] + \ |
|
118
|
|
|
['gyroscope_' + i for i in axes] + \ |
|
119
|
|
|
['magnometer_' + i for i in axes] + \ |
|
120
|
|
|
['orientation_' + str(i) for i in range(4)] |
|
121
|
|
|
header = ["timestamp", "activityID", "heartrate"] + ["hand_" + s |
|
122
|
|
|
for s in IMUsensor_columns] \ |
|
|
|
|
|
|
123
|
|
|
+ ["chest_" + s for s in IMUsensor_columns] + ["ankle_" + s |
|
124
|
|
|
for s in IMUsensor_columns] |
|
|
|
|
|
|
125
|
|
|
return header |
|
126
|
|
|
|
|
127
|
|
|
def addheader(datasets): |
|
128
|
|
|
""" |
|
129
|
|
|
The columns of the pandas data frame are numbers |
|
130
|
|
|
this function adds the column labels |
|
131
|
|
|
|
|
132
|
|
|
Parameters |
|
133
|
|
|
---------- |
|
134
|
|
|
datasets : list |
|
135
|
|
|
List of pandas dataframes |
|
136
|
|
|
""" |
|
137
|
|
|
header = get_header() |
|
138
|
|
|
for i in range(0, len(datasets)): |
|
139
|
|
|
datasets[i].columns = header |
|
140
|
|
|
return datasets |
|
141
|
|
|
|
|
142
|
|
|
|
|
143
|
|
|
def numpify_and_store(X, y, X_name, y_name, outdatapath, shuffle=False): |
|
144
|
|
|
""" |
|
145
|
|
|
Converts python lists x 3D and y 1D into numpy arrays |
|
146
|
|
|
and stores the numpy array in directory outdatapath |
|
147
|
|
|
shuffle is optional and shuffles the samples |
|
148
|
|
|
|
|
149
|
|
|
Parameters |
|
150
|
|
|
---------- |
|
151
|
|
|
X : list |
|
152
|
|
|
list with data |
|
153
|
|
|
y : list |
|
154
|
|
|
list with data |
|
155
|
|
|
X_name : str |
|
156
|
|
|
name to store the x arrays |
|
157
|
|
|
y_name : str |
|
158
|
|
|
name to store the y arrays |
|
159
|
|
|
outdatapath : str |
|
160
|
|
|
path to the directory to store the data |
|
161
|
|
|
shuffle : bool |
|
162
|
|
|
whether to shuffle the data before storing |
|
163
|
|
|
""" |
|
164
|
|
|
X = np.array(X) |
|
165
|
|
|
y = np.array(y) |
|
166
|
|
|
# Shuffle the train set |
|
167
|
|
|
if shuffle is True: |
|
168
|
|
|
np.random.seed(123) |
|
169
|
|
|
neworder = np.random.permutation(X.shape[0]) |
|
170
|
|
|
X = X[neworder, :, :] |
|
171
|
|
|
y = y[neworder, :] |
|
172
|
|
|
# Save binary file |
|
173
|
|
|
xpath = os.path.join(outdatapath, X_name) |
|
174
|
|
|
ypath = os.path.join(outdatapath, y_name) |
|
175
|
|
|
np.save(xpath, X) |
|
176
|
|
|
np.save(ypath, y) |
|
177
|
|
|
print('Stored ' + xpath, y_name) |
|
178
|
|
|
|
|
179
|
|
|
|
|
180
|
|
View Code Duplication |
def fetch_data(directory_to_extract_to): |
|
|
|
|
|
|
181
|
|
|
""" |
|
182
|
|
|
Fetch the data and extract the contents of the zip file |
|
183
|
|
|
to the directory_to_extract_to. |
|
184
|
|
|
First check whether this was done before, if yes, then skip |
|
185
|
|
|
|
|
186
|
|
|
Parameters |
|
187
|
|
|
---------- |
|
188
|
|
|
directory_to_extract_to : str |
|
189
|
|
|
directory to create subfolder 'PAMAP2' |
|
190
|
|
|
|
|
191
|
|
|
Returns |
|
192
|
|
|
------- |
|
193
|
|
|
targetdir: str |
|
194
|
|
|
directory where the data is extracted |
|
195
|
|
|
""" |
|
196
|
|
|
targetdir = os.path.join(directory_to_extract_to, "PAMAP2") |
|
197
|
|
|
if os.path.exists(targetdir): |
|
198
|
|
|
print('Data previously downloaded and stored in ' + targetdir) |
|
199
|
|
|
else: |
|
200
|
|
|
os.makedirs(targetdir) # create target directory |
|
201
|
|
|
# Download the PAMAP2 data, this is 688 Mb |
|
202
|
|
|
path_to_zip_file = os.path.join(directory_to_extract_to, 'PAMAP2_Dataset.zip') |
|
|
|
|
|
|
203
|
|
|
test_file_exist = os.path.isfile(path_to_zip_file) |
|
204
|
|
|
if test_file_exist is False: |
|
205
|
|
|
url = str('https://archive.ics.uci.edu/ml/' + |
|
206
|
|
|
'machine-learning-databases/00231/PAMAP2_Dataset.zip') |
|
207
|
|
|
# retrieve data from url |
|
208
|
|
|
local_fn, headers = urllib.request.urlretrieve(url, |
|
209
|
|
|
filename=path_to_zip_file) |
|
|
|
|
|
|
210
|
|
|
print('Download complete and stored in: ' + path_to_zip_file) |
|
211
|
|
|
else: |
|
212
|
|
|
print('The data was previously downloaded and stored in ' + |
|
213
|
|
|
path_to_zip_file) |
|
214
|
|
|
# unzip |
|
215
|
|
|
|
|
216
|
|
|
with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref: |
|
217
|
|
|
zip_ref.extractall(targetdir) |
|
218
|
|
|
os.remove(path_to_zip_file) |
|
219
|
|
|
return targetdir |
|
220
|
|
|
|
|
221
|
|
|
|
|
222
|
|
|
def map_class(datasets_filled): |
|
223
|
|
|
ysetall = [set(np.array(data.activityID)) - set([0]) |
|
224
|
|
|
for data in datasets_filled] |
|
225
|
|
|
classlabels = list(set.union(*[set(y) for y in ysetall])) |
|
226
|
|
|
nr_classes = len(classlabels) |
|
227
|
|
|
mapclasses = {classlabels[i]: i for i in range(len(classlabels))} |
|
228
|
|
|
return classlabels, nr_classes, mapclasses |
|
229
|
|
|
|
|
230
|
|
|
|
|
231
|
|
|
def split_data(Xlists, ybinarylists, indices): |
|
232
|
|
|
""" Function takes subset from list given indices |
|
233
|
|
|
|
|
234
|
|
|
Parameters |
|
235
|
|
|
---------- |
|
236
|
|
|
Xlists: tuple |
|
237
|
|
|
tuple (samples) of lists (windows) of numpy-arrays (time, variable) |
|
238
|
|
|
ybinarylist : |
|
239
|
|
|
list (samples) of numpy-arrays (window, class) |
|
240
|
|
|
indices : |
|
241
|
|
|
indices of the slice of data (samples) to be taken |
|
242
|
|
|
|
|
243
|
|
|
Returns |
|
244
|
|
|
------- |
|
245
|
|
|
x_setlist : list |
|
246
|
|
|
list (windows across samples) of numpy-arrays (time, variable) |
|
247
|
|
|
y_setlist: list |
|
248
|
|
|
list (windows across samples) of numpy-arrays (class, ) |
|
249
|
|
|
""" |
|
250
|
|
|
tty = str(type(indices)) |
|
251
|
|
|
# or statement in next line is to account for python2 and python3 |
|
252
|
|
|
# difference |
|
253
|
|
|
if tty == "<class 'slice'>" or tty == "<type 'slice'>": |
|
254
|
|
|
x_setlist = [X for Xlist in Xlists[indices] for X in Xlist] |
|
255
|
|
|
y_setlist = [y for ylist in ybinarylists[indices] for y in ylist] |
|
256
|
|
|
else: |
|
257
|
|
|
x_setlist = [X for X in Xlists[indices]] |
|
258
|
|
|
y_setlist = [y for y in ybinarylists[indices]] |
|
259
|
|
|
return x_setlist, y_setlist |
|
260
|
|
|
|
|
261
|
|
|
def split_data_random(X, y, val_size, test_size): |
|
262
|
|
|
X = np.array(X) |
|
263
|
|
|
y = np.array(y) |
|
264
|
|
|
size = len(X) |
|
265
|
|
|
train_size = size - val_size - test_size |
|
266
|
|
|
indices = np.random.permutation(size) |
|
267
|
|
|
X_train = X[indices[:train_size]] |
|
268
|
|
|
y_train = y[indices[:train_size]] |
|
269
|
|
|
X_val = X[indices[train_size:train_size+val_size]] |
|
270
|
|
|
y_val = y[indices[train_size:train_size+val_size]] |
|
271
|
|
|
X_test = X[indices[train_size+val_size:]] |
|
272
|
|
|
y_test = y[indices[train_size+val_size:]] |
|
273
|
|
|
return X_train, y_train, X_val, y_val, X_test, y_test |
|
274
|
|
|
|
|
275
|
|
|
def preprocess(targetdir, outdatapath, columns_to_use, exclude_activities, fold, |
|
|
|
|
|
|
276
|
|
|
val_test_size=None): |
|
277
|
|
|
""" Function to preprocess the PAMAP2 data after it is fetched |
|
278
|
|
|
|
|
279
|
|
|
Parameters |
|
280
|
|
|
---------- |
|
281
|
|
|
targetdir : str |
|
282
|
|
|
subdirectory of directory_to_extract_to, targetdir |
|
283
|
|
|
is defined by function fetch_data |
|
284
|
|
|
outdatapath : str |
|
285
|
|
|
a subdirectory of directory_to_extract_to, outdatapath |
|
286
|
|
|
is the direcotry where the Numpy output will be stored. |
|
287
|
|
|
columns_to_use : list |
|
288
|
|
|
list of column names to use |
|
289
|
|
|
exclude_activities : list or tuple |
|
290
|
|
|
activities to exclude from the |
|
291
|
|
|
fold : boolean |
|
292
|
|
|
Whether to store each fold seperately ('False' creates |
|
293
|
|
|
Train, Test and Validation sets) |
|
294
|
|
|
|
|
295
|
|
|
Returns |
|
296
|
|
|
------- |
|
297
|
|
|
None |
|
298
|
|
|
""" |
|
299
|
|
|
datadir = os.path.join(targetdir, 'PAMAP2_Dataset', 'Protocol') |
|
300
|
|
|
filenames = listdir(datadir) |
|
301
|
|
|
filenames.sort() |
|
302
|
|
|
print('Start pre-processing all ' + str(len(filenames)) + ' files...') |
|
303
|
|
|
# load the files and put them in a list of pandas dataframes: |
|
304
|
|
|
datasets = [pd.read_csv(os.path.join(datadir, fn), header=None, sep=' ') |
|
305
|
|
|
for fn in filenames] |
|
306
|
|
|
datasets = addheader(datasets) # add headers to the datasets |
|
307
|
|
|
# Interpolate dataset to get same sample rate between channels |
|
308
|
|
|
datasets_filled = [d.interpolate() for d in datasets] |
|
309
|
|
|
# Create mapping for class labels |
|
310
|
|
|
classlabels, nr_classes, mapclasses = map_class(datasets_filled) |
|
311
|
|
|
# Create input (x) and output (y) sets |
|
312
|
|
|
xall = [np.array(data[columns_to_use]) for data in datasets_filled] |
|
313
|
|
|
yall = [np.array(data.activityID) for data in datasets_filled] |
|
314
|
|
|
xylists = [split_activities(y, x, exclude_activities) for x, y in zip(xall, yall)] |
|
|
|
|
|
|
315
|
|
|
Xlists, ylists = zip(*xylists) |
|
316
|
|
|
ybinarylists = [transform_y(y, mapclasses, nr_classes) for y in ylists] |
|
317
|
|
|
frame_length = int(5.12 * 100) |
|
318
|
|
|
step = 1 * 100 |
|
319
|
|
|
if not fold: |
|
320
|
|
|
if val_test_size is None: |
|
321
|
|
|
# Split in train, test and val |
|
322
|
|
|
x_vallist, y_vallist = split_data(Xlists, ybinarylists, indices=6) |
|
323
|
|
|
test_range = slice(7, len(datasets_filled)) |
|
324
|
|
|
x_testlist, y_testlist = split_data(Xlists, ybinarylists, test_range) |
|
|
|
|
|
|
325
|
|
|
x_trainlist, y_trainlist = split_data(Xlists, ybinarylists, |
|
326
|
|
|
indices=slice(0, 6)) |
|
327
|
|
|
# Take sliding-window frames, target is label of last time step, |
|
328
|
|
|
# and store as numpy file |
|
329
|
|
|
x_train, y_train = sliding_window(frame_length, step, x_trainlist, |
|
330
|
|
|
y_trainlist) |
|
331
|
|
|
x_val, y_val = sliding_window(frame_length, step, x_vallist, |
|
332
|
|
|
y_vallist) |
|
333
|
|
|
x_test, y_test = sliding_window(frame_length, step, x_testlist, |
|
334
|
|
|
y_testlist) |
|
335
|
|
|
|
|
336
|
|
|
else: |
|
337
|
|
|
val_size, test_size = val_test_size |
|
338
|
|
|
X_list, y_list = split_data(Xlists, ybinarylists, |
|
339
|
|
|
slice(0, len(datasets_filled))) |
|
340
|
|
|
X, y = sliding_window(frame_length, step, X_list, |
|
341
|
|
|
y_list) |
|
342
|
|
|
x_train, y_train, x_val, y_val, x_test, y_test = split_data_random(X, y, val_size, test_size) |
|
|
|
|
|
|
343
|
|
|
|
|
344
|
|
|
|
|
345
|
|
|
numpify_and_store(x_train, y_train, X_name='X_train', y_name='y_train', |
|
346
|
|
|
outdatapath=outdatapath, shuffle=True) |
|
347
|
|
|
numpify_and_store(x_val, y_val, X_name='X_val', y_name='y_val', |
|
348
|
|
|
outdatapath=outdatapath, shuffle=False) |
|
349
|
|
|
numpify_and_store(x_test, y_test, X_name='X_test', y_name='y_test', |
|
350
|
|
|
outdatapath=outdatapath, shuffle=False) |
|
351
|
|
|
else : |
|
|
|
|
|
|
352
|
|
|
for i in range(len(Xlists)): |
|
353
|
|
|
X_i, y_i = split_data(Xlists, ybinarylists, i) |
|
354
|
|
|
X, y = sliding_window(frame_length, step, X_i, |
|
355
|
|
|
y_i) |
|
356
|
|
|
numpify_and_store(X, y, X_name='X_'+str(i), y_name='y_'+str(i), |
|
357
|
|
|
outdatapath=outdatapath, shuffle=True) |
|
358
|
|
|
|
|
359
|
|
|
|
|
360
|
|
|
print('Processed data succesfully stored in ' + outdatapath) |
|
361
|
|
|
return None |
|
362
|
|
|
|
|
363
|
|
|
|
|
364
|
|
|
def fetch_and_preprocess(directory_to_extract_to, columns_to_use=None, output_dir='preprocessed', exclude_activities=[0], fold=False, |
|
|
|
|
|
|
365
|
|
|
val_test_size=None): |
|
366
|
|
|
""" |
|
367
|
|
|
High level function to fetch_and_preprocess the PAMAP2 dataset |
|
368
|
|
|
|
|
369
|
|
|
Parameters |
|
370
|
|
|
---------- |
|
371
|
|
|
directory_to_extract_to : str |
|
372
|
|
|
the directory where the data will be stored |
|
373
|
|
|
columns_to_use : list |
|
374
|
|
|
the columns to use |
|
375
|
|
|
ouptput_dir : str |
|
376
|
|
|
name of the directory to write the outputdata to |
|
377
|
|
|
exclude_activities : list or tuple |
|
378
|
|
|
activities to exclude from the |
|
379
|
|
|
fold : boolean |
|
380
|
|
|
Whether to store each fold seperately ('False' creates |
|
381
|
|
|
Train, Test and Validation sets) |
|
382
|
|
|
|
|
383
|
|
|
Returns |
|
384
|
|
|
------- |
|
385
|
|
|
outdatapath: str |
|
386
|
|
|
The directory in which the numpy files are stored |
|
387
|
|
|
""" |
|
388
|
|
|
if columns_to_use is None: |
|
389
|
|
|
columns_to_use = ['hand_acc_16g_x', 'hand_acc_16g_y', 'hand_acc_16g_z', |
|
390
|
|
|
'ankle_acc_16g_x', 'ankle_acc_16g_y', 'ankle_acc_16g_z', |
|
|
|
|
|
|
391
|
|
|
'chest_acc_16g_x', 'chest_acc_16g_y', 'chest_acc_16g_z'] |
|
|
|
|
|
|
392
|
|
|
targetdir = fetch_data(directory_to_extract_to) |
|
393
|
|
|
outdatapath = os.path.join(targetdir, output_dir) |
|
394
|
|
|
if not os.path.exists(outdatapath): |
|
395
|
|
|
os.makedirs(outdatapath) |
|
396
|
|
|
if os.path.isfile(os.path.join(outdatapath, 'X_train.npy')): |
|
397
|
|
|
print('Data previously pre-processed and np-files saved to ' + |
|
398
|
|
|
outdatapath) |
|
399
|
|
|
else: |
|
400
|
|
|
preprocess(targetdir, outdatapath, columns_to_use, exclude_activities, fold, val_test_size) |
|
|
|
|
|
|
401
|
|
|
return outdatapath |
|
402
|
|
|
|
|
403
|
|
|
|
|
404
|
|
|
def load_data(outputpath): |
|
405
|
|
|
""" Function to load the numpy data as stored in directory |
|
406
|
|
|
outputpath. |
|
407
|
|
|
|
|
408
|
|
|
Parameters |
|
409
|
|
|
---------- |
|
410
|
|
|
outputpath : str |
|
411
|
|
|
directory where the numpy files are stored |
|
412
|
|
|
|
|
413
|
|
|
Returns |
|
414
|
|
|
------- |
|
415
|
|
|
x_train |
|
416
|
|
|
y_train_binary |
|
417
|
|
|
x_val |
|
418
|
|
|
y_val_binary |
|
419
|
|
|
x_test |
|
420
|
|
|
y_test_binary |
|
421
|
|
|
""" |
|
422
|
|
|
ext = '.npy' |
|
423
|
|
|
x_train = np.load(os.path.join(outputpath, 'X_train' + ext)) |
|
424
|
|
|
y_train_binary = np.load(os.path.join(outputpath, 'y_train' + ext)) |
|
425
|
|
|
x_val = np.load(os.path.join(outputpath, 'X_val' + ext)) |
|
426
|
|
|
y_val_binary = np.load(os.path.join(outputpath, 'y_val' + ext)) |
|
|
|
|
|
|
427
|
|
|
x_test = np.load(os.path.join(outputpath, 'X_test' + ext)) |
|
428
|
|
|
y_test_binary = np.load(os.path.join(outputpath, 'y_test' + ext)) |
|
|
|
|
|
|
429
|
|
|
return x_train, y_train_binary, x_val, y_val_binary, x_test, y_test_binary |
|
430
|
|
|
|
|
431
|
|
|
|
|
432
|
|
View Code Duplication |
def download_preprocessed_data(directory_to_extract_to): |
|
|
|
|
|
|
433
|
|
|
data_path = os.path.join(directory_to_extract_to, |
|
434
|
|
|
'data/PAMAP2/preprocessed') |
|
435
|
|
|
|
|
436
|
|
|
if not os.path.isdir(data_path): |
|
437
|
|
|
path_to_zip_file = os.path.join(directory_to_extract_to, 'data.zip') |
|
438
|
|
|
|
|
439
|
|
|
# Download zip file with data |
|
440
|
|
|
if not os.path.isfile(path_to_zip_file): |
|
441
|
|
|
print("Downloading data...") |
|
442
|
|
|
local_fn, headers = urllib.request.urlretrieve( |
|
443
|
|
|
'https://zenodo.org/record/345082/files/data.zip', |
|
444
|
|
|
filename=path_to_zip_file) |
|
445
|
|
|
else: |
|
446
|
|
|
print("Data already downloaded") |
|
447
|
|
|
|
|
448
|
|
|
# Extract the zip file |
|
449
|
|
|
with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref: |
|
450
|
|
|
print("Extracting data...") |
|
451
|
|
|
zip_ref.extractall(directory_to_extract_to) |
|
452
|
|
|
print("Done") |
|
453
|
|
|
else: |
|
454
|
|
|
print("Data already downloaded and extracted.") |
|
455
|
|
|
|
|
456
|
|
|
return data_path |
This check looks for lines that are too long. You can specify the maximum line length.