Conditions | 39 |
Total Lines | 116 |
Lines | 0 |
Ratio | 0 % |
Tests | 0 |
CRAP Score | 1560 |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like fetch_and_preprocess() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | #import required python modules |
||
39 | def fetch_and_preprocess(directory_to_extract_to,columns_to_use): |
||
40 | targetdir = directory_to_extract_to + '/PAMAP2' |
||
41 | if os.path.exists(targetdir): |
||
42 | print('Data previously downloaded and stored in ' + targetdir) |
||
43 | else: |
||
44 | #download the PAMAP2 data, this is 688 Mb |
||
45 | path_to_zip_file = directory_to_extract_to + '/PAMAP2_Dataset.zip' |
||
46 | test_file_exist = os.path.isfile(path_to_zip_file) |
||
47 | if test_file_exist is False: |
||
48 | url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00231/PAMAP2_Dataset.zip' |
||
49 | local_fn, headers = urllib.request.urlretrieve(url,filename=path_to_zip_file) #retrieve data from url |
||
50 | print('Download complete and stored in: ' + path_to_zip_file ) |
||
51 | else: |
||
52 | print('The data was previously downloaded and stored in ' + path_to_zip_file ) |
||
53 | # unzip |
||
54 | os.makedirs(targetdir) # create target directory |
||
55 | with zipfile.ZipFile(path_to_zip_file ,"r") as zip_ref: |
||
56 | zip_ref.extractall(targetdir) |
||
57 | outdatapath = targetdir + '/PAMAP2_Dataset' + '/slidingwindow512cleaned/' |
||
58 | if not os.path.exists(outdatapath): |
||
59 | os.makedirs(outdatapath) |
||
60 | if os.path.isfile(outdatapath+'X_train.npy'): |
||
61 | print('Data previously pre-processed and np-files saved to ' + outdatapath) |
||
62 | else: |
||
63 | datadir = targetdir + '/PAMAP2_Dataset/Protocol' |
||
64 | filenames = listdir(datadir) |
||
65 | print('Start pre-processing all ' + str(len(filenames)) + ' files...') |
||
66 | # load the files and put them in a list of pandas dataframes: |
||
67 | datasets = [pd.read_csv(datadir+'/'+fn, header=None, sep=' ') for fn in filenames] |
||
68 | # The columns are numbers, which is not very practical. Let's add column labels to the pandas dataframe: |
||
69 | axes = ['x', 'y', 'z'] |
||
70 | IMUsensor_columns = ['temperature'] + \ |
||
71 | ['acc_16g_' + i for i in axes] + \ |
||
72 | ['acc_6g_' + i for i in axes] + \ |
||
73 | ['gyroscope_'+ i for i in axes] + \ |
||
74 | ['magnometer_'+ i for i in axes] + \ |
||
75 | ['orientation_' + str(i) for i in range(4)] |
||
76 | header = ["timestamp", "activityID", "heartrate"] + ["hand_"+s for s in IMUsensor_columns]\ |
||
77 | + ["chest_"+s for s in IMUsensor_columns]+ ["ankle_"+s for s in IMUsensor_columns] |
||
78 | for i in range(0,len(datasets)): |
||
79 | datasets[i].columns = header |
||
80 | #Interpolate dataset to get same sample rate between channels |
||
81 | datasets_filled = [d.interpolate() for d in datasets] |
||
82 | # Create mapping for class labels |
||
83 | y_set_all = [set(np.array(data.activityID)) - set([0]) for data in datasets_filled] |
||
84 | classlabels = list(set.union(*[set(y) for y in y_set_all])) |
||
85 | nr_classes = len(classlabels) |
||
86 | mapclasses = {classlabels[i] : i for i in range(len(classlabels))} |
||
87 | def transform_y(y): |
||
88 | y_mapped = np.array([mapclasses[c] for c in y], dtype='int') |
||
89 | y_binary = to_categorical(y_mapped, nr_classes) |
||
90 | return y_binary |
||
91 | #Create input (X) and output (y) sets |
||
92 | X_all = [np.array(data[columns_to_use]) for data in datasets_filled] |
||
93 | y_all = [np.array(data.activityID) for data in datasets_filled] |
||
94 | Xy_lists = [split_activities(y, X) for X,y in zip(X_all, y_all)] |
||
95 | X_lists, y_lists = zip(*Xy_lists) |
||
96 | y_binary_lists = [transform_y(y) for y in y_lists] |
||
97 | # Split in train, test and val |
||
98 | train_range = slice(0, 6) |
||
99 | val_range = 6 |
||
100 | test_range = slice(7,len(datasets_filled)) |
||
101 | X_train_list = [X for X_list in X_lists[train_range] for X in X_list] |
||
102 | X_val_list = [X for X in X_lists[val_range]] |
||
103 | X_test_list = [X for X_list in X_lists[test_range] for X in X_list] |
||
104 | |||
105 | y_train_list = [y for y_list in y_binary_lists[train_range] for y in y_list] |
||
106 | y_val_list = [y for y in y_binary_lists[val_range]] |
||
107 | y_test_list = [y for y_list in y_binary_lists[test_range] for y in y_list] |
||
108 | |||
109 | # Take sliding-window frames. Target is label of last time step |
||
110 | # Data is 100 Hz |
||
111 | frame_length = int(5.12 * 100) |
||
112 | step = 1 * 100 |
||
113 | |||
114 | X_train = [] |
||
115 | y_train = [] |
||
116 | X_val = [] |
||
117 | y_val = [] |
||
118 | X_test = [] |
||
119 | y_test = [] |
||
120 | for j in range(len(X_train_list)): |
||
121 | X = X_train_list[j] |
||
122 | y_binary = y_train_list[j] |
||
123 | sliding_window(X, y_binary, frame_length, step, X_train, y_train) |
||
124 | for j in range(len(X_val_list)): |
||
125 | X = X_val_list[j] |
||
126 | y_binary = y_val_list[j] |
||
127 | sliding_window(X, y_binary, frame_length, step, X_val, y_val) |
||
128 | for j in range(len(X_test_list)): |
||
129 | X = X_test_list[j] |
||
130 | y_binary = y_test_list[j] |
||
131 | sliding_window(X, y_binary, frame_length, step, X_test, y_test) |
||
132 | |||
133 | X_train = np.array(X_train) |
||
134 | y_train = np.array(y_train) |
||
135 | X_val = np.array(X_val) |
||
136 | y_val = np.array(y_val) |
||
137 | X_test = np.array(X_test) |
||
138 | y_test = np.array(y_test) |
||
139 | |||
140 | #Shuffle around the train set |
||
141 | np.random.seed(123) |
||
142 | neworder = np.random.permutation(X_train.shape[0]) |
||
143 | X_train = X_train[neworder,:,:] |
||
144 | y_train = y_train[neworder,:] |
||
145 | |||
146 | # Save binary file |
||
147 | np.save(outdatapath+'X_train', X_train) |
||
148 | np.save(outdatapath+'y_train_binary', y_train) |
||
149 | np.save(outdatapath+'X_val', X_val) |
||
150 | np.save(outdatapath+'y_val_binary', y_val) |
||
151 | np.save(outdatapath+'X_test', X_test) |
||
152 | np.save(outdatapath+'y_test_binary', y_test) |
||
153 | print('Processed data succesfully stored in ' + outdatapath) |
||
154 | return outdatapath |
||
155 | |||
165 |
This check looks for invalid names for a range of different identifiers.
You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.
If your project includes a Pylint configuration file, the settings contained in that file take precedence.
To find out more about Pylint, please refer to their site.