Code Duplication    Length = 25-40 lines in 2 locations

tutorial/tutorial_pamap2.py 2 locations

@@ 180-219 (lines=40) @@
177
    print('Stored ' + xpath, y_name)
178
179
180
def fetch_data(directory_to_extract_to):
181
    """
182
    Fetch the data and extract the contents of the zip file
183
    to the directory_to_extract_to.
184
    First check whether this was done before, if yes, then skip
185
186
    Parameters
187
    ----------
188
    directory_to_extract_to : str
189
        directory to create subfolder 'PAMAP2'
190
191
    Returns
192
    -------
193
    targetdir: str
194
        directory where the data is extracted
195
    """
196
    targetdir = os.path.join(directory_to_extract_to, "PAMAP2")
197
    if os.path.exists(targetdir):
198
        print('Data previously downloaded and stored in ' + targetdir)
199
    else:
200
        os.makedirs(targetdir)  # create target directory
201
        # Download the PAMAP2 data, this is 688 Mb
202
        path_to_zip_file = os.path.join(directory_to_extract_to, 'PAMAP2_Dataset.zip')
203
        test_file_exist = os.path.isfile(path_to_zip_file)
204
        if test_file_exist is False:
205
            url = str('https://archive.ics.uci.edu/ml/' +
206
                      'machine-learning-databases/00231/PAMAP2_Dataset.zip')
207
            # retrieve data from url
208
            local_fn, headers = urllib.request.urlretrieve(url,
209
                                                           filename=path_to_zip_file)
210
            print('Download complete and stored in: ' + path_to_zip_file)
211
        else:
212
            print('The data was previously downloaded and stored in ' +
213
                  path_to_zip_file)
214
        # unzip
215
216
        with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
217
            zip_ref.extractall(targetdir)
218
        os.remove(path_to_zip_file)
219
    return targetdir
220
221
222
def map_class(datasets_filled):
@@ 432-456 (lines=25) @@
429
    return x_train, y_train_binary, x_val, y_val_binary, x_test, y_test_binary
430
431
432
def download_preprocessed_data(directory_to_extract_to):
433
    data_path = os.path.join(directory_to_extract_to,
434
                             'data/PAMAP2/preprocessed')
435
436
    if not os.path.isdir(data_path):
437
        path_to_zip_file = os.path.join(directory_to_extract_to, 'data.zip')
438
439
        # Download zip file with data
440
        if not os.path.isfile(path_to_zip_file):
441
            print("Downloading data...")
442
            local_fn, headers = urllib.request.urlretrieve(
443
                'https://zenodo.org/record/345082/files/data.zip',
444
                filename=path_to_zip_file)
445
        else:
446
            print("Data already downloaded")
447
448
        # Extract the zip file
449
        with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
450
            print("Extracting data...")
451
            zip_ref.extractall(directory_to_extract_to)
452
        print("Done")
453
    else:
454
        print("Data already downloaded and extracted.")
455
456
    return data_path