Code Duplication - NLeSC/mcfly - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 25-40 lines in 2 locations

tutorial/tutorial_pamap2.py 2 locations


    print('Stored ' + xpath, y_name)


def fetch_data(directory_to_extract_to):
    """
    Fetch the data and extract the contents of the zip file
    to the directory_to_extract_to.
    First check whether this was done before, if yes, then skip

    Parameters
    ----------
    directory_to_extract_to : str
        directory to create subfolder 'PAMAP2'

    Returns
    -------
    targetdir: str
        directory where the data is extracted
    """
    targetdir = os.path.join(directory_to_extract_to, "PAMAP2")
    if os.path.exists(targetdir):
        print('Data previously downloaded and stored in ' + targetdir)
    else:
        os.makedirs(targetdir)  # create target directory
        # Download the PAMAP2 data, this is 688 Mb
        path_to_zip_file = os.path.join(directory_to_extract_to, 'PAMAP2_Dataset.zip')
        test_file_exist = os.path.isfile(path_to_zip_file)
        if test_file_exist is False:
            url = str('https://archive.ics.uci.edu/ml/' +
                      'machine-learning-databases/00231/PAMAP2_Dataset.zip')
            # retrieve data from url
            local_fn, headers = urllib.request.urlretrieve(url,
                                                           filename=path_to_zip_file)
            print('Download complete and stored in: ' + path_to_zip_file)
        else:
            print('The data was previously downloaded and stored in ' +
                  path_to_zip_file)
        # unzip

        with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
            zip_ref.extractall(targetdir)
        os.remove(path_to_zip_file)
    return targetdir


def map_class(datasets_filled):

    return x_train, y_train_binary, x_val, y_val_binary, x_test, y_test_binary


def download_preprocessed_data(directory_to_extract_to):
    data_path = os.path.join(directory_to_extract_to,
                             'data/PAMAP2/preprocessed')

    if not os.path.isdir(data_path):
        path_to_zip_file = os.path.join(directory_to_extract_to, 'data.zip')

        # Download zip file with data
        if not os.path.isfile(path_to_zip_file):
            print("Downloading data...")
            local_fn, headers = urllib.request.urlretrieve(
                'https://zenodo.org/record/345082/files/data.zip',
                filename=path_to_zip_file)
        else:
            print("Data already downloaded")

        # Extract the zip file
        with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
            print("Extracting data...")
            zip_ref.extractall(directory_to_extract_to)
        print("Done")
    else:
        print("Data already downloaded and extracted.")

    return data_path

		@@ 180-219 (lines=40) @@
177		print('Stored ' + xpath, y_name)
178
179
180		def fetch_data(directory_to_extract_to):
181		"""
182		Fetch the data and extract the contents of the zip file
183		to the directory_to_extract_to.
184		First check whether this was done before, if yes, then skip
185
186		Parameters
187		----------
188		directory_to_extract_to : str
189		directory to create subfolder 'PAMAP2'
190
191		Returns
192		-------
193		targetdir: str
194		directory where the data is extracted
195		"""
196		targetdir = os.path.join(directory_to_extract_to, "PAMAP2")
197		if os.path.exists(targetdir):
198		print('Data previously downloaded and stored in ' + targetdir)
199		else:
200		os.makedirs(targetdir) # create target directory
201		# Download the PAMAP2 data, this is 688 Mb
202		path_to_zip_file = os.path.join(directory_to_extract_to, 'PAMAP2_Dataset.zip')
203		test_file_exist = os.path.isfile(path_to_zip_file)
204		if test_file_exist is False:
205		url = str('https://archive.ics.uci.edu/ml/' +
206		'machine-learning-databases/00231/PAMAP2_Dataset.zip')
207		# retrieve data from url
208		local_fn, headers = urllib.request.urlretrieve(url,
209		filename=path_to_zip_file)
210		print('Download complete and stored in: ' + path_to_zip_file)
211		else:
212		print('The data was previously downloaded and stored in ' +
213		path_to_zip_file)
214		# unzip
215
216		with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
217		zip_ref.extractall(targetdir)
218		os.remove(path_to_zip_file)
219		return targetdir
220
221
222		def map_class(datasets_filled):
		@@ 432-456 (lines=25) @@
429		return x_train, y_train_binary, x_val, y_val_binary, x_test, y_test_binary
430
431
432		def download_preprocessed_data(directory_to_extract_to):
433		data_path = os.path.join(directory_to_extract_to,
434		'data/PAMAP2/preprocessed')
435
436		if not os.path.isdir(data_path):
437		path_to_zip_file = os.path.join(directory_to_extract_to, 'data.zip')
438
439		# Download zip file with data
440		if not os.path.isfile(path_to_zip_file):
441		print("Downloading data...")
442		local_fn, headers = urllib.request.urlretrieve(
443		'https://zenodo.org/record/345082/files/data.zip',
444		filename=path_to_zip_file)
445		else:
446		print("Data already downloaded")
447
448		# Extract the zip file
449		with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
450		print("Extracting data...")
451		zip_ref.extractall(directory_to_extract_to)
452		print("Done")
453		else:
454		print("Data already downloaded and extracted.")
455
456		return data_path

NLeSC / mcfly

Code Duplication Length = 25-40 lines in 2 locations

tutorial/tutorial_pamap2.py 2 locations