e2edutch.download - Code Metrics - Inspection of "Merge pull request #18 from Filter-Bubble/predicto..." - Filter-Bubble/e2e-Dutch - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( fc9889...278673 )

by Dafne van

created 2021-01-07 14:57 UTC

e2edutch.download A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	72
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	17
eloc	52
dl	0
loc	72
rs	10
c	0
b	0
f	0

3 Functions

Rating	Name	Size	Complexity
A	download_file()	18	5
C	download_data()	34	11
A	main()	3	1

import os

import requests
import gzip

import zipfile

from tqdm import tqdm
from e2edutch import util


def download_file(url, path):
    """
    Download a URL into a file as specified by `path`.
    """
    # This function is copied from stanza
    # https://github.com/stanfordnlp/stanza/blob/f0338f891a03e242c7e11e440dec6e191d54ab77/stanza/resources/common.py#L103
    r = requests.get(url, stream=True)

    with open(path, 'wb') as f:

        file_size = int(r.headers.get('content-length'))
        default_chunk_size = 131072
        desc = 'Downloading ' + url
        with tqdm(total=file_size, unit='B', unit_scale=True,
                  desc=desc) as pbar:
            for chunk in r.iter_content(chunk_size=default_chunk_size):
                if chunk:
                    f.write(chunk)
                    f.flush()
                    pbar.update(len(chunk))


def download_data(config={}):
# Bad:
# If array_param is modified inside the function, the next invocation will
# receive the modified object.
def some_function(array_param=[]):
    # ...

# Better: Create an array on each invocation
def some_function(array_param=None):
    array_param = array_param or []
    # ...
    data_dir = util.get_data_dir(config)

    # Download word vectors
    url = "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nl.300.vec.gz"
    fname = os.path.join(data_dir, 'fasttext.300.vec')
    fname_gz = fname+'.gz'
    if not os.path.exists(fname):
        download_file(url, fname_gz)
        with gzip.open(fname_gz, 'rb') as fin:
            with open(fname, 'wb') as fout:
                # We need to remove the first line
                for i, line in enumerate(fin.readlines()):
                    if i > 0:
                        fout.write(line)
        os.remove(fname_gz)

    # Download e2e dutch model_
    url = "https://surfdrive.surf.nl/files/index.php/s/UnZMyDrBEFunmQZ/download"
    fname_zip = os.path.join(data_dir, 'model.zip')
    log_dir_name = os.path.join(data_dir, 'final')
    if not os.path.exists(fname_zip) and not os.path.exists(log_dir_name):
        download_file(url, fname_zip)
    if not os.path.exists(log_dir_name):
        with zipfile.ZipFile(fname_zip, 'r') as zfile:
            zfile.extractall(data_dir)
        os.rename(os.path.join(data_dir, 'logs', 'final'), log_dir_name)
        os.rmdir(os.path.join(data_dir, 'logs'))

    # Download char_dict
    url = "https://github.com/Filter-Bubble/e2e-Dutch/raw/v0.2.0/data/char_vocab.dutch.txt"
    fname = os.path.join(data_dir, 'char_vocab.dutch.txt')
    if not os.path.exists(fname):
        download_file(url, fname)


def main():

    # To do: argparse for config file
    download_data()


if __name__ == "__main__":
    main()


1			import os
			0 ignored issues – show introduced 2021-01-07 13:31 UTC by Report Bug Copy Issue Report Missing module docstring Loading history...
2			import requests
3			import gzip
			0 ignored issues – show introduced 2021-01-07 13:31 UTC by Report Bug Copy Issue Report standard import "import gzip" should be placed before "import requests" Loading history...
4			import zipfile
			0 ignored issues – show introduced 2021-01-07 13:31 UTC by Report Bug Copy Issue Report standard import "import zipfile" should be placed before "import requests" Loading history...
5			from tqdm import tqdm
6			from e2edutch import util
7
8
9			def download_file(url, path):
10			"""
11			Download a URL into a file as specified by `path`.
12			"""
13			# This function is copied from stanza
14			# https://github.com/stanfordnlp/stanza/blob/f0338f891a03e242c7e11e440dec6e191d54ab77/stanza/resources/common.py#L103
15			r = requests.get(url, stream=True)
			0 ignored issues – show Coding Style Naming introduced 2021-01-07 13:31 UTC by Report Bug Copy Issue Report Variable name "r" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
16			with open(path, 'wb') as f:
			0 ignored issues – show Coding Style Naming introduced 2021-01-07 13:31 UTC by Report Bug Copy Issue Report Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
17			file_size = int(r.headers.get('content-length'))
18			default_chunk_size = 131072
19			desc = 'Downloading ' + url
20			with tqdm(total=file_size, unit='B', unit_scale=True,
21			desc=desc) as pbar:
22			for chunk in r.iter_content(chunk_size=default_chunk_size):
23			if chunk:
24			f.write(chunk)
25			f.flush()
26			pbar.update(len(chunk))
27
28
29			def download_data(config={}):
			0 ignored issues – show Bug Best Practice introduced 2021-01-07 13:31 UTC by Report Bug Copy Issue Report The default value `{}` might cause unintended side-effects. Objects as default values are only created once in Python and not on each invocation of the function. If the default object is modified, this modification is carried over to the next invocation of the method. # Bad: # If array_param is modified inside the function, the next invocation will # receive the modified object. def some_function(array_param=[]): # ... # Better: Create an array on each invocation def some_function(array_param=None): array_param = array_param or [] # ... Loading history... introduced 2021-01-07 13:31 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
30			data_dir = util.get_data_dir(config)
31
32			# Download word vectors
33			url = "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nl.300.vec.gz"
34			fname = os.path.join(data_dir, 'fasttext.300.vec')
35			fname_gz = fname+'.gz'
36			if not os.path.exists(fname):
37			download_file(url, fname_gz)
38			with gzip.open(fname_gz, 'rb') as fin:
39			with open(fname, 'wb') as fout:
40			# We need to remove the first line
41			for i, line in enumerate(fin.readlines()):
42			if i > 0:
43			fout.write(line)
44			os.remove(fname_gz)
45
46			# Download e2e dutch model_
47			url = "https://surfdrive.surf.nl/files/index.php/s/UnZMyDrBEFunmQZ/download"
48			fname_zip = os.path.join(data_dir, 'model.zip')
49			log_dir_name = os.path.join(data_dir, 'final')
50			if not os.path.exists(fname_zip) and not os.path.exists(log_dir_name):
51			download_file(url, fname_zip)
52			if not os.path.exists(log_dir_name):
53			with zipfile.ZipFile(fname_zip, 'r') as zfile:
54			zfile.extractall(data_dir)
55			os.rename(os.path.join(data_dir, 'logs', 'final'), log_dir_name)
56			os.rmdir(os.path.join(data_dir, 'logs'))
57
58			# Download char_dict
59			url = "https://github.com/Filter-Bubble/e2e-Dutch/raw/v0.2.0/data/char_vocab.dutch.txt"
60			fname = os.path.join(data_dir, 'char_vocab.dutch.txt')
61			if not os.path.exists(fname):
62			download_file(url, fname)
63
64
65			def main():
			0 ignored issues – show introduced 2021-01-07 13:31 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
66			# To do: argparse for config file
67			download_data()
68
69
70			if __name__ == "__main__":
71			main()
72

Filter-Bubble / e2e-Dutch

Push — master ( fc9889...278673 )

e2edutch.download A

Complexity

Size/Duplication

Importance

3 Functions

Duplication Side-by-Side

Filter issues like