default_downloader() - Code Metrics - Inspection of "Merge pull request #345 from dmitriy-serdyuk/fix-w..." - mila-udem/fuel - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 3e1d4c...f31f72 )

by Bart

created 2016-04-13 21:38 UTC

default_downloader() F

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	12
dl	0
loc	45
rs	2.7855

How to fix Complexity

import os
import re
import sys
from contextlib import contextmanager

import requests
from progressbar import (ProgressBar, Percentage, Bar, ETA, FileTransferSpeed,
                         Timer, UnknownLength)
from six.moves import zip, urllib

from ..exceptions import NeedURLPrefix


@contextmanager
def progress_bar(name, maxval):
    """Manages a progress bar for a download.

    Parameters
    ----------
    name : str
        Name of the downloaded file.
    maxval : int
        Total size of the download, in bytes.

    """
    if maxval is not UnknownLength:
        widgets = ['{}: '.format(name), Percentage(), ' ',
                   Bar(marker='=', left='[', right=']'), ' ', ETA(), ' ',
                   FileTransferSpeed()]
    else:
        widgets = ['{}: '.format(name), ' ', Timer(), ' ', FileTransferSpeed()]
    bar = ProgressBar(widgets=widgets, max_value=maxval, fd=sys.stdout).start()
    try:
        yield bar
    finally:
        bar.update(maxval)
        bar.finish()


def filename_from_url(url, path=None):

    """Parses a URL to determine a file name.

    Parameters
    ----------
    url : str
        URL to parse.

    """
    r = requests.get(url, stream=True)
    if 'Content-Disposition' in r.headers:
        filename = re.findall(r'filename=([^;]+)',
                              r.headers['Content-Disposition'])[0].strip('"\"')
    else:
        filename = os.path.basename(urllib.parse.urlparse(url).path)
    return filename


def download(url, file_handle, chunk_size=1024):
    """Downloads a given URL to a specific file.

    Parameters
    ----------
    url : str
        URL to download.
    file_handle : file
        Where to save the downloaded URL.

    """
    r = requests.get(url, stream=True)
    total_length = r.headers.get('content-length')
    if total_length is None:
        maxval = UnknownLength
    else:
        maxval = int(total_length)
    name = file_handle.name
    with progress_bar(name=name, maxval=maxval) as bar:
        for i, chunk in enumerate(r.iter_content(chunk_size)):
            if total_length:
                bar.update(i * chunk_size)
            file_handle.write(chunk)


def ensure_directory_exists(directory):
    """Create directory (with parents) if does not exist, raise on failure.

    Parameters
    ----------
    directory : str
        The directory to create

    """
    if os.path.isdir(directory):
        return
    os.makedirs(directory)


def default_downloader(directory, urls, filenames, url_prefix=None,
                       clear=False):
    """Downloads or clears files from URLs and filenames.

    Parameters
    ----------
    directory : str
        The directory in which downloaded files are saved.
    urls : list
        A list of URLs to download.
    filenames : list
        A list of file names for the corresponding URLs.
    url_prefix : str, optional
        If provided, this is prepended to filenames that
        lack a corresponding URL.
    clear : bool, optional
        If `True`, delete the given filenames from the given
        directory rather than download them.

    """
    # Parse file names from URL if not provided
    for i, url in enumerate(urls):
        filename = filenames[i]
        if not filename:
            filename = filename_from_url(url)
        if not filename:
            raise ValueError("no filename available for URL '{}'".format(url))
        filenames[i] = filename
    files = [os.path.join(directory, f) for f in filenames]

    if clear:
        for f in files:
            if os.path.isfile(f):
                os.remove(f)
    else:
        print('Downloading ' + ', '.join(filenames) + '\n')
        ensure_directory_exists(directory)

        for url, f, n in zip(urls, files, filenames):
            if not url:
                if url_prefix is None:
                    raise NeedURLPrefix
                url = url_prefix + n
            with open(f, 'wb') as file_handle:
                download(url, file_handle)


1			import os
2			import re
3			import sys
4			from contextlib import contextmanager
5
6			import requests
7			from progressbar import (ProgressBar, Percentage, Bar, ETA, FileTransferSpeed,
8			Timer, UnknownLength)
9			from six.moves import zip, urllib
			0 ignored issues – show Bug Best Practice introduced 2015-12-02 16:30 UTC by Report Bug Copy Issue Report This seems to re-define the built-in `zip`. It is generally discouraged to redefine built-ins as this makes code very hard to read. Loading history...
10			from ..exceptions import NeedURLPrefix
11
12
13			@contextmanager
14			def progress_bar(name, maxval):
15			"""Manages a progress bar for a download.
16
17			Parameters
18			----------
19			name : str
20			Name of the downloaded file.
21			maxval : int
22			Total size of the download, in bytes.
23
24			"""
25			if maxval is not UnknownLength:
26			widgets = ['{}: '.format(name), Percentage(), ' ',
27			Bar(marker='=', left='[', right=']'), ' ', ETA(), ' ',
28			FileTransferSpeed()]
29			else:
30			widgets = ['{}: '.format(name), ' ', Timer(), ' ', FileTransferSpeed()]
31			bar = ProgressBar(widgets=widgets, max_value=maxval, fd=sys.stdout).start()
32			try:
33			yield bar
34			finally:
35			bar.update(maxval)
36			bar.finish()
37
38
39			def filename_from_url(url, path=None):
			0 ignored issues – show Unused Code introduced 2015-12-02 16:30 UTC by Report Bug Copy Issue Report The argument `path` seems to be unused. Loading history...
40			"""Parses a URL to determine a file name.
41
42			Parameters
43			----------
44			url : str
45			URL to parse.
46
47			"""
48			r = requests.get(url, stream=True)
49			if 'Content-Disposition' in r.headers:
50			filename = re.findall(r'filename=([^;]+)',
51			r.headers['Content-Disposition'])[0].strip('"\"')
52			else:
53			filename = os.path.basename(urllib.parse.urlparse(url).path)
54			return filename
55
56
57			def download(url, file_handle, chunk_size=1024):
58			"""Downloads a given URL to a specific file.
59
60			Parameters
61			----------
62			url : str
63			URL to download.
64			file_handle : file
65			Where to save the downloaded URL.
66
67			"""
68			r = requests.get(url, stream=True)
69			total_length = r.headers.get('content-length')
70			if total_length is None:
71			maxval = UnknownLength
72			else:
73			maxval = int(total_length)
74			name = file_handle.name
75			with progress_bar(name=name, maxval=maxval) as bar:
76			for i, chunk in enumerate(r.iter_content(chunk_size)):
77			if total_length:
78			bar.update(i * chunk_size)
79			file_handle.write(chunk)
80
81
82			def ensure_directory_exists(directory):
83			"""Create directory (with parents) if does not exist, raise on failure.
84
85			Parameters
86			----------
87			directory : str
88			The directory to create
89
90			"""
91			if os.path.isdir(directory):
92			return
93			os.makedirs(directory)
94
95
96			def default_downloader(directory, urls, filenames, url_prefix=None,
97			clear=False):
98			"""Downloads or clears files from URLs and filenames.
99
100			Parameters
101			----------
102			directory : str
103			The directory in which downloaded files are saved.
104			urls : list
105			A list of URLs to download.
106			filenames : list
107			A list of file names for the corresponding URLs.
108			url_prefix : str, optional
109			If provided, this is prepended to filenames that
110			lack a corresponding URL.
111			clear : bool, optional
112			If `True`, delete the given filenames from the given
113			directory rather than download them.
114
115			"""
116			# Parse file names from URL if not provided
117			for i, url in enumerate(urls):
118			filename = filenames[i]
119			if not filename:
120			filename = filename_from_url(url)
121			if not filename:
122			raise ValueError("no filename available for URL '{}'".format(url))
123			filenames[i] = filename
124			files = [os.path.join(directory, f) for f in filenames]
125
126			if clear:
127			for f in files:
128			if os.path.isfile(f):
129			os.remove(f)
130			else:
131			print('Downloading ' + ', '.join(filenames) + '\n')
132			ensure_directory_exists(directory)
133
134			for url, f, n in zip(urls, files, filenames):
135			if not url:
136			if url_prefix is None:
137			raise NeedURLPrefix
138			url = url_prefix + n
139			with open(f, 'wb') as file_handle:
140			download(url, file_handle)
141

mila-udem / fuel

Push — master ( 3e1d4c...f31f72 )

default_downloader() F

Complexity

Size

Duplication

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like