Completed
Push — master ( 3e1d4c...f31f72 )
by Bart
27s
created

default_downloader()   F

Complexity

Conditions 12

Size

Total Lines 45

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 12
dl 0
loc 45
rs 2.7855

How to fix   Complexity   

Complexity

Complex classes like default_downloader() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import os
2
import re
3
import sys
4
from contextlib import contextmanager
5
6
import requests
7
from progressbar import (ProgressBar, Percentage, Bar, ETA, FileTransferSpeed,
8
                         Timer, UnknownLength)
9
from six.moves import zip, urllib
0 ignored issues
show
Bug Best Practice introduced by
This seems to re-define the built-in zip.

It is generally discouraged to redefine built-ins as this makes code very hard to read.

Loading history...
10
from ..exceptions import NeedURLPrefix
11
12
13
@contextmanager
14
def progress_bar(name, maxval):
15
    """Manages a progress bar for a download.
16
17
    Parameters
18
    ----------
19
    name : str
20
        Name of the downloaded file.
21
    maxval : int
22
        Total size of the download, in bytes.
23
24
    """
25
    if maxval is not UnknownLength:
26
        widgets = ['{}: '.format(name), Percentage(), ' ',
27
                   Bar(marker='=', left='[', right=']'), ' ', ETA(), ' ',
28
                   FileTransferSpeed()]
29
    else:
30
        widgets = ['{}: '.format(name), ' ', Timer(), ' ', FileTransferSpeed()]
31
    bar = ProgressBar(widgets=widgets, max_value=maxval, fd=sys.stdout).start()
32
    try:
33
        yield bar
34
    finally:
35
        bar.update(maxval)
36
        bar.finish()
37
38
39
def filename_from_url(url, path=None):
0 ignored issues
show
Unused Code introduced by
The argument path seems to be unused.
Loading history...
40
    """Parses a URL to determine a file name.
41
42
    Parameters
43
    ----------
44
    url : str
45
        URL to parse.
46
47
    """
48
    r = requests.get(url, stream=True)
49
    if 'Content-Disposition' in r.headers:
50
        filename = re.findall(r'filename=([^;]+)',
51
                              r.headers['Content-Disposition'])[0].strip('"\"')
52
    else:
53
        filename = os.path.basename(urllib.parse.urlparse(url).path)
54
    return filename
55
56
57
def download(url, file_handle, chunk_size=1024):
58
    """Downloads a given URL to a specific file.
59
60
    Parameters
61
    ----------
62
    url : str
63
        URL to download.
64
    file_handle : file
65
        Where to save the downloaded URL.
66
67
    """
68
    r = requests.get(url, stream=True)
69
    total_length = r.headers.get('content-length')
70
    if total_length is None:
71
        maxval = UnknownLength
72
    else:
73
        maxval = int(total_length)
74
    name = file_handle.name
75
    with progress_bar(name=name, maxval=maxval) as bar:
76
        for i, chunk in enumerate(r.iter_content(chunk_size)):
77
            if total_length:
78
                bar.update(i * chunk_size)
79
            file_handle.write(chunk)
80
81
82
def ensure_directory_exists(directory):
83
    """Create directory (with parents) if does not exist, raise on failure.
84
85
    Parameters
86
    ----------
87
    directory : str
88
        The directory to create
89
90
    """
91
    if os.path.isdir(directory):
92
        return
93
    os.makedirs(directory)
94
95
96
def default_downloader(directory, urls, filenames, url_prefix=None,
97
                       clear=False):
98
    """Downloads or clears files from URLs and filenames.
99
100
    Parameters
101
    ----------
102
    directory : str
103
        The directory in which downloaded files are saved.
104
    urls : list
105
        A list of URLs to download.
106
    filenames : list
107
        A list of file names for the corresponding URLs.
108
    url_prefix : str, optional
109
        If provided, this is prepended to filenames that
110
        lack a corresponding URL.
111
    clear : bool, optional
112
        If `True`, delete the given filenames from the given
113
        directory rather than download them.
114
115
    """
116
    # Parse file names from URL if not provided
117
    for i, url in enumerate(urls):
118
        filename = filenames[i]
119
        if not filename:
120
            filename = filename_from_url(url)
121
        if not filename:
122
            raise ValueError("no filename available for URL '{}'".format(url))
123
        filenames[i] = filename
124
    files = [os.path.join(directory, f) for f in filenames]
125
126
    if clear:
127
        for f in files:
128
            if os.path.isfile(f):
129
                os.remove(f)
130
    else:
131
        print('Downloading ' + ', '.join(filenames) + '\n')
132
        ensure_directory_exists(directory)
133
134
        for url, f, n in zip(urls, files, filenames):
135
            if not url:
136
                if url_prefix is None:
137
                    raise NeedURLPrefix
138
                url = url_prefix + n
139
            with open(f, 'wb') as file_handle:
140
                download(url, file_handle)
141