1 | import os |
||
2 | import re |
||
3 | import sys |
||
4 | from contextlib import contextmanager |
||
5 | |||
6 | import requests |
||
7 | from progressbar import (ProgressBar, Percentage, Bar, ETA, FileTransferSpeed, |
||
8 | Timer, UnknownLength) |
||
9 | from six.moves import zip, urllib |
||
0 ignored issues
–
show
|
|||
10 | from ..exceptions import NeedURLPrefix |
||
11 | |||
12 | |||
13 | @contextmanager |
||
14 | def progress_bar(name, maxval): |
||
15 | """Manages a progress bar for a download. |
||
16 | |||
17 | Parameters |
||
18 | ---------- |
||
19 | name : str |
||
20 | Name of the downloaded file. |
||
21 | maxval : int |
||
22 | Total size of the download, in bytes. |
||
23 | |||
24 | """ |
||
25 | if maxval is not UnknownLength: |
||
26 | widgets = ['{}: '.format(name), Percentage(), ' ', |
||
27 | Bar(marker='=', left='[', right=']'), ' ', ETA(), ' ', |
||
28 | FileTransferSpeed()] |
||
29 | else: |
||
30 | widgets = ['{}: '.format(name), ' ', Timer(), ' ', FileTransferSpeed()] |
||
31 | bar = ProgressBar(widgets=widgets, max_value=maxval, fd=sys.stdout).start() |
||
32 | try: |
||
33 | yield bar |
||
34 | finally: |
||
35 | bar.update(maxval) |
||
36 | bar.finish() |
||
37 | |||
38 | |||
39 | def filename_from_url(url, path=None): |
||
0 ignored issues
–
show
|
|||
40 | """Parses a URL to determine a file name. |
||
41 | |||
42 | Parameters |
||
43 | ---------- |
||
44 | url : str |
||
45 | URL to parse. |
||
46 | |||
47 | """ |
||
48 | r = requests.get(url, stream=True) |
||
49 | if 'Content-Disposition' in r.headers: |
||
50 | filename = re.findall(r'filename=([^;]+)', |
||
51 | r.headers['Content-Disposition'])[0].strip('"\"') |
||
52 | else: |
||
53 | filename = os.path.basename(urllib.parse.urlparse(url).path) |
||
54 | return filename |
||
55 | |||
56 | |||
57 | def download(url, file_handle, chunk_size=1024): |
||
58 | """Downloads a given URL to a specific file. |
||
59 | |||
60 | Parameters |
||
61 | ---------- |
||
62 | url : str |
||
63 | URL to download. |
||
64 | file_handle : file |
||
65 | Where to save the downloaded URL. |
||
66 | |||
67 | """ |
||
68 | r = requests.get(url, stream=True) |
||
69 | total_length = r.headers.get('content-length') |
||
70 | if total_length is None: |
||
71 | maxval = UnknownLength |
||
72 | else: |
||
73 | maxval = int(total_length) |
||
74 | name = file_handle.name |
||
75 | with progress_bar(name=name, maxval=maxval) as bar: |
||
76 | for i, chunk in enumerate(r.iter_content(chunk_size)): |
||
77 | if total_length: |
||
78 | bar.update(i * chunk_size) |
||
79 | file_handle.write(chunk) |
||
80 | |||
81 | |||
82 | def ensure_directory_exists(directory): |
||
83 | """Create directory (with parents) if does not exist, raise on failure. |
||
84 | |||
85 | Parameters |
||
86 | ---------- |
||
87 | directory : str |
||
88 | The directory to create |
||
89 | |||
90 | """ |
||
91 | if os.path.isdir(directory): |
||
92 | return |
||
93 | os.makedirs(directory) |
||
94 | |||
95 | |||
96 | def default_downloader(directory, urls, filenames, url_prefix=None, |
||
97 | clear=False): |
||
98 | """Downloads or clears files from URLs and filenames. |
||
99 | |||
100 | Parameters |
||
101 | ---------- |
||
102 | directory : str |
||
103 | The directory in which downloaded files are saved. |
||
104 | urls : list |
||
105 | A list of URLs to download. |
||
106 | filenames : list |
||
107 | A list of file names for the corresponding URLs. |
||
108 | url_prefix : str, optional |
||
109 | If provided, this is prepended to filenames that |
||
110 | lack a corresponding URL. |
||
111 | clear : bool, optional |
||
112 | If `True`, delete the given filenames from the given |
||
113 | directory rather than download them. |
||
114 | |||
115 | """ |
||
116 | # Parse file names from URL if not provided |
||
117 | for i, url in enumerate(urls): |
||
118 | filename = filenames[i] |
||
119 | if not filename: |
||
120 | filename = filename_from_url(url) |
||
121 | if not filename: |
||
122 | raise ValueError("no filename available for URL '{}'".format(url)) |
||
123 | filenames[i] = filename |
||
124 | files = [os.path.join(directory, f) for f in filenames] |
||
125 | |||
126 | if clear: |
||
127 | for f in files: |
||
128 | if os.path.isfile(f): |
||
129 | os.remove(f) |
||
130 | else: |
||
131 | print('Downloading ' + ', '.join(filenames) + '\n') |
||
132 | ensure_directory_exists(directory) |
||
133 | |||
134 | for url, f, n in zip(urls, files, filenames): |
||
135 | if not url: |
||
136 | if url_prefix is None: |
||
137 | raise NeedURLPrefix |
||
138 | url = url_prefix + n |
||
139 | with open(f, 'wb') as file_handle: |
||
140 | download(url, file_handle) |
||
141 |
It is generally discouraged to redefine built-ins as this makes code very hard to read.