| Conditions | 14 |
| Total Lines | 80 |
| Code Lines | 41 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like ocrd.resolver.Resolver.download_to_directory() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | import tempfile |
||
| 24 | def download_to_directory(self, directory, url, basename=None, if_exists='skip', subdir=None): |
||
| 25 | """ |
||
| 26 | Download a file to a directory. |
||
| 27 | |||
| 28 | Early Shortcut: If url is a local file and that file is already in the directory, keep it there. |
||
| 29 | |||
| 30 | If basename is not given but subdir is, assume user knows what she's doing and use last URL segment as the basename. |
||
| 31 | If basename is not given and no subdir is given, use the alnum characters in the URL as the basename. |
||
| 32 | |||
| 33 | Args: |
||
| 34 | directory (string): Directory to download files to |
||
| 35 | basename (string, None): basename part of the filename on disk. |
||
| 36 | url (string): URL to download from |
||
| 37 | if_exists (string, "skip"): What to do if target file already exists. One of ``skip`` (default), ``overwrite`` or ``raise`` |
||
| 38 | subdir (string, None): Subdirectory to create within the directory. Think fileGrp. |
||
| 39 | |||
| 40 | Returns: |
||
| 41 | Local filename, __relative__ to directory |
||
| 42 | """ |
||
| 43 | log = getLogger('ocrd.resolver.download_to_directory') # pylint: disable=redefined-outer-name |
||
| 44 | log.debug("directory=|%s| url=|%s| basename=|%s| if_exists=|%s| subdir=|%s|", directory, url, basename, if_exists, subdir) |
||
| 45 | |||
| 46 | if not url: |
||
| 47 | raise Exception("'url' must be a string") |
||
| 48 | if not directory: |
||
| 49 | raise Exception("'directory' must be a string") # actually Path would also work |
||
| 50 | |||
| 51 | directory = Path(directory) |
||
| 52 | directory.mkdir(parents=True, exist_ok=True) |
||
| 53 | directory = str(directory.resolve()) |
||
| 54 | |||
| 55 | subdir_path = Path(subdir if subdir else '') |
||
| 56 | basename_path = Path(basename if basename else nth_url_segment(url)) |
||
| 57 | ret = str(Path(subdir_path, basename_path)) |
||
| 58 | dst_path = Path(directory, ret) |
||
| 59 | |||
| 60 | # log.info("\n\tdst_path='%s \n\turl=%s", dst_path, url) |
||
| 61 | # print('url=%s', url) |
||
| 62 | # print('directory=%s', directory) |
||
| 63 | # print('subdir_path=%s', subdir_path) |
||
| 64 | # print('basename_path=%s', basename_path) |
||
| 65 | # print('ret=%s', ret) |
||
| 66 | # print('dst_path=%s', dst_path) |
||
| 67 | |||
| 68 | src_path = None |
||
| 69 | if is_local_filename(url): |
||
| 70 | try: |
||
| 71 | # XXX this raises FNFE in Python 3.5 if src_path doesn't exist but not 3.6+ |
||
| 72 | src_path = Path(get_local_filename(url)).resolve() |
||
| 73 | except FileNotFoundError as e: |
||
| 74 | log.error("Failed to resolve URL locally: %s --> '%s' which does not exist" % (url, src_path)) |
||
| 75 | raise e |
||
| 76 | if not src_path.exists(): |
||
| 77 | raise FileNotFoundError("File path passed as 'url' to download_to_directory does not exist: %s" % url) |
||
| 78 | if src_path == dst_path: |
||
| 79 | log.debug("Stop early, src_path and dst_path are the same: '%s' (url: '%s')" % (src_path, url)) |
||
| 80 | return ret |
||
| 81 | |||
| 82 | # Respect 'if_exists' arg |
||
| 83 | if dst_path.exists(): |
||
| 84 | if if_exists == 'skip': |
||
| 85 | return ret |
||
| 86 | if if_exists == 'raise': |
||
| 87 | raise FileExistsError("File already exists and if_exists == 'raise': %s" % (dst_path)) |
||
| 88 | |||
| 89 | # Create dst_path parent dir |
||
| 90 | dst_path.parent.mkdir(parents=True, exist_ok=True) |
||
| 91 | |||
| 92 | # Copy files or download remote assets |
||
| 93 | if src_path: |
||
| 94 | log.debug("Copying file '%s' to '%s'", src_path, dst_path) |
||
| 95 | dst_path.write_bytes(src_path.read_bytes()) |
||
| 96 | else: |
||
| 97 | log.debug("Downloading URL '%s' to '%s'", url, dst_path) |
||
| 98 | response = requests.get(url) |
||
| 99 | if response.status_code != 200: |
||
| 100 | raise Exception("HTTP request failed: %s (HTTP %d)" % (url, response.status_code)) |
||
| 101 | dst_path.write_bytes(response.content) |
||
| 102 | |||
| 103 | return ret |
||
| 104 | |||
| 183 |