Passed
Pull Request — master (#559)
by Konstantin
02:18
created

OcrdResourceManager.download()   D

Complexity

Conditions 13

Size

Total Lines 33
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 28
dl 0
loc 33
rs 4.2
c 0
b 0
f 0
cc 13
nop 8

How to fix   Complexity    Many Parameters   

Complexity

Complex classes like ocrd.resource_manager.OcrdResourceManager.download() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
from pathlib import Path
2
import re
3
from shutil import copyfileobj, copytree
4
from tempfile import TemporaryFile
5
from tarfile import open as open_tarfile
6
7
import requests
8
from yaml import safe_load
9
10
from .constants import RESOURCE_LIST_FILENAME
11
12
from ocrd_validators import OcrdResourceListValidator
13
from ocrd_utils import getLogger
14
from ocrd_utils.constants import HOME, XDG_CACHE_HOME
15
from ocrd_utils.os import list_resource_candidates, list_all_resources, pushd_popd
16
17
builtin_list_filename = Path(RESOURCE_LIST_FILENAME)
18
user_list_filename = Path(HOME, 'ocrd', 'resources.yml')
19
20
class OcrdResourceManager():
21
22
    """
23
    Managing processor resources
24
    """
25
    def __init__(self):
26
        self.log = getLogger('ocrd.resource_manager')
27
        self.database = {}
28
        self.load_resource_list(builtin_list_filename)
29
        self.load_resource_list(user_list_filename)
30
31
    def load_resource_list(self, list_filename):
32
        if list_filename.is_file():
33
            with open(list_filename, 'r', encoding='utf-8') as f:
34
                list_loaded = safe_load(f)
35
            report = OcrdResourceListValidator.validate(list_loaded)
36
            if not report.is_valid:
37
                self.log.error('\n'.join(report.errors))
38
                raise ValueError("Resource list %s is invalid!" % (list_filename))
39
            for executable, resource_list in list_loaded.items():
40
                if executable not in self.database:
41
                    self.database[executable] = []
42
                # Prepend, so user provided is sorted before builtin
43
                self.database[executable] = list_loaded[executable] + self.database[executable]
44
45
    def list_available(self, executable=None):
46
        """
47
        List models available for download by processor
48
        """
49
        if executable:
50
            return [(executable, self.database[executable])]
51
        return [(x, y) for x, y in self.database.items()]
52
53
    def list_installed(self, executable=None):
54
        """
55
        List installed resources, matching with registry by ``name``
56
        """
57
        ret = []
58
        for executable in [executable] if executable else self.database.keys():
59
            reslist = []
60
            for res_filename in list_all_resources(executable):
61
                res_name = Path(res_filename).name
62
                resdict = [x for x in self.database[executable] if x['name'] == res_name]
63
                if not resdict:
64
                    # TODO handle gracefully
65
                    resdict = [{'name': res_name, 'url': '???', 'description': '???', 'version_range': '???'}]
66
                reslist.append(resdict[0])
67
            ret.append((executable, reslist))
68
        return ret
69
70
    def find_resources(self, executable=None, name=None, url=None):
71
        """
72
        Find resources in the registry
73
        """
74
        ret = []
75
        if executable and executable not in self.database.keys():
76
            return ret
77
        for executable in [executable] if executable else self.database.keys():
78
            for resdict in self.database[executable]:
79
                if url and url == resdict['url']:
80
                    ret.append((executable, resdict))
81
                elif name and name == resdict['name']:
82
                    ret.append((executable, resdict))
83
        return ret
84
85
    def parameter_usage(self, name, usage='as-is'):
86
        if usage == 'as-is':
87
            return name
88
        if usage == 'without-extension':
89
            return Path(name).stem
90
91
    # TODO Proper caching (make head request for size, If-Modified etc)
92
    def download(self, executable, url, overwrite=False, basedir=XDG_CACHE_HOME, name=None, resource_type='file', path_in_archive='.'):
93
        """
94
        Download a resource by URL
95
        """
96
        log = getLogger('ocrd.resource_manager.download')
97
        destdir = Path(basedir, executable)
98
        if not name:
99
            name = re.sub('[^A-Za-z0-9]', '', url)
100
        fpath = Path(destdir, name)
101
        if fpath.exists() and not overwrite:
102
            log.info("%s to be downloaded to %s which already exists and overwrite is False" % (url, fpath))
103
            return fpath
104
        destdir.mkdir(parents=True, exist_ok=True)
105
        if resource_type == 'file':
106
            with requests.get(url, stream=True) as r:
107
                with open(fpath, 'wb') as f:
108
                    copyfileobj(r.raw, f)
109
        elif resource_type == 'tarball':
110
            with pushd_popd(tempdir=True):
111
                log.info("Downloading %s" % url)
112
                with open('download.tar.xx', 'wb') as f_write_tar:
113
                    with requests.get(url, stream=True) as r:
114
                        copyfileobj(r.raw, f_write_tar)
115
                Path('out').mkdir()
116
                with pushd_popd('out'):
117
                    log.info("Extracting tarball")
118
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
119
                        tar.extractall()
120
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
121
                    copytree(path_in_archive, str(fpath))
122
        # TODO
123
        # elif resource_type == 'github-dir':
124
        return fpath
125