Passed
Pull Request — master (#559)
by Konstantin
02:19
created

OcrdResourceManager.download()   C

Complexity

Conditions 9

Size

Total Lines 39
Code Lines 33

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 33
dl 0
loc 39
rs 6.6666
c 0
b 0
f 0
cc 9
nop 9

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
from pathlib import Path
2
import re
3
from shutil import copyfileobj, copytree
4
from tempfile import TemporaryFile
5
from tarfile import open as open_tarfile
6
7
import requests
8
from yaml import safe_load
9
10
from .constants import RESOURCE_LIST_FILENAME
11
12
from ocrd_validators import OcrdResourceListValidator
13
from ocrd_utils import getLogger
14
from ocrd_utils.constants import HOME, XDG_CACHE_HOME
15
from ocrd_utils.os import list_resource_candidates, list_all_resources, pushd_popd
16
17
builtin_list_filename = Path(RESOURCE_LIST_FILENAME)
18
user_list_filename = Path(HOME, 'ocrd', 'resources.yml')
19
20
class OcrdResourceManager():
21
22
    """
23
    Managing processor resources
24
    """
25
    def __init__(self):
26
        self.log = getLogger('ocrd.resource_manager')
27
        self.database = {}
28
        self.load_resource_list(builtin_list_filename)
29
        self.load_resource_list(user_list_filename)
30
31
    def load_resource_list(self, list_filename):
32
        if list_filename.is_file():
33
            with open(list_filename, 'r', encoding='utf-8') as f:
34
                list_loaded = safe_load(f)
35
            report = OcrdResourceListValidator.validate(list_loaded)
36
            if not report.is_valid:
37
                self.log.error('\n'.join(report.errors))
38
                raise ValueError("Resource list %s is invalid!" % (list_filename))
39
            for executable, resource_list in list_loaded.items():
40
                if executable not in self.database:
41
                    self.database[executable] = []
42
                # Prepend, so user provided is sorted before builtin
43
                self.database[executable] = list_loaded[executable] + self.database[executable]
44
45
    def list_available(self, executable=None):
46
        """
47
        List models available for download by processor
48
        """
49
        if executable:
50
            return [(executable, self.database[executable])]
51
        return [(x, y) for x, y in self.database.items()]
52
53
    def list_installed(self, executable=None):
54
        """
55
        List installed resources, matching with registry by ``name``
56
        """
57
        ret = []
58
        for executable in [executable] if executable else self.database.keys():
59
            reslist = []
60
            for res_filename in list_all_resources(executable):
61
                res_name = Path(res_filename).name
62
                resdict = [x for x in self.database[executable] if x['name'] == res_name]
63
                if not resdict:
64
                    # TODO handle gracefully
65
                    resdict = [{'name': res_name, 'url': '???', 'description': '???', 'version_range': '???'}]
66
                reslist.append(resdict[0])
67
            ret.append((executable, reslist))
68
        return ret
69
70
    def find_resources(self, executable=None, name=None, url=None):
71
        """
72
        Find resources in the registry
73
        """
74
        ret = []
75
        if executable and executable not in self.database.keys():
76
            return ret
77
        for executable in [executable] if executable else self.database.keys():
78
            for resdict in self.database[executable]:
79
                if url and url == resdict['url']:
80
                    ret.append((executable, resdict))
81
                elif name and name == resdict['name']:
82
                    ret.append((executable, resdict))
83
        return ret
84
85
    def parameter_usage(self, name, usage='as-is'):
86
        if usage == 'as-is':
87
            return name
88
        if usage == 'without-extension':
89
            return Path(name).stem
90
91
    def _download_impl(self, url, filename, progress_cb=None):
92
        with open(filename, 'wb') as f:
93
            with requests.get(url, stream=True) as r:
94
                total = int(r.headers.get('content-length'))
95
                # copyfileobj(r.raw, f_write_tar)
96
                for data in r.iter_content(chunk_size=4096):
97
                    if progress_cb:
98
                        progress_cb(len(data))
99
                    f.write(data)
100
101
    # TODO Proper caching (make head request for size, If-Modified etc)
102
    def download(
103
        self,
104
        executable,
105
        url,
106
        overwrite=False,
107
        basedir=XDG_CACHE_HOME,
108
        name=None,
109
        resource_type='file',
110
        path_in_archive='.',
111
        progress_cb=None,
112
    ):
113
        """
114
        Download a resource by URL
115
        """
116
        log = getLogger('ocrd.resource_manager.download')
117
        destdir = Path(basedir, executable)
118
        if not name:
119
            name = re.sub('[^A-Za-z0-9]', '', url)
120
        fpath = Path(destdir, name)
121
        if fpath.exists() and not overwrite:
122
            log.info("%s to be downloaded to %s which already exists and overwrite is False" % (url, fpath))
123
            return fpath
124
        destdir.mkdir(parents=True, exist_ok=True)
125
        if resource_type == 'file':
126
            self._download_impl(url, fpath, progress_cb)
127
        elif resource_type == 'tarball':
128
            with pushd_popd(tempdir=True):
129
                log.info("Downloading %s" % url)
130
                self._download_impl(url, 'download.tar.xx', progress_cb)
131
                Path('out').mkdir()
132
                with pushd_popd('out'):
133
                    log.info("Extracting tarball")
134
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
135
                        tar.extractall()
136
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
137
                    copytree(path_in_archive, str(fpath))
138
        # TODO
139
        # elif resource_type == 'github-dir':
140
        return fpath
141