Passed
Pull Request — master (#559)
by Konstantin
02:29
created

OcrdResourceManager.download()   C

Complexity

Conditions 9

Size

Total Lines 39
Code Lines 33

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 33
dl 0
loc 39
rs 6.6666
c 0
b 0
f 0
cc 9
nop 9

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
from pathlib import Path
2
import re
3
from shutil import copyfileobj, copytree
4
from tempfile import TemporaryFile
5
from tarfile import open as open_tarfile
6
7
import requests
8
from yaml import safe_load
9
10
from .constants import RESOURCE_LIST_FILENAME
11
12
from ocrd_validators import OcrdResourceListValidator
13
from ocrd_utils import getLogger
14
from ocrd_utils.constants import HOME, XDG_CACHE_HOME
15
from ocrd_utils.os import list_resource_candidates, list_all_resources, pushd_popd
16
17
builtin_list_filename = Path(RESOURCE_LIST_FILENAME)
18
user_list_filename = Path(HOME, 'ocrd', 'resources.yml')
19
20
class OcrdResourceManager():
21
22
    """
23
    Managing processor resources
24
    """
25
    def __init__(self):
26
        self.log = getLogger('ocrd.resource_manager')
27
        self.database = {}
28
        self.load_resource_list(builtin_list_filename)
29
        self.load_resource_list(user_list_filename)
30
31
    def load_resource_list(self, list_filename):
32
        if list_filename.is_file():
33
            with open(list_filename, 'r', encoding='utf-8') as f:
34
                list_loaded = safe_load(f)
35
            report = OcrdResourceListValidator.validate(list_loaded)
36
            if not report.is_valid:
37
                self.log.error('\n'.join(report.errors))
38
                raise ValueError("Resource list %s is invalid!" % (list_filename))
39
            for executable, resource_list in list_loaded.items():
40
                if executable not in self.database:
41
                    self.database[executable] = []
42
                # Prepend, so user provided is sorted before builtin
43
                self.database[executable] = list_loaded[executable] + self.database[executable]
44
45
    def list_available(self, executable=None):
46
        """
47
        List models available for download by processor
48
        """
49
        if executable:
50
            return [(executable, self.database[executable])]
51
        return [(x, y) for x, y in self.database.items()]
52
53
    def list_installed(self, executable=None):
54
        """
55
        List installed resources, matching with registry by ``name``
56
        """
57
        ret = []
58
        for executable in [executable] if executable else self.database.keys():
59
            reslist = []
60
            for res_filename in list_all_resources(executable):
61
                res_name = Path(res_filename).name
62
                resdict = [x for x in self.database[executable] if x['name'] == res_name]
63
                if not resdict:
64
                    # TODO handle gracefully
65
                    resdict = [{'name': res_name, 'url': '???', 'description': '???', 'version_range': '???'}]
66
                reslist.append(resdict[0])
67
            ret.append((executable, reslist))
68
        return ret
69
70
    def find_resources(self, executable=None, name=None, url=None):
71
        """
72
        Find resources in the registry
73
        """
74
        ret = []
75
        if executable and executable not in self.database.keys():
76
            return ret
77
        for executable in [executable] if executable else self.database.keys():
78
            for resdict in self.database[executable]:
79
                if not name and not url:
80
                    ret.append((executable, resdict))
81
                elif url and url == resdict['url']:
82
                    ret.append((executable, resdict))
83
                elif name and name == resdict['name']:
84
                    ret.append((executable, resdict))
85
        return ret
86
87
    def parameter_usage(self, name, usage='as-is'):
88
        if usage == 'as-is':
89
            return name
90
        if usage == 'without-extension':
91
            return Path(name).stem
92
93
    def _download_impl(self, url, filename, progress_cb=None):
94
        with open(filename, 'wb') as f:
95
            with requests.get(url, stream=True) as r:
96
                total = int(r.headers.get('content-length'))
97
                # copyfileobj(r.raw, f_write_tar)
98
                for data in r.iter_content(chunk_size=4096):
99
                    if progress_cb:
100
                        progress_cb(len(data))
101
                    f.write(data)
102
103
    # TODO Proper caching (make head request for size, If-Modified etc)
104
    def download(
105
        self,
106
        executable,
107
        url,
108
        overwrite=False,
109
        basedir=XDG_CACHE_HOME,
110
        name=None,
111
        resource_type='file',
112
        path_in_archive='.',
113
        progress_cb=None,
114
    ):
115
        """
116
        Download a resource by URL
117
        """
118
        log = getLogger('ocrd.resource_manager.download')
119
        destdir = Path(basedir, executable)
120
        if not name:
121
            name = re.sub('[^A-Za-z0-9]', '', url)
122
        fpath = Path(destdir, name)
123
        if fpath.exists() and not overwrite:
124
            log.info("%s to be downloaded to %s which already exists and overwrite is False" % (url, fpath))
125
            return fpath
126
        destdir.mkdir(parents=True, exist_ok=True)
127
        if resource_type == 'file':
128
            self._download_impl(url, fpath, progress_cb)
129
        elif resource_type == 'tarball':
130
            with pushd_popd(tempdir=True):
131
                log.info("Downloading %s" % url)
132
                self._download_impl(url, 'download.tar.xx', progress_cb)
133
                Path('out').mkdir()
134
                with pushd_popd('out'):
135
                    log.info("Extracting tarball")
136
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
137
                        tar.extractall()
138
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
139
                    copytree(path_in_archive, str(fpath))
140
        # TODO
141
        # elif resource_type == 'github-dir':
142
        return fpath
143