Passed
Pull Request — master (#559)
by Konstantin
02:53
created

OcrdResourceManager.download()   C

Complexity

Conditions 9

Size

Total Lines 39
Code Lines 33

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 33
dl 0
loc 39
rs 6.6666
c 0
b 0
f 0
cc 9
nop 9

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
from pathlib import Path
2
from os.path import join
3
from os import environ, listdir
4
import re
5
from shutil import copytree
6
from tarfile import open as open_tarfile
7
8
import requests
9
from yaml import safe_load
10
11
from ocrd_validators import OcrdResourceListValidator
12
from ocrd_utils import getLogger
13
from ocrd_utils.constants import HOME, XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME
14
from ocrd_utils.os import list_all_resources, pushd_popd
15
16
from .constants import RESOURCE_LIST_FILENAME
17
18
builtin_list_filename = Path(RESOURCE_LIST_FILENAME)
19
user_list_filename = Path(HOME, 'ocrd', 'resources.yml')
20
21
class OcrdResourceManager():
22
23
    """
24
    Managing processor resources
25
    """
26
    def __init__(self):
27
        self.log = getLogger('ocrd.resource_manager')
28
        self.database = {}
29
        self.load_resource_list(builtin_list_filename)
30
        self.load_resource_list(user_list_filename)
31
32
    def load_resource_list(self, list_filename):
33
        if list_filename.is_file():
34
            with open(list_filename, 'r', encoding='utf-8') as f:
35
                list_loaded = safe_load(f)
36
            report = OcrdResourceListValidator.validate(list_loaded)
37
            if not report.is_valid:
38
                self.log.error('\n'.join(report.errors))
39
                raise ValueError("Resource list %s is invalid!" % (list_filename))
40
            for executable, resource_list in list_loaded.items():
41
                if executable not in self.database:
42
                    self.database[executable] = []
43
                # Prepend, so user provided is sorted before builtin
44
                self.database[executable] = list_loaded[executable] + self.database[executable]
45
46
    def list_available(self, executable=None):
47
        """
48
        List models available for download by processor
49
        """
50
        if executable:
51
            return [(executable, self.database[executable])]
52
        return [(x, y) for x, y in self.database.items()]
53
54
    def list_installed(self, executable=None):
55
        """
56
        List installed resources, matching with registry by ``name``
57
        """
58
        ret = []
59
        if executable:
60
            all_executables = [executable]
61
        else:
62
            # resources we know about
63
            all_executables = list(self.database.keys())
64
            # resources in the file system
65
            parent_dirs = [XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME]
66
            if 'VIRTUAL_ENV' in environ:
67
                parent_dirs += [join(environ['VIRTUAL_ENV'], 'share')]
68
            for parent_dir in parent_dirs:
69
                all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')]
70
        for this_executable in set(all_executables):
71
            reslist = []
72
            for res_filename in list_all_resources(this_executable):
73
                res_name = Path(res_filename).name
74
                resdict = [x for x in self.database.get(this_executable, []) if x['name'] == res_name]
75
                if not resdict:
76
                    # TODO handle gracefully
77
                    resdict = [{'name': res_name, 'url': '???', 'description': '???', 'version_range': '???'}]
78
                reslist.append(resdict[0])
79
            ret.append((this_executable, reslist))
80
        return ret
81
82
    def find_resources(self, executable=None, name=None, url=None):
83
        """
84
        Find resources in the registry
85
        """
86
        ret = []
87
        if executable and executable not in self.database.keys():
88
            return ret
89
        for executable in [executable] if executable else self.database.keys():
90
            for resdict in self.database[executable]:
91
                if not name and not url:
92
                    ret.append((executable, resdict))
93
                elif url and url == resdict['url']:
94
                    ret.append((executable, resdict))
95
                elif name and name == resdict['name']:
96
                    ret.append((executable, resdict))
97
        return ret
98
99
    def parameter_usage(self, name, usage='as-is'):
100
        if usage == 'as-is':
101
            return name
102
        if usage == 'without-extension':
103
            return Path(name).stem
104
105
    def _download_impl(self, url, filename, progress_cb=None):
106
        with open(filename, 'wb') as f:
107
            with requests.get(url, stream=True) as r:
108
                total = int(r.headers.get('content-length'))
109
                # copyfileobj(r.raw, f_write_tar)
110
                for data in r.iter_content(chunk_size=4096):
111
                    if progress_cb:
112
                        progress_cb(len(data))
113
                    f.write(data)
114
115
    # TODO Proper caching (make head request for size, If-Modified etc)
116
    def download(
117
        self,
118
        executable,
119
        url,
120
        overwrite=False,
121
        basedir=XDG_CACHE_HOME,
122
        name=None,
123
        resource_type='file',
124
        path_in_archive='.',
125
        progress_cb=None,
126
    ):
127
        """
128
        Download a resource by URL
129
        """
130
        log = getLogger('ocrd.resource_manager.download')
131
        destdir = Path(basedir, executable)
132
        if not name:
133
            name = re.sub('[^A-Za-z0-9]', '', url)
134
        fpath = Path(destdir, name)
135
        if fpath.exists() and not overwrite:
136
            log.info("%s to be downloaded to %s which already exists and overwrite is False" % (url, fpath))
137
            return fpath
138
        destdir.mkdir(parents=True, exist_ok=True)
139
        if resource_type == 'file':
140
            self._download_impl(url, fpath, progress_cb)
141
        elif resource_type == 'tarball':
142
            with pushd_popd(tempdir=True):
143
                log.info("Downloading %s" % url)
144
                self._download_impl(url, 'download.tar.xx', progress_cb)
145
                Path('out').mkdir()
146
                with pushd_popd('out'):
147
                    log.info("Extracting tarball")
148
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
149
                        tar.extractall()
150
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
151
                    copytree(path_in_archive, str(fpath))
152
        # TODO
153
        # elif resource_type == 'github-dir':
154
        return fpath
155