Passed
Pull Request — master (#559)
by Konstantin
01:49
created

OcrdResourceManager.find_resources()   D

Complexity

Conditions 12

Size

Total Lines 16
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 13
dl 0
loc 16
rs 4.8
c 0
b 0
f 0
cc 12
nop 4

How to fix   Complexity   

Complexity

Complex classes like ocrd.resource_manager.OcrdResourceManager.find_resources() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from pathlib import Path
2
import re
3
from shutil import copyfileobj, copytree
4
from tempfile import TemporaryFile
5
from tarfile import open as open_tarfile
6
7
import requests
8
from yaml import safe_load
9
10
from .constants import RESOURCE_LIST_FILENAME
11
12
from ocrd_validators import OcrdResourceListValidator
13
from ocrd_utils import getLogger
14
from ocrd_utils.constants import HOME, XDG_CACHE_HOME
15
from ocrd_utils.os import list_resource_candidates, list_all_resources, pushd_popd
16
17
builtin_list_filename = Path(RESOURCE_LIST_FILENAME)
18
user_list_filename = Path(HOME, 'ocrd', 'resources.yml')
19
20
class OcrdResourceManager():
21
22
    """
23
    Managing processor resources
24
    """
25
    def __init__(self):
26
        self.log = getLogger('ocrd.resource_manager')
27
        self.database = {}
28
        self.load_resource_list(builtin_list_filename)
29
        self.load_resource_list(user_list_filename)
30
31
    def load_resource_list(self, list_filename):
32
        if list_filename.is_file():
33
            with open(list_filename, 'r', encoding='utf-8') as f:
34
                list_loaded = safe_load(f)
35
            report = OcrdResourceListValidator.validate(list_loaded)
36
            if not report.is_valid:
37
                self.log.error('\n'.join(report.errors))
38
                raise ValueError("Resource list %s is invalid!" % (list_filename))
39
            for executable, resource_list in list_loaded.items():
40
                if executable not in self.database:
41
                    self.database[executable] = []
42
                # Prepend, so user provided is sorted before builtin
43
                self.database[executable] = list_loaded[executable] + self.database[executable]
44
45
    def list_available(self, executable=None):
46
        """
47
        List models available for download by processor
48
        """
49
        if executable:
50
            return [(executable, self.database[executable])]
51
        return [(x, y) for x, y in self.database.items()]
52
53
    def list_installed(self, executable=None):
54
        """
55
        List installed resources, matching with registry by ``name``
56
        """
57
        ret = []
58
        for executable in [executable] if executable else self.database.keys():
59
            reslist = []
60
            for res_filename in list_all_resources(executable):
61
                res_name = Path(res_filename).name
62
                resdict = [x for x in self.database[executable] if x['name'] == res_name]
63
                if not resdict:
64
                    # TODO handle gracefully
65
                    resdict = [{'name': res_name, 'url': '???', 'description': '???', 'version_range': '???'}]
66
                reslist.append(resdict[0])
67
            ret.append((executable, reslist))
68
        return ret
69
70
    def find_resources(self, executable=None, name=None, url=None):
71
        """
72
        Find resources in the registry
73
        """
74
        ret = []
75
        if executable and executable not in self.database.keys():
76
            return ret
77
        for executable in [executable] if executable else self.database.keys():
78
            for resdict in self.database[executable]:
79
                if not name and not url:
80
                    ret.append((executable, resdict))
81
                elif url and url == resdict['url']:
82
                    ret.append((executable, resdict))
83
                elif name and name == resdict['name']:
84
                    ret.append((executable, resdict))
85
        return ret
86
87
    def parameter_usage(self, name, usage='as-is'):
88
        if usage == 'as-is':
89
            return name
90
        if usage == 'without-extension':
91
            return Path(name).stem
92
93
    def _download_impl(self, url, filename, progress_cb=None):
94
        with open(filename, 'wb') as f:
95
            with requests.get(url, stream=True) as r:
96
                total = int(r.headers.get('content-length'))
97
                # copyfileobj(r.raw, f_write_tar)
98
                for data in r.iter_content(chunk_size=4096):
99
                    if progress_cb:
100
                        progress_cb(len(data))
101
                    f.write(data)
102
103
    # TODO Proper caching (make head request for size, If-Modified etc)
104
    def download(
105
        self,
106
        executable,
107
        url,
108
        overwrite=False,
109
        basedir=XDG_CACHE_HOME,
110
        name=None,
111
        resource_type='file',
112
        path_in_archive='.',
113
        progress_cb=None,
114
    ):
115
        """
116
        Download a resource by URL
117
        """
118
        log = getLogger('ocrd.resource_manager.download')
119
        destdir = Path(basedir, executable)
120
        if not name:
121
            name = re.sub('[^A-Za-z0-9]', '', url)
122
        fpath = Path(destdir, name)
123
        if fpath.exists() and not overwrite:
124
            log.info("%s to be downloaded to %s which already exists and overwrite is False" % (url, fpath))
125
            return fpath
126
        destdir.mkdir(parents=True, exist_ok=True)
127
        if resource_type == 'file':
128
            self._download_impl(url, fpath, progress_cb)
129
        elif resource_type == 'tarball':
130
            with pushd_popd(tempdir=True):
131
                log.info("Downloading %s" % url)
132
                self._download_impl(url, 'download.tar.xx', progress_cb)
133
                Path('out').mkdir()
134
                with pushd_popd('out'):
135
                    log.info("Extracting tarball")
136
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
137
                        tar.extractall()
138
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
139
                    copytree(path_in_archive, str(fpath))
140
        # TODO
141
        # elif resource_type == 'github-dir':
142
        return fpath
143