Passed
Pull Request — master (#559)
by Konstantin
02:18
created

ocrd.resource_manager   F

Complexity

Total Complexity 65

Size/Duplication

Total Lines 212
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 65
eloc 169
dl 0
loc 212
rs 3.2
c 0
b 0
f 0

10 Methods

Rating   Name   Duplication   Size   Complexity  
B OcrdResourceManager.load_resource_list() 0 16 7
A OcrdResourceManager.__init__() 0 11 4
A OcrdResourceManager.list_available() 0 7 2
A OcrdResourceManager._copy_impl() 0 12 5
D OcrdResourceManager.find_resources() 0 18 13
B OcrdResourceManager.list_installed() 0 28 8
D OcrdResourceManager.download() 0 46 12
B OcrdResourceManager.add_to_user_database() 0 24 6
A OcrdResourceManager.parameter_usage() 0 5 3
A OcrdResourceManager._download_impl() 0 10 5

How to fix   Complexity   

Complexity

Complex classes like ocrd.resource_manager often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from pathlib import Path
2
from os.path import join
3
from os import environ, listdir
4
import re
5
from shutil import copytree
6
from datetime import datetime
7
from tarfile import open as open_tarfile
8
from urllib.parse import urlparse, unquote
9
10
import requests
11
from yaml import safe_load, safe_dump
12
13
from ocrd_validators import OcrdResourceListValidator
14
from ocrd_utils import getLogger
15
from ocrd_utils.constants import HOME, XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME, VIRTUAL_ENV
16
from ocrd_utils.os import list_all_resources, pushd_popd
17
18
from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT
19
20
class OcrdResourceManager():
21
22
    """
23
    Managing processor resources
24
    """
25
    def __init__(self):
26
        self.log = getLogger('ocrd.resource_manager')
27
        self.database = {}
28
        self.load_resource_list(Path(RESOURCE_LIST_FILENAME))
29
        self.user_list = Path(XDG_CONFIG_HOME, 'ocrd', 'resources.yml')
30
        if not self.user_list.exists():
31
            if not self.user_list.parent.exists():
32
                self.user_list.parent.mkdir()
33
            with open(str(self.user_list), 'w', encoding='utf-8') as f:
34
                f.write(RESOURCE_USER_LIST_COMMENT)
35
        self.load_resource_list(self.user_list)
36
37
    def load_resource_list(self, list_filename, database=None):
38
        if not database:
39
            database = self.database
40
        if list_filename.is_file():
41
            with open(list_filename, 'r', encoding='utf-8') as f:
42
                list_loaded = safe_load(f) or {}
43
            report = OcrdResourceListValidator.validate(list_loaded)
44
            if not report.is_valid:
45
                self.log.error('\n'.join(report.errors))
46
                raise ValueError("Resource list %s is invalid!" % (list_filename))
47
            for executable, resource_list in list_loaded.items():
48
                if executable not in database:
49
                    database[executable] = []
50
                # Prepend, so user provided is sorted before builtin
51
                database[executable] = list_loaded[executable] + database[executable]
52
        return database
53
54
    def list_available(self, executable=None):
55
        """
56
        List models available for download by processor
57
        """
58
        if executable:
59
            return [(executable, self.database[executable])]
60
        return [(x, y) for x, y in self.database.items()]
61
62
    def list_installed(self, executable=None):
63
        """
64
        List installed resources, matching with registry by ``name``
65
        """
66
        ret = []
67
        if executable:
68
            all_executables = [executable]
69
        else:
70
            # resources we know about
71
            all_executables = list(self.database.keys())
72
            # resources in the file system
73
            parent_dirs = [join(x, 'ocrd-resources') for x in [XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME]]
74
            if VIRTUAL_ENV:
75
                parent_dirs += [join(VIRTUAL_ENV, 'share', 'ocrd-resources')]
76
            for parent_dir in parent_dirs:
77
                if Path(parent_dir).exists():
78
                    all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')]
79
        for this_executable in set(all_executables):
80
            reslist = []
81
            for res_filename in list_all_resources(this_executable):
82
                res_name = Path(res_filename).name
83
                resdict = [x for x in self.database.get(this_executable, []) if x['name'] == res_name]
84
                if not resdict:
85
                    self.log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'" % (this_executable, res_name, res_filename, self.user_list))
86
                    resdict = [self.add_to_user_database(this_executable, res_filename)]
87
                reslist.append(resdict[0])
88
            ret.append((this_executable, reslist))
89
        return ret
90
91
    def add_to_user_database(self, executable, res_filename, url=None):
92
        """
93
        Add a stub entry to the user resource.yml
94
        """
95
        res_name = Path(res_filename).name
96
        res_size = Path(res_filename).stat().st_size
97
        with open(self.user_list, 'r', encoding='utf-8') as f:
98
            user_database = safe_load(f) or {}
99
        if executable not in user_database:
100
            user_database[executable] = []
101
        if not self.find_resources(executable=executable, name=res_name, database=user_database):
102
            resdict = {
103
                'name': res_name,
104
                'url': url if url else '???',
105
                'description': 'Found at %s on %s' % (res_filename, datetime.now()),
106
                'version_range': '???',
107
                'size': res_size
108
            }
109
            user_database[executable].append(resdict)
110
        with open(self.user_list, 'w', encoding='utf-8') as f:
111
            f.write(RESOURCE_USER_LIST_COMMENT)
112
            f.write('\n')
113
            f.write(safe_dump(user_database))
114
        return resdict
0 ignored issues
show
introduced by
The variable resdict does not seem to be defined in case BooleanNotNode on line 101 is False. Are you sure this can never be the case?
Loading history...
115
116
    def find_resources(self, executable=None, name=None, url=None, database=None):
117
        """
118
        Find resources in the registry
119
        """
120
        if not database:
121
            database = self.database
122
        ret = []
123
        if executable and executable not in database.keys():
124
            return ret
125
        for executable in [executable] if executable else database.keys():
126
            for resdict in database[executable]:
127
                if not name and not url:
128
                    ret.append((executable, resdict))
129
                elif url and url == resdict['url']:
130
                    ret.append((executable, resdict))
131
                elif name and name == resdict['name']:
132
                    ret.append((executable, resdict))
133
        return ret
134
135
    def parameter_usage(self, name, usage='as-is'):
136
        if usage == 'as-is':
137
            return name
138
        if usage == 'without-extension':
139
            return Path(name).stem
140
141
    def _download_impl(self, url, filename, progress_cb=None):
142
        log = getLogger('ocrd.resource_manager._download_impl')
143
        log.info("Downloading %s" % url)
144
        with open(filename, 'wb') as f:
145
            with requests.get(url, stream=True) as r:
146
                total = int(r.headers.get('content-length'))
147
                for data in r.iter_content(chunk_size=4096):
148
                    if progress_cb:
149
                        progress_cb(len(data))
150
                    f.write(data)
151
152
    def _copy_impl(self, src_filename, filename, progress_cb=None):
153
        log = getLogger('ocrd.resource_manager._copy_impl')
154
        log.info("Copying %s" % src_filename)
155
        with open(filename, 'wb') as f_out, open(src_filename, 'rb') as f_in:
156
            while True:
157
                chunk = f_in.read(4096)
158
                if chunk:
159
                    f_out.write(chunk)
160
                    if progress_cb:
161
                        progress_cb(len(chunk))
162
                else:
163
                    break
164
165
    # TODO Proper caching (make head request for size, If-Modified etc)
166
    def download(
167
        self,
168
        executable,
169
        url,
170
        overwrite=False,
171
        basedir=XDG_CACHE_HOME,
172
        name=None,
173
        resource_type='file',
174
        path_in_archive='.',
175
        progress_cb=None,
176
    ):
177
        """
178
        Download a resource by URL
179
        """
180
        log = getLogger('ocrd.resource_manager.download')
181
        destdir = Path(basedir, executable)
182
        if not name:
183
            url_parsed = urlparse(url)
184
            name = Path(unquote(url_parsed.path)).name
185
        fpath = Path(destdir, name)
186
        is_url = url.startswith('https://') or url.startswith('http://')
187
        if fpath.exists() and not overwrite:
188
            log.info("%s to be %s to %s which already exists and overwrite is False" % (url, 'downloaded' if is_url else 'copied', fpath))
189
            return fpath
190
        destdir.mkdir(parents=True, exist_ok=True)
191
        if resource_type == 'file':
192
            if is_url:
193
                self._download_impl(url, fpath, progress_cb)
194
            else:
195
                self._copy_impl(url, fpath, progress_cb)
196
        elif resource_type == 'tarball':
197
            with pushd_popd(tempdir=True):
198
                if is_url:
199
                    self._download_impl(url, 'download.tar.xx', progress_cb)
200
                else:
201
                    self._copy_impl(url, 'download.tar.xx', progress_cb)
202
                Path('out').mkdir()
203
                with pushd_popd('out'):
204
                    log.info("Extracting tarball")
205
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
206
                        tar.extractall()
207
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
208
                    copytree(path_in_archive, str(fpath))
209
        # TODO
210
        # elif resource_type == 'github-dir':
211
        return fpath
212