Passed
Pull Request — master (#559)
by Konstantin
04:41
created

ocrd.resource_manager   F

Complexity

Total Complexity 70

Size/Duplication

Total Lines 222
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 70
eloc 177
dl 0
loc 222
rs 2.8
c 0
b 0
f 0

12 Methods

Rating   Name   Duplication   Size   Complexity  
D OcrdResourceManager.find_resources() 0 18 13
B OcrdResourceManager.load_resource_list() 0 16 7
B OcrdResourceManager.list_installed() 0 27 7
A OcrdResourceManager.__init__() 0 11 4
A OcrdResourceManager.list_available() 0 7 2
A OcrdResourceManager._copy_impl() 0 12 5
A OcrdResourceManager.resource_dir_to_location() 0 5 3
A OcrdResourceManager.location_to_resource_dir() 0 4 3
D OcrdResourceManager.download() 0 46 12
B OcrdResourceManager.add_to_user_database() 0 24 6
A OcrdResourceManager.parameter_usage() 0 5 3
A OcrdResourceManager._download_impl() 0 10 5

How to fix   Complexity   

Complexity

Complex classes like ocrd.resource_manager often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from pathlib import Path
2
from os.path import join
3
from os import environ, listdir, getcwd
4
import re
5
from shutil import copytree
6
from datetime import datetime
7
from tarfile import open as open_tarfile
8
from urllib.parse import urlparse, unquote
9
10
import requests
11
from yaml import safe_load, safe_dump
12
13
from ocrd_validators import OcrdResourceListValidator
14
from ocrd_utils import getLogger
15
from ocrd_utils.constants import HOME, XDG_DATA_HOME, XDG_CONFIG_HOME
16
from ocrd_utils.os import list_all_resources, pushd_popd
17
18
from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT
19
20
class OcrdResourceManager():
21
22
    """
23
    Managing processor resources
24
    """
25
    def __init__(self):
26
        self.log = getLogger('ocrd.resource_manager')
27
        self.database = {}
28
        self.load_resource_list(Path(RESOURCE_LIST_FILENAME))
29
        self.user_list = Path(XDG_CONFIG_HOME, 'ocrd', 'resources.yml')
30
        if not self.user_list.exists():
31
            if not self.user_list.parent.exists():
32
                self.user_list.parent.mkdir()
33
            with open(str(self.user_list), 'w', encoding='utf-8') as f:
34
                f.write(RESOURCE_USER_LIST_COMMENT)
35
        self.load_resource_list(self.user_list)
36
37
    def load_resource_list(self, list_filename, database=None):
38
        if not database:
39
            database = self.database
40
        if list_filename.is_file():
41
            with open(list_filename, 'r', encoding='utf-8') as f:
42
                list_loaded = safe_load(f) or {}
43
            report = OcrdResourceListValidator.validate(list_loaded)
44
            if not report.is_valid:
45
                self.log.error('\n'.join(report.errors))
46
                raise ValueError("Resource list %s is invalid!" % (list_filename))
47
            for executable, resource_list in list_loaded.items():
48
                if executable not in database:
49
                    database[executable] = []
50
                # Prepend, so user provided is sorted before builtin
51
                database[executable] = list_loaded[executable] + database[executable]
52
        return database
53
54
    def list_available(self, executable=None):
55
        """
56
        List models available for download by processor
57
        """
58
        if executable:
59
            return [(executable, self.database[executable])]
60
        return [(x, y) for x, y in self.database.items()]
61
62
    def list_installed(self, executable=None):
63
        """
64
        List installed resources, matching with registry by ``name``
65
        """
66
        ret = []
67
        if executable:
68
            all_executables = [executable]
69
        else:
70
            # resources we know about
71
            all_executables = list(self.database.keys())
72
            # resources in the file system
73
            parent_dirs = [join(x, 'ocrd-resources') for x in [XDG_DATA_HOME, '/usr/local/share']]
74
            for parent_dir in parent_dirs:
75
                if Path(parent_dir).exists():
76
                    all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')]
77
        for this_executable in set(all_executables):
78
            reslist = []
79
            for res_filename in list_all_resources(this_executable):
80
                res_name = Path(res_filename).name
81
                resdict = [x for x in self.database.get(this_executable, []) if x['name'] == res_name]
82
                if not resdict:
83
                    self.log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'" % (this_executable, res_name, res_filename, self.user_list))
84
                    resdict = [self.add_to_user_database(this_executable, res_filename)]
85
                resdict[0]['path'] = res_filename
86
                reslist.append(resdict[0])
87
            ret.append((this_executable, reslist))
88
        return ret
89
90
    def add_to_user_database(self, executable, res_filename, url=None):
91
        """
92
        Add a stub entry to the user resource.yml
93
        """
94
        res_name = Path(res_filename).name
95
        res_size = Path(res_filename).stat().st_size
96
        with open(self.user_list, 'r', encoding='utf-8') as f:
97
            user_database = safe_load(f) or {}
98
        if executable not in user_database:
99
            user_database[executable] = []
100
        if not self.find_resources(executable=executable, name=res_name, database=user_database):
101
            resdict = {
102
                'name': res_name,
103
                'url': url if url else '???',
104
                'description': 'Found at %s on %s' % (self.resource_dir_to_location(res_filename), datetime.now()),
105
                'version_range': '???',
106
                'size': res_size
107
            }
108
            user_database[executable].append(resdict)
109
        with open(self.user_list, 'w', encoding='utf-8') as f:
110
            f.write(RESOURCE_USER_LIST_COMMENT)
111
            f.write('\n')
112
            f.write(safe_dump(user_database))
113
        return resdict
0 ignored issues
show
introduced by
The variable resdict does not seem to be defined in case BooleanNotNode on line 100 is False. Are you sure this can never be the case?
Loading history...
114
115
    def find_resources(self, executable=None, name=None, url=None, database=None):
116
        """
117
        Find resources in the registry
118
        """
119
        if not database:
120
            database = self.database
121
        ret = []
122
        if executable and executable not in database.keys():
123
            return ret
124
        for executable in [executable] if executable else database.keys():
125
            for resdict in database[executable]:
126
                if not name and not url:
127
                    ret.append((executable, resdict))
128
                elif url and url == resdict['url']:
129
                    ret.append((executable, resdict))
130
                elif name and name == resdict['name']:
131
                    ret.append((executable, resdict))
132
        return ret
133
134
    def location_to_resource_dir(self, location):
135
        return '/usr/local/share/ocrd-resources' if location == 'system' else \
136
                join(XDG_DATA_HOME, 'ocrd-resources') if location == 'data' else \
137
                getcwd()
138
139
    def resource_dir_to_location(self, resource_path):
140
        resource_path = str(resource_path)
141
        return 'system' if resource_path.startswith('/usr/local/share/ocrd-resources') else \
142
               'data' if resource_path.startswith(join(XDG_DATA_HOME, 'ocrd-resources')) else \
143
               resource_path
144
145
    def parameter_usage(self, name, usage='as-is'):
146
        if usage == 'as-is':
147
            return name
148
        if usage == 'without-extension':
149
            return Path(name).stem
150
151
    def _download_impl(self, url, filename, progress_cb=None):
152
        log = getLogger('ocrd.resource_manager._download_impl')
153
        log.info("Downloading %s" % url)
154
        with open(filename, 'wb') as f:
155
            with requests.get(url, stream=True) as r:
156
                total = int(r.headers.get('content-length'))
157
                for data in r.iter_content(chunk_size=4096):
158
                    if progress_cb:
159
                        progress_cb(len(data))
160
                    f.write(data)
161
162
    def _copy_impl(self, src_filename, filename, progress_cb=None):
163
        log = getLogger('ocrd.resource_manager._copy_impl')
164
        log.info("Copying %s" % src_filename)
165
        with open(filename, 'wb') as f_out, open(src_filename, 'rb') as f_in:
166
            while True:
167
                chunk = f_in.read(4096)
168
                if chunk:
169
                    f_out.write(chunk)
170
                    if progress_cb:
171
                        progress_cb(len(chunk))
172
                else:
173
                    break
174
175
    # TODO Proper caching (make head request for size, If-Modified etc)
176
    def download(
177
        self,
178
        executable,
179
        url,
180
        basedir,
181
        overwrite=False,
182
        name=None,
183
        resource_type='file',
184
        path_in_archive='.',
185
        progress_cb=None,
186
    ):
187
        """
188
        Download a resource by URL
189
        """
190
        log = getLogger('ocrd.resource_manager.download')
191
        destdir = Path(basedir, executable)
192
        if not name:
193
            url_parsed = urlparse(url)
194
            name = Path(unquote(url_parsed.path)).name
195
        fpath = Path(destdir, name)
196
        is_url = url.startswith('https://') or url.startswith('http://')
197
        if fpath.exists() and not overwrite:
198
            log.info("%s to be %s to %s which already exists and overwrite is False" % (url, 'downloaded' if is_url else 'copied', fpath))
199
            return fpath
200
        destdir.mkdir(parents=True, exist_ok=True)
201
        if resource_type == 'file':
202
            if is_url:
203
                self._download_impl(url, fpath, progress_cb)
204
            else:
205
                self._copy_impl(url, fpath, progress_cb)
206
        elif resource_type == 'tarball':
207
            with pushd_popd(tempdir=True):
208
                if is_url:
209
                    self._download_impl(url, 'download.tar.xx', progress_cb)
210
                else:
211
                    self._copy_impl(url, 'download.tar.xx', progress_cb)
212
                Path('out').mkdir()
213
                with pushd_popd('out'):
214
                    log.info("Extracting tarball")
215
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
216
                        tar.extractall()
217
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
218
                    copytree(path_in_archive, str(fpath))
219
        # TODO
220
        # elif resource_type == 'github-dir':
221
        return fpath
222