Passed
Pull Request — master (#559)
by Konstantin
01:58
created

ocrd.resource_manager   C

Complexity

Total Complexity 55

Size/Duplication

Total Lines 189
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 55
eloc 149
dl 0
loc 189
rs 6
c 0
b 0
f 0

9 Methods

Rating   Name   Duplication   Size   Complexity  
D OcrdResourceManager.find_resources() 0 18 13
B OcrdResourceManager.load_resource_list() 0 16 7
B OcrdResourceManager.list_installed() 0 27 7
C OcrdResourceManager.download() 0 39 9
B OcrdResourceManager.add_to_user_database() 0 24 5
A OcrdResourceManager.__init__() 0 11 4
A OcrdResourceManager.list_available() 0 7 2
A OcrdResourceManager.parameter_usage() 0 5 3
A OcrdResourceManager._download_impl() 0 9 5

How to fix   Complexity   

Complexity

Complex classes like ocrd.resource_manager often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from pathlib import Path
2
from os.path import join
3
from os import environ, listdir
4
import re
5
from shutil import copytree
6
from datetime import datetime
7
from tarfile import open as open_tarfile
8
9
import requests
10
from yaml import safe_load, safe_dump
11
12
from ocrd_validators import OcrdResourceListValidator
13
from ocrd_utils import getLogger
14
from ocrd_utils.constants import HOME, XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME
15
from ocrd_utils.os import list_all_resources, pushd_popd
16
17
from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT
18
19
class OcrdResourceManager():
20
21
    """
22
    Managing processor resources
23
    """
24
    def __init__(self):
25
        self.log = getLogger('ocrd.resource_manager')
26
        self.database = {}
27
        self.load_resource_list(Path(RESOURCE_LIST_FILENAME))
28
        self.user_list = Path(XDG_CONFIG_HOME, 'ocrd', 'resources.yml')
29
        if not self.user_list.exists():
30
            if not self.user_list.parent.exists():
31
                self.user_list.parent.mkdir()
32
            with open(str(self.user_list), 'w', encoding='utf-8') as f:
33
                f.write(RESOURCE_USER_LIST_COMMENT)
34
        self.load_resource_list(self.user_list)
35
36
    def load_resource_list(self, list_filename, database=None):
37
        if not database:
38
            database = self.database
39
        if list_filename.is_file():
40
            with open(list_filename, 'r', encoding='utf-8') as f:
41
                list_loaded = safe_load(f) or {}
42
            report = OcrdResourceListValidator.validate(list_loaded)
43
            if not report.is_valid:
44
                self.log.error('\n'.join(report.errors))
45
                raise ValueError("Resource list %s is invalid!" % (list_filename))
46
            for executable, resource_list in list_loaded.items():
47
                if executable not in database:
48
                    database[executable] = []
49
                # Prepend, so user provided is sorted before builtin
50
                database[executable] = list_loaded[executable] + database[executable]
51
        return database
52
53
    def list_available(self, executable=None):
54
        """
55
        List models available for download by processor
56
        """
57
        if executable:
58
            return [(executable, self.database[executable])]
59
        return [(x, y) for x, y in self.database.items()]
60
61
    def list_installed(self, executable=None):
62
        """
63
        List installed resources, matching with registry by ``name``
64
        """
65
        ret = []
66
        if executable:
67
            all_executables = [executable]
68
        else:
69
            # resources we know about
70
            all_executables = list(self.database.keys())
71
            # resources in the file system
72
            parent_dirs = [XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME]
73
            if 'VIRTUAL_ENV' in environ:
74
                parent_dirs += [join(environ['VIRTUAL_ENV'], 'share')]
75
            for parent_dir in parent_dirs:
76
                all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')]
77
        for this_executable in set(all_executables):
78
            reslist = []
79
            for res_filename in list_all_resources(this_executable):
80
                res_name = Path(res_filename).name
81
                resdict = [x for x in self.database.get(this_executable, []) if x['name'] == res_name]
82
                if not resdict:
83
                    self.log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'" % (this_executable, res_name, res_filename, self.user_list))
84
                    resdict = [self.add_to_user_database(this_executable, res_filename)]
85
                reslist.append(resdict[0])
86
            ret.append((this_executable, reslist))
87
        return ret
88
89
    def add_to_user_database(self, executable, res_filename):
90
        """
91
        Add a stub entry to the user resource.yml
92
        """
93
        res_name = Path(res_filename).name
94
        res_size = Path(res_filename).stat().st_size
95
        with open(self.user_list, 'r', encoding='utf-8') as f:
96
            user_database = safe_load(f) or {}
97
        if executable not in user_database:
98
            user_database[executable] = []
99
        if not self.find_resources(executable=executable, name=res_name, database=user_database):
100
            resdict = {
101
                'name': res_name,
102
                'url': '???',
103
                'description': 'Found at %s on %s' % (res_filename, datetime.now()),
104
                'version_range': '???',
105
                'size': res_size
106
            }
107
            user_database[executable].append(resdict)
108
        with open(self.user_list, 'w', encoding='utf-8') as f:
109
            f.write(RESOURCE_USER_LIST_COMMENT)
110
            f.write('\n')
111
            f.write(safe_dump(user_database))
112
        return resdict
0 ignored issues
show
introduced by
The variable resdict does not seem to be defined in case BooleanNotNode on line 99 is False. Are you sure this can never be the case?
Loading history...
113
114
    def find_resources(self, executable=None, name=None, url=None, database=None):
115
        """
116
        Find resources in the registry
117
        """
118
        if not database:
119
            database = self.database
120
        ret = []
121
        if executable and executable not in database.keys():
122
            return ret
123
        for executable in [executable] if executable else database.keys():
124
            for resdict in database[executable]:
125
                if not name and not url:
126
                    ret.append((executable, resdict))
127
                elif url and url == resdict['url']:
128
                    ret.append((executable, resdict))
129
                elif name and name == resdict['name']:
130
                    ret.append((executable, resdict))
131
        return ret
132
133
    def parameter_usage(self, name, usage='as-is'):
134
        if usage == 'as-is':
135
            return name
136
        if usage == 'without-extension':
137
            return Path(name).stem
138
139
    def _download_impl(self, url, filename, progress_cb=None):
140
        with open(filename, 'wb') as f:
141
            with requests.get(url, stream=True) as r:
142
                total = int(r.headers.get('content-length'))
143
                # copyfileobj(r.raw, f_write_tar)
144
                for data in r.iter_content(chunk_size=4096):
145
                    if progress_cb:
146
                        progress_cb(len(data))
147
                    f.write(data)
148
149
    # TODO Proper caching (make head request for size, If-Modified etc)
150
    def download(
151
        self,
152
        executable,
153
        url,
154
        overwrite=False,
155
        basedir=XDG_CACHE_HOME,
156
        name=None,
157
        resource_type='file',
158
        path_in_archive='.',
159
        progress_cb=None,
160
    ):
161
        """
162
        Download a resource by URL
163
        """
164
        log = getLogger('ocrd.resource_manager.download')
165
        destdir = Path(basedir, executable)
166
        if not name:
167
            name = re.sub('[^A-Za-z0-9]', '', url)
168
        fpath = Path(destdir, name)
169
        if fpath.exists() and not overwrite:
170
            log.info("%s to be downloaded to %s which already exists and overwrite is False" % (url, fpath))
171
            return fpath
172
        destdir.mkdir(parents=True, exist_ok=True)
173
        if resource_type == 'file':
174
            self._download_impl(url, fpath, progress_cb)
175
        elif resource_type == 'tarball':
176
            with pushd_popd(tempdir=True):
177
                log.info("Downloading %s" % url)
178
                self._download_impl(url, 'download.tar.xx', progress_cb)
179
                Path('out').mkdir()
180
                with pushd_popd('out'):
181
                    log.info("Extracting tarball")
182
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
183
                        tar.extractall()
184
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
185
                    copytree(path_in_archive, str(fpath))
186
        # TODO
187
        # elif resource_type == 'github-dir':
188
        return fpath
189