Passed
Pull Request — master (#559)
by Konstantin
02:09
created

OcrdResourceManager.find_resources()   D

Complexity

Conditions 13

Size

Total Lines 18
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 15
dl 0
loc 18
rs 4.2
c 0
b 0
f 0
cc 13
nop 5

How to fix   Complexity   

Complexity

Complex classes like ocrd.resource_manager.OcrdResourceManager.find_resources() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from pathlib import Path
2
from os.path import join
3
from os import environ, listdir, getcwd
4
import re
5
from shutil import copytree
6
from datetime import datetime
7
from tarfile import open as open_tarfile
8
from urllib.parse import urlparse, unquote
9
10
import requests
11
from yaml import safe_load, safe_dump
12
13
from ocrd_validators import OcrdResourceListValidator
14
from ocrd_utils import getLogger
15
from ocrd_utils.constants import HOME, XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME, VIRTUAL_ENV
16
from ocrd_utils.os import list_all_resources, pushd_popd
17
18
from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT
19
from .config import load_config_file
20
21
class OcrdResourceManager():
22
23
    """
24
    Managing processor resources
25
    """
26
    def __init__(self):
27
        self.log = getLogger('ocrd.resource_manager')
28
        self.database = {}
29
        self.load_resource_list(Path(RESOURCE_LIST_FILENAME))
30
        self.user_list = Path(XDG_CONFIG_HOME, 'ocrd', 'resources.yml')
31
        if not self.user_list.exists():
32
            if not self.user_list.parent.exists():
33
                self.user_list.parent.mkdir()
34
            with open(str(self.user_list), 'w', encoding='utf-8') as f:
35
                f.write(RESOURCE_USER_LIST_COMMENT)
36
        self.load_resource_list(self.user_list)
37
38
    def load_resource_list(self, list_filename, database=None):
39
        if not database:
40
            database = self.database
41
        if list_filename.is_file():
42
            with open(list_filename, 'r', encoding='utf-8') as f:
43
                list_loaded = safe_load(f) or {}
44
            report = OcrdResourceListValidator.validate(list_loaded)
45
            if not report.is_valid:
46
                self.log.error('\n'.join(report.errors))
47
                raise ValueError("Resource list %s is invalid!" % (list_filename))
48
            for executable, resource_list in list_loaded.items():
49
                if executable not in database:
50
                    database[executable] = []
51
                # Prepend, so user provided is sorted before builtin
52
                database[executable] = list_loaded[executable] + database[executable]
53
        return database
54
55
    def list_available(self, executable=None):
56
        """
57
        List models available for download by processor
58
        """
59
        if executable:
60
            return [(executable, self.database[executable])]
61
        return [(x, y) for x, y in self.database.items()]
62
63
    def list_installed(self, executable=None):
64
        """
65
        List installed resources, matching with registry by ``name``
66
        """
67
        ret = []
68
        if executable:
69
            all_executables = [executable]
70
        else:
71
            # resources we know about
72
            all_executables = list(self.database.keys())
73
            # resources in the file system
74
            parent_dirs = [join(x, 'ocrd-resources') for x in [XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME]]
75
            if VIRTUAL_ENV:
76
                parent_dirs += [join(VIRTUAL_ENV, 'share', 'ocrd-resources')]
77
            for parent_dir in parent_dirs:
78
                if Path(parent_dir).exists():
79
                    all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')]
80
        for this_executable in set(all_executables):
81
            reslist = []
82
            for res_filename in list_all_resources(this_executable):
83
                res_name = Path(res_filename).name
84
                resdict = [x for x in self.database.get(this_executable, []) if x['name'] == res_name]
85
                if not resdict:
86
                    self.log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'" % (this_executable, res_name, res_filename, self.user_list))
87
                    resdict = [self.add_to_user_database(this_executable, res_filename)]
88
                resdict[0]['path'] = res_filename
89
                reslist.append(resdict[0])
90
            ret.append((this_executable, reslist))
91
        return ret
92
93
    def add_to_user_database(self, executable, res_filename, url=None):
94
        """
95
        Add a stub entry to the user resource.yml
96
        """
97
        res_name = Path(res_filename).name
98
        res_size = Path(res_filename).stat().st_size
99
        with open(self.user_list, 'r', encoding='utf-8') as f:
100
            user_database = safe_load(f) or {}
101
        if executable not in user_database:
102
            user_database[executable] = []
103
        if not self.find_resources(executable=executable, name=res_name, database=user_database):
104
            resdict = {
105
                'name': res_name,
106
                'url': url if url else '???',
107
                'description': 'Found at %s on %s' % (self.resource_dir_to_location(res_filename), datetime.now()),
108
                'version_range': '???',
109
                'size': res_size
110
            }
111
            user_database[executable].append(resdict)
112
        with open(self.user_list, 'w', encoding='utf-8') as f:
113
            f.write(RESOURCE_USER_LIST_COMMENT)
114
            f.write('\n')
115
            f.write(safe_dump(user_database))
116
        return resdict
0 ignored issues
show
introduced by
The variable resdict does not seem to be defined in case BooleanNotNode on line 103 is False. Are you sure this can never be the case?
Loading history...
117
118
    def find_resources(self, executable=None, name=None, url=None, database=None):
119
        """
120
        Find resources in the registry
121
        """
122
        if not database:
123
            database = self.database
124
        ret = []
125
        if executable and executable not in database.keys():
126
            return ret
127
        for executable in [executable] if executable else database.keys():
128
            for resdict in database[executable]:
129
                if not name and not url:
130
                    ret.append((executable, resdict))
131
                elif url and url == resdict['url']:
132
                    ret.append((executable, resdict))
133
                elif name and name == resdict['name']:
134
                    ret.append((executable, resdict))
135
        return ret
136
137
    def location_to_resource_dir(self, location):
138
        return join(VIRTUAL_ENV, 'share', 'ocrd-resources') if location == 'virtualenv' and VIRTUAL_ENV else \
139
                join(XDG_CACHE_HOME, 'ocrd-resources') if location == 'cache' else \
140
                join(XDG_DATA_HOME, 'ocrd-resources') if location == 'data' else \
141
                join(XDG_CONFIG_HOME, 'ocrd-resources') if location == 'config' else \
142
                getcwd()
143
144
    def resource_dir_to_location(self, resource_path):
145
        resource_path = str(resource_path)
146
        return 'virtualenv' if VIRTUAL_ENV and resource_path.startswith(join(VIRTUAL_ENV, 'share', 'ocrd-resources')) else \
147
               'cache' if resource_path.startswith(join(XDG_CACHE_HOME, 'ocrd-resources')) else \
148
               'data' if resource_path.startswith(join(XDG_DATA_HOME, 'ocrd-resources')) else \
149
               'config' if resource_path.startswith(join(XDG_CONFIG_HOME, 'ocrd-resources')) else \
150
               resource_path
151
152
    @property
153
    def default_resource_dir(self):
154
        config = load_config_file()
155
        return self.location_to_resource_dir(config.resource_location)
156
157
    def parameter_usage(self, name, usage='as-is'):
158
        if usage == 'as-is':
159
            return name
160
        if usage == 'without-extension':
161
            return Path(name).stem
162
163
    def _download_impl(self, url, filename, progress_cb=None):
164
        log = getLogger('ocrd.resource_manager._download_impl')
165
        log.info("Downloading %s" % url)
166
        with open(filename, 'wb') as f:
167
            with requests.get(url, stream=True) as r:
168
                total = int(r.headers.get('content-length'))
169
                for data in r.iter_content(chunk_size=4096):
170
                    if progress_cb:
171
                        progress_cb(len(data))
172
                    f.write(data)
173
174
    def _copy_impl(self, src_filename, filename, progress_cb=None):
175
        log = getLogger('ocrd.resource_manager._copy_impl')
176
        log.info("Copying %s" % src_filename)
177
        with open(filename, 'wb') as f_out, open(src_filename, 'rb') as f_in:
178
            while True:
179
                chunk = f_in.read(4096)
180
                if chunk:
181
                    f_out.write(chunk)
182
                    if progress_cb:
183
                        progress_cb(len(chunk))
184
                else:
185
                    break
186
187
    # TODO Proper caching (make head request for size, If-Modified etc)
188
    def download(
189
        self,
190
        executable,
191
        url,
192
        overwrite=False,
193
        basedir=XDG_CACHE_HOME,
194
        name=None,
195
        resource_type='file',
196
        path_in_archive='.',
197
        progress_cb=None,
198
    ):
199
        """
200
        Download a resource by URL
201
        """
202
        log = getLogger('ocrd.resource_manager.download')
203
        destdir = Path(basedir, executable)
204
        if not name:
205
            url_parsed = urlparse(url)
206
            name = Path(unquote(url_parsed.path)).name
207
        fpath = Path(destdir, name)
208
        is_url = url.startswith('https://') or url.startswith('http://')
209
        if fpath.exists() and not overwrite:
210
            log.info("%s to be %s to %s which already exists and overwrite is False" % (url, 'downloaded' if is_url else 'copied', fpath))
211
            return fpath
212
        destdir.mkdir(parents=True, exist_ok=True)
213
        if resource_type == 'file':
214
            if is_url:
215
                self._download_impl(url, fpath, progress_cb)
216
            else:
217
                self._copy_impl(url, fpath, progress_cb)
218
        elif resource_type == 'tarball':
219
            with pushd_popd(tempdir=True):
220
                if is_url:
221
                    self._download_impl(url, 'download.tar.xx', progress_cb)
222
                else:
223
                    self._copy_impl(url, 'download.tar.xx', progress_cb)
224
                Path('out').mkdir()
225
                with pushd_popd('out'):
226
                    log.info("Extracting tarball")
227
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
228
                        tar.extractall()
229
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
230
                    copytree(path_in_archive, str(fpath))
231
        # TODO
232
        # elif resource_type == 'github-dir':
233
        return fpath
234