Passed
Pull Request — master (#559)
by Konstantin
02:53
created

ocrd.resource_manager   B

Complexity

Total Complexity 45

Size/Duplication

Total Lines 155
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 45
eloc 118
dl 0
loc 155
rs 8.8
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
D OcrdResourceManager.find_resources() 0 16 12
B OcrdResourceManager.load_resource_list() 0 13 6
B OcrdResourceManager.list_installed() 0 27 7
C OcrdResourceManager.download() 0 39 9
A OcrdResourceManager.__init__() 0 5 1
A OcrdResourceManager.list_available() 0 7 2
A OcrdResourceManager.parameter_usage() 0 5 3
A OcrdResourceManager._download_impl() 0 9 5

How to fix   Complexity   

Complexity

Complex classes like ocrd.resource_manager often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from pathlib import Path
2
from os.path import join
3
from os import environ, listdir
4
import re
5
from shutil import copytree
6
from tarfile import open as open_tarfile
7
8
import requests
9
from yaml import safe_load
10
11
from ocrd_validators import OcrdResourceListValidator
12
from ocrd_utils import getLogger
13
from ocrd_utils.constants import HOME, XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME
14
from ocrd_utils.os import list_all_resources, pushd_popd
15
16
from .constants import RESOURCE_LIST_FILENAME
17
18
builtin_list_filename = Path(RESOURCE_LIST_FILENAME)
19
user_list_filename = Path(HOME, 'ocrd', 'resources.yml')
20
21
class OcrdResourceManager():
22
23
    """
24
    Managing processor resources
25
    """
26
    def __init__(self):
27
        self.log = getLogger('ocrd.resource_manager')
28
        self.database = {}
29
        self.load_resource_list(builtin_list_filename)
30
        self.load_resource_list(user_list_filename)
31
32
    def load_resource_list(self, list_filename):
33
        if list_filename.is_file():
34
            with open(list_filename, 'r', encoding='utf-8') as f:
35
                list_loaded = safe_load(f)
36
            report = OcrdResourceListValidator.validate(list_loaded)
37
            if not report.is_valid:
38
                self.log.error('\n'.join(report.errors))
39
                raise ValueError("Resource list %s is invalid!" % (list_filename))
40
            for executable, resource_list in list_loaded.items():
41
                if executable not in self.database:
42
                    self.database[executable] = []
43
                # Prepend, so user provided is sorted before builtin
44
                self.database[executable] = list_loaded[executable] + self.database[executable]
45
46
    def list_available(self, executable=None):
47
        """
48
        List models available for download by processor
49
        """
50
        if executable:
51
            return [(executable, self.database[executable])]
52
        return [(x, y) for x, y in self.database.items()]
53
54
    def list_installed(self, executable=None):
55
        """
56
        List installed resources, matching with registry by ``name``
57
        """
58
        ret = []
59
        if executable:
60
            all_executables = [executable]
61
        else:
62
            # resources we know about
63
            all_executables = list(self.database.keys())
64
            # resources in the file system
65
            parent_dirs = [XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME]
66
            if 'VIRTUAL_ENV' in environ:
67
                parent_dirs += [join(environ['VIRTUAL_ENV'], 'share')]
68
            for parent_dir in parent_dirs:
69
                all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')]
70
        for this_executable in set(all_executables):
71
            reslist = []
72
            for res_filename in list_all_resources(this_executable):
73
                res_name = Path(res_filename).name
74
                resdict = [x for x in self.database.get(this_executable, []) if x['name'] == res_name]
75
                if not resdict:
76
                    # TODO handle gracefully
77
                    resdict = [{'name': res_name, 'url': '???', 'description': '???', 'version_range': '???'}]
78
                reslist.append(resdict[0])
79
            ret.append((this_executable, reslist))
80
        return ret
81
82
    def find_resources(self, executable=None, name=None, url=None):
83
        """
84
        Find resources in the registry
85
        """
86
        ret = []
87
        if executable and executable not in self.database.keys():
88
            return ret
89
        for executable in [executable] if executable else self.database.keys():
90
            for resdict in self.database[executable]:
91
                if not name and not url:
92
                    ret.append((executable, resdict))
93
                elif url and url == resdict['url']:
94
                    ret.append((executable, resdict))
95
                elif name and name == resdict['name']:
96
                    ret.append((executable, resdict))
97
        return ret
98
99
    def parameter_usage(self, name, usage='as-is'):
100
        if usage == 'as-is':
101
            return name
102
        if usage == 'without-extension':
103
            return Path(name).stem
104
105
    def _download_impl(self, url, filename, progress_cb=None):
106
        with open(filename, 'wb') as f:
107
            with requests.get(url, stream=True) as r:
108
                total = int(r.headers.get('content-length'))
109
                # copyfileobj(r.raw, f_write_tar)
110
                for data in r.iter_content(chunk_size=4096):
111
                    if progress_cb:
112
                        progress_cb(len(data))
113
                    f.write(data)
114
115
    # TODO Proper caching (make head request for size, If-Modified etc)
116
    def download(
117
        self,
118
        executable,
119
        url,
120
        overwrite=False,
121
        basedir=XDG_CACHE_HOME,
122
        name=None,
123
        resource_type='file',
124
        path_in_archive='.',
125
        progress_cb=None,
126
    ):
127
        """
128
        Download a resource by URL
129
        """
130
        log = getLogger('ocrd.resource_manager.download')
131
        destdir = Path(basedir, executable)
132
        if not name:
133
            name = re.sub('[^A-Za-z0-9]', '', url)
134
        fpath = Path(destdir, name)
135
        if fpath.exists() and not overwrite:
136
            log.info("%s to be downloaded to %s which already exists and overwrite is False" % (url, fpath))
137
            return fpath
138
        destdir.mkdir(parents=True, exist_ok=True)
139
        if resource_type == 'file':
140
            self._download_impl(url, fpath, progress_cb)
141
        elif resource_type == 'tarball':
142
            with pushd_popd(tempdir=True):
143
                log.info("Downloading %s" % url)
144
                self._download_impl(url, 'download.tar.xx', progress_cb)
145
                Path('out').mkdir()
146
                with pushd_popd('out'):
147
                    log.info("Extracting tarball")
148
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
149
                        tar.extractall()
150
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
151
                    copytree(path_in_archive, str(fpath))
152
        # TODO
153
        # elif resource_type == 'github-dir':
154
        return fpath
155