Passed
Pull Request — master (#559)
by Konstantin
02:38
created

OcrdResourceManager.__init__()   A

Complexity

Conditions 4

Size

Total Lines 11
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 11
dl 0
loc 11
rs 9.85
c 0
b 0
f 0
cc 4
nop 1
1
from pathlib import Path
2
from os.path import join
3
from os import environ, listdir
4
import re
5
from shutil import copytree
6
from datetime import datetime
7
from tarfile import open as open_tarfile
8
from urllib.parse import urlparse, unquote
9
10
import requests
11
from yaml import safe_load, safe_dump
12
13
from ocrd_validators import OcrdResourceListValidator
14
from ocrd_utils import getLogger
15
from ocrd_utils.constants import HOME, XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME
16
from ocrd_utils.os import list_all_resources, pushd_popd
17
18
from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT
19
20
class OcrdResourceManager():
21
22
    """
23
    Managing processor resources
24
    """
25
    def __init__(self):
26
        self.log = getLogger('ocrd.resource_manager')
27
        self.database = {}
28
        self.load_resource_list(Path(RESOURCE_LIST_FILENAME))
29
        self.user_list = Path(XDG_CONFIG_HOME, 'ocrd', 'resources.yml')
30
        if not self.user_list.exists():
31
            if not self.user_list.parent.exists():
32
                self.user_list.parent.mkdir()
33
            with open(str(self.user_list), 'w', encoding='utf-8') as f:
34
                f.write(RESOURCE_USER_LIST_COMMENT)
35
        self.load_resource_list(self.user_list)
36
37
    def load_resource_list(self, list_filename, database=None):
38
        if not database:
39
            database = self.database
40
        if list_filename.is_file():
41
            with open(list_filename, 'r', encoding='utf-8') as f:
42
                list_loaded = safe_load(f) or {}
43
            report = OcrdResourceListValidator.validate(list_loaded)
44
            if not report.is_valid:
45
                self.log.error('\n'.join(report.errors))
46
                raise ValueError("Resource list %s is invalid!" % (list_filename))
47
            for executable, resource_list in list_loaded.items():
48
                if executable not in database:
49
                    database[executable] = []
50
                # Prepend, so user provided is sorted before builtin
51
                database[executable] = list_loaded[executable] + database[executable]
52
        return database
53
54
    def list_available(self, executable=None):
55
        """
56
        List models available for download by processor
57
        """
58
        if executable:
59
            return [(executable, self.database[executable])]
60
        return [(x, y) for x, y in self.database.items()]
61
62
    def list_installed(self, executable=None):
63
        """
64
        List installed resources, matching with registry by ``name``
65
        """
66
        ret = []
67
        if executable:
68
            all_executables = [executable]
69
        else:
70
            # resources we know about
71
            all_executables = list(self.database.keys())
72
            # resources in the file system
73
            parent_dirs = [XDG_CACHE_HOME, XDG_CONFIG_HOME, XDG_DATA_HOME]
74
            if 'VIRTUAL_ENV' in environ:
75
                parent_dirs += [join(environ['VIRTUAL_ENV'], 'share')]
76
            for parent_dir in parent_dirs:
77
                all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')]
78
        for this_executable in set(all_executables):
79
            reslist = []
80
            for res_filename in list_all_resources(this_executable):
81
                res_name = Path(res_filename).name
82
                resdict = [x for x in self.database.get(this_executable, []) if x['name'] == res_name]
83
                if not resdict:
84
                    self.log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'" % (this_executable, res_name, res_filename, self.user_list))
85
                    resdict = [self.add_to_user_database(this_executable, res_filename)]
86
                reslist.append(resdict[0])
87
            ret.append((this_executable, reslist))
88
        return ret
89
90
    def add_to_user_database(self, executable, res_filename):
91
        """
92
        Add a stub entry to the user resource.yml
93
        """
94
        res_name = Path(res_filename).name
95
        res_size = Path(res_filename).stat().st_size
96
        with open(self.user_list, 'r', encoding='utf-8') as f:
97
            user_database = safe_load(f) or {}
98
        if executable not in user_database:
99
            user_database[executable] = []
100
        if not self.find_resources(executable=executable, name=res_name, database=user_database):
101
            resdict = {
102
                'name': res_name,
103
                'url': '???',
104
                'description': 'Found at %s on %s' % (res_filename, datetime.now()),
105
                'version_range': '???',
106
                'size': res_size
107
            }
108
            user_database[executable].append(resdict)
109
        with open(self.user_list, 'w', encoding='utf-8') as f:
110
            f.write(RESOURCE_USER_LIST_COMMENT)
111
            f.write('\n')
112
            f.write(safe_dump(user_database))
113
        return resdict
0 ignored issues
show
introduced by
The variable resdict does not seem to be defined in case BooleanNotNode on line 100 is False. Are you sure this can never be the case?
Loading history...
114
115
    def find_resources(self, executable=None, name=None, url=None, database=None):
116
        """
117
        Find resources in the registry
118
        """
119
        if not database:
120
            database = self.database
121
        ret = []
122
        if executable and executable not in database.keys():
123
            return ret
124
        for executable in [executable] if executable else database.keys():
125
            for resdict in database[executable]:
126
                if not name and not url:
127
                    ret.append((executable, resdict))
128
                elif url and url == resdict['url']:
129
                    ret.append((executable, resdict))
130
                elif name and name == resdict['name']:
131
                    ret.append((executable, resdict))
132
        return ret
133
134
    def parameter_usage(self, name, usage='as-is'):
135
        if usage == 'as-is':
136
            return name
137
        if usage == 'without-extension':
138
            return Path(name).stem
139
140
    def _download_impl(self, url, filename, progress_cb=None):
141
        with open(filename, 'wb') as f:
142
            with requests.get(url, stream=True) as r:
143
                total = int(r.headers.get('content-length'))
144
                # copyfileobj(r.raw, f_write_tar)
145
                for data in r.iter_content(chunk_size=4096):
146
                    if progress_cb:
147
                        progress_cb(len(data))
148
                    f.write(data)
149
150
    # TODO Proper caching (make head request for size, If-Modified etc)
151
    def download(
152
        self,
153
        executable,
154
        url,
155
        overwrite=False,
156
        basedir=XDG_CACHE_HOME,
157
        name=None,
158
        resource_type='file',
159
        path_in_archive='.',
160
        progress_cb=None,
161
    ):
162
        """
163
        Download a resource by URL
164
        """
165
        log = getLogger('ocrd.resource_manager.download')
166
        destdir = Path(basedir, executable)
167
        if not name:
168
            url_parsed = urlparse(url)
169
            name = Path(unqote(url_parsed.path)).name
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable unqote does not seem to be defined.
Loading history...
170
        fpath = Path(destdir, name)
171
        if fpath.exists() and not overwrite:
172
            log.info("%s to be downloaded to %s which already exists and overwrite is False" % (url, fpath))
173
            return fpath
174
        destdir.mkdir(parents=True, exist_ok=True)
175
        if resource_type == 'file':
176
            self._download_impl(url, fpath, progress_cb)
177
        elif resource_type == 'tarball':
178
            with pushd_popd(tempdir=True):
179
                log.info("Downloading %s" % url)
180
                self._download_impl(url, 'download.tar.xx', progress_cb)
181
                Path('out').mkdir()
182
                with pushd_popd('out'):
183
                    log.info("Extracting tarball")
184
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
185
                        tar.extractall()
186
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
187
                    copytree(path_in_archive, str(fpath))
188
        # TODO
189
        # elif resource_type == 'github-dir':
190
        return fpath
191