Passed
Pull Request — master (#770)
by Konstantin
02:25
created

OcrdResourceManager.xdg_data_home()   A

Complexity

Conditions 3

Size

Total Lines 8
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 8
rs 10
c 0
b 0
f 0
cc 3
nop 1
1
from pathlib import Path
2
from os.path import join
3
from os import environ, listdir, getcwd, path
4
import re
5
from shutil import copytree
6
from datetime import datetime
7
from tarfile import open as open_tarfile
8
from urllib.parse import urlparse, unquote
9
10
import requests
11
from yaml import safe_load, safe_dump
12
13
from ocrd_validators import OcrdResourceListValidator
14
from ocrd_utils import getLogger
15
from ocrd_utils.os import list_all_resources, pushd_popd
16
from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT
17
18
class OcrdResourceManager():
19
20
    """
21
    Managing processor resources
22
    """
23
    def __init__(self, userdir=None, xdg_config_home=None, xdg_data_home=None):
24
        self.log = getLogger('ocrd.resource_manager')
25
        self.database = {}
26
27
        self._xdg_data_home = xdg_data_home
28
        self._xdg_config_home = xdg_config_home
29
        self._userdir = userdir
30
        self.user_list = Path(self.xdg_config_home, 'ocrd', 'resources.yml')
31
32
        self.load_resource_list(Path(RESOURCE_LIST_FILENAME))
33
        if not self.user_list.exists():
34
            if not self.user_list.parent.exists():
35
                self.user_list.parent.mkdir(parents=True)
36
            with open(str(self.user_list), 'w', encoding='utf-8') as f:
37
                f.write(RESOURCE_USER_LIST_COMMENT)
38
        self.load_resource_list(self.user_list)
39
40
    @property
41
    def userdir(self):
42
        if not self._userdir:
43
            self._userdir = path.expanduser('~')
44
            if 'HOME' in environ and environ['HOME'] != path.expanduser('~'):
45
                self._userdir = environ['HOME']
46
        return self._userdir
47
48
    @property
49
    def xdg_data_home(self):
50
        if not self._xdg_data_home:
51
            if 'XDG_DATA_HOME' in environ:
52
                self._xdg_data_home = environ['XDG_DATA_HOME']
53
            else:
54
                self._xdg_data_home = join(self.userdir, '.local', 'share')
55
        return self._xdg_data_home
56
57
    @property
58
    def xdg_config_home(self):
59
        if not self._xdg_config_home:
60
            if 'XDG_CONFIG_HOME' in environ:
61
                self._xdg_config_home = environ['XDG_CONFIG_HOME']
62
            else:
63
                self._xdg_config_home = join(self.userdir, '.config')
64
        return self._xdg_config_home
65
66
    def load_resource_list(self, list_filename, database=None):
67
        if not database:
68
            database = self.database
69
        if list_filename.is_file():
70
            with open(list_filename, 'r', encoding='utf-8') as f:
71
                list_loaded = safe_load(f) or {}
72
            report = OcrdResourceListValidator.validate(list_loaded)
73
            if not report.is_valid:
74
                self.log.error('\n'.join(report.errors))
75
                raise ValueError("Resource list %s is invalid!" % (list_filename))
76
            for executable, resource_list in list_loaded.items():
77
                if executable not in database:
78
                    database[executable] = []
79
                # Prepend, so user provided is sorted before builtin
80
                database[executable] = list_loaded[executable] + database[executable]
81
        return database
82
83
    def list_available(self, executable=None):
84
        """
85
        List models available for download by processor
86
        """
87
        if executable:
88
            return [(executable, self.database[executable])]
89
        return self.database.items()
90
91
    def list_installed(self, executable=None):
92
        """
93
        List installed resources, matching with registry by ``name``
94
        """
95
        ret = []
96
        if executable:
97
            all_executables = [executable]
98
        else:
99
            # resources we know about
100
            all_executables = list(self.database.keys())
101
            # resources in the file system
102
            parent_dirs = [join(x, 'ocrd-resources') for x in [self.xdg_data_home, '/usr/local/share']]
103
            for parent_dir in parent_dirs:
104
                if Path(parent_dir).exists():
105
                    all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')]
106
        for this_executable in set(all_executables):
107
            reslist = []
108
            for res_filename in list_all_resources(this_executable):
109
                res_name = Path(res_filename).name
110
                resdict = [x for x in self.database.get(this_executable, []) if x['name'] == res_name]
111
                if not resdict:
112
                    self.log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'", this_executable, res_name, res_filename, self.user_list)
113
                    resdict = [self.add_to_user_database(this_executable, res_filename)]
114
                resdict[0]['path'] = res_filename
115
                reslist.append(resdict[0])
116
            ret.append((this_executable, reslist))
117
        return ret
118
119
    def add_to_user_database(self, executable, res_filename, url=None):
120
        """
121
        Add a stub entry to the user resource.yml
122
        """
123
        res_name = Path(res_filename).name
124
        res_size = Path(res_filename).stat().st_size
125
        with open(self.user_list, 'r', encoding='utf-8') as f:
126
            user_database = safe_load(f) or {}
127
        if executable not in user_database:
128
            user_database[executable] = []
129
        resources_found = self.find_resources(executable=executable, name=res_name, database=user_database)
130
        if not resources_found:
131
            resdict = {
132
                'name': res_name,
133
                'url': url if url else '???',
134
                'description': 'Found at %s on %s' % (self.resource_dir_to_location(res_filename), datetime.now()),
135
                'version_range': '???',
136
                'size': res_size
137
            }
138
            user_database[executable].append(resdict)
139
        else:
140
            resdict = resources_found[0]
141
        with open(self.user_list, 'w', encoding='utf-8') as f:
142
            f.write(RESOURCE_USER_LIST_COMMENT)
143
            f.write('\n')
144
            f.write(safe_dump(user_database))
145
        self.load_resource_list(self.user_list)
146
        return resdict
147
148
    def find_resources(self, executable=None, name=None, url=None, database=None):
149
        """
150
        Find resources in the registry
151
        """
152
        if not database:
153
            database = self.database
154
        ret = []
155
        if executable and executable not in database.keys():
156
            return ret
157
        for executable in [executable] if executable else database.keys():
158
            for resdict in database[executable]:
159
                if not name and not url:
160
                    ret.append((executable, resdict))
161
                elif url and url == resdict['url']:
162
                    ret.append((executable, resdict))
163
                elif name and name == resdict['name']:
164
                    ret.append((executable, resdict))
165
        return ret
166
167
    @property
168
    def default_resource_dir(self):
169
        return self.location_to_resource_dir('data')
170
171
    def location_to_resource_dir(self, location):
172
        return '/usr/local/share/ocrd-resources' if location == 'system' else \
173
                join(self.xdg_data_home, 'ocrd-resources') if location == 'data' else \
174
                getcwd()
175
176
    def resource_dir_to_location(self, resource_path):
177
        resource_path = str(resource_path)
178
        return 'system' if resource_path.startswith('/usr/local/share/ocrd-resources') else \
179
               'data' if resource_path.startswith(join(self.xdg_data_home, 'ocrd-resources')) else \
180
               'cwd' if resource_path.startswith(getcwd()) else \
181
               resource_path
182
183
    def parameter_usage(self, name, usage='as-is'):
184
        if usage == 'as-is':
185
            return name
186
        elif usage == 'without-extension':
187
            return Path(name).stem
188
        raise ValueError("No such usage '%s'" % usage)
189
190
    def _download_impl(self, url, filename, progress_cb=None, size=None):
191
        log = getLogger('ocrd.resource_manager._download_impl')
192
        log.info("Downloading %s to %s" % (url, filename))
193
        with open(filename, 'wb') as f:
194
            with requests.get(url, stream=True) as r:
195
                total = size if size else int(r.headers.get('content-length'))
196
                for data in r.iter_content(chunk_size=4096):
197
                    if progress_cb:
198
                        progress_cb(len(data))
199
                    f.write(data)
200
201
    def _copy_impl(self, src_filename, filename, progress_cb=None):
202
        log = getLogger('ocrd.resource_manager._copy_impl')
203
        log.info("Copying %s" % src_filename)
204
        with open(filename, 'wb') as f_out, open(src_filename, 'rb') as f_in:
205
            while True:
206
                chunk = f_in.read(4096)
207
                if chunk:
208
                    f_out.write(chunk)
209
                    if progress_cb:
210
                        progress_cb(len(chunk))
211
                else:
212
                    break
213
214
    # TODO Proper caching (make head request for size, If-Modified etc)
215
    def download(
216
        self,
217
        executable,
218
        url,
219
        basedir,
220
        overwrite=False,
221
        no_subdir=False,
222
        name=None,
223
        resource_type='file',
224
        path_in_archive='.',
225
        progress_cb=None,
226
        size=None,
227
    ):
228
        """
229
        Download a resource by URL
230
        """
231
        log = getLogger('ocrd.resource_manager.download')
232
        destdir = Path(basedir) if no_subdir else Path(basedir, executable)
233
        if not name:
234
            url_parsed = urlparse(url)
235
            name = Path(unquote(url_parsed.path)).name
236
        fpath = Path(destdir, name)
237
        is_url = url.startswith('https://') or url.startswith('http://')
238
        if fpath.exists() and not overwrite:
239
            log.info("%s to be %s to %s which already exists and overwrite is False" % (url, 'downloaded' if is_url else 'copied', fpath))
240
            return fpath
241
        destdir.mkdir(parents=True, exist_ok=True)
242
        if resource_type == 'file':
243
            if is_url:
244
                self._download_impl(url, fpath, progress_cb)
245
            else:
246
                self._copy_impl(url, fpath, progress_cb)
247
        elif resource_type == 'tarball':
248
            with pushd_popd(tempdir=True):
249
                if is_url:
250
                    self._download_impl(url, 'download.tar.xx', progress_cb, size)
251
                else:
252
                    self._copy_impl(url, 'download.tar.xx', progress_cb)
253
                Path('out').mkdir()
254
                with pushd_popd('out'):
255
                    log.info("Extracting tarball")
256
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
257
                        tar.extractall()
258
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
259
                    copytree(path_in_archive, str(fpath))
260
        # TODO
261
        # elif resource_type == 'github-dir':
262
        return fpath
263