Passed
Pull Request — master (#797)
by Konstantin
04:57
created

OcrdResourceManager.xdg_config_home()   A

Complexity

Conditions 3

Size

Total Lines 8
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 8
rs 10
c 0
b 0
f 0
cc 3
nop 1
1
from pathlib import Path
2
from os.path import join
3
from os import environ, listdir, getcwd, path, unlink
4
from shutil import copytree as copytree_, rmtree
5
from datetime import datetime
6
from tarfile import open as open_tarfile
7
from urllib.parse import urlparse, unquote
8
9
import requests
10
from yaml import safe_load, safe_dump
11
12
from ocrd_validators import OcrdResourceListValidator
13
from ocrd_utils import getLogger
14
from ocrd_utils.os import get_processor_resource_types, list_all_resources, pushd_popd
15
from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT
16
17
def copytree(src, dst, *args, overwrite=False, **kwargs):
18
    if overwrite:
19
        rmtree(dst)
20
    return copytree_(src, dst, *args, **kwargs)
21
22
class OcrdResourceManager():
23
24
    """
25
    Managing processor resources
26
    """
27
    def __init__(self, userdir=None, xdg_config_home=None, xdg_data_home=None):
28
        self.log = getLogger('ocrd.resource_manager')
29
        self.database = {}
30
31
        self._xdg_data_home = xdg_data_home
32
        self._xdg_config_home = xdg_config_home
33
        self._userdir = userdir
34
        self.user_list = Path(self.xdg_config_home, 'ocrd', 'resources.yml')
35
36
        self.load_resource_list(Path(RESOURCE_LIST_FILENAME))
37
        if not self.user_list.exists():
38
            if not self.user_list.parent.exists():
39
                self.user_list.parent.mkdir(parents=True)
40
            with open(str(self.user_list), 'w', encoding='utf-8') as f:
41
                f.write(RESOURCE_USER_LIST_COMMENT)
42
        self.load_resource_list(self.user_list)
43
44
    @property
45
    def userdir(self):
46
        if not self._userdir:
47
            self._userdir = path.expanduser('~')
48
            if 'HOME' in environ and environ['HOME'] != path.expanduser('~'):
49
                self._userdir = environ['HOME']
50
        return self._userdir
51
52
    @property
53
    def xdg_data_home(self):
54
        if not self._xdg_data_home:
55
            if 'XDG_DATA_HOME' in environ:
56
                self._xdg_data_home = environ['XDG_DATA_HOME']
57
            else:
58
                self._xdg_data_home = join(self.userdir, '.local', 'share')
59
        return self._xdg_data_home
60
61
    @property
62
    def xdg_config_home(self):
63
        if not self._xdg_config_home:
64
            if 'XDG_CONFIG_HOME' in environ:
65
                self._xdg_config_home = environ['XDG_CONFIG_HOME']
66
            else:
67
                self._xdg_config_home = join(self.userdir, '.config')
68
        return self._xdg_config_home
69
70
    def load_resource_list(self, list_filename, database=None):
71
        if not database:
72
            database = self.database
73
        if list_filename.is_file():
74
            with open(list_filename, 'r', encoding='utf-8') as f:
75
                list_loaded = safe_load(f) or {}
76
            report = OcrdResourceListValidator.validate(list_loaded)
77
            if not report.is_valid:
78
                self.log.error('\n'.join(report.errors))
79
                raise ValueError("Resource list %s is invalid!" % (list_filename))
80
            for executable, resource_list in list_loaded.items():
81
                if executable not in database:
82
                    database[executable] = []
83
                # Prepend, so user provided is sorted before builtin
84
                database[executable] = list_loaded[executable] + database[executable]
85
        return database
86
87
    def list_available(self, executable=None):
88
        """
89
        List models available for download by processor
90
        """
91
        if executable:
92
            return [(executable, self.database[executable])]
93
        return self.database.items()
94
95
    def list_installed(self, executable=None):
96
        """
97
        List installed resources, matching with registry by ``name``
98
        """
99
        ret = []
100
        if executable:
101
            all_executables = [executable]
102
        else:
103
            # resources we know about
104
            all_executables = list(self.database.keys())
105
            # resources in the file system
106
            parent_dirs = [join(x, 'ocrd-resources') for x in [self.xdg_data_home, '/usr/local/share']]
107
            for parent_dir in parent_dirs:
108
                if Path(parent_dir).exists():
109
                    all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')]
110
        for this_executable in set(all_executables):
111
            reslist = []
112
            has_dirs, has_files = get_processor_resource_types(this_executable)
113
            for res_filename in list_all_resources(this_executable):
114
                if Path(res_filename).is_dir() and not has_dirs:
115
                    continue
116
                if Path(res_filename).is_file() and not has_files:
117
                    continue
118
                res_name = Path(res_filename).name
119
                resdict = [x for x in self.database.get(this_executable, []) if x['name'] == res_name]
120
                if not resdict:
121
                    self.log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'", this_executable, res_name, res_filename, self.user_list)
122
                    resdict = [self.add_to_user_database(this_executable, res_filename)]
123
                resdict[0]['path'] = res_filename
124
                reslist.append(resdict[0])
125
            ret.append((this_executable, reslist))
126
        return ret
127
128
    def add_to_user_database(self, executable, res_filename, url=None):
129
        """
130
        Add a stub entry to the user resource.yml
131
        """
132
        res_name = Path(res_filename).name
133
        res_size = Path(res_filename).stat().st_size
134
        with open(self.user_list, 'r', encoding='utf-8') as f:
135
            user_database = safe_load(f) or {}
136
        if executable not in user_database:
137
            user_database[executable] = []
138
        resources_found = self.find_resources(executable=executable, name=res_name, database=user_database)
139
        if not resources_found:
140
            resdict = {
141
                'name': res_name,
142
                'url': url if url else '???',
143
                'description': 'Found at %s on %s' % (self.resource_dir_to_location(res_filename), datetime.now()),
144
                'version_range': '???',
145
                'size': res_size
146
            }
147
            user_database[executable].append(resdict)
148
        else:
149
            resdict = resources_found[0]
150
        with open(self.user_list, 'w', encoding='utf-8') as f:
151
            f.write(RESOURCE_USER_LIST_COMMENT)
152
            f.write('\n')
153
            f.write(safe_dump(user_database))
154
        self.load_resource_list(self.user_list)
155
        return resdict
156
157
    def find_resources(self, executable=None, name=None, url=None, database=None):
158
        """
159
        Find resources in the registry
160
        """
161
        if not database:
162
            database = self.database
163
        ret = []
164
        if executable and executable not in database.keys():
165
            return ret
166
        for executable in [executable] if executable else database.keys():
167
            for resdict in database[executable]:
168
                if not name and not url:
169
                    ret.append((executable, resdict))
170
                elif url and url == resdict['url']:
171
                    ret.append((executable, resdict))
172
                elif name and name == resdict['name']:
173
                    ret.append((executable, resdict))
174
        return ret
175
176
    @property
177
    def default_resource_dir(self):
178
        return self.location_to_resource_dir('data')
179
180
    def location_to_resource_dir(self, location):
181
        return '/usr/local/share/ocrd-resources' if location == 'system' else \
182
                join(self.xdg_data_home, 'ocrd-resources') if location == 'data' else \
183
                getcwd()
184
185
    def resource_dir_to_location(self, resource_path):
186
        resource_path = str(resource_path)
187
        return 'system' if resource_path.startswith('/usr/local/share/ocrd-resources') else \
188
               'data' if resource_path.startswith(join(self.xdg_data_home, 'ocrd-resources')) else \
189
               'cwd' if resource_path.startswith(getcwd()) else \
190
               resource_path
191
192
    def parameter_usage(self, name, usage='as-is'):
193
        if usage == 'as-is':
194
            return name
195
        elif usage == 'without-extension':
196
            return Path(name).stem
197
        raise ValueError("No such usage '%s'" % usage)
198
199
    def _download_impl(self, url, filename, progress_cb=None, size=None, overwrite=False):
200
        log = getLogger('ocrd.resource_manager._download_impl')
201
        log.info("Downloading %s to %s" % (url, filename))
202
        if Path(filename).exists() and overwrite:
203
            unlink(filename)
204
        with open(filename, 'wb') as f:
205
            with requests.get(url, stream=True) as r:
206
                total = size if size else int(r.headers.get('content-length'))
207
                for data in r.iter_content(chunk_size=4096):
208
                    if progress_cb:
209
                        progress_cb(len(data))
210
                    f.write(data)
211
212
    def _copy_impl(self, src_filename, filename, progress_cb=None, overwrite=False):
213
        log = getLogger('ocrd.resource_manager._copy_impl')
214
        log.info("Copying %s" % src_filename)
215
        if Path(filename).exists() and overwrite:
216
            unlink(filename)
217
        with open(filename, 'wb') as f_out, open(src_filename, 'rb') as f_in:
218
            while True:
219
                chunk = f_in.read(4096)
220
                if chunk:
221
                    f_out.write(chunk)
222
                    if progress_cb:
223
                        progress_cb(len(chunk))
224
                else:
225
                    break
226
227
    # TODO Proper caching (make head request for size, If-Modified etc)
228
    def download(
229
        self,
230
        executable,
231
        url,
232
        basedir,
233
        overwrite=False,
234
        no_subdir=False,
235
        name=None,
236
        resource_type='file',
237
        path_in_archive='.',
238
        progress_cb=None,
239
        size=None,
240
    ):
241
        """
242
        Download a resource by URL
243
        """
244
        log = getLogger('ocrd.resource_manager.download')
245
        destdir = Path(basedir) if no_subdir else Path(basedir, executable)
246
        if not name:
247
            url_parsed = urlparse(url)
248
            name = Path(unquote(url_parsed.path)).name
249
        fpath = Path(destdir, name)
250
        is_url = url.startswith('https://') or url.startswith('http://')
251
        if fpath.exists() and not overwrite:
252
            log.info("%s to be %s to %s which already exists and overwrite is False" % (url, 'downloaded' if is_url else 'copied', fpath))
253
            return fpath
254
        destdir.mkdir(parents=True, exist_ok=True)
255
        if resource_type == 'file':
256
            if is_url:
257
                self._download_impl(url, fpath, progress_cb, overwrite=overwrite)
258
            else:
259
                self._copy_impl(url, fpath, progress_cb, overwrite=overwrite)
260
        elif resource_type == 'tarball':
261
            with pushd_popd(tempdir=True):
262
                if is_url:
263
                    self._download_impl(url, 'download.tar.xx', progress_cb, size, overwrite=overwrite)
264
                else:
265
                    self._copy_impl(url, 'download.tar.xx', progress_cb, overwrite=overwrite)
266
                Path('out').mkdir()
267
                with pushd_popd('out'):
268
                    log.info("Extracting tarball")
269
                    with open_tarfile('../download.tar.xx', 'r:*') as tar:
270
                        tar.extractall()
271
                    log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath))
272
                    copytree(path_in_archive, str(fpath), overwrite=overwrite)
273
        # TODO
274
        # elif resource_type == 'github-dir':
275
        return fpath
276