1
|
|
|
""" |
2
|
|
|
OCR-D CLI: management of processor resources |
3
|
|
|
|
4
|
|
|
.. click:: ocrd.cli.resmgr:resmgr_cli |
5
|
|
|
:prog: ocrd resmgr |
6
|
|
|
:nested: full |
7
|
|
|
""" |
8
|
|
|
import sys |
9
|
|
|
from os import environ |
10
|
|
|
from pathlib import Path |
11
|
|
|
from distutils.spawn import find_executable as which |
12
|
|
|
from yaml import safe_load, safe_dump |
13
|
|
|
|
14
|
|
|
import requests |
15
|
|
|
import click |
16
|
|
|
|
17
|
|
|
from ocrd_utils import ( |
18
|
|
|
initLogging, |
19
|
|
|
directory_size, |
20
|
|
|
getLogger, |
21
|
|
|
RESOURCE_LOCATIONS, |
22
|
|
|
) |
23
|
|
|
from ocrd.constants import RESOURCE_USER_LIST_COMMENT |
24
|
|
|
|
25
|
|
|
from ..resource_manager import OcrdResourceManager |
26
|
|
|
|
27
|
|
|
def print_resources(executable, reslist, resmgr): |
28
|
|
|
print('%s' % executable) |
29
|
|
|
for resdict in reslist: |
30
|
|
|
print('- %s %s (%s)\n %s' % ( |
31
|
|
|
resdict['name'], |
32
|
|
|
'@ %s' % resmgr.resource_dir_to_location(resdict['path']) if 'path' in resdict else '', |
33
|
|
|
resdict['url'], |
34
|
|
|
resdict['description'] |
35
|
|
|
)) |
36
|
|
|
print() |
37
|
|
|
|
38
|
|
|
@click.group("resmgr") |
39
|
|
|
def resmgr_cli(): |
40
|
|
|
""" |
41
|
|
|
Managing processor resources |
42
|
|
|
""" |
43
|
|
|
initLogging() |
44
|
|
|
|
45
|
|
|
@resmgr_cli.command('list-available') |
46
|
|
|
@click.option('-D', '--no-dynamic', is_flag=True, default=False, help="Whether to skip looking into each processor's --dump-json for module-level resources") |
47
|
|
|
@click.option('-e', '--executable', help='Show only resources for executable beginning with EXEC', metavar='EXEC', default='ocrd-*') |
48
|
|
|
def list_available(executable, no_dynamic): |
49
|
|
|
""" |
50
|
|
|
List available resources |
51
|
|
|
""" |
52
|
|
|
resmgr = OcrdResourceManager() |
53
|
|
|
for executable, reslist in resmgr.list_available(executable=executable, dynamic=not no_dynamic): |
54
|
|
|
print_resources(executable, reslist, resmgr) |
55
|
|
|
|
56
|
|
|
@resmgr_cli.command('list-installed') |
57
|
|
|
@click.option('-e', '--executable', help='Show only resources for executable EXEC', metavar='EXEC') |
58
|
|
|
def list_installed(executable=None): |
59
|
|
|
""" |
60
|
|
|
List installed resources |
61
|
|
|
""" |
62
|
|
|
resmgr = OcrdResourceManager() |
63
|
|
|
for executable, reslist in resmgr.list_installed(executable): |
64
|
|
|
print_resources(executable, reslist, resmgr) |
65
|
|
|
|
66
|
|
|
@resmgr_cli.command('download') |
67
|
|
|
@click.option('-n', '--any-url', help='URL of unregistered resource to download/copy from', default='') |
68
|
|
|
@click.option('-D', '--no-dynamic', is_flag=True, default=False, help="Whether to skip looking into each processor's --dump-json for module-level resources") |
69
|
|
|
@click.option('-t', '--resource-type', help='Type of resource', type=click.Choice(['file', 'directory', 'archive']), default='file') |
70
|
|
|
@click.option('-P', '--path-in-archive', help='Path to extract in case of archive type', default='.') |
71
|
|
|
@click.option('-a', '--allow-uninstalled', help="Allow installing resources for uninstalled processors", is_flag=True) |
72
|
|
|
@click.option('-o', '--overwrite', help='Overwrite existing resources', is_flag=True) |
73
|
|
|
@click.option('-l', '--location', help='Where to store resources', type=click.Choice(RESOURCE_LOCATIONS), default='data', show_default=True) |
74
|
|
|
@click.argument('executable', required=True) |
75
|
|
|
@click.argument('name', required=False) |
76
|
|
|
def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstalled, overwrite, location, executable, name): |
77
|
|
|
""" |
78
|
|
|
Download resource NAME for processor EXECUTABLE. |
79
|
|
|
|
80
|
|
|
NAME is the name of the resource made available by downloading or copying. |
81
|
|
|
|
82
|
|
|
If NAME is '*' (asterisk), then download all known registered resources for this processor. |
83
|
|
|
|
84
|
|
|
If ``--any-url=URL`` or ``-n URL`` is given, then URL is accepted regardless of registered resources for ``NAME``. |
85
|
|
|
(This can be used for unknown resources or for replacing registered resources.) |
86
|
|
|
|
87
|
|
|
If ``--resource-type`` is set to `archive`, then that archive gets unpacked after download, |
88
|
|
|
and its ``--path-in-archive`` will subsequently be renamed to NAME. |
89
|
|
|
""" |
90
|
|
|
log = getLogger('ocrd.cli.resmgr') |
91
|
|
|
resmgr = OcrdResourceManager() |
92
|
|
|
basedir = resmgr.location_to_resource_dir(location) |
93
|
|
|
if executable != '*' and not name: |
94
|
|
|
log.error("Unless EXECUTABLE ('%s') is the '*' wildcard, NAME is required" % executable) |
95
|
|
|
sys.exit(1) |
96
|
|
|
elif executable == '*': |
97
|
|
|
executable = None |
98
|
|
|
if name == '*': |
99
|
|
|
name = None |
100
|
|
|
is_url = (any_url.startswith('https://') or any_url.startswith('http://')) if any_url else False |
101
|
|
|
is_filename = Path(any_url).exists() if any_url else False |
102
|
|
|
if executable and not which(executable): |
103
|
|
|
if not allow_uninstalled: |
104
|
|
|
log.error("Executable '%s' is not installed. " \ |
105
|
|
|
"To download resources anyway, use the -a/--allow-uninstalled flag", executable) |
106
|
|
|
sys.exit(1) |
107
|
|
|
else: |
108
|
|
|
log.info("Executable %s is not installed, but " \ |
109
|
|
|
"downloading resources anyway", executable) |
110
|
|
|
reslist = resmgr.list_available(executable=executable, dynamic=not no_dynamic) |
111
|
|
|
if name: |
112
|
|
|
reslist = [(executable, r) for _, rs in reslist for r in rs if r['name'] == name] |
113
|
|
|
if not reslist: |
114
|
|
|
log.info(f"No resources {name} found in registry for executable {executable}") |
115
|
|
|
if executable and name: |
116
|
|
|
reslist = [(executable, {'url': any_url or '???', 'name': name, |
117
|
|
|
'type': resource_type, |
118
|
|
|
'path_in_archive': path_in_archive})] |
119
|
|
|
for executable, resdict in reslist: |
120
|
|
|
if 'size' in resdict: |
121
|
|
|
registered = "registered" |
122
|
|
|
else: |
123
|
|
|
registered = "unregistered" |
124
|
|
|
if any_url: |
125
|
|
|
resdict['url'] = any_url |
126
|
|
|
if resdict['url'] == '???': |
127
|
|
|
log.warning("Cannot download user resource %s", resdict['name']) |
128
|
|
|
continue |
129
|
|
|
if resdict['url'].startswith('https://') or resdict['url'].startswith('http://'): |
130
|
|
|
log.info("Downloading %s resource '%s' (%s)", registered, resdict['name'], resdict['url']) |
131
|
|
|
with requests.get(resdict['url'], stream=True) as r: |
132
|
|
|
resdict['size'] = int(r.headers.get('content-length')) |
133
|
|
|
else: |
134
|
|
|
log.info("Copying %s resource '%s' (%s)", registered, resdict['name'], resdict['url']) |
135
|
|
|
urlpath = Path(resdict['url']) |
136
|
|
|
resdict['url'] = str(urlpath.resolve()) |
137
|
|
|
if Path(urlpath).is_dir(): |
138
|
|
|
resdict['size'] = directory_size(urlpath) |
139
|
|
|
else: |
140
|
|
|
resdict['size'] = urlpath.stat().st_size |
141
|
|
|
with click.progressbar(length=resdict['size']) as bar: |
142
|
|
|
fpath = resmgr.download( |
143
|
|
|
executable, |
144
|
|
|
resdict['url'], |
145
|
|
|
name=resdict['name'], |
146
|
|
|
resource_type=resdict.get('type', resource_type), |
147
|
|
|
path_in_archive=resdict.get('path_in_archive', path_in_archive), |
148
|
|
|
overwrite=overwrite, |
149
|
|
|
size=resdict['size'], |
150
|
|
|
no_subdir=location == 'cwd', |
151
|
|
|
basedir=basedir, |
152
|
|
|
progress_cb=lambda delta: bar.update(delta) |
|
|
|
|
153
|
|
|
) |
154
|
|
|
if registered == 'unregistered': |
155
|
|
|
log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'", executable, name, any_url, resmgr.user_list) |
156
|
|
|
resmgr.add_to_user_database(executable, fpath, url=any_url) |
157
|
|
|
resmgr.save_user_list() |
158
|
|
|
log.info("Installed resource %s under %s", resdict['url'], fpath) |
159
|
|
|
log.info("Use in parameters as '%s'", resmgr.parameter_usage(resdict['name'], usage=resdict.get('parameter_usage', 'as-is'))) |
160
|
|
|
|
161
|
|
|
@resmgr_cli.command('migrate') |
162
|
|
|
@click.argument('migration', type=click.Choice(['2.37.0'])) |
163
|
|
|
def migrate(migration): |
164
|
|
|
""" |
165
|
|
|
Update the configuration after updating core to MIGRATION |
166
|
|
|
""" |
167
|
|
|
resmgr = OcrdResourceManager(skip_init=True) |
168
|
|
|
log = getLogger('ocrd.resmgr.migrate') |
169
|
|
|
if not resmgr.user_list.exists(): |
170
|
|
|
log.info(f'No configuration file found at {resmgr.user_list}, nothing to do') |
171
|
|
|
if migration == '2.37.0': |
172
|
|
|
backup_file = resmgr.user_list.with_suffix(f'.yml.before-{migration}') |
173
|
|
|
yaml_in_str = resmgr.user_list.read_text() |
174
|
|
|
log.info(f'Backing {resmgr.user_list} to {backup_file}') |
175
|
|
|
backup_file.write_text(yaml_in_str) |
176
|
|
|
log.info(f'Applying migration {migration} to {resmgr.user_list}') |
177
|
|
|
yaml_in = safe_load(yaml_in_str) |
178
|
|
|
yaml_out = {} |
179
|
|
|
for executable, reslist_in in yaml_in.items(): |
180
|
|
|
yaml_out[executable] = [] |
181
|
|
|
for resdict_in in reslist_in: |
182
|
|
|
resdict_out = {} |
183
|
|
|
for k_in, v_in in resdict_in.items(): |
184
|
|
|
k_out, v_out = k_in, v_in |
185
|
|
|
if k_in == 'type' and v_in in ['github-dir', 'tarball']: |
186
|
|
|
if v_in == 'github-dir': |
187
|
|
|
v_out = 'directory' |
188
|
|
|
elif v_in == 'tarball': |
189
|
|
|
v_out = 'directory' |
190
|
|
|
resdict_out[k_out] = v_out |
191
|
|
|
yaml_out[executable].append(resdict_out) |
192
|
|
|
resmgr.user_list.write_text(RESOURCE_USER_LIST_COMMENT + |
193
|
|
|
'\n# migrated with ocrd resmgr migrate {migration}\n' + |
194
|
|
|
safe_dump(yaml_out)) |
195
|
|
|
log.info(f'Applied migration {migration} to {resmgr.user_list}') |
196
|
|
|
|