|
1
|
|
|
""" |
|
2
|
|
|
OCR-D CLI: management of processor resources |
|
3
|
|
|
|
|
4
|
|
|
.. click:: ocrd.cli.resmgr:resmgr_cli |
|
5
|
|
|
:prog: ocrd resmgr |
|
6
|
|
|
:nested: full |
|
7
|
|
|
""" |
|
8
|
|
|
import sys |
|
9
|
|
|
from os import environ |
|
10
|
|
|
from pathlib import Path |
|
11
|
|
|
from distutils.spawn import find_executable as which |
|
12
|
|
|
from yaml import safe_load, safe_dump |
|
13
|
|
|
|
|
14
|
|
|
import requests |
|
15
|
|
|
import click |
|
16
|
|
|
|
|
17
|
|
|
from ocrd_utils import ( |
|
18
|
|
|
initLogging, |
|
19
|
|
|
directory_size, |
|
20
|
|
|
getLogger, |
|
21
|
|
|
RESOURCE_LOCATIONS, |
|
22
|
|
|
) |
|
23
|
|
|
from ocrd.constants import RESOURCE_USER_LIST_COMMENT |
|
24
|
|
|
|
|
25
|
|
|
from ..resource_manager import OcrdResourceManager |
|
26
|
|
|
|
|
27
|
|
|
def print_resources(executable, reslist, resmgr): |
|
28
|
|
|
print('%s' % executable) |
|
29
|
|
|
for resdict in reslist: |
|
30
|
|
|
print('- %s %s (%s)\n %s' % ( |
|
31
|
|
|
resdict['name'], |
|
32
|
|
|
'@ %s' % resmgr.resource_dir_to_location(resdict['path']) if 'path' in resdict else '', |
|
33
|
|
|
resdict['url'], |
|
34
|
|
|
resdict['description'] |
|
35
|
|
|
)) |
|
36
|
|
|
print() |
|
37
|
|
|
|
|
38
|
|
|
@click.group("resmgr") |
|
39
|
|
|
def resmgr_cli(): |
|
40
|
|
|
""" |
|
41
|
|
|
Managing processor resources |
|
42
|
|
|
""" |
|
43
|
|
|
initLogging() |
|
44
|
|
|
|
|
45
|
|
|
@resmgr_cli.command('list-available') |
|
46
|
|
|
@click.option('-D', '--no-dynamic', is_flag=True, default=False, help="Whether to skip looking into each processor's --dump-json for module-level resources") |
|
47
|
|
|
@click.option('-e', '--executable', help='Show only resources for executable beginning with EXEC', metavar='EXEC', default='ocrd-*') |
|
48
|
|
|
def list_available(executable, no_dynamic): |
|
49
|
|
|
""" |
|
50
|
|
|
List available resources |
|
51
|
|
|
""" |
|
52
|
|
|
resmgr = OcrdResourceManager() |
|
53
|
|
|
for executable, reslist in resmgr.list_available(executable=executable, dynamic=not no_dynamic): |
|
54
|
|
|
print_resources(executable, reslist, resmgr) |
|
55
|
|
|
|
|
56
|
|
|
@resmgr_cli.command('list-installed') |
|
57
|
|
|
@click.option('-e', '--executable', help='Show only resources for executable EXEC', metavar='EXEC') |
|
58
|
|
|
def list_installed(executable=None): |
|
59
|
|
|
""" |
|
60
|
|
|
List installed resources |
|
61
|
|
|
""" |
|
62
|
|
|
resmgr = OcrdResourceManager() |
|
63
|
|
|
for executable, reslist in resmgr.list_installed(executable): |
|
64
|
|
|
print_resources(executable, reslist, resmgr) |
|
65
|
|
|
|
|
66
|
|
|
@resmgr_cli.command('download') |
|
67
|
|
|
@click.option('-n', '--any-url', help='URL of unregistered resource to download/copy from', default='') |
|
68
|
|
|
@click.option('-D', '--no-dynamic', is_flag=True, default=False, help="Whether to skip looking into each processor's --dump-json for module-level resources") |
|
69
|
|
|
@click.option('-t', '--resource-type', help='Type of resource', type=click.Choice(['file', 'directory', 'archive']), default='file') |
|
70
|
|
|
@click.option('-P', '--path-in-archive', help='Path to extract in case of archive type', default='.') |
|
71
|
|
|
@click.option('-a', '--allow-uninstalled', help="Allow installing resources for uninstalled processors", is_flag=True) |
|
72
|
|
|
@click.option('-o', '--overwrite', help='Overwrite existing resources', is_flag=True) |
|
73
|
|
|
@click.option('-l', '--location', help='Where to store resources', type=click.Choice(RESOURCE_LOCATIONS), default='data', show_default=True) |
|
74
|
|
|
@click.argument('executable', required=True) |
|
75
|
|
|
@click.argument('name', required=False) |
|
76
|
|
|
def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstalled, overwrite, location, executable, name): |
|
77
|
|
|
""" |
|
78
|
|
|
Download resource NAME for processor EXECUTABLE. |
|
79
|
|
|
|
|
80
|
|
|
NAME is the name of the resource made available by downloading or copying. |
|
81
|
|
|
|
|
82
|
|
|
If NAME is '*' (asterisk), then download all known registered resources for this processor. |
|
83
|
|
|
|
|
84
|
|
|
If ``--any-url=URL`` or ``-n URL`` is given, then URL is accepted regardless of registered resources for ``NAME``. |
|
85
|
|
|
(This can be used for unknown resources or for replacing registered resources.) |
|
86
|
|
|
|
|
87
|
|
|
If ``--resource-type`` is set to `archive`, then that archive gets unpacked after download, |
|
88
|
|
|
and its ``--path-in-archive`` will subsequently be renamed to NAME. |
|
89
|
|
|
""" |
|
90
|
|
|
log = getLogger('ocrd.cli.resmgr') |
|
91
|
|
|
resmgr = OcrdResourceManager() |
|
92
|
|
|
basedir = resmgr.location_to_resource_dir(location) |
|
93
|
|
|
if executable != '*' and not name: |
|
94
|
|
|
log.error("Unless EXECUTABLE ('%s') is the '*' wildcard, NAME is required" % executable) |
|
95
|
|
|
sys.exit(1) |
|
96
|
|
|
elif executable == '*': |
|
97
|
|
|
executable = None |
|
98
|
|
|
if name == '*': |
|
99
|
|
|
name = None |
|
100
|
|
|
is_url = (any_url.startswith('https://') or any_url.startswith('http://')) if any_url else False |
|
101
|
|
|
is_filename = Path(any_url).exists() if any_url else False |
|
102
|
|
|
if executable and not which(executable): |
|
103
|
|
|
if not allow_uninstalled: |
|
104
|
|
|
log.error("Executable '%s' is not installed. " \ |
|
105
|
|
|
"To download resources anyway, use the -a/--allow-uninstalled flag", executable) |
|
106
|
|
|
sys.exit(1) |
|
107
|
|
|
else: |
|
108
|
|
|
log.info("Executable %s is not installed, but " \ |
|
109
|
|
|
"downloading resources anyway", executable) |
|
110
|
|
|
reslist = resmgr.list_available(executable=executable, dynamic=not no_dynamic) |
|
111
|
|
|
if name: |
|
112
|
|
|
reslist = [(executable, r) for _, rs in reslist for r in rs if r['name'] == name] |
|
113
|
|
|
if not reslist: |
|
114
|
|
|
log.info(f"No resources {name} found in registry for executable {executable}") |
|
115
|
|
|
if executable and name: |
|
116
|
|
|
reslist = [(executable, {'url': any_url or '???', 'name': name, |
|
117
|
|
|
'type': resource_type, |
|
118
|
|
|
'path_in_archive': path_in_archive})] |
|
119
|
|
|
for executable, resdict in reslist: |
|
120
|
|
|
if 'size' in resdict: |
|
121
|
|
|
registered = "registered" |
|
122
|
|
|
else: |
|
123
|
|
|
registered = "unregistered" |
|
124
|
|
|
if any_url: |
|
125
|
|
|
resdict['url'] = any_url |
|
126
|
|
|
if resdict['url'] == '???': |
|
127
|
|
|
log.warning("Cannot download user resource %s", resdict['name']) |
|
128
|
|
|
continue |
|
129
|
|
|
if resdict['url'].startswith('https://') or resdict['url'].startswith('http://'): |
|
130
|
|
|
log.info("Downloading %s resource '%s' (%s)", registered, resdict['name'], resdict['url']) |
|
131
|
|
|
with requests.get(resdict['url'], stream=True) as r: |
|
132
|
|
|
resdict['size'] = int(r.headers.get('content-length')) |
|
133
|
|
|
else: |
|
134
|
|
|
log.info("Copying %s resource '%s' (%s)", registered, resdict['name'], resdict['url']) |
|
135
|
|
|
urlpath = Path(resdict['url']) |
|
136
|
|
|
resdict['url'] = str(urlpath.resolve()) |
|
137
|
|
|
if Path(urlpath).is_dir(): |
|
138
|
|
|
resdict['size'] = directory_size(urlpath) |
|
139
|
|
|
else: |
|
140
|
|
|
resdict['size'] = urlpath.stat().st_size |
|
141
|
|
|
with click.progressbar(length=resdict['size']) as bar: |
|
142
|
|
|
fpath = resmgr.download( |
|
143
|
|
|
executable, |
|
144
|
|
|
resdict['url'], |
|
145
|
|
|
name=resdict['name'], |
|
146
|
|
|
resource_type=resdict.get('type', resource_type), |
|
147
|
|
|
path_in_archive=resdict.get('path_in_archive', path_in_archive), |
|
148
|
|
|
overwrite=overwrite, |
|
149
|
|
|
size=resdict['size'], |
|
150
|
|
|
no_subdir=location == 'cwd', |
|
151
|
|
|
basedir=basedir, |
|
152
|
|
|
progress_cb=lambda delta: bar.update(delta) |
|
|
|
|
|
|
153
|
|
|
) |
|
154
|
|
|
if registered == 'unregistered': |
|
155
|
|
|
log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'", executable, name, any_url, resmgr.user_list) |
|
156
|
|
|
resmgr.add_to_user_database(executable, fpath, url=any_url) |
|
157
|
|
|
resmgr.save_user_list() |
|
158
|
|
|
log.info("Installed resource %s under %s", resdict['url'], fpath) |
|
159
|
|
|
log.info("Use in parameters as '%s'", resmgr.parameter_usage(resdict['name'], usage=resdict.get('parameter_usage', 'as-is'))) |
|
160
|
|
|
|
|
161
|
|
|
@resmgr_cli.command('migrate') |
|
162
|
|
|
@click.argument('migration', type=click.Choice(['2.37.0'])) |
|
163
|
|
|
def migrate(migration): |
|
164
|
|
|
""" |
|
165
|
|
|
Update the configuration after updating core to MIGRATION |
|
166
|
|
|
""" |
|
167
|
|
|
resmgr = OcrdResourceManager(skip_init=True) |
|
168
|
|
|
log = getLogger('ocrd.resmgr.migrate') |
|
169
|
|
|
if not resmgr.user_list.exists(): |
|
170
|
|
|
log.info(f'No configuration file found at {resmgr.user_list}, nothing to do') |
|
171
|
|
|
if migration == '2.37.0': |
|
172
|
|
|
backup_file = resmgr.user_list.with_suffix(f'.yml.before-{migration}') |
|
173
|
|
|
yaml_in_str = resmgr.user_list.read_text() |
|
174
|
|
|
log.info(f'Backing {resmgr.user_list} to {backup_file}') |
|
175
|
|
|
backup_file.write_text(yaml_in_str) |
|
176
|
|
|
log.info(f'Applying migration {migration} to {resmgr.user_list}') |
|
177
|
|
|
yaml_in = safe_load(yaml_in_str) |
|
178
|
|
|
yaml_out = {} |
|
179
|
|
|
for executable, reslist_in in yaml_in.items(): |
|
180
|
|
|
yaml_out[executable] = [] |
|
181
|
|
|
for resdict_in in reslist_in: |
|
182
|
|
|
resdict_out = {} |
|
183
|
|
|
for k_in, v_in in resdict_in.items(): |
|
184
|
|
|
k_out, v_out = k_in, v_in |
|
185
|
|
|
if k_in == 'type' and v_in in ['github-dir', 'tarball']: |
|
186
|
|
|
if v_in == 'github-dir': |
|
187
|
|
|
v_out = 'directory' |
|
188
|
|
|
elif v_in == 'tarball': |
|
189
|
|
|
v_out = 'directory' |
|
190
|
|
|
resdict_out[k_out] = v_out |
|
191
|
|
|
yaml_out[executable].append(resdict_out) |
|
192
|
|
|
resmgr.user_list.write_text(RESOURCE_USER_LIST_COMMENT + |
|
193
|
|
|
'\n# migrated with ocrd resmgr migrate {migration}\n' + |
|
194
|
|
|
safe_dump(yaml_out)) |
|
195
|
|
|
log.info(f'Applied migration {migration} to {resmgr.user_list}') |
|
196
|
|
|
|