1
|
|
|
""" |
2
|
|
|
OCR-D CLI: management of processor resources |
3
|
|
|
|
4
|
|
|
.. click:: ocrd.cli.resmgr:resmgr_cli |
5
|
|
|
:prog: ocrd resmgr |
6
|
|
|
:nested: full |
7
|
|
|
""" |
8
|
|
|
import sys |
9
|
|
|
from pathlib import Path |
10
|
|
|
from shutil import which |
11
|
|
|
from yaml import safe_load, safe_dump |
12
|
|
|
|
13
|
|
|
import requests |
14
|
|
|
import click |
15
|
|
|
|
16
|
|
|
from ocrd_utils import ( |
17
|
|
|
directory_size, |
18
|
|
|
getLogger, |
19
|
|
|
get_moduledir, |
20
|
|
|
get_ocrd_tool_json, |
21
|
|
|
resource_filename, |
22
|
|
|
initLogging, |
23
|
|
|
RESOURCE_LOCATIONS, |
24
|
|
|
) |
25
|
|
|
from ocrd.constants import RESOURCE_USER_LIST_COMMENT |
26
|
|
|
|
27
|
|
|
from ..resource_manager import OcrdResourceManager |
28
|
|
|
|
29
|
|
|
|
30
|
|
|
def print_resources(executable, reslist, resmgr): |
31
|
|
|
print(f"{executable}") |
32
|
|
|
for resdict in reslist: |
33
|
|
|
res_loc = resmgr.resource_dir_to_location(resdict['path']) if 'path' in resdict else '' |
34
|
|
|
print(f"- {resdict['name']} @ {res_loc} ({resdict['url']})\n {resdict['description']}") |
35
|
|
|
print() |
36
|
|
|
|
37
|
|
|
|
38
|
|
|
@click.group("resmgr") |
39
|
|
|
def resmgr_cli(): |
40
|
|
|
""" |
41
|
|
|
Managing processor resources |
42
|
|
|
""" |
43
|
|
|
initLogging() |
44
|
|
|
|
45
|
|
|
|
46
|
|
|
@resmgr_cli.command('list-available') |
47
|
|
|
@click.option('-D', '--no-dynamic', is_flag=True, default=False, |
48
|
|
|
help="Whether to skip looking into each processor's --dump-{json,module-dir} for module-level resources") |
49
|
|
|
@click.option('-e', '--executable', metavar='EXEC', default='ocrd-*', |
50
|
|
|
help='Show only resources for executable beginning with EXEC', ) |
51
|
|
|
def list_available(executable, no_dynamic): |
52
|
|
|
""" |
53
|
|
|
List available resources |
54
|
|
|
""" |
55
|
|
|
resmgr = OcrdResourceManager() |
56
|
|
|
for executable, reslist in resmgr.list_available(executable=executable, dynamic=not no_dynamic): |
57
|
|
|
print_resources(executable, reslist, resmgr) |
58
|
|
|
|
59
|
|
|
|
60
|
|
|
@resmgr_cli.command('list-installed') |
61
|
|
|
@click.option('-e', '--executable', help='Show only resources for executable EXEC', metavar='EXEC') |
62
|
|
|
def list_installed(executable=None): |
63
|
|
|
""" |
64
|
|
|
List installed resources |
65
|
|
|
""" |
66
|
|
|
resmgr = OcrdResourceManager() |
67
|
|
|
for executable, reslist in resmgr.list_installed(executable): |
68
|
|
|
print_resources(executable, reslist, resmgr) |
69
|
|
|
|
70
|
|
|
|
71
|
|
|
@resmgr_cli.command('download') |
72
|
|
|
@click.option('-n', '--any-url', default='', help='URL of unregistered resource to download/copy from') |
73
|
|
|
@click.option('-D', '--no-dynamic', default=False, is_flag=True, |
74
|
|
|
help="Whether to skip looking into each processor's --dump-{json,module-dir} for module-level resources") |
75
|
|
|
@click.option('-t', '--resource-type', type=click.Choice(['file', 'directory', 'archive']), default='file', |
76
|
|
|
help='Type of resource',) |
77
|
|
|
@click.option('-P', '--path-in-archive', default='.', help='Path to extract in case of archive type') |
78
|
|
|
@click.option('-a', '--allow-uninstalled', is_flag=True, |
79
|
|
|
help="Allow installing resources for uninstalled processors",) |
80
|
|
|
@click.option('-o', '--overwrite', help='Overwrite existing resources', is_flag=True) |
81
|
|
|
@click.option('-l', '--location', type=click.Choice(RESOURCE_LOCATIONS), |
82
|
|
|
help="Where to store resources - defaults to first location in processor's 'resource_locations' " |
83
|
|
|
"list or finally 'data'") |
84
|
|
|
@click.argument('executable', required=True) |
85
|
|
|
@click.argument('name', required=False) |
86
|
|
|
def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstalled, overwrite, location, executable, |
87
|
|
|
name): |
88
|
|
|
""" |
89
|
|
|
Download resource NAME for processor EXECUTABLE. |
90
|
|
|
|
91
|
|
|
NAME is the name of the resource made available by downloading or copying. |
92
|
|
|
|
93
|
|
|
If NAME is '*' (asterisk), then download all known registered resources for this processor. |
94
|
|
|
|
95
|
|
|
If ``--any-url=URL`` or ``-n URL`` is given, then URL is accepted regardless of registered resources for ``NAME``. |
96
|
|
|
(This can be used for unknown resources or for replacing registered resources.) |
97
|
|
|
|
98
|
|
|
If ``--resource-type`` is set to `archive`, then that archive gets unpacked after download, |
99
|
|
|
and its ``--path-in-archive`` will subsequently be renamed to NAME. |
100
|
|
|
""" |
101
|
|
|
log = getLogger('ocrd.cli.resmgr') |
102
|
|
|
resmgr = OcrdResourceManager() |
103
|
|
|
if executable != '*' and not name: |
104
|
|
|
log.error(f"Unless EXECUTABLE ('{executable}') is the '*' wildcard, NAME is required") |
105
|
|
|
sys.exit(1) |
106
|
|
|
elif executable == '*': |
107
|
|
|
executable = None |
108
|
|
|
if name == '*': |
109
|
|
|
name = None |
110
|
|
|
is_url = (any_url.startswith('https://') or any_url.startswith('http://')) if any_url else False |
111
|
|
|
is_filename = Path(any_url).exists() if any_url else False |
112
|
|
|
if executable and not which(executable): |
113
|
|
|
if not allow_uninstalled: |
114
|
|
|
log.error(f"Executable '{executable}' is not installed. " |
115
|
|
|
f"To download resources anyway, use the -a/--allow-uninstalled flag") |
116
|
|
|
sys.exit(1) |
117
|
|
|
else: |
118
|
|
|
log.info(f"Executable '{executable}' is not installed, but downloading resources anyway") |
119
|
|
|
reslist = resmgr.list_available(executable=executable, dynamic=not no_dynamic, name=name) |
120
|
|
|
if not any(r[1] for r in reslist): |
121
|
|
|
log.info(f"No resources {name} found in registry for executable {executable}") |
122
|
|
|
if executable and name: |
123
|
|
|
reslist = [(executable, [{ |
124
|
|
|
'url': any_url or '???', |
125
|
|
|
'name': name, |
126
|
|
|
'type': resource_type, |
127
|
|
|
'path_in_archive': path_in_archive}] |
128
|
|
|
)] |
129
|
|
|
for this_executable, this_reslist in reslist: |
130
|
|
|
for resdict in this_reslist: |
131
|
|
|
if 'size' in resdict: |
132
|
|
|
registered = "registered" |
133
|
|
|
else: |
134
|
|
|
registered = "unregistered" |
135
|
|
|
if any_url: |
136
|
|
|
resdict['url'] = any_url |
137
|
|
|
if resdict['url'] == '???': |
138
|
|
|
log.warning(f"Cannot download user resource {resdict['name']}") |
139
|
|
|
continue |
140
|
|
|
if resdict['url'].startswith('https://') or resdict['url'].startswith('http://'): |
141
|
|
|
log.info(f"Downloading {registered} resource '{resdict['name']}' ({resdict['url']})") |
142
|
|
|
if 'size' not in resdict: |
143
|
|
|
with requests.head(resdict['url']) as r: |
144
|
|
|
resdict['size'] = int(r.headers.get('content-length', 0)) |
145
|
|
|
else: |
146
|
|
|
log.info(f"Copying {registered} resource '{resdict['name']}' ({resdict['url']})") |
147
|
|
|
urlpath = Path(resdict['url']) |
148
|
|
|
resdict['url'] = str(urlpath.resolve()) |
149
|
|
|
if Path(urlpath).is_dir(): |
150
|
|
|
resdict['size'] = directory_size(urlpath) |
151
|
|
|
else: |
152
|
|
|
resdict['size'] = urlpath.stat().st_size |
153
|
|
|
if not location: |
154
|
|
|
location = get_ocrd_tool_json(this_executable)['resource_locations'][0] |
155
|
|
|
elif location not in get_ocrd_tool_json(this_executable)['resource_locations']: |
156
|
|
|
log.error(f"The selected --location {location} is not in the {this_executable}'s resource search path, " |
157
|
|
|
f"refusing to install to invalid location") |
158
|
|
|
sys.exit(1) |
159
|
|
|
if location != 'module': |
160
|
|
|
basedir = resmgr.location_to_resource_dir(location) |
161
|
|
|
else: |
162
|
|
|
basedir = get_moduledir(this_executable) |
163
|
|
|
if not basedir: |
164
|
|
|
basedir = resmgr.location_to_resource_dir('data') |
165
|
|
|
|
166
|
|
|
try: |
167
|
|
|
with click.progressbar(length=resdict['size']) as bar: |
168
|
|
|
fpath = resmgr.download( |
169
|
|
|
this_executable, |
170
|
|
|
resdict['url'], |
171
|
|
|
basedir, |
172
|
|
|
name=resdict['name'], |
173
|
|
|
resource_type=resdict.get('type', resource_type), |
174
|
|
|
path_in_archive=resdict.get('path_in_archive', path_in_archive), |
175
|
|
|
overwrite=overwrite, |
176
|
|
|
no_subdir=location in ['cwd', 'module'], |
177
|
|
|
progress_cb=lambda delta: bar.update(delta) |
178
|
|
|
) |
179
|
|
|
if registered == 'unregistered': |
180
|
|
|
log.info(f"{this_executable} resource '{name}' ({any_url}) not a known resource, creating stub " |
181
|
|
|
f"in {resmgr.user_list}'") |
182
|
|
|
resmgr.add_to_user_database(this_executable, fpath, url=any_url) |
183
|
|
|
resmgr.save_user_list() |
184
|
|
|
log.info(f"Installed resource {resdict['url']} under {fpath}") |
185
|
|
|
except FileExistsError as exc: |
186
|
|
|
log.info(str(exc)) |
187
|
|
|
log.info(f"Use in parameters as " |
188
|
|
|
f"'{resmgr.parameter_usage(resdict['name'], usage=resdict.get('parameter_usage', 'as-is'))}'") |
189
|
|
|
|
190
|
|
|
|
191
|
|
|
@resmgr_cli.command('migrate') |
192
|
|
|
@click.argument('migration', type=click.Choice(['2.37.0'])) |
193
|
|
|
def migrate(migration): |
194
|
|
|
""" |
195
|
|
|
Update the configuration after updating core to MIGRATION |
196
|
|
|
""" |
197
|
|
|
resmgr = OcrdResourceManager(skip_init=True) |
198
|
|
|
log = getLogger('ocrd.resmgr.migrate') |
199
|
|
|
if not resmgr.user_list.exists(): |
200
|
|
|
log.info(f'No configuration file found at {resmgr.user_list}, nothing to do') |
201
|
|
|
if migration == '2.37.0': |
202
|
|
|
backup_file = resmgr.user_list.with_suffix(f'.yml.before-{migration}') |
203
|
|
|
yaml_in_str = resmgr.user_list.read_text() |
204
|
|
|
log.info(f'Backing {resmgr.user_list} to {backup_file}') |
205
|
|
|
backup_file.write_text(yaml_in_str) |
206
|
|
|
log.info(f'Applying migration {migration} to {resmgr.user_list}') |
207
|
|
|
yaml_in = safe_load(yaml_in_str) |
208
|
|
|
yaml_out = {} |
209
|
|
|
for executable, reslist_in in yaml_in.items(): |
210
|
|
|
yaml_out[executable] = [] |
211
|
|
|
for resdict_in in reslist_in: |
212
|
|
|
resdict_out = {} |
213
|
|
|
for k_in, v_in in resdict_in.items(): |
214
|
|
|
k_out, v_out = k_in, v_in |
215
|
|
|
if k_in == 'type' and v_in in ['github-dir', 'tarball']: |
216
|
|
|
if v_in == 'github-dir': |
217
|
|
|
v_out = 'directory' |
218
|
|
|
elif v_in == 'tarball': |
219
|
|
|
v_out = 'directory' |
220
|
|
|
resdict_out[k_out] = v_out |
221
|
|
|
yaml_out[executable].append(resdict_out) |
222
|
|
|
resmgr.user_list.write_text( |
223
|
|
|
RESOURCE_USER_LIST_COMMENT + '\n# migrated with ocrd resmgr migrate {migration}\n' + safe_dump(yaml_out)) |
224
|
|
|
log.info(f'Applied migration {migration} to {resmgr.user_list}') |
225
|
|
|
|