1
|
|
|
import sys |
2
|
|
|
from contextlib import nullcontext |
3
|
|
|
|
4
|
|
|
from ocrd_utils import ( |
5
|
|
|
config, |
6
|
|
|
initLogging, |
7
|
|
|
is_local_filename, |
8
|
|
|
get_local_filename, |
9
|
|
|
getLogger, |
10
|
|
|
parse_json_string_with_comments, |
11
|
|
|
set_json_key_value_overrides, |
12
|
|
|
parse_json_string_or_file, |
13
|
|
|
redirect_stderr_and_stdout_to_file, |
14
|
|
|
) |
15
|
|
|
from ocrd_validators import WorkspaceValidator |
16
|
|
|
from ocrd_network import ProcessingWorker, ProcessorServer, AgentType |
17
|
|
|
|
18
|
|
|
from ..resolver import Resolver |
19
|
|
|
from ..processor.base import ResourceNotFoundError, run_processor |
20
|
|
|
|
21
|
|
|
from .loglevel_option import ocrd_loglevel |
22
|
|
|
from .parameter_option import parameter_option, parameter_override_option |
23
|
|
|
from .ocrd_cli_options import ocrd_cli_options |
24
|
|
|
from .mets_find_options import mets_find_options |
25
|
|
|
|
26
|
|
|
SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER] |
27
|
|
|
|
28
|
|
|
|
29
|
|
|
def ocrd_cli_wrap_processor( |
30
|
|
|
processorClass, |
31
|
|
|
mets=None, |
32
|
|
|
mets_server_url=None, |
33
|
|
|
working_dir=None, |
34
|
|
|
dump_json=False, |
35
|
|
|
dump_module_dir=False, |
36
|
|
|
help=False, # pylint: disable=redefined-builtin |
37
|
|
|
profile=False, |
38
|
|
|
profile_file=None, |
39
|
|
|
version=False, |
40
|
|
|
overwrite=False, |
41
|
|
|
debug=False, |
42
|
|
|
resolve_resource=None, |
43
|
|
|
show_resource=None, |
44
|
|
|
list_resources=False, |
45
|
|
|
# ocrd_network params start # |
46
|
|
|
subcommand=None, |
47
|
|
|
address=None, |
48
|
|
|
queue=None, |
49
|
|
|
log_filename=None, |
50
|
|
|
database=None, |
51
|
|
|
# ocrd_network params end # |
52
|
|
|
**kwargs |
53
|
|
|
): |
54
|
|
|
# FIXME: remove workspace arg entirely |
55
|
|
|
processor = processorClass(None) |
56
|
|
|
if not sys.argv[1:]: |
57
|
|
|
processor.show_help(subcommand=subcommand) |
58
|
|
|
sys.exit(1) |
59
|
|
|
if help: |
60
|
|
|
processor.show_help(subcommand=subcommand) |
61
|
|
|
sys.exit() |
62
|
|
|
if version: |
63
|
|
|
processor.show_version() |
64
|
|
|
sys.exit() |
65
|
|
|
if dump_json: |
66
|
|
|
processor.dump_json() |
67
|
|
|
sys.exit() |
68
|
|
|
if dump_module_dir: |
69
|
|
|
processor.dump_module_dir() |
70
|
|
|
sys.exit() |
71
|
|
|
if resolve_resource: |
72
|
|
|
try: |
73
|
|
|
res = processor.resolve_resource(resolve_resource) |
74
|
|
|
print(res) |
75
|
|
|
sys.exit() |
76
|
|
|
except ResourceNotFoundError as e: |
77
|
|
|
log = getLogger('ocrd.processor.base') |
78
|
|
|
log.critical(e.message) |
79
|
|
|
sys.exit(1) |
80
|
|
|
if show_resource: |
81
|
|
|
try: |
82
|
|
|
processor.show_resource(show_resource) |
83
|
|
|
sys.exit() |
84
|
|
|
except ResourceNotFoundError as e: |
85
|
|
|
log = getLogger('ocrd.processor.base') |
86
|
|
|
log.critical(e.message) |
87
|
|
|
sys.exit(1) |
88
|
|
|
if list_resources: |
89
|
|
|
processor.list_resources() |
90
|
|
|
sys.exit() |
91
|
|
|
if subcommand: |
92
|
|
|
# Used for checking/starting network agents for the WebAPI architecture |
93
|
|
|
check_and_run_network_agent(processorClass, subcommand, address, database, queue) |
94
|
|
|
elif address or queue or database: |
95
|
|
|
raise ValueError(f"Subcommand options --address --queue and --database are only valid for subcommands: {SUBCOMMANDS}") |
96
|
|
|
|
97
|
|
|
# from here: single-run processing context |
98
|
|
|
initLogging() |
99
|
|
|
if 'parameter' in kwargs: |
100
|
|
|
# Disambiguate parameter file/literal, and resolve file |
101
|
|
|
def resolve(name): |
102
|
|
|
try: |
103
|
|
|
return processor.resolve_resource(name) |
104
|
|
|
except ResourceNotFoundError: |
105
|
|
|
return None |
106
|
|
|
kwargs['parameter'] = parse_json_string_or_file(*kwargs['parameter'], |
107
|
|
|
resolve_preset_file=resolve) |
108
|
|
|
else: |
109
|
|
|
kwargs['parameter'] = {} |
110
|
|
|
# Merge parameter overrides and parameters |
111
|
|
|
if 'parameter_override' in kwargs: |
112
|
|
|
set_json_key_value_overrides(kwargs['parameter'], *kwargs.pop('parameter_override')) |
113
|
|
|
# Assert -I / -O |
114
|
|
|
if not kwargs['input_file_grp']: |
115
|
|
|
raise ValueError('-I/--input-file-grp is required') |
116
|
|
|
if not kwargs['output_file_grp']: |
117
|
|
|
raise ValueError('-O/--output-file-grp is required') |
118
|
|
|
resolver = Resolver() |
119
|
|
|
working_dir, mets, _, mets_server_url = \ |
120
|
|
|
resolver.resolve_mets_arguments(working_dir, mets, None, mets_server_url) |
121
|
|
|
workspace = resolver.workspace_from_url(mets, working_dir, mets_server_url=mets_server_url) |
122
|
|
|
page_id = kwargs.get('page_id') |
123
|
|
|
if debug: |
124
|
|
|
config.OCRD_MISSING_INPUT = 'ABORT' |
125
|
|
|
config.OCRD_MISSING_OUTPUT = 'ABORT' |
126
|
|
|
config.OCRD_EXISTING_OUTPUT = 'ABORT' |
127
|
|
|
if overwrite: |
128
|
|
|
config.OCRD_EXISTING_OUTPUT = 'OVERWRITE' |
129
|
|
|
report = WorkspaceValidator.check_file_grp(workspace, kwargs['input_file_grp'], '' if overwrite else kwargs['output_file_grp'], page_id) |
130
|
|
|
if not report.is_valid: |
131
|
|
|
raise Exception("Invalid input/output file grps:\n\t%s" % '\n\t'.join(report.errors)) |
132
|
|
|
# Set up profiling behavior from environment variables/flags |
133
|
|
|
if not profile and 'CPU' in config.OCRD_PROFILE: |
134
|
|
|
profile = True |
135
|
|
|
if not profile_file and config.is_set('OCRD_PROFILE_FILE'): |
136
|
|
|
profile_file = config.OCRD_PROFILE_FILE |
137
|
|
|
if profile or profile_file: |
138
|
|
|
import cProfile |
139
|
|
|
import pstats |
140
|
|
|
import io |
141
|
|
|
import atexit |
142
|
|
|
print("Profiling...") |
143
|
|
|
pr = cProfile.Profile() |
144
|
|
|
pr.enable() |
145
|
|
|
def goexit(): |
146
|
|
|
pr.disable() |
|
|
|
|
147
|
|
|
print("Profiling completed") |
148
|
|
|
if profile_file: |
149
|
|
|
pr.dump_stats(profile_file) |
150
|
|
|
s = io.StringIO() |
|
|
|
|
151
|
|
|
pstats.Stats(pr, stream=s).sort_stats("cumulative").print_stats() |
|
|
|
|
152
|
|
|
print(s.getvalue()) |
153
|
|
|
atexit.register(goexit) |
154
|
|
|
if log_filename: |
155
|
|
|
log_ctx = redirect_stderr_and_stdout_to_file(log_filename) |
156
|
|
|
else: |
157
|
|
|
log_ctx = nullcontext() |
158
|
|
|
with log_ctx: |
159
|
|
|
run_processor(processorClass, mets_url=mets, workspace=workspace, **kwargs) |
160
|
|
|
|
161
|
|
|
|
162
|
|
|
def check_and_run_network_agent(ProcessorClass, subcommand: str, address: str, database: str, queue: str): |
163
|
|
|
""" |
164
|
|
|
""" |
165
|
|
|
if subcommand not in SUBCOMMANDS: |
166
|
|
|
raise ValueError(f"SUBCOMMAND can only be one of {SUBCOMMANDS}") |
167
|
|
|
|
168
|
|
|
if not database: |
169
|
|
|
raise ValueError(f"Option '--database' is invalid for subcommand {subcommand}") |
170
|
|
|
|
171
|
|
|
if subcommand == AgentType.PROCESSOR_SERVER: |
172
|
|
|
if not address: |
173
|
|
|
raise ValueError(f"Option '--address' required for subcommand {subcommand}") |
174
|
|
|
if queue: |
175
|
|
|
raise ValueError(f"Option '--queue' invalid for subcommand {subcommand}") |
176
|
|
|
if subcommand == AgentType.PROCESSING_WORKER: |
177
|
|
|
if address: |
178
|
|
|
raise ValueError(f"Option '--address' invalid for subcommand {subcommand}") |
179
|
|
|
if not queue: |
180
|
|
|
raise ValueError(f"Option '--queue' required for subcommand {subcommand}") |
181
|
|
|
|
182
|
|
|
processor = ProcessorClass(workspace=None) |
183
|
|
|
if subcommand == AgentType.PROCESSING_WORKER: |
184
|
|
|
processing_worker = ProcessingWorker( |
185
|
|
|
rabbitmq_addr=queue, |
186
|
|
|
mongodb_addr=database, |
187
|
|
|
processor_name=processor.ocrd_tool['executable'], |
188
|
|
|
ocrd_tool=processor.ocrd_tool, |
189
|
|
|
processor_class=ProcessorClass, |
190
|
|
|
) |
191
|
|
|
# The RMQConsumer is initialized and a connection to the RabbitMQ is performed |
192
|
|
|
processing_worker.connect_consumer() |
193
|
|
|
# Start consuming from the queue with name `processor_name` |
194
|
|
|
processing_worker.start_consuming() |
195
|
|
|
elif subcommand == AgentType.PROCESSOR_SERVER: |
196
|
|
|
# TODO: Better validate that inside the ProcessorServer itself |
197
|
|
|
host, port = address.split(':') |
198
|
|
|
processor_server = ProcessorServer( |
199
|
|
|
mongodb_addr=database, |
200
|
|
|
processor_name=processor.ocrd_tool['executable'], |
201
|
|
|
processor_class=ProcessorClass, |
202
|
|
|
) |
203
|
|
|
processor_server.run_server(host=host, port=int(port)) |
204
|
|
|
else: |
205
|
|
|
raise ValueError(f"Unknown network agent type, must be one of: {SUBCOMMANDS}") |
206
|
|
|
sys.exit(0) |
207
|
|
|
|