1
|
|
|
""" |
2
|
|
|
Helper methods for running and documenting processors |
3
|
|
|
""" |
4
|
|
|
from time import time |
5
|
|
|
import json |
6
|
|
|
import subprocess |
7
|
|
|
|
8
|
|
|
from click import wrap_text |
9
|
|
|
from ocrd_utils import getLogger |
10
|
|
|
|
11
|
|
|
__all__ = [ |
12
|
|
|
'generate_processor_help', |
13
|
|
|
'run_cli', |
14
|
|
|
'run_processor' |
15
|
|
|
] |
16
|
|
|
|
17
|
|
|
log = getLogger('ocrd.processor') |
18
|
|
|
|
19
|
|
|
def _get_workspace(workspace=None, resolver=None, mets_url=None, working_dir=None): |
20
|
|
|
if workspace is None: |
21
|
|
|
if resolver is None: |
22
|
|
|
raise Exception("Need to pass a resolver to create a workspace") |
23
|
|
|
if mets_url is None: |
24
|
|
|
raise Exception("Need to pass mets_url to create a workspace") |
25
|
|
|
workspace = resolver.workspace_from_url(mets_url, dst_dir=working_dir) |
26
|
|
|
return workspace |
27
|
|
|
|
28
|
|
|
def run_processor( |
29
|
|
|
processorClass, |
30
|
|
|
ocrd_tool=None, |
31
|
|
|
mets_url=None, |
32
|
|
|
resolver=None, |
33
|
|
|
workspace=None, |
34
|
|
|
page_id=None, |
35
|
|
|
log_level=None, # TODO actually use this! |
36
|
|
|
input_file_grp=None, |
37
|
|
|
output_file_grp=None, |
38
|
|
|
parameter=None, |
39
|
|
|
parameter_override=None, |
40
|
|
|
working_dir=None, |
41
|
|
|
): # pylint: disable=too-many-locals |
42
|
|
|
""" |
43
|
|
|
Create a workspace for mets_url and run processor through it |
44
|
|
|
|
45
|
|
|
Args: |
46
|
|
|
parameter (string): URL to the parameter |
47
|
|
|
""" |
48
|
|
|
workspace = _get_workspace( |
49
|
|
|
workspace, |
50
|
|
|
resolver, |
51
|
|
|
mets_url, |
52
|
|
|
working_dir |
53
|
|
|
) |
54
|
|
|
log.debug("Running processor %s", processorClass) |
55
|
|
|
processor = processorClass( |
56
|
|
|
workspace, |
57
|
|
|
ocrd_tool=ocrd_tool, |
58
|
|
|
page_id=page_id, |
59
|
|
|
input_file_grp=input_file_grp, |
60
|
|
|
output_file_grp=output_file_grp, |
61
|
|
|
parameter=parameter |
62
|
|
|
) |
63
|
|
|
ocrd_tool = processor.ocrd_tool |
64
|
|
|
name = '%s v%s' % (ocrd_tool['executable'], processor.version) |
65
|
|
|
otherrole = ocrd_tool['steps'][0] |
66
|
|
|
logProfile = getLogger('ocrd.process.profile') |
67
|
|
|
log.debug("Processor instance %s (%s doing %s)", processor, name, otherrole) |
68
|
|
|
t0 = time() |
69
|
|
|
processor.process() |
70
|
|
|
t1 = time() - t0 |
71
|
|
|
logProfile.info("Executing processor '%s' took %fs [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s']" % ( |
72
|
|
|
ocrd_tool['executable'], |
73
|
|
|
t1, |
74
|
|
|
input_file_grp if input_file_grp else '', |
75
|
|
|
output_file_grp if output_file_grp else '', |
76
|
|
|
json.dumps(parameter) if parameter else {} |
77
|
|
|
)) |
78
|
|
|
workspace.mets.add_agent( |
79
|
|
|
name=name, |
80
|
|
|
_type='OTHER', |
81
|
|
|
othertype='SOFTWARE', |
82
|
|
|
role='OTHER', |
83
|
|
|
otherrole=otherrole |
84
|
|
|
) |
85
|
|
|
workspace.save_mets() |
86
|
|
|
return processor |
87
|
|
|
|
88
|
|
|
def run_cli( |
89
|
|
|
executable, |
90
|
|
|
mets_url=None, |
91
|
|
|
resolver=None, |
92
|
|
|
workspace=None, |
93
|
|
|
page_id=None, |
94
|
|
|
overwrite=None, |
95
|
|
|
log_level=None, |
96
|
|
|
input_file_grp=None, |
97
|
|
|
output_file_grp=None, |
98
|
|
|
parameter=None, |
99
|
|
|
working_dir=None, |
100
|
|
|
): |
101
|
|
|
""" |
102
|
|
|
Create a workspace for mets_url and run MP CLI through it |
103
|
|
|
""" |
104
|
|
|
workspace = _get_workspace(workspace, resolver, mets_url, working_dir) |
105
|
|
|
args = [executable, '--working-dir', workspace.directory] |
106
|
|
|
args += ['--mets', mets_url] |
107
|
|
|
if log_level: |
108
|
|
|
args += ['--log-level', log_level] |
109
|
|
|
if page_id: |
110
|
|
|
args += ['--page-id', page_id] |
111
|
|
|
if input_file_grp: |
112
|
|
|
args += ['--input-file-grp', input_file_grp] |
113
|
|
|
if output_file_grp: |
114
|
|
|
args += ['--output-file-grp', output_file_grp] |
115
|
|
|
if parameter: |
116
|
|
|
args += ['--parameter', parameter] |
117
|
|
|
if overwrite: |
118
|
|
|
args += ['--overwrite'] |
119
|
|
|
log.debug("Running subprocess '%s'", ' '.join(args)) |
120
|
|
|
return subprocess.call(args) |
121
|
|
|
|
122
|
|
|
def generate_processor_help(ocrd_tool): |
123
|
|
|
parameter_help = '' |
124
|
|
|
if 'parameters' not in ocrd_tool or not ocrd_tool['parameters']: |
125
|
|
|
parameter_help = ' NONE\n' |
126
|
|
|
else: |
127
|
|
|
def wrap(s): |
128
|
|
|
return wrap_text(s, initial_indent=' '*3, |
129
|
|
|
subsequent_indent=' '*4, |
130
|
|
|
width=72, preserve_paragraphs=True) |
131
|
|
|
for param_name, param in ocrd_tool['parameters'].items(): |
132
|
|
|
parameter_help += wrap('"%s" [%s%s]' % ( |
133
|
|
|
param_name, |
134
|
|
|
param['type'], |
135
|
|
|
' - REQUIRED' if 'required' in param and param['required'] else |
136
|
|
|
' - %s' % json.dumps(param['default']) if 'default' in param else '')) |
137
|
|
|
parameter_help += '\n ' + wrap(param['description']) |
138
|
|
|
if 'enum' in param: |
139
|
|
|
parameter_help += '\n ' + wrap('Possible values: %s' % json.dumps(param['enum'])) |
140
|
|
|
parameter_help += "\n" |
141
|
|
|
return ''' |
142
|
|
|
Usage: %s [OPTIONS] |
143
|
|
|
|
144
|
|
|
%s |
145
|
|
|
|
146
|
|
|
Options: |
147
|
|
|
-I, --input-file-grp USE File group(s) used as input |
148
|
|
|
-O, --output-file-grp USE File group(s) used as output |
149
|
|
|
-g, --page-id ID Physical page ID(s) to process |
150
|
|
|
--overwrite Remove existing output pages/images |
151
|
|
|
(with --page-id, remove only those) |
152
|
|
|
-p, --parameter JSON-PATH Parameters, either verbatim JSON string |
153
|
|
|
or JSON file path |
154
|
|
|
-P, --param-override KEY VAL Override a single JSON object key-value pair, |
155
|
|
|
taking precedence over --parameter |
156
|
|
|
-m, --mets URL-PATH URL or file path of METS to process |
157
|
|
|
-w, --working-dir PATH Working directory of local workspace |
158
|
|
|
-l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE] |
159
|
|
|
Log level |
160
|
|
|
-J, --dump-json Dump tool description as JSON and exit |
161
|
|
|
-h, --help This help message |
162
|
|
|
-V, --version Show version |
163
|
|
|
|
164
|
|
|
Parameters: |
165
|
|
|
%s |
166
|
|
|
Default Wiring: |
167
|
|
|
%s -> %s |
168
|
|
|
|
169
|
|
|
''' % ( |
170
|
|
|
ocrd_tool['executable'], |
171
|
|
|
ocrd_tool['description'], |
172
|
|
|
parameter_help, |
173
|
|
|
ocrd_tool.get('input_file_grp', 'NONE'), |
174
|
|
|
ocrd_tool.get('output_file_grp', 'NONE') |
175
|
|
|
) |
176
|
|
|
|
177
|
|
|
|
178
|
|
|
|
179
|
|
|
|