|
1
|
|
|
import json |
|
2
|
|
|
|
|
3
|
|
|
from ocrd.processor.base import run_cli |
|
4
|
|
|
from ocrd.resolver import Resolver |
|
5
|
|
|
from ocrd_utils import getLogger |
|
6
|
|
|
from ocrd_validators import OcrdWfValidator |
|
7
|
|
|
from ocrd_models import OcrdWf, OcrdWfStep |
|
8
|
|
|
|
|
9
|
|
|
def run_tasks(mets, log_level, page_id, task_strs, overwrite=False): |
|
10
|
|
|
resolver = Resolver() |
|
11
|
|
|
workspace = resolver.workspace_from_url(mets) |
|
12
|
|
|
log = getLogger('ocrd.task_sequence.run_tasks') |
|
13
|
|
|
steps = [OcrdWfStep.parse(task_str) for task_str in task_strs] |
|
14
|
|
|
wf = OcrdWf(steps=steps) |
|
15
|
|
|
|
|
16
|
|
|
OcrdWfValidator().validate(wf, workspace, page_id=page_id, overwrite=overwrite) |
|
17
|
|
|
|
|
18
|
|
|
# Run the tasks |
|
19
|
|
|
for task in steps: |
|
20
|
|
|
|
|
21
|
|
|
log.info("Start processing task '%s'", task) |
|
22
|
|
|
|
|
23
|
|
|
# execute cli |
|
24
|
|
|
returncode, out, err = run_cli( |
|
25
|
|
|
task.executable, |
|
26
|
|
|
mets, |
|
27
|
|
|
resolver, |
|
28
|
|
|
workspace, |
|
29
|
|
|
log_level=log_level, |
|
30
|
|
|
page_id=page_id, |
|
31
|
|
|
overwrite=overwrite, |
|
32
|
|
|
input_file_grp=','.join(task.input_file_grps), |
|
33
|
|
|
output_file_grp=','.join(task.output_file_grps), |
|
34
|
|
|
parameter=json.dumps(task.parameters) |
|
35
|
|
|
) |
|
36
|
|
|
|
|
37
|
|
|
# check return code |
|
38
|
|
|
if returncode != 0: |
|
39
|
|
|
raise Exception("%s exited with non-zero return value %s. STDOUT:\n%s\nSTDERR:\n%s" % (task.executable, returncode, out, err)) |
|
40
|
|
|
|
|
41
|
|
|
# reload mets |
|
42
|
|
|
workspace.reload_mets() |
|
43
|
|
|
|
|
44
|
|
|
# check output file groups are in mets |
|
45
|
|
|
for output_file_grp in task.output_file_grps: |
|
46
|
|
|
if not output_file_grp in workspace.mets.file_groups: |
|
47
|
|
|
raise Exception("Invalid state: expected output file group not in mets: %s\nSTDOUT:\n%s\nSTDERR:\n%s" % (output_file_grp, out, err)) |
|
48
|
|
|
|
|
49
|
|
|
log.info("Finished processing task '%s'", task) |
|
50
|
|
|
|