|
1
|
|
|
import io |
|
2
|
|
|
import re |
|
3
|
|
|
|
|
4
|
|
|
from .constants import OCRD_WF_SHEBANG |
|
5
|
|
|
from .ocrd_wf_step import OcrdWfStep |
|
6
|
|
|
|
|
7
|
|
|
class OcrdWf(): |
|
8
|
|
|
|
|
9
|
|
|
def __init__(self, steps=None, assignments=None): |
|
10
|
|
|
self.steps = steps if steps else [] |
|
11
|
|
|
self.assignments = assignments if assignments else {} |
|
12
|
|
|
|
|
13
|
|
|
@staticmethod |
|
14
|
|
|
def parse_file(fname): |
|
15
|
|
|
with io.open(fname, mode='r', encoding='utf-8') as f: |
|
16
|
|
|
return OcrdWf.parse(f.read()) |
|
17
|
|
|
|
|
18
|
|
|
@staticmethod |
|
19
|
|
|
def parse(src): |
|
20
|
|
|
if src[0:len(OCRD_WF_SHEBANG)] != OCRD_WF_SHEBANG: |
|
21
|
|
|
raise ValueError("OCRD-WF does not begin with '%s'!" % OCRD_WF_SHEBANG) |
|
22
|
|
|
lines_wo_empty = [] |
|
23
|
|
|
# remove empty lines |
|
24
|
|
|
for line in src.split("\n")[1:]: |
|
25
|
|
|
if not re.fullmatch(r'^\s*$', line): |
|
26
|
|
|
lines_wo_empty.append(line) |
|
27
|
|
|
# strip comments |
|
28
|
|
|
lines_wo_comment = [] |
|
29
|
|
|
for line in lines_wo_empty: |
|
30
|
|
|
if not re.match(r"^\s*#", line): |
|
31
|
|
|
lines_wo_comment.append(line) |
|
32
|
|
|
lines_wo_continuation = [] |
|
33
|
|
|
# line continuation |
|
34
|
|
|
n = 0 |
|
35
|
|
|
while n < len(lines_wo_comment): |
|
36
|
|
|
continued_lines = 0 |
|
37
|
|
|
while lines_wo_comment[n].endswith('\\'): |
|
38
|
|
|
lines_wo_comment[n] = re.sub(r"\s*\\$", "", lines_wo_comment[n]) |
|
39
|
|
|
continued_lines += 1 |
|
40
|
|
|
lines_wo_comment[n] += re.sub(r"^\s*", " ", lines_wo_comment[n + continued_lines]) |
|
41
|
|
|
lines_wo_continuation.append(lines_wo_comment[n]) |
|
42
|
|
|
n += 1 + continued_lines |
|
43
|
|
|
assignments = {} |
|
44
|
|
|
steps = [] |
|
45
|
|
|
for line in lines_wo_continuation: |
|
46
|
|
|
if re.match(r'^[A-Za-z][A-Za-z0-9]*=', line): |
|
47
|
|
|
k, v = line.split('=', 2) |
|
48
|
|
|
assignments[k] = v |
|
49
|
|
|
else: |
|
50
|
|
|
steps.append(OcrdWfStep.parse(line)) |
|
51
|
|
|
return OcrdWf(assignments=assignments, steps=steps) |
|
52
|
|
|
|
|
53
|
|
|
def __str__(self): |
|
54
|
|
|
ret = '%s\n' % OCRD_WF_SHEBANG |
|
55
|
|
|
for k in self.assignments: |
|
56
|
|
|
v = self.assignments[k] |
|
57
|
|
|
ret += '%s=%s\n' % (k, v) |
|
58
|
|
|
for step in self.steps: |
|
59
|
|
|
ret += '%s\n' % str(step) |
|
60
|
|
|
return ret |
|
61
|
|
|
|