1
|
|
|
import io |
2
|
|
|
import re |
3
|
|
|
|
4
|
|
|
from .constants import OCRD_WF_SHEBANG |
5
|
|
|
from .ocrd_wf_step import OcrdWfStep |
6
|
|
|
|
7
|
|
|
class OcrdWf(): |
8
|
|
|
|
9
|
|
|
def __init__(self, steps=None, assignments=None): |
10
|
|
|
self.steps = steps if steps else [] |
11
|
|
|
self.assignments = assignments if assignments else {} |
12
|
|
|
|
13
|
|
|
@staticmethod |
14
|
|
|
def parse_file(fname): |
15
|
|
|
with io.open(fname, mode='r', encoding='utf-8') as f: |
16
|
|
|
return OcrdWf.parse(f.read()) |
17
|
|
|
|
18
|
|
|
@staticmethod |
19
|
|
|
def parse(src): |
20
|
|
|
if src[0:len(OCRD_WF_SHEBANG)] != OCRD_WF_SHEBANG: |
21
|
|
|
raise ValueError("OCRD-WF does not begin with '%s'!" % OCRD_WF_SHEBANG) |
22
|
|
|
lines_wo_empty = [] |
23
|
|
|
# remove empty lines |
24
|
|
|
for line in src.split("\n")[1:]: |
25
|
|
|
if not re.fullmatch(r'^\s*$', line): |
26
|
|
|
lines_wo_empty.append(line) |
27
|
|
|
# strip comments |
28
|
|
|
lines_wo_comment = [] |
29
|
|
|
for line in lines_wo_empty: |
30
|
|
|
if not re.match(r"^\s*#", line): |
31
|
|
|
lines_wo_comment.append(line) |
32
|
|
|
lines_wo_continuation = [] |
33
|
|
|
# line continuation |
34
|
|
|
n = 0 |
35
|
|
|
while n < len(lines_wo_comment): |
36
|
|
|
continued_lines = 0 |
37
|
|
|
while lines_wo_comment[n].endswith('\\'): |
38
|
|
|
lines_wo_comment[n] = re.sub(r"\s*\\$", "", lines_wo_comment[n]) |
39
|
|
|
continued_lines += 1 |
40
|
|
|
lines_wo_comment[n] += re.sub(r"^\s*", " ", lines_wo_comment[n + continued_lines]) |
41
|
|
|
lines_wo_continuation.append(lines_wo_comment[n]) |
42
|
|
|
n += 1 + continued_lines |
43
|
|
|
assignments = {} |
44
|
|
|
steps = [] |
45
|
|
|
for line in lines_wo_continuation: |
46
|
|
|
if re.match(r'^[A-Za-z][A-Za-z0-9]*=', line): |
47
|
|
|
k, v = line.split('=', 2) |
48
|
|
|
assignments[k] = v |
49
|
|
|
else: |
50
|
|
|
steps.append(OcrdWfStep.parse(line)) |
51
|
|
|
return OcrdWf(assignments=assignments, steps=steps) |
52
|
|
|
|
53
|
|
|
def __str__(self): |
54
|
|
|
ret = '%s\n' % OCRD_WF_SHEBANG |
55
|
|
|
for k in self.assignments: |
56
|
|
|
v = self.assignments[k] |
57
|
|
|
ret += '%s=%s\n' % (k, v) |
58
|
|
|
for step in self.steps: |
59
|
|
|
ret += '%s\n' % str(step) |
60
|
|
|
return ret |
61
|
|
|
|