Passed
Push — master ( 77064b...da9bd1 )
by Konstantin
01:57
created

ocrd.processor.helpers   A

Complexity

Total Complexity 22

Size/Duplication

Total Lines 175
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 22
eloc 122
dl 0
loc 175
rs 10
c 0
b 0
f 0

4 Functions

Rating   Name   Duplication   Size   Complexity  
B run_cli() 0 33 7
B generate_processor_help() 0 53 7
B run_processor() 0 59 4
A _get_workspace() 0 8 4
1
"""
2
Helper methods for running and documenting processors
3
"""
4
from time import time
5
import json
6
import subprocess
7
8
from click import wrap_text
9
from ocrd_utils import getLogger
10
11
__all__ = [
12
    'generate_processor_help',
13
    'run_cli',
14
    'run_processor'
15
]
16
17
log = getLogger('ocrd.processor')
18
19
def _get_workspace(workspace=None, resolver=None, mets_url=None, working_dir=None):
20
    if workspace is None:
21
        if resolver is None:
22
            raise Exception("Need to pass a resolver to create a workspace")
23
        if mets_url is None:
24
            raise Exception("Need to pass mets_url to create a workspace")
25
        workspace = resolver.workspace_from_url(mets_url, dst_dir=working_dir)
26
    return workspace
27
28
def run_processor(
29
        processorClass,
30
        ocrd_tool=None,
31
        mets_url=None,
32
        resolver=None,
33
        workspace=None,
34
        page_id=None,
35
        log_level=None,         # TODO actually use this!
36
        input_file_grp=None,
37
        output_file_grp=None,
38
        parameter=None,
39
        parameter_override=None,
40
        working_dir=None,
41
): # pylint: disable=too-many-locals
42
    """
43
    Create a workspace for mets_url and run processor through it
44
45
    Args:
46
        parameter (string): URL to the parameter
47
    """
48
    workspace = _get_workspace(
49
        workspace,
50
        resolver,
51
        mets_url,
52
        working_dir
53
    )
54
    log.debug("Running processor %s", processorClass)
55
    processor = processorClass(
56
        workspace,
57
        ocrd_tool=ocrd_tool,
58
        page_id=page_id,
59
        input_file_grp=input_file_grp,
60
        output_file_grp=output_file_grp,
61
        parameter=parameter
62
    )
63
    ocrd_tool = processor.ocrd_tool
64
    name = '%s v%s' % (ocrd_tool['executable'], processor.version)
65
    otherrole = ocrd_tool['steps'][0]
66
    logProfile = getLogger('ocrd.process.profile')
67
    log.debug("Processor instance %s (%s doing %s)", processor, name, otherrole)
68
    t0 = time()
69
    processor.process()
70
    t1 = time() - t0
71
    logProfile.info("Executing processor '%s' took %fs [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s']" % (
72
        ocrd_tool['executable'],
73
        t1,
74
        input_file_grp if input_file_grp else '',
75
        output_file_grp if output_file_grp else '',
76
        json.dumps(parameter) if parameter else {}
77
    ))
78
    workspace.mets.add_agent(
79
        name=name,
80
        _type='OTHER',
81
        othertype='SOFTWARE',
82
        role='OTHER',
83
        otherrole=otherrole
84
    )
85
    workspace.save_mets()
86
    return processor
87
88
def run_cli(
89
        executable,
90
        mets_url=None,
91
        resolver=None,
92
        workspace=None,
93
        page_id=None,
94
        overwrite=None,
95
        log_level=None,
96
        input_file_grp=None,
97
        output_file_grp=None,
98
        parameter=None,
99
        working_dir=None,
100
):
101
    """
102
    Create a workspace for mets_url and run MP CLI through it
103
    """
104
    workspace = _get_workspace(workspace, resolver, mets_url, working_dir)
105
    args = [executable, '--working-dir', workspace.directory]
106
    args += ['--mets', mets_url]
107
    if log_level:
108
        args += ['--log-level', log_level]
109
    if page_id:
110
        args += ['--page-id', page_id]
111
    if input_file_grp:
112
        args += ['--input-file-grp', input_file_grp]
113
    if output_file_grp:
114
        args += ['--output-file-grp', output_file_grp]
115
    if parameter:
116
        args += ['--parameter', parameter]
117
    if overwrite:
118
        args += ['--overwrite']
119
    log.debug("Running subprocess '%s'", ' '.join(args))
120
    return subprocess.call(args)
121
122
def generate_processor_help(ocrd_tool):
123
    parameter_help = ''
124
    if 'parameters' not in ocrd_tool or not ocrd_tool['parameters']:
125
        parameter_help = '  NONE\n'
126
    else:
127
        def wrap(s):
128
            return wrap_text(s, initial_indent=' '*3,
129
                             subsequent_indent=' '*4,
130
                             width=72, preserve_paragraphs=True)
131
        for param_name, param in ocrd_tool['parameters'].items():
132
            parameter_help += wrap('"%s" [%s%s]' % (
133
                param_name,
134
                param['type'],
135
                ' - REQUIRED' if 'required' in param and param['required'] else
136
                ' - %s' % json.dumps(param['default']) if 'default' in param else ''))
137
            parameter_help += '\n ' + wrap(param['description'])
138
            if 'enum' in param:
139
                parameter_help += '\n ' + wrap('Possible values: %s' % json.dumps(param['enum']))
140
            parameter_help += "\n"
141
    return '''
142
Usage: %s [OPTIONS]
143
144
  %s
145
146
Options:
147
  -I, --input-file-grp USE        File group(s) used as input
148
  -O, --output-file-grp USE       File group(s) used as output
149
  -g, --page-id ID                Physical page ID(s) to process
150
  --overwrite                     Remove existing output pages/images
151
                                  (with --page-id, remove only those)
152
  -p, --parameter JSON-PATH       Parameters, either verbatim JSON string
153
                                  or JSON file path
154
  -P, --param-override KEY VAL    Override a single JSON object key-value pair,
155
                                  taking precedence over --parameter
156
  -m, --mets URL-PATH             URL or file path of METS to process
157
  -w, --working-dir PATH          Working directory of local workspace
158
  -l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE]
159
                                  Log level
160
  -J, --dump-json                 Dump tool description as JSON and exit
161
  -h, --help                      This help message
162
  -V, --version                   Show version
163
164
Parameters:
165
%s
166
Default Wiring:
167
  %s -> %s
168
169
''' % (
170
    ocrd_tool['executable'],
171
    ocrd_tool['description'],
172
    parameter_help,
173
    ocrd_tool.get('input_file_grp', 'NONE'),
174
    ocrd_tool.get('output_file_grp', 'NONE')
175
)
176
177
178
179