Passed
Push — master ( 77064b...da9bd1 )
by Konstantin
01:57
created

ocrd.processor.helpers.run_cli()   B

Complexity

Conditions 7

Size

Total Lines 33
Code Lines 29

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 29
dl 0
loc 33
rs 7.784
c 0
b 0
f 0
cc 7
nop 11

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
Helper methods for running and documenting processors
3
"""
4
from time import time
5
import json
6
import subprocess
7
8
from click import wrap_text
9
from ocrd_utils import getLogger
10
11
__all__ = [
12
    'generate_processor_help',
13
    'run_cli',
14
    'run_processor'
15
]
16
17
log = getLogger('ocrd.processor')
18
19
def _get_workspace(workspace=None, resolver=None, mets_url=None, working_dir=None):
20
    if workspace is None:
21
        if resolver is None:
22
            raise Exception("Need to pass a resolver to create a workspace")
23
        if mets_url is None:
24
            raise Exception("Need to pass mets_url to create a workspace")
25
        workspace = resolver.workspace_from_url(mets_url, dst_dir=working_dir)
26
    return workspace
27
28
def run_processor(
29
        processorClass,
30
        ocrd_tool=None,
31
        mets_url=None,
32
        resolver=None,
33
        workspace=None,
34
        page_id=None,
35
        log_level=None,         # TODO actually use this!
36
        input_file_grp=None,
37
        output_file_grp=None,
38
        parameter=None,
39
        parameter_override=None,
40
        working_dir=None,
41
): # pylint: disable=too-many-locals
42
    """
43
    Create a workspace for mets_url and run processor through it
44
45
    Args:
46
        parameter (string): URL to the parameter
47
    """
48
    workspace = _get_workspace(
49
        workspace,
50
        resolver,
51
        mets_url,
52
        working_dir
53
    )
54
    log.debug("Running processor %s", processorClass)
55
    processor = processorClass(
56
        workspace,
57
        ocrd_tool=ocrd_tool,
58
        page_id=page_id,
59
        input_file_grp=input_file_grp,
60
        output_file_grp=output_file_grp,
61
        parameter=parameter
62
    )
63
    ocrd_tool = processor.ocrd_tool
64
    name = '%s v%s' % (ocrd_tool['executable'], processor.version)
65
    otherrole = ocrd_tool['steps'][0]
66
    logProfile = getLogger('ocrd.process.profile')
67
    log.debug("Processor instance %s (%s doing %s)", processor, name, otherrole)
68
    t0 = time()
69
    processor.process()
70
    t1 = time() - t0
71
    logProfile.info("Executing processor '%s' took %fs [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s']" % (
72
        ocrd_tool['executable'],
73
        t1,
74
        input_file_grp if input_file_grp else '',
75
        output_file_grp if output_file_grp else '',
76
        json.dumps(parameter) if parameter else {}
77
    ))
78
    workspace.mets.add_agent(
79
        name=name,
80
        _type='OTHER',
81
        othertype='SOFTWARE',
82
        role='OTHER',
83
        otherrole=otherrole
84
    )
85
    workspace.save_mets()
86
    return processor
87
88
def run_cli(
89
        executable,
90
        mets_url=None,
91
        resolver=None,
92
        workspace=None,
93
        page_id=None,
94
        overwrite=None,
95
        log_level=None,
96
        input_file_grp=None,
97
        output_file_grp=None,
98
        parameter=None,
99
        working_dir=None,
100
):
101
    """
102
    Create a workspace for mets_url and run MP CLI through it
103
    """
104
    workspace = _get_workspace(workspace, resolver, mets_url, working_dir)
105
    args = [executable, '--working-dir', workspace.directory]
106
    args += ['--mets', mets_url]
107
    if log_level:
108
        args += ['--log-level', log_level]
109
    if page_id:
110
        args += ['--page-id', page_id]
111
    if input_file_grp:
112
        args += ['--input-file-grp', input_file_grp]
113
    if output_file_grp:
114
        args += ['--output-file-grp', output_file_grp]
115
    if parameter:
116
        args += ['--parameter', parameter]
117
    if overwrite:
118
        args += ['--overwrite']
119
    log.debug("Running subprocess '%s'", ' '.join(args))
120
    return subprocess.call(args)
121
122
def generate_processor_help(ocrd_tool):
123
    parameter_help = ''
124
    if 'parameters' not in ocrd_tool or not ocrd_tool['parameters']:
125
        parameter_help = '  NONE\n'
126
    else:
127
        def wrap(s):
128
            return wrap_text(s, initial_indent=' '*3,
129
                             subsequent_indent=' '*4,
130
                             width=72, preserve_paragraphs=True)
131
        for param_name, param in ocrd_tool['parameters'].items():
132
            parameter_help += wrap('"%s" [%s%s]' % (
133
                param_name,
134
                param['type'],
135
                ' - REQUIRED' if 'required' in param and param['required'] else
136
                ' - %s' % json.dumps(param['default']) if 'default' in param else ''))
137
            parameter_help += '\n ' + wrap(param['description'])
138
            if 'enum' in param:
139
                parameter_help += '\n ' + wrap('Possible values: %s' % json.dumps(param['enum']))
140
            parameter_help += "\n"
141
    return '''
142
Usage: %s [OPTIONS]
143
144
  %s
145
146
Options:
147
  -I, --input-file-grp USE        File group(s) used as input
148
  -O, --output-file-grp USE       File group(s) used as output
149
  -g, --page-id ID                Physical page ID(s) to process
150
  --overwrite                     Remove existing output pages/images
151
                                  (with --page-id, remove only those)
152
  -p, --parameter JSON-PATH       Parameters, either verbatim JSON string
153
                                  or JSON file path
154
  -P, --param-override KEY VAL    Override a single JSON object key-value pair,
155
                                  taking precedence over --parameter
156
  -m, --mets URL-PATH             URL or file path of METS to process
157
  -w, --working-dir PATH          Working directory of local workspace
158
  -l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE]
159
                                  Log level
160
  -J, --dump-json                 Dump tool description as JSON and exit
161
  -h, --help                      This help message
162
  -V, --version                   Show version
163
164
Parameters:
165
%s
166
Default Wiring:
167
  %s -> %s
168
169
''' % (
170
    ocrd_tool['executable'],
171
    ocrd_tool['description'],
172
    parameter_help,
173
    ocrd_tool.get('input_file_grp', 'NONE'),
174
    ocrd_tool.get('output_file_grp', 'NONE')
175
)
176
177
178
179