Passed
Pull Request — master (#582)
by Konstantin
01:59
created

ocrd.decorators   B

Complexity

Total Complexity 46

Size/Duplication

Total Lines 242
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 46
eloc 149
dl 0
loc 242
rs 8.72
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
F ocrd_mets_filter_options.__call__() 0 50 14
C ocrd_mets_filter_options.__init__() 0 18 10
A ocrd_mets_filter_options._expand_template() 0 9 4

5 Functions

Rating   Name   Duplication   Size   Complexity  
A ocrd_cli_options() 0 33 2
A _handle_param_option() 0 2 1
A ocrd_loglevel() 0 6 1
A _set_root_logger_version() 0 3 1
D ocrd_cli_wrap_processor() 0 60 13

How to fix   Complexity   

Complexity

Complex classes like ocrd.decorators often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
from os.path import isfile
2
from re import match, sub, IGNORECASE
3
from itertools import product
4
import sys
5
from string import Template
6
7
import click
8
9
from ocrd_utils import (
10
    is_local_filename,
11
    get_local_filename,
12
    setOverrideLogLevel,
13
    parse_json_string_or_file,
14
    set_json_key_value_overrides,
15
)
16
17
from ocrd_utils import getLogger
18
from .resolver import Resolver
19
from .processor.base import run_processor
20
from ocrd_validators import WorkspaceValidator
21
from ocrd_models.ocrd_mets_filter import FIELDS
22
23
def _set_root_logger_version(ctx, param, value):    # pylint: disable=unused-argument
24
    setOverrideLogLevel(value)
25
    return value
26
27
loglevel_option = click.option('-l', '--log-level', help="Log level",
28
                               type=click.Choice(['OFF', 'ERROR', 'WARN', 'INFO', 'DEBUG', 'TRACE']),
29
                               default=None, callback=_set_root_logger_version)
30
31
def _handle_param_option(ctx, param, value):
32
    return parse_json_string_or_file(*list(value))
33
34
parameter_option = click.option('-p', '--parameter',
35
                                help="Parameters, either JSON string or path to JSON file",
36
                                multiple=True,
37
                                default=['{}'],
38
                                callback=_handle_param_option)
39
40
parameter_override_option = click.option('-P', '--parameter-override',
41
                                help="Parameter override",
42
                                nargs=2,
43
                                multiple=True,
44
                                callback=lambda ctx, param, kv: kv)
45
                                # callback=lambda ctx, param, kv: {kv[0]: kv[1]})
46
47
def ocrd_cli_wrap_processor(
48
    processorClass,
49
    ocrd_tool=None,
50
    mets=None,
51
    working_dir=None,
52
    dump_json=False,
53
    help=False, # pylint: disable=redefined-builtin
54
    version=False,
55
    overwrite=False,
56
    **kwargs
57
):
58
    if dump_json or help or version:
59
        setOverrideLogLevel('OFF', silent=True)
60
        processorClass(workspace=None, dump_json=dump_json, show_help=help, show_version=version)
61
        sys.exit()
62
    else:
63
        LOG = getLogger('ocrd_cli_wrap_processor')
64
        if not mets or (is_local_filename(mets) and not isfile(get_local_filename(mets))):
65
            processorClass(workspace=None, show_help=True)
66
            sys.exit(1)
67
        # LOG.info('kwargs=%s' % kwargs)
68
        # Merge parameter overrides and parameters
69
        if 'parameter_override' in kwargs:
70
            set_json_key_value_overrides(kwargs['parameter'], *kwargs['parameter_override'])
71
        # TODO OCR-D/core#274
72
        # Assert -I / -O
73
        # if not kwargs['input_file_grp']:
74
        #     raise ValueError('-I/--input-file-grp is required')
75
        # if not kwargs['output_file_grp']:
76
        #     raise ValueError('-O/--output-file-grp is required')
77
        if is_local_filename(mets) and not isfile(get_local_filename(mets)):
78
            msg = "File does not exist: %s" % mets
79
            LOG.error(msg)
80
            raise Exception(msg)
81
        resolver = Resolver()
82
        workspace = resolver.workspace_from_url(mets, working_dir)
83
        page_id = kwargs.get('page_id')
84
        # XXX not possible while processors do not adhere to # https://github.com/OCR-D/core/issues/505
85
        # if overwrite
86
        #     if 'output_file_grp' not in kwargs or not kwargs['output_file_grp']:
87
        #         raise Exception("--overwrite requires --output-file-grp")
88
        #     LOG.info("Removing files because of --overwrite")
89
        #     for grp in kwargs['output_file_grp'].split(','):
90
        #         if page_id:
91
        #             for one_page_id in kwargs['page_id'].split(','):
92
        #                 LOG.debug("Removing files in output file group %s with page ID %s", grp, one_page_id)
93
        #                 for file in workspace.mets.find_files(pageId=one_page_id, fileGrp=grp):
94
        #                     workspace.remove_file(file, force=True, keep_file=False, page_recursive=True)
95
        #         else:
96
        #             LOG.debug("Removing all files in output file group %s ", grp)
97
        #             # TODO: can be reduced to `page_same_group=True` as soon as core#505 has landed (in all processors)
98
        #             workspace.remove_file_group(grp, recursive=True, force=True, keep_files=False, page_recursive=True, page_same_group=False)
99
        #     workspace.save_mets()
100
        # XXX While https://github.com/OCR-D/core/issues/505 is open, set 'overwrite_mode' globally on the workspace
101
        if overwrite:
102
            workspace.overwrite_mode = True
103
        report = WorkspaceValidator.check_file_grp(workspace, kwargs['input_file_grp'], '' if overwrite else kwargs['output_file_grp'], page_id)
104
        if not report.is_valid:
105
            raise Exception("Invalid input/output file grps:\n\t%s" % '\n\t'.join(report.errors))
106
        run_processor(processorClass, ocrd_tool, mets, workspace=workspace, **kwargs)
107
108
def ocrd_loglevel(f):
109
    """
110
    Add an option '--log-level' to set the log level.
111
    """
112
    loglevel_option(f)
113
    return f
114
115
def ocrd_cli_options(f):
116
    """
117
    Implement MP CLI.
118
119
    Usage::
120
121
        import ocrd_cli_options from ocrd.utils
122
123
        @click.command()
124
        @ocrd_cli_options
125
        def cli(mets_url, **kwargs):
126
            print(mets_url)
127
    """
128
    params = [
129
        click.option('-m', '--mets', help="METS to process", default="mets.xml"),
130
        click.option('-w', '--working-dir', help="Working Directory"),
131
        # TODO OCR-D/core#274
132
        # click.option('-I', '--input-file-grp', help='File group(s) used as input. **required**'),
133
        # click.option('-O', '--output-file-grp', help='File group(s) used as output. **required**'),
134
        click.option('-I', '--input-file-grp', help='File group(s) used as input.', default='INPUT'),
135
        click.option('-O', '--output-file-grp', help='File group(s) used as output.', default='OUTPUT'),
136
        click.option('-g', '--page-id', help="ID(s) of the pages to process"),
137
        click.option('--overwrite', help="Overwrite the output file group or a page range (--page-id)", is_flag=True, default=False),
138
        parameter_option,
139
        parameter_override_option,
140
        click.option('-J', '--dump-json', help="Dump tool description as JSON and exit", is_flag=True, default=False),
141
        loglevel_option,
142
        click.option('-V', '--version', help="Show version", is_flag=True, default=False),
143
        click.option('-h', '--help', help="This help message", is_flag=True, default=False),
144
    ]
145
    for param in params:
146
        param(f)
147
    return f
148
149
TEMPLATE_DEFAULTS = {
150
    'metavar':        'PAT',
151
    'required':       False,
152
    'parameter':      '${field}_${operator}clude',
153
    'help':           '${field} ${operation} ${type}',
154
    'help_field':     '${field}',
155
    'help_operation': 'to ${operator}clude',
156
    'help_type':      '(string/regex/comma-separated)',
157
}
158
class ocrd_mets_filter_options():
159
    """
160
    Adds include/exclude filter options
161
    """
162
163
    def __init__(self, fields=FIELDS, operators=None, **templates):
164
        self.fields = fields
165
        self.operators = operators if operators else ['ex', 'in']
166
        templates={**TEMPLATE_DEFAULTS, **templates}
167
        self.templates = {}
168
        for (tpl_name, tpl), field, operator in product(templates.items(), self.fields, self.operators):
169
            if tpl_name not in self.templates:
170
                self.templates[tpl_name] = dict()
171
            key = field
172
            if tpl_name in ['help_operation']:
173
                key = '%sclude' % operator
174
            elif tpl_name in ['parameter', 'required']:
175
                key = '%s_%sclude' % (field, operator)
176
            if key not in self.templates[tpl_name]:
177
                if isinstance(tpl, dict):
178
                    self.templates[tpl_name][key] = Template(str(tpl[key] if key in tpl else TEMPLATE_DEFAULTS[tpl_name]))
179
                else:
180
                    self.templates[tpl_name][key] = Template(str(tpl if tpl else TEMPLATE_DEFAULTS[tpl_name]))
181
182
    def _expand_template(self, tpl_name, field, operator, tpl_vars):
183
        tpl = self.templates[tpl_name]
184
        if tpl_name in ['help_operation']:
185
            return tpl['%sclude' % operator].safe_substitute(tpl_vars)
186
        if tpl_name in ['parameter']:
187
            return tpl['%s_%sclude' % (field, operator)].safe_substitute(tpl_vars)
188
        if tpl_name in ['required']:
189
            return 'True' == tpl['%s_%sclude' % (field, operator)].safe_substitute(tpl_vars)
190
        return tpl[field].safe_substitute(tpl_vars)
191
192
    def __call__(self, f):
193
        for field, operator in product(self.fields, self.operators):
194
            _tpl = lambda tpl_name: lambda **tpl_vars_: self._expand_template(tpl_name, field,
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable tpl_name does not seem to be defined.
Loading history...
195
                    operator, tpl_vars={**{'field': field, 'operator': operator}, **tpl_vars_})
196
197
            # XXX Controls the kwarg name of this field in the decorated command
198
            args = [_tpl('parameter')()]
199
            kwargs = dict(
200
                default=None,
201
                callback=lambda ctx, param, value: value.split(',') if value and ',' in value else value,
202
                required=_tpl('required')(),
203
                metavar=_tpl('metavar')(),
204
                help=_tpl('help')(
205
                    field=_tpl('help_field')(),
206
                    operation=_tpl('help_operation')(),
207
                    type=_tpl('help_type')()
208
                ))
209
210
            # XXX No regex search for pageId search currently
211
            if field == 'pageId' and operator == 'in':
212
                kwargs['help'] = sub(r'[,/]?\s*regexp?\b', '', kwargs['help'], flags=IGNORECASE)
213
214
            # pylint: disable=multiple-statements
215
            # XXX must be retained for backwards-compatibility
216
            if operator == 'in':
217
                if field == 'ID':       args.extend(['-i', '--file-id'])
218
                if field == 'pageId':   args.extend(['-g', '--page-id'])
219
                if field == 'fileGrp':  args.extend(['-G', '--file-grp'])
220
                if field == 'mimetype': args.extend(['-m', '--mimetype'])
221
222
            # # 0
223
            # args.append('--%s%s' % ('not-' if operator == 'ex' else '', field))
224
            # if field.lower() != field:
225
            #     args.append('--%s%s' % ('not-' if operator == 'ex' else '', field.lower()))
226
227
            # 2
228
            args.append('--%s%s' % ('not-' if operator == 'ex' else '', field.lower()))
229
230
            # 3
231
            # args.append('--%s%s' % ('not-' if operator == 'ex' else '', field))
232
233
            # 4
234
            # if operator == 'in':
235
            #     args.append('--%s' % field.lower())
236
            # else:
237
            #     args.append('--%s%s' % ('not-' if operator == 'ex' else '', field))
238
239
            click.option(*args, **kwargs)(f)
240
        # print({k: v.safe_substitute({}) for k, v in self.templates['required'].items()})
241
        return f
242