Passed
Push — master ( 71c0c1...5d69e4 )
by Konstantin
02:52
created

ocrd.decorators   B

Complexity

Total Complexity 45

Size/Duplication

Total Lines 208
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 45
eloc 174
dl 0
loc 208
rs 8.8
c 0
b 0
f 0

2 Functions

Rating   Name   Duplication   Size   Complexity  
D check_and_run_network_agent() 0 50 12
F ocrd_cli_wrap_processor() 0 130 33

How to fix   Complexity   

Complexity

Complex classes like ocrd.decorators often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
import sys
2
from contextlib import nullcontext
3
4
from ocrd_utils import (
5
    config,
6
    initLogging,
7
    is_local_filename,
8
    get_local_filename,
9
    getLogger,
10
    parse_json_string_with_comments,
11
    set_json_key_value_overrides,
12
    parse_json_string_or_file,
13
    redirect_stderr_and_stdout_to_file,
14
)
15
from ocrd_validators import WorkspaceValidator
16
17
from ..resolver import Resolver
18
from ..processor.base import ResourceNotFoundError, run_processor
19
20
from .loglevel_option import ocrd_loglevel
21
from .parameter_option import parameter_option, parameter_override_option
22
from .ocrd_cli_options import ocrd_cli_options
23
from .mets_find_options import mets_find_options
24
25
26
def ocrd_cli_wrap_processor(
27
    processorClass,
28
    mets=None,
29
    mets_server_url=None,
30
    working_dir=None,
31
    dump_json=False,
32
    dump_module_dir=False,
33
    help=False, # pylint: disable=redefined-builtin
34
    profile=False,
35
    profile_file=None,
36
    version=False,
37
    overwrite=False,
38
    debug=False,
39
    resolve_resource=None,
40
    show_resource=None,
41
    list_resources=False,
42
    # ocrd_network params start #
43
    subcommand=None,
44
    address=None,
45
    queue=None,
46
    log_filename=None,
47
    database=None,
48
    # ocrd_network params end #
49
    **kwargs
50
):
51
    # init logging handlers so no imported libs can preempt ours
52
    initLogging()
53
54
    # FIXME: remove workspace arg entirely
55
    processor = processorClass(None)
56
    if not sys.argv[1:]:
57
        processor.show_help(subcommand=subcommand)
58
        sys.exit(1)
59
    if help:
60
        processor.show_help(subcommand=subcommand)
61
        sys.exit()
62
    if version:
63
        processor.show_version()
64
        sys.exit()
65
    if dump_json:
66
        processor.dump_json()
67
        sys.exit()
68
    if dump_module_dir:
69
        processor.dump_module_dir()
70
        sys.exit()
71
    if resolve_resource:
72
        try:
73
            res = processor.resolve_resource(resolve_resource)
74
            print(res)
75
            sys.exit()
76
        except ResourceNotFoundError as e:
77
            log = getLogger('ocrd.processor.base')
78
            log.critical(e.message)
79
            sys.exit(1)
80
    if show_resource:
81
        try:
82
            processor.show_resource(show_resource)
83
            sys.exit()
84
        except ResourceNotFoundError as e:
85
            log = getLogger('ocrd.processor.base')
86
            log.critical(e.message)
87
            sys.exit(1)
88
    if list_resources:
89
        processor.list_resources()
90
        sys.exit()
91
    if subcommand or address or queue or database:
92
        # Used for checking/starting network agents for the WebAPI architecture
93
        check_and_run_network_agent(processorClass, subcommand, address, database, queue)
94
95
    if 'parameter' in kwargs:
96
        # Disambiguate parameter file/literal, and resolve file
97
        def resolve(name):
98
            try:
99
                return processor.resolve_resource(name)
100
            except ResourceNotFoundError:
101
                return None
102
        kwargs['parameter'] = parse_json_string_or_file(*kwargs['parameter'],
103
                                                        resolve_preset_file=resolve)
104
    else:
105
        kwargs['parameter'] = {}
106
    # Merge parameter overrides and parameters
107
    if 'parameter_override' in kwargs:
108
        set_json_key_value_overrides(kwargs['parameter'], *kwargs.pop('parameter_override'))
109
    # Assert -I / -O
110
    if not kwargs['input_file_grp']:
111
        raise ValueError('-I/--input-file-grp is required')
112
    if not kwargs['output_file_grp']:
113
        raise ValueError('-O/--output-file-grp is required')
114
    resolver = Resolver()
115
    working_dir, mets, _, mets_server_url = \
116
            resolver.resolve_mets_arguments(working_dir, mets, None, mets_server_url)
117
    workspace = resolver.workspace_from_url(mets, working_dir, mets_server_url=mets_server_url)
118
    page_id = kwargs.get('page_id')
119
    if debug:
120
        config.OCRD_MISSING_INPUT = 'ABORT'
121
        config.OCRD_MISSING_OUTPUT = 'ABORT'
122
        config.OCRD_EXISTING_OUTPUT = 'ABORT'
123
    if overwrite:
124
        config.OCRD_EXISTING_OUTPUT = 'OVERWRITE'
125
    report = WorkspaceValidator.check_file_grp(workspace, kwargs['input_file_grp'], '' if overwrite else kwargs['output_file_grp'], page_id)
126
    if not report.is_valid:
127
        raise Exception("Invalid input/output file grps:\n\t%s" % '\n\t'.join(report.errors))
128
    # Set up profiling behavior from environment variables/flags
129
    if not profile and 'CPU' in config.OCRD_PROFILE:
130
        profile = True
131
    if not profile_file and config.is_set('OCRD_PROFILE_FILE'):
132
        profile_file = config.OCRD_PROFILE_FILE
133
    if profile or profile_file:
134
        import cProfile
135
        import pstats
136
        import io
137
        import atexit
138
        print("Profiling...")
139
        pr = cProfile.Profile()
140
        pr.enable()
141
        def goexit():
142
            pr.disable()
0 ignored issues
show
introduced by
The variable pr does not seem to be defined in case profile or profile_file on line 133 is False. Are you sure this can never be the case?
Loading history...
143
            print("Profiling completed")
144
            if profile_file:
145
                pr.dump_stats(profile_file)
146
            s = io.StringIO()
0 ignored issues
show
introduced by
The variable io does not seem to be defined in case profile or profile_file on line 133 is False. Are you sure this can never be the case?
Loading history...
147
            pstats.Stats(pr, stream=s).sort_stats("cumulative").print_stats()
0 ignored issues
show
introduced by
The variable pstats does not seem to be defined in case profile or profile_file on line 133 is False. Are you sure this can never be the case?
Loading history...
148
            print(s.getvalue())
149
        atexit.register(goexit)
150
    if log_filename:
151
        log_ctx = redirect_stderr_and_stdout_to_file(log_filename)
152
    else:
153
        log_ctx = nullcontext()
154
    with log_ctx:
155
        run_processor(processorClass, mets_url=mets, workspace=workspace, **kwargs)
156
157
158
def check_and_run_network_agent(ProcessorClass, subcommand: str, address: str, database: str, queue: str):
159
    """
160
    """
161
    from ocrd_network import ProcessingWorker, ProcessorServer, AgentType
162
    SUBCOMMANDS = [AgentType.PROCESSING_WORKER, AgentType.PROCESSOR_SERVER]
163
164
    if not subcommand:
165
        raise ValueError(f"Subcommand options --address --queue and --database are only valid for subcommands: {SUBCOMMANDS}")
166
    if subcommand not in SUBCOMMANDS:
167
        raise ValueError(f"SUBCOMMAND can only be one of {SUBCOMMANDS}")
168
169
    if not database:
170
        raise ValueError(f"Option '--database' is invalid for subcommand {subcommand}")
171
172
    if subcommand == AgentType.PROCESSOR_SERVER:
173
        if not address:
174
            raise ValueError(f"Option '--address' required for subcommand {subcommand}")
175
        if queue:
176
            raise ValueError(f"Option '--queue' invalid for subcommand {subcommand}")
177
    if subcommand == AgentType.PROCESSING_WORKER:
178
        if address:
179
            raise ValueError(f"Option '--address' invalid for subcommand {subcommand}")
180
        if not queue:
181
            raise ValueError(f"Option '--queue' required for subcommand {subcommand}")
182
183
    processor = ProcessorClass(workspace=None)
184
    if subcommand == AgentType.PROCESSING_WORKER:
185
        processing_worker = ProcessingWorker(
186
            rabbitmq_addr=queue,
187
            mongodb_addr=database,
188
            processor_name=processor.ocrd_tool['executable'],
189
            ocrd_tool=processor.ocrd_tool,
190
            processor_class=ProcessorClass,
191
        )
192
        # The RMQConsumer is initialized and a connection to the RabbitMQ is performed
193
        processing_worker.connect_consumer()
194
        # Start consuming from the queue with name `processor_name`
195
        processing_worker.start_consuming()
196
    elif subcommand == AgentType.PROCESSOR_SERVER:
197
        # TODO: Better validate that inside the ProcessorServer itself
198
        host, port = address.split(':')
199
        processor_server = ProcessorServer(
200
            mongodb_addr=database,
201
            processor_name=processor.ocrd_tool['executable'],
202
            processor_class=ProcessorClass,
203
        )
204
        processor_server.run_server(host=host, port=int(port))
205
    else:
206
        raise ValueError(f"Unknown network agent type, must be one of: {SUBCOMMANDS}")
207
    sys.exit(0)
208