Passed
Push — master ( 77064b...da9bd1 )
by Konstantin
01:57
created

ocrd.processor.base.run_processor()   B

Complexity

Conditions 4

Size

Total Lines 59
Code Lines 48

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 48
dl 0
loc 59
rs 8.7018
c 0
b 0
f 0
cc 4
nop 12

How to fix   Long Method    Many Parameters   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
Processor base class and helper functions
3
"""
4
5
__all__ = ['Processor', 'generate_processor_help', 'run_cli', 'run_processo']
6
7
import os
8
import json
9
from ocrd_utils import getLogger, VERSION as OCRD_VERSION, MIMETYPE_PAGE
10
from ocrd_validators import ParameterValidator
11
12
# XXX imports must remain for backwards-compatibilty
13
from .helpers import run_cli, run_processor, generate_processor_help # pylint: disable=unused-import
14
15
log = getLogger('ocrd.processor')
16
17
class Processor():
18
    """
19
    A processor runs an algorithm based on the workspace, the mets.xml in the
20
    workspace (and the input files defined therein) as well as optional
21
    parameter.
22
    """
23
24
    def __init__(
25
            self,
26
            workspace,
27
            ocrd_tool=None,
28
            parameter=None,
29
            # TODO OCR-D/core#274
30
            # input_file_grp=None,
31
            # output_file_grp=None,
32
            input_file_grp="INPUT",
33
            output_file_grp="OUTPUT",
34
            page_id=None,
35
            show_help=False,
36
            show_version=False,
37
            dump_json=False,
38
            version=None
39
    ):
40
        if parameter is None:
41
            parameter = {}
42
        if dump_json:
43
            print(json.dumps(ocrd_tool, indent=True))
44
            return
45
        self.ocrd_tool = ocrd_tool
46
        if show_help:
47
            self.show_help()
48
            return
49
        self.version = version
50
        if show_version:
51
            self.show_version()
52
            return
53
        self.workspace = workspace
54
        # FIXME HACK would be better to use pushd_popd(self.workspace.directory)
55
        # but there is no way to do that in process here since it's an
56
        # overridden method. chdir is almost always an anti-pattern.
57
        if self.workspace:
58
            os.chdir(self.workspace.directory)
59
        self.input_file_grp = input_file_grp
60
        self.output_file_grp = output_file_grp
61
        self.page_id = None if page_id == [] or page_id is None else page_id
62
        parameterValidator = ParameterValidator(ocrd_tool)
63
        report = parameterValidator.validate(parameter)
64
        if not report.is_valid:
65
            raise Exception("Invalid parameters %s" % report.errors)
66
        self.parameter = parameter
67
68
    def show_help(self):
69
        print(generate_processor_help(self.ocrd_tool))
70
71
    def show_version(self):
72
        print("Version %s, ocrd/core %s" % (self.version, OCRD_VERSION))
73
74
    def verify(self):
75
        """
76
        Verify that the input fulfills the processor's requirements.
77
        """
78
        return True
79
80
    def process(self):
81
        """
82
        Process the workspace
83
        """
84
        raise Exception("Must be implemented")
85
86
    @property
87
    def input_files(self):
88
        """
89
        List the input files.
90
91
        - If there's a PAGE-XML for the page, take it (and forget about all
92
          other files for that page)
93
        - Else if there's only one image, take it (and forget about all other
94
          files for that page)
95
        - Otherwise raise an error (complaining that only PAGE-XML warrants
96
97
          having multiple images for a single page)
98
        (https://github.com/cisocrgroup/ocrd_cis/pull/57#issuecomment-656336593)
99
        """
100
        ret = self.workspace.mets.find_files(
101
            fileGrp=self.input_file_grp, pageId=self.page_id, mimetype=MIMETYPE_PAGE)
102
        if ret:
103
            return ret
104
        ret = self.workspace.mets.find_files(
105
            fileGrp=self.input_file_grp, pageId=self.page_id, mimetype="//image/.*")
106
        if self.page_id and len(ret) > 1:
107
            raise ValueError("No PAGE-XML %s in fileGrp '%s' but multiple images." % (
108
                "for page '%s'" % self.page_id if self.page_id else '',
109
                self.input_file_grp
110
                ))
111
        return ret
112