Passed
Push — master ( 77064b...da9bd1 )
by Konstantin
01:57
created

ocrd.processor.base.Processor.show_help()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 2
dl 0
loc 2
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
"""
2
Processor base class and helper functions
3
"""
4
5
__all__ = ['Processor', 'generate_processor_help', 'run_cli', 'run_processo']
6
7
import os
8
import json
9
from ocrd_utils import getLogger, VERSION as OCRD_VERSION, MIMETYPE_PAGE
10
from ocrd_validators import ParameterValidator
11
12
# XXX imports must remain for backwards-compatibilty
13
from .helpers import run_cli, run_processor, generate_processor_help # pylint: disable=unused-import
14
15
log = getLogger('ocrd.processor')
16
17
class Processor():
18
    """
19
    A processor runs an algorithm based on the workspace, the mets.xml in the
20
    workspace (and the input files defined therein) as well as optional
21
    parameter.
22
    """
23
24
    def __init__(
25
            self,
26
            workspace,
27
            ocrd_tool=None,
28
            parameter=None,
29
            # TODO OCR-D/core#274
30
            # input_file_grp=None,
31
            # output_file_grp=None,
32
            input_file_grp="INPUT",
33
            output_file_grp="OUTPUT",
34
            page_id=None,
35
            show_help=False,
36
            show_version=False,
37
            dump_json=False,
38
            version=None
39
    ):
40
        if parameter is None:
41
            parameter = {}
42
        if dump_json:
43
            print(json.dumps(ocrd_tool, indent=True))
44
            return
45
        self.ocrd_tool = ocrd_tool
46
        if show_help:
47
            self.show_help()
48
            return
49
        self.version = version
50
        if show_version:
51
            self.show_version()
52
            return
53
        self.workspace = workspace
54
        # FIXME HACK would be better to use pushd_popd(self.workspace.directory)
55
        # but there is no way to do that in process here since it's an
56
        # overridden method. chdir is almost always an anti-pattern.
57
        if self.workspace:
58
            os.chdir(self.workspace.directory)
59
        self.input_file_grp = input_file_grp
60
        self.output_file_grp = output_file_grp
61
        self.page_id = None if page_id == [] or page_id is None else page_id
62
        parameterValidator = ParameterValidator(ocrd_tool)
63
        report = parameterValidator.validate(parameter)
64
        if not report.is_valid:
65
            raise Exception("Invalid parameters %s" % report.errors)
66
        self.parameter = parameter
67
68
    def show_help(self):
69
        print(generate_processor_help(self.ocrd_tool))
70
71
    def show_version(self):
72
        print("Version %s, ocrd/core %s" % (self.version, OCRD_VERSION))
73
74
    def verify(self):
75
        """
76
        Verify that the input fulfills the processor's requirements.
77
        """
78
        return True
79
80
    def process(self):
81
        """
82
        Process the workspace
83
        """
84
        raise Exception("Must be implemented")
85
86
    @property
87
    def input_files(self):
88
        """
89
        List the input files.
90
91
        - If there's a PAGE-XML for the page, take it (and forget about all
92
          other files for that page)
93
        - Else if there's only one image, take it (and forget about all other
94
          files for that page)
95
        - Otherwise raise an error (complaining that only PAGE-XML warrants
96
97
          having multiple images for a single page)
98
        (https://github.com/cisocrgroup/ocrd_cis/pull/57#issuecomment-656336593)
99
        """
100
        ret = self.workspace.mets.find_files(
101
            fileGrp=self.input_file_grp, pageId=self.page_id, mimetype=MIMETYPE_PAGE)
102
        if ret:
103
            return ret
104
        ret = self.workspace.mets.find_files(
105
            fileGrp=self.input_file_grp, pageId=self.page_id, mimetype="//image/.*")
106
        if self.page_id and len(ret) > 1:
107
            raise ValueError("No PAGE-XML %s in fileGrp '%s' but multiple images." % (
108
                "for page '%s'" % self.page_id if self.page_id else '',
109
                self.input_file_grp
110
                ))
111
        return ret
112