Passed
Pull Request — master (#1240)
by Konstantin
02:47
created

ocrd.cli.bashlib.bashlib_input_files()   A

Complexity

Conditions 3

Size

Total Lines 49
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 28
dl 0
loc 49
rs 9.208
c 0
b 0
f 0
cc 3
nop 1
1
"""
2
OCR-D CLI: bash library
3
4
.. click:: ocrd.cli.bashlib:bashlib_cli
5
    :prog: ocrd bashlib
6
    :nested: full
7
8
"""
9
from __future__ import print_function
10
import sys
11
from os.path import isfile
12
import click
13
14
from ocrd.constants import BASHLIB_FILENAME
15
import ocrd.constants
16
import ocrd_utils.constants
17
from ocrd_utils.constants import DEFAULT_METS_BASENAME
18
import ocrd_models.constants
19
import ocrd_validators.constants
20
from ocrd.decorators import (
21
    parameter_option,
22
    parameter_override_option,
23
    ocrd_loglevel,
24
    ocrd_cli_wrap_processor
25
)
26
from ocrd_utils import (
27
    is_local_filename,
28
    get_local_filename,
29
    initLogging,
30
    getLogger,
31
    make_file_id,
32
    config
33
)
34
from ocrd.resolver import Resolver
35
from ocrd.processor import Processor
36
37
# ----------------------------------------------------------------------
38
# ocrd bashlib
39
# ----------------------------------------------------------------------
40
41
@click.group('bashlib')
42
def bashlib_cli():
43
    """
44
    Work with bash library
45
    """
46
47
# ----------------------------------------------------------------------
48
# ocrd bashlib filename
49
# ----------------------------------------------------------------------
50
51
@bashlib_cli.command('filename')
52
def bashlib_filename():
53
    """
54
    Dump the bash library filename for sourcing by shell scripts
55
56
    For functions exported by bashlib, see `<../../README.md>`_
57
    """
58
    print(BASHLIB_FILENAME)
59
60
@bashlib_cli.command('constants')
61
@click.argument('name')
62
def bashlib_constants(name):
63
    """
64
    Query constants from ocrd_utils and ocrd_models
65
    """
66
    all_constants = {}
67
    for src in [ocrd.constants, ocrd_utils.constants, ocrd_models.constants, ocrd_validators.constants]:
68
        for k in src.__all__:
69
            all_constants[k] = src.__dict__[k]
70
    if name in ['*', 'KEYS', '__all__']:
71
        print(sorted(all_constants.keys()))
72
        sys.exit(0)
73
    if name not in all_constants:
74
        print("ERROR: name '%s' is not a known constant" % name, file=sys.stderr)
75
        sys.exit(1)
76
    val = all_constants[name]
77
    if isinstance(val, dict):
78
        # make this bash-friendly (show initialization for associative array)
79
        for key in val:
80
            print("[%s]=%s" % (key, val[key]), end=' ')
81
    else:
82
        print(val)
83
84
@bashlib_cli.command('input-files')
85
@click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME)
86
@click.option('-w', '--working-dir', help="Working Directory")
87
@click.option('-I', '--input-file-grp', help='File group(s) used as input.', default=None)
88
@click.option('-O', '--output-file-grp', help='File group(s) used as output.', default=None)
89
# repeat some other processor options for convenience (will be ignored here)
90
@click.option('-g', '--page-id', help="ID(s) of the pages to process")
91
@click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist\n"
92
              "(with '--page-id', remove only those).\n"
93
              "Short-hand for OCRD_EXISTING_OUTPUT=OVERWRITE")
94
@click.option('--debug', is_flag=True, default=False, help="Abort on any errors with full stack trace.\n"
95
              "Short-hand for OCRD_MISSING_OUTPUT=ABORT")
96
@parameter_option
97
@parameter_override_option
98
@ocrd_loglevel
99
def bashlib_input_files(**kwargs):
100
    """
101
    List input files for processing
102
103
    Instantiate a processor and workspace from the given processing options.
104
    Then loop through the input files of the input fileGrp, and for each one,
105
    print its `url`, `ID`, `mimetype` and `pageId`, as well as its recommended
106
    `outputFileId` (from ``make_file_id``).
107
108
    (The printing format is one associative array initializer per line.)
109
    """
110
    class BashlibProcessor(Processor):
111
        @property
112
        def ocrd_tool(self):
113
            return {'executable': '', 'steps': ['']}
114
        @property
115
        def version(self):
116
            return '1.0'
117
        # go half way of the normal run_processor / process_workspace call tree
118
        # by just delegating to process_workspace, overriding process_page_file
119
        # to ensure all input files exist locally (without persisting them in the METS)
120
        # and print what needs to be acted on in bash-friendly way
121
        def process_page_file(self, *input_files):
122
            for field in ['url', 'local_filename', 'ID', 'mimetype', 'pageId']:
123
                # make this bash-friendly (show initialization for associative array)
124
                if len(input_files) > 1:
125
                    # single quotes allow us to preserve the list value inside the alist
126
                    value = ' '.join(str(getattr(res, field)) for res in input_files)
127
                else:
128
                    value = str(getattr(input_files[0], field))
129
                print(f"[{field}]='{value}'", end=' ')
130
            output_file_id = make_file_id(input_files[0], kwargs['output_file_grp'])
131
            print(f"[outputFileId]='{output_file_id}'")
132
    ocrd_cli_wrap_processor(BashlibProcessor, **kwargs)
133