Passed
Pull Request — master (#759)
by Konstantin
02:11
created

ocrd.cli.bashlib.bashlib_input_files()   B

Complexity

Conditions 5

Size

Total Lines 40
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 28
dl 0
loc 40
rs 8.7413
c 0
b 0
f 0
cc 5
nop 1
1
"""
2
OCR-D CLI: bash library
3
4
.. click:: ocrd.cli.bashlib:bashlib_cli
5
    :prog: ocrd bashlib
6
    :nested: full
7
8
"""
9
from __future__ import print_function
10
import sys
11
from os.path import isfile
12
import click
13
14
from ocrd.constants import BASHLIB_FILENAME
15
import ocrd.constants
16
import ocrd_utils.constants
17
import ocrd_models.constants
18
import ocrd_validators.constants
19
from ocrd.decorators import (
20
    parameter_option,
21
    parameter_override_option,
22
    ocrd_loglevel
23
)
24
from ocrd_utils import (
25
    is_local_filename,
26
    get_local_filename,
27
    initLogging,
28
    make_file_id
29
)
30
from ocrd.resolver import Resolver
31
from ocrd.processor import Processor
32
33
# ----------------------------------------------------------------------
34
# ocrd bashlib
35
# ----------------------------------------------------------------------
36
37
@click.group('bashlib')
38
def bashlib_cli():
39
    """
40
    Work with bash library
41
    """
42
43
# ----------------------------------------------------------------------
44
# ocrd bashlib filename
45
# ----------------------------------------------------------------------
46
47
@bashlib_cli.command('filename')
48
def bashlib_filename():
49
    """
50
    Dump the bash library filename for sourcing by shell scripts
51
52
    For functions exported by bashlib, see `<../../README.md>`_
53
    """
54
    print(BASHLIB_FILENAME)
55
56
@bashlib_cli.command('constants')
57
@click.argument('name')
58
def bashlib_constants(name):
59
    """
60
    Query constants from ocrd_utils and ocrd_models
61
    """
62
    all_constants = {}
63
    for src in [ocrd.constants, ocrd_utils.constants, ocrd_models.constants, ocrd_validators.constants]:
64
        for k in src.__all__:
65
            all_constants[k] = src.__dict__[k]
66
    if name in ['*', 'KEYS', '__all__']:
67
        print(sorted(all_constants.keys()))
68
    if name not in all_constants:
69
        print("ERROR: name '%s' is not a known constant" % name, file=sys.stderr)
70
        sys.exit(1)
71
    val = all_constants[name]
72
    if isinstance(val, dict):
73
        # make this bash-friendly (show initialization for associative array)
74
        for key in val:
75
            print("[%s]=%s" % (key, val[key]), end=' ')
76
    else:
77
        print(val)
78
79
@bashlib_cli.command('input-files')
80
@click.option('-m', '--mets', help="METS to process", default="mets.xml")
81
@click.option('-w', '--working-dir', help="Working Directory")
82
@click.option('-I', '--input-file-grp', help='File group(s) used as input.', default='INPUT')
83
@click.option('-O', '--output-file-grp', help='File group(s) used as output.', default='OUTPUT')
84
# repeat some other processor options for convenience (will be ignored here)
85
@click.option('-g', '--page-id', help="ID(s) of the pages to process")
86
@click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist")
87
@parameter_option
88
@parameter_override_option
89
@ocrd_loglevel
90
def bashlib_input_files(**kwargs):
91
    """
92
    List input files for processing
93
94
    Instantiate a processor and workspace from the given processing options.
95
    Then loop through the input files of the input fileGrp, and for each one,
96
    print its `url`, `ID`, `mimetype` and `pageId`, as well as its recommended
97
    `outputFileId` (from ``make_file_id``).
98
99
    (The printing format is one associative array initializer per line.)
100
    """
101
    initLogging()
102
    mets = kwargs.pop('mets')
103
    working_dir = kwargs.pop('working_dir')
104
    if is_local_filename(mets) and not isfile(get_local_filename(mets)):
105
        msg = "File does not exist: %s" % mets
106
        raise Exception(msg)
107
    resolver = Resolver()
108
    workspace = resolver.workspace_from_url(mets, working_dir)
109
    processor = Processor(workspace,
110
                          ocrd_tool=None,
111
                          page_id=kwargs['page_id'],
112
                          input_file_grp=kwargs['input_file_grp'],
113
                          output_file_grp=kwargs['output_file_grp'])
114
    for input_file in processor.input_files:
115
        for field in ['url', 'ID', 'mimetype', 'pageId']:
116
            # make this bash-friendly (show initialization for associative array)
117
            print("[%s]='%s'" % (field, getattr(input_file, field)), end=' ')
118
        print("[outputFileId]='%s'" % make_file_id(input_file, kwargs['output_file_grp']))
119