| 1 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | OCR-D CLI: bash library | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | .. click:: ocrd.cli.bashlib:bashlib_cli | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |     :prog: ocrd bashlib | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |     :nested: full | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | from __future__ import print_function | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | import sys | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | from os.path import isfile | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | import click | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | from ocrd.constants import BASHLIB_FILENAME | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | import ocrd.constants | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | import ocrd_utils.constants | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | from ocrd_utils.constants import DEFAULT_METS_BASENAME | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  | import ocrd_models.constants | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | import ocrd_validators.constants | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  | from ocrd.decorators import ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |     parameter_option, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |     parameter_override_option, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |     ocrd_loglevel, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |     ocrd_cli_wrap_processor | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  | ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  | from ocrd_utils import ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     is_local_filename, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     get_local_filename, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |     initLogging, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |     getLogger, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |     make_file_id, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |     config | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  | ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  | from ocrd.resolver import Resolver | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  | from ocrd.processor import Processor | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  | # ---------------------------------------------------------------------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  | # ocrd bashlib | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  | # ---------------------------------------------------------------------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  | @click.group('bashlib') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  | def bashlib_cli(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     Work with bash library | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  | # ---------------------------------------------------------------------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  | # ocrd bashlib filename | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  | # ---------------------------------------------------------------------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  | @bashlib_cli.command('filename') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  | def bashlib_filename(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |     Dump the bash library filename for sourcing by shell scripts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |     For functions exported by bashlib, see `<../../README.md>`_ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |     print(BASHLIB_FILENAME) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 59 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 60 |  |  | @bashlib_cli.command('constants') | 
            
                                                                        
                            
            
                                    
            
            
                | 61 |  |  | @click.argument('name') | 
            
                                                                        
                            
            
                                    
            
            
                | 62 |  |  | def bashlib_constants(name): | 
            
                                                                        
                            
            
                                    
            
            
                | 63 |  |  |     """ | 
            
                                                                        
                            
            
                                    
            
            
                | 64 |  |  |     Query constants from ocrd_utils and ocrd_models | 
            
                                                                        
                            
            
                                    
            
            
                | 65 |  |  |     """ | 
            
                                                                        
                            
            
                                    
            
            
                | 66 |  |  |     all_constants = {} | 
            
                                                                        
                            
            
                                    
            
            
                | 67 |  |  |     for src in [ocrd.constants, ocrd_utils.constants, ocrd_models.constants, ocrd_validators.constants]: | 
            
                                                                        
                            
            
                                    
            
            
                | 68 |  |  |         for k in src.__all__: | 
            
                                                                        
                            
            
                                    
            
            
                | 69 |  |  |             all_constants[k] = src.__dict__[k] | 
            
                                                                        
                            
            
                                    
            
            
                | 70 |  |  |     if name in ['*', 'KEYS', '__all__']: | 
            
                                                                        
                            
            
                                    
            
            
                | 71 |  |  |         print(sorted(all_constants.keys())) | 
            
                                                                        
                            
            
                                    
            
            
                | 72 |  |  |         sys.exit(0) | 
            
                                                                        
                            
            
                                    
            
            
                | 73 |  |  |     if name not in all_constants: | 
            
                                                                        
                            
            
                                    
            
            
                | 74 |  |  |         print("ERROR: name '%s' is not a known constant" % name, file=sys.stderr) | 
            
                                                                        
                            
            
                                    
            
            
                | 75 |  |  |         sys.exit(1) | 
            
                                                                        
                            
            
                                    
            
            
                | 76 |  |  |     val = all_constants[name] | 
            
                                                                        
                            
            
                                    
            
            
                | 77 |  |  |     if isinstance(val, dict): | 
            
                                                                        
                            
            
                                    
            
            
                | 78 |  |  |         # make this bash-friendly (show initialization for associative array) | 
            
                                                                        
                            
            
                                    
            
            
                | 79 |  |  |         for key in val: | 
            
                                                                        
                            
            
                                    
            
            
                | 80 |  |  |             print("[%s]=%s" % (key, val[key]), end=' ') | 
            
                                                                        
                            
            
                                    
            
            
                | 81 |  |  |     else: | 
            
                                                                        
                            
            
                                    
            
            
                | 82 |  |  |         print(val) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  | @bashlib_cli.command('input-files') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  | @click.option('-m', '--mets', help="METS to process", default=DEFAULT_METS_BASENAME) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  | @click.option('-w', '--working-dir', help="Working Directory") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  | @click.option('-I', '--input-file-grp', help='File group(s) used as input.', default=None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  | @click.option('-O', '--output-file-grp', help='File group(s) used as output.', default=None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  | # repeat some other processor options for convenience (will be ignored here) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  | @click.option('-g', '--page-id', help="ID(s) of the pages to process") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  | @click.option('--overwrite', is_flag=True, default=False, help="Remove output pages/images if they already exist\n" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |               "(with '--page-id', remove only those).\n" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |               "Short-hand for OCRD_EXISTING_OUTPUT=OVERWRITE") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  | @click.option('--debug', is_flag=True, default=False, help="Abort on any errors with full stack trace.\n" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |               "Short-hand for OCRD_MISSING_OUTPUT=ABORT") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  | @parameter_option | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  | @parameter_override_option | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  | @ocrd_loglevel | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  | def bashlib_input_files(**kwargs): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |     List input files for processing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |     Instantiate a processor and workspace from the given processing options. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |     Then loop through the input files of the input fileGrp, and for each one, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |     print its `url`, `ID`, `mimetype` and `pageId`, as well as its recommended | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |     `outputFileId` (from ``make_file_id``). | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |     (The printing format is one associative array initializer per line.) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |     class BashlibProcessor(Processor): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |         def ocrd_tool(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |             return {'executable': '', 'steps': ['']} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |         @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         def version(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |             return '1.0' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |         # go half way of the normal run_processor / process_workspace call tree | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |         # by just delegating to process_workspace, overriding process_page_file | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |         # to ensure all input files exist locally (without persisting them in the METS) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |         # and print what needs to be acted on in bash-friendly way | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |         def process_page_file(self, *input_files): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |             for field in ['url', 'local_filename', 'ID', 'mimetype', 'pageId']: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |                 # make this bash-friendly (show initialization for associative array) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |                 if len(input_files) > 1: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |                     # single quotes allow us to preserve the list value inside the alist | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |                     value = ' '.join(str(getattr(res, field)) for res in input_files) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |                     value = str(getattr(input_files[0], field)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |                 print(f"[{field}]='{value}'", end=' ') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |             output_file_id = make_file_id(input_files[0], kwargs['output_file_grp']) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |             print(f"[outputFileId]='{output_file_id}'") | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 132 |  |  |     ocrd_cli_wrap_processor(BashlibProcessor, **kwargs) | 
            
                                                        
            
                                    
            
            
                | 133 |  |  |  |