| 
                    1
                 | 
                                    
                                                     | 
                
                 | 
                """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    2
                 | 
                                    
                                                     | 
                
                 | 
                OCR-D CLI: workspace management  | 
            
            
                                                        
            
                                    
            
            
                | 
                    3
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    4
                 | 
                                    
                                                     | 
                
                 | 
                .. click:: ocrd.cli.workspace:workspace_cli  | 
            
            
                                                        
            
                                    
            
            
                | 
                    5
                 | 
                                    
                                                     | 
                
                 | 
                    :prog: ocrd workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    6
                 | 
                                    
                                                     | 
                
                 | 
                    :nested: full  | 
            
            
                                                        
            
                                    
            
            
                | 
                    7
                 | 
                                    
                                                     | 
                
                 | 
                """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    8
                 | 
                                    
                                                     | 
                
                 | 
                import os  | 
            
            
                                                        
            
                                    
            
            
                | 
                    9
                 | 
                                    
                                                     | 
                
                 | 
                from os import getcwd  | 
            
            
                                                        
            
                                    
            
            
                | 
                    10
                 | 
                                    
                                                     | 
                
                 | 
                from os.path import relpath, exists, join, isabs  | 
            
            
                                                        
            
                                    
            
            
                | 
                    11
                 | 
                                    
                                                     | 
                
                 | 
                from pathlib import Path  | 
            
            
                                                        
            
                                    
            
            
                | 
                    12
                 | 
                                    
                                                     | 
                
                 | 
                from json import loads, dumps  | 
            
            
                                                        
            
                                    
            
            
                | 
                    13
                 | 
                                    
                                                     | 
                
                 | 
                import sys  | 
            
            
                                                        
            
                                    
            
            
                | 
                    14
                 | 
                                    
                                                     | 
                
                 | 
                from glob import glob   # XXX pathlib.Path.glob does not support absolute globs  | 
            
            
                                                        
            
                                    
            
            
                | 
                    15
                 | 
                                    
                                                     | 
                
                 | 
                import re  | 
            
            
                                                        
            
                                    
            
            
                | 
                    16
                 | 
                                    
                                                     | 
                
                 | 
                import time  | 
            
            
                                                        
            
                                    
            
            
                | 
                    17
                 | 
                                    
                                                     | 
                
                 | 
                import numpy as np  | 
            
            
                                                        
            
                                    
            
            
                | 
                    18
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    19
                 | 
                                    
                                                     | 
                
                 | 
                import click  | 
            
            
                                                        
            
                                    
            
            
                | 
                    20
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    21
                 | 
                                    
                                                     | 
                
                 | 
                from ocrd import Resolver, Workspace, WorkspaceValidator, WorkspaceBackupManager  | 
            
            
                                                        
            
                                    
            
            
                | 
                    22
                 | 
                                    
                                                     | 
                
                 | 
                from ocrd.mets_server import OcrdMetsServer  | 
            
            
                                                        
            
                                    
            
            
                | 
                    23
                 | 
                                    
                                                     | 
                
                 | 
                from ocrd_utils import getLogger, initLogging, pushd_popd, EXT_TO_MIME, safe_filename, parse_json_string_or_file, partition_list, DEFAULT_METS_BASENAME  | 
            
            
                                                        
            
                                    
            
            
                | 
                    24
                 | 
                                    
                                                     | 
                
                 | 
                from ocrd.decorators import mets_find_options  | 
            
            
                                                        
            
                                    
            
            
                | 
                    25
                 | 
                                    
                                                     | 
                
                 | 
                from . import command_with_replaced_help  | 
            
            
                                                        
            
                                    
            
            
                | 
                    26
                 | 
                                    
                                                     | 
                
                 | 
                from ocrd_models.constants import METS_PAGE_DIV_ATTRIBUTE  | 
            
            
                                                        
            
                                    
            
            
                | 
                    27
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    28
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    29
                 | 
                                    
                                                     | 
                
                 | 
                class WorkspaceCtx():  | 
            
            
                                                        
            
                                    
            
            
                | 
                    30
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    31
                 | 
                                    
                                                     | 
                
                 | 
                    def __init__(self, directory, mets_url, mets_basename=DEFAULT_METS_BASENAME, mets_server_url=None, automatic_backup=False):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    32
                 | 
                                    
                                                     | 
                
                 | 
                        self.log = getLogger('ocrd.cli.workspace') | 
            
            
                                                        
            
                                    
            
            
                | 
                    33
                 | 
                                    
                                                     | 
                
                 | 
                        if mets_basename:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    34
                 | 
                                    
                                                     | 
                
                 | 
                            self.log.warning(DeprecationWarning('--mets-basename is deprecated. Use --mets/--directory instead.')) | 
            
            
                                                        
            
                                    
            
            
                | 
                    35
                 | 
                                    
                                                     | 
                
                 | 
                        self.resolver = Resolver()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    36
                 | 
                                    
                                                     | 
                
                 | 
                        self.directory, self.mets_url, self.mets_basename, self.mets_server_url \  | 
            
            
                                                        
            
                                    
            
            
                | 
                    37
                 | 
                                    
                                                     | 
                
                 | 
                                = self.resolver.resolve_mets_arguments(directory, mets_url, mets_basename, mets_server_url)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    38
                 | 
                                    
                                                     | 
                
                 | 
                        self.automatic_backup = automatic_backup  | 
            
            
                                                        
            
                                    
            
            
                | 
                    39
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    40
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    41
                 | 
                                    
                                                     | 
                
                 | 
                pass_workspace = click.make_pass_decorator(WorkspaceCtx)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    42
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    43
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    44
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    45
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    46
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    47
                 | 
                                    
                                                     | 
                
                 | 
                @click.group("workspace") | 
            
            
                                                        
            
                                    
            
            
                | 
                    48
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-d', '--directory', envvar='WORKSPACE_DIR', type=click.Path(file_okay=False), metavar='WORKSPACE_DIR', help='Changes the workspace folder location [default: METS_URL directory or .]"') | 
            
            
                                                        
            
                                    
            
            
                | 
                    49
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-M', '--mets-basename', default=None, help='METS file basename. Deprecated, use --mets/--directory') | 
            
            
                                                        
            
                                    
            
            
                | 
                    50
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-m', '--mets', default=None, help='The path/URL of the METS file [default: WORKSPACE_DIR/mets.xml]', metavar="METS_URL") | 
            
            
                                                        
            
                                    
            
            
                | 
                    51
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-U', '--mets-server-url', 'mets_server_url', help="TCP host of METS server") | 
            
            
                                                        
            
                                    
            
            
                | 
                    52
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--backup', default=False, help="Backup mets.xml whenever it is saved.", is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    53
                 | 
                                    
                                                     | 
                
                 | 
                @click.pass_context  | 
            
            
                                                        
            
                                    
            
            
                | 
                    54
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_cli(ctx, directory, mets, mets_basename, mets_server_url, backup):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    55
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    56
                 | 
                                    
                                                     | 
                
                 | 
                    Managing workspaces  | 
            
            
                                                        
            
                                    
            
            
                | 
                    57
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    58
                 | 
                                    
                                                     | 
                
                 | 
                    A workspace comprises a METS file and a directory as point of reference.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    59
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    60
                 | 
                                    
                                                     | 
                
                 | 
                    Operates on the file system directly or via a METS server   | 
            
            
                                                        
            
                                    
            
            
                | 
                    61
                 | 
                                    
                                                     | 
                
                 | 
                    (already running via some prior `server start` subcommand).  | 
            
            
                                                        
            
                                    
            
            
                | 
                    62
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    63
                 | 
                                    
                                                     | 
                
                 | 
                    initLogging()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    64
                 | 
                                    
                                                     | 
                
                 | 
                    ctx.obj = WorkspaceCtx(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    65
                 | 
                                    
                                                     | 
                
                 | 
                        directory,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    66
                 | 
                                    
                                                     | 
                
                 | 
                        mets_url=mets,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    67
                 | 
                                    
                                                     | 
                
                 | 
                        mets_basename=mets_basename,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    68
                 | 
                                    
                                                     | 
                
                 | 
                        mets_server_url=mets_server_url,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    69
                 | 
                                    
                                                     | 
                
                 | 
                        automatic_backup=backup  | 
            
            
                                                        
            
                                    
            
            
                | 
                    70
                 | 
                                    
                                                     | 
                
                 | 
                    )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    71
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    72
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    73
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace validate  | 
            
            
                                                        
            
                                    
            
            
                | 
                    74
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    75
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    76
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('validate', cls=command_with_replaced_help( | 
            
            
                                                        
            
                                    
            
            
                | 
                    77
                 | 
                                    
                                                     | 
                
                 | 
                    (r' \[METS_URL\]', ''))) # XXX deprecated argument  | 
            
            
                                                        
            
                                    
            
            
                | 
                    78
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    79
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-a', '--download', is_flag=True, help="Download all files") | 
            
            
                                                        
            
                                    
            
            
                | 
                    80
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-s', '--skip', help="Tests to skip", default=[], multiple=True, type=click.Choice( | 
            
            
                                                        
            
                                    
            
            
                | 
                    81
                 | 
                                    
                                                     | 
                
                 | 
                    ['imagefilename', 'dimension', 'pixel_density', 'page', 'url', 'page_xsd', 'mets_fileid_page_pcgtsid',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    82
                 | 
                                    
                                                     | 
                
                 | 
                     'mets_unique_identifier', 'mets_file_group_names', 'mets_files', 'mets_xsd']))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    83
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--page-textequiv-consistency', '--page-strictness', help="How strict to check PAGE multi-level textequiv consistency", type=click.Choice(['strict', 'lax', 'fix', 'off']), default='strict') | 
            
            
                                                        
            
                                    
            
            
                | 
                    84
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--page-coordinate-consistency', help="How fierce to check PAGE multi-level coordinate consistency", type=click.Choice(['poly', 'baseline', 'both', 'off']), default='poly') | 
            
            
                                                        
            
                                    
            
            
                | 
                    85
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('mets_url', default=None, required=False) | 
            
            
                                                        
            
                                    
            
            
                | 
                    86
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency, page_coordinate_consistency):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    87
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    88
                 | 
                                    
                                                     | 
                
                 | 
                    Validate a workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    89
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    90
                 | 
                                    
                                                     | 
                
                 | 
                    METS_URL can be a URL, an absolute path or a path relative to $PWD.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    91
                 | 
                                    
                                                     | 
                
                 | 
                    If not given, use --mets accordingly.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    92
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    93
                 | 
                                    
                                                     | 
                
                 | 
                    Check that the METS and its referenced file contents  | 
            
            
                                                        
            
                                    
            
            
                | 
                    94
                 | 
                                    
                                                     | 
                
                 | 
                    abide by the OCR-D specifications.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    95
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    96
                 | 
                                    
                                                     | 
                
                 | 
                    LOG = getLogger('ocrd.cli.workspace.validate') | 
            
            
                                                        
            
                                    
            
            
                | 
                    97
                 | 
                                    
                                                     | 
                
                 | 
                    if mets_url:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    98
                 | 
                                    
                                                     | 
                
                 | 
                        LOG.warning(DeprecationWarning("Use 'ocrd workspace --mets METS init' instead of argument 'METS_URL' ('%s')" % mets_url)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    99
                 | 
                                    
                                                     | 
                
                 | 
                    else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    100
                 | 
                                    
                                                     | 
                
                 | 
                        mets_url = ctx.mets_url  | 
            
            
                                                        
            
                                    
            
            
                | 
                    101
                 | 
                                    
                                                     | 
                
                 | 
                    report = WorkspaceValidator.validate(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    102
                 | 
                                    
                                                     | 
                
                 | 
                        ctx.resolver,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    103
                 | 
                                    
                                                     | 
                
                 | 
                        mets_url,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    104
                 | 
                                    
                                                     | 
                
                 | 
                        src_dir=ctx.directory,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    105
                 | 
                                    
                                                     | 
                
                 | 
                        skip=skip,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    106
                 | 
                                    
                                                     | 
                
                 | 
                        download=download,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    107
                 | 
                                    
                                                     | 
                
                 | 
                        page_strictness=page_textequiv_consistency,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    108
                 | 
                                    
                                                     | 
                
                 | 
                        page_coordinate_consistency=page_coordinate_consistency  | 
            
            
                                                        
            
                                    
            
            
                | 
                    109
                 | 
                                    
                                                     | 
                
                 | 
                    )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    110
                 | 
                                    
                                                     | 
                
                 | 
                    print(report.to_xml())  | 
            
            
                                                        
            
                                    
            
            
                | 
                    111
                 | 
                                    
                                                     | 
                
                 | 
                    if not report.is_valid:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    112
                 | 
                                    
                                                     | 
                
                 | 
                        sys.exit(128)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    113
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    114
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    115
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace clone  | 
            
            
                                                        
            
                                    
            
            
                | 
                    116
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    117
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    118
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('clone', cls=command_with_replaced_help( | 
            
            
                                                        
            
                                    
            
            
                | 
                    119
                 | 
                                    
                                                     | 
                
                 | 
                    (r' \[WORKSPACE_DIR\]', ''))) # XXX deprecated argument  | 
            
            
                                                        
            
                                    
            
            
                | 
                    120
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-f', '--clobber-mets', help="Overwrite existing METS file", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    121
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-a', '--download', is_flag=True, help="Download all files and change location in METS file after cloning") | 
            
            
                                                        
            
                                    
            
            
                | 
                    122
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('mets_url') | 
            
            
                                                        
            
                                    
            
            
                | 
                    123
                 | 
                                    
                                                     | 
                
                 | 
                @mets_find_options  | 
            
            
                                                        
            
                                    
            
            
                | 
                    124
                 | 
                                    
                                                     | 
                
                 | 
                # XXX deprecated  | 
            
            
                                                        
            
                                    
            
            
                | 
                    125
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('workspace_dir', default=None, required=False) | 
            
            
                                                        
            
                                    
            
            
                | 
                    126
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    127
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_clone(ctx, clobber_mets, download, file_grp, file_id, page_id, mimetype, include_fileGrp, exclude_fileGrp, mets_url, workspace_dir):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    128
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    129
                 | 
                                    
                                                     | 
                
                 | 
                    Create a workspace from METS_URL and return the directory  | 
            
            
                                                        
            
                                    
            
            
                | 
                    130
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    131
                 | 
                                    
                                                     | 
                
                 | 
                    METS_URL can be a URL, an absolute path or a path relative to $PWD.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    132
                 | 
                                    
                                                     | 
                
                 | 
                    If METS_URL is not provided, use --mets accordingly.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    133
                 | 
                                    
                                                     | 
                
                 | 
                    METS_URL can also be an OAI-PMH GetRecord URL wrapping a METS file.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    134
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    135
                 | 
                                    
                                                     | 
                
                 | 
                    LOG = getLogger('ocrd.cli.workspace.clone') | 
            
            
                                                        
            
                                    
            
            
                | 
                    136
                 | 
                                    
                                                     | 
                
                 | 
                    if workspace_dir:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    137
                 | 
                                    
                                                     | 
                
                 | 
                        LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR clone' instead of argument 'WORKSPACE_DIR' ('%s')" % workspace_dir)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    138
                 | 
                                    
                                                     | 
                
                 | 
                        ctx.directory = workspace_dir  | 
            
            
                                                        
            
                                    
            
            
                | 
                    139
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    140
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = ctx.resolver.workspace_from_url(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    141
                 | 
                                    
                                                     | 
                
                 | 
                        mets_url,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    142
                 | 
                                    
                                                     | 
                
                 | 
                        dst_dir=ctx.directory,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    143
                 | 
                                    
                                                     | 
                
                 | 
                        mets_basename=ctx.mets_basename,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    144
                 | 
                                    
                                                     | 
                
                 | 
                        clobber_mets=clobber_mets,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    145
                 | 
                                    
                                                     | 
                
                 | 
                        download=download,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    146
                 | 
                                    
                                                     | 
                
                 | 
                        ID=file_id,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    147
                 | 
                                    
                                                     | 
                
                 | 
                        pageId=page_id,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    148
                 | 
                                    
                                                     | 
                
                 | 
                        mimetype=mimetype,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    149
                 | 
                                    
                                                     | 
                
                 | 
                        include_fileGrp=include_fileGrp,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    150
                 | 
                                    
                                                     | 
                
                 | 
                        exclude_fileGrp=exclude_fileGrp,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    151
                 | 
                                    
                                                     | 
                
                 | 
                    )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    152
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    153
                 | 
                                    
                                                     | 
                
                 | 
                    print(workspace.directory)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    154
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    155
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    156
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace init  | 
            
            
                                                        
            
                                    
            
            
                | 
                    157
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    158
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    159
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('init', cls=command_with_replaced_help( | 
            
            
                                                        
            
                                    
            
            
                | 
                    160
                 | 
                                    
                                                     | 
                
                 | 
                    (r' \[DIRECTORY\]', ''))) # XXX deprecated argument  | 
            
            
                                                        
            
                                    
            
            
                | 
                    161
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-f', '--clobber-mets', help="Clobber mets.xml if it exists", is_flag=True, default=False) | 
            
            
                                                        
            
                                    
            
            
                | 
                    162
                 | 
                                    
                                                     | 
                
                 | 
                # XXX deprecated  | 
            
            
                                                        
            
                                    
            
            
                | 
                    163
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('directory', default=None, required=False) | 
            
            
                                                        
            
                                    
            
            
                | 
                    164
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    165
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_init(ctx, clobber_mets, directory):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    166
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    167
                 | 
                                    
                                                     | 
                
                 | 
                    Create a workspace with an empty METS file in --directory.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    168
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    169
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    170
                 | 
                                    
                                                     | 
                
                 | 
                    LOG = getLogger('ocrd.cli.workspace.init') | 
            
            
                                                        
            
                                    
            
            
                | 
                    171
                 | 
                                    
                                                     | 
                
                 | 
                    if directory:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    172
                 | 
                                    
                                                     | 
                
                 | 
                        LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR init' instead of argument 'DIRECTORY' ('%s')" % directory)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    173
                 | 
                                    
                                                     | 
                
                 | 
                        ctx.directory = directory  | 
            
            
                                                        
            
                                    
            
            
                | 
                    174
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = ctx.resolver.workspace_from_nothing(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    175
                 | 
                                    
                                                     | 
                
                 | 
                        directory=ctx.directory,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    176
                 | 
                                    
                                                     | 
                
                 | 
                        mets_basename=ctx.mets_basename,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    177
                 | 
                                    
                                                     | 
                
                 | 
                        clobber_mets=clobber_mets  | 
            
            
                                                        
            
                                    
            
            
                | 
                    178
                 | 
                                    
                                                     | 
                
                 | 
                    )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    179
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    180
                 | 
                                    
                                                     | 
                
                 | 
                    print(workspace.directory)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    181
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    182
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    183
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace add  | 
            
            
                                                        
            
                                    
            
            
                | 
                    184
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    185
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    186
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('add') | 
            
            
                                                        
            
                                    
            
            
                | 
                    187
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-G', '--file-grp', help="fileGrp USE", required=True, metavar='FILE_GRP') | 
            
            
                                                        
            
                                    
            
            
                | 
                    188
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-i', '--file-id', help="ID for the file", required=True, metavar='FILE_ID') | 
            
            
                                                        
            
                                    
            
            
                | 
                    189
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-m', '--mimetype', help="Media type of the file. Guessed from extension if not provided", required=False, metavar='TYPE') | 
            
            
                                                        
            
                                    
            
            
                | 
                    190
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-g', '--page-id', help="ID of the physical page", metavar='PAGE_ID') | 
            
            
                                                        
            
                                    
            
            
                | 
                    191
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-C', '--check-file-exists', help="Whether to ensure FNAME exists", is_flag=True, default=False) | 
            
            
                                                        
            
                                    
            
            
                | 
                    192
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--ignore', help="Do not check whether file exists.", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    193
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--force', help="If file with ID already exists, replace it. No effect if --ignore is set.", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    194
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('fname', required=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    195
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    196
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_file_exists, force, fname):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    197
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    198
                 | 
                                    
                                                     | 
                
                 | 
                    Add a file or http(s) URL FNAME to METS in a workspace.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    199
                 | 
                                    
                                                     | 
                
                 | 
                    If FNAME is not an http(s) URL and is not a workspace-local existing file, try to copy to workspace.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    200
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    201
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    202
                 | 
                                    
                                                     | 
                
                 | 
                        ctx.resolver,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    203
                 | 
                                    
                                                     | 
                
                 | 
                        directory=ctx.directory,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    204
                 | 
                                    
                                                     | 
                
                 | 
                        mets_basename=ctx.mets_basename,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    205
                 | 
                                    
                                                     | 
                
                 | 
                        automatic_backup=ctx.automatic_backup,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    206
                 | 
                                    
                                                     | 
                
                 | 
                        mets_server_url=ctx.mets_server_url,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    207
                 | 
                                    
                                                     | 
                
                 | 
                    )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    208
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    209
                 | 
                                    
                                                     | 
                
                 | 
                    log = getLogger('ocrd.cli.workspace.add') | 
            
            
                                                        
            
                                    
            
            
                | 
                    210
                 | 
                                    
                                                     | 
                
                 | 
                    if not mimetype:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    211
                 | 
                                    
                                                     | 
                
                 | 
                        try:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    212
                 | 
                                    
                                                     | 
                
                 | 
                            mimetype = EXT_TO_MIME[Path(fname).suffix]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    213
                 | 
                                    
                                                     | 
                
                 | 
                            log.info("Guessed mimetype to be %s" % mimetype) | 
            
            
                                                        
            
                                    
            
            
                | 
                    214
                 | 
                                    
                                                     | 
                
                 | 
                        except KeyError:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    215
                 | 
                                    
                                                     | 
                
                 | 
                            log.error("Cannot guess mimetype from extension '%s' for '%s'. Set --mimetype explicitly" % (Path(fname).suffix, fname)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    216
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    217
                 | 
                                    
                                                     | 
                
                 | 
                    log.debug("Adding '%s'", fname) | 
            
            
                                                        
            
                                    
            
            
                | 
                    218
                 | 
                                    
                                                     | 
                
                 | 
                    local_filename = None  | 
            
            
                                                        
            
                                    
            
            
                | 
                    219
                 | 
                                    
                                                     | 
                
                 | 
                    if not (fname.startswith('http://') or fname.startswith('https://')): | 
            
            
                                                        
            
                                    
            
            
                | 
                    220
                 | 
                                    
                                                     | 
                
                 | 
                        if not fname.startswith(ctx.directory):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    221
                 | 
                                    
                                                     | 
                
                 | 
                            if not isabs(fname) and exists(join(ctx.directory, fname)):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    222
                 | 
                                    
                                                     | 
                
                 | 
                                fname = join(ctx.directory, fname)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    223
                 | 
                                    
                                                     | 
                
                 | 
                            else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    224
                 | 
                                    
                                                     | 
                
                 | 
                                log.debug("File '%s' is not in workspace, copying", fname) | 
            
            
                                                        
            
                                    
            
            
                | 
                    225
                 | 
                                    
                                                     | 
                
                 | 
                                try:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    226
                 | 
                                    
                                                     | 
                
                 | 
                                    fname = ctx.resolver.download_to_directory(ctx.directory, fname, subdir=file_grp)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    227
                 | 
                                    
                                                     | 
                
                 | 
                                except FileNotFoundError:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    228
                 | 
                                    
                                                     | 
                
                 | 
                                    if check_file_exists:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    229
                 | 
                                    
                                                     | 
                
                 | 
                                        log.error("File '%s' does not exist, halt execution!" % fname) | 
            
            
                                                        
            
                                    
            
            
                | 
                    230
                 | 
                                    
                                                     | 
                
                 | 
                                        sys.exit(1)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    231
                 | 
                                    
                                                     | 
                
                 | 
                        if check_file_exists and not exists(fname):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    232
                 | 
                                    
                                                     | 
                
                 | 
                            log.error("File '%s' does not exist, halt execution!" % fname) | 
            
            
                                                        
            
                                    
            
            
                | 
                    233
                 | 
                                    
                                                     | 
                
                 | 
                            sys.exit(1)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    234
                 | 
                                    
                                                     | 
                
                 | 
                        if fname.startswith(ctx.directory):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    235
                 | 
                                    
                                                     | 
                
                 | 
                            fname = relpath(fname, ctx.directory)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    236
                 | 
                                    
                                                     | 
                
                 | 
                        local_filename = fname  | 
            
            
                                                        
            
                                    
            
            
                | 
                    237
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    238
                 | 
                                    
                                                     | 
                
                 | 
                    if not page_id:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    239
                 | 
                                    
                                                     | 
                
                 | 
                        log.warning("You did not provide '--page-id/-g', so the file you added is not linked to a specific page.") | 
            
            
                                                        
            
                                    
            
            
                | 
                    240
                 | 
                                    
                                                     | 
                
                 | 
                    kwargs = { | 
            
            
                                                        
            
                                    
            
            
                | 
                    241
                 | 
                                    
                                                     | 
                
                 | 
                        'file_id': file_id,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    242
                 | 
                                    
                                                     | 
                
                 | 
                        'mimetype': mimetype,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    243
                 | 
                                    
                                                     | 
                
                 | 
                        'page_id': page_id,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    244
                 | 
                                    
                                                     | 
                
                 | 
                        'force': force,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    245
                 | 
                                    
                                                     | 
                
                 | 
                        'ignore': ignore,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    246
                 | 
                                    
                                                     | 
                
                 | 
                        'local_filename': local_filename,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    247
                 | 
                                    
                                                     | 
                
                 | 
                        'url': fname  | 
            
            
                                                        
            
                                    
            
            
                | 
                    248
                 | 
                                    
                                                     | 
                
                 | 
                    }  | 
            
            
                                                        
            
                                    
            
            
                | 
                    249
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.add_file(file_grp, **kwargs)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    250
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    251
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    252
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    253
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace bulk-add  | 
            
            
                                                        
            
                                    
            
            
                | 
                    254
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    255
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    256
                 | 
                                    
                                                     | 
                
                 | 
                # pylint: disable=broad-except  | 
            
            
                                                        
            
                                    
            
            
                | 
                    257
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('bulk-add') | 
            
            
                                                        
            
                                    
            
            
                | 
                    258
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-r', '--regex', help="Regular expression matching the FILE_GLOB filesystem paths to define named captures usable in the other parameters", required=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    259
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-m', '--mimetype', help="Media type of the file. If not provided, guess from filename", required=False) | 
            
            
                                                        
            
                                    
            
            
                | 
                    260
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-g', '--page-id', help="physical page ID of the file", required=False) | 
            
            
                                                        
            
                                    
            
            
                | 
                    261
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-i', '--file-id', help="ID of the file. If not provided, derive from fileGrp and filename", required=False) | 
            
            
                                                        
            
                                    
            
            
                | 
                    262
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-u', '--url', help="Remote URL of the file", required=False) | 
            
            
                                                        
            
                                    
            
            
                | 
                    263
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-l', '--local-filename', help="Local filesystem path in the workspace directory (copied from source file if different)", required=False) | 
            
            
                                                        
            
                                    
            
            
                | 
                    264
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-G', '--file-grp', help="File group USE of the file", required=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    265
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-n', '--dry-run', help="Don't actually do anything to the METS or filesystem, just preview", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    266
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-S', '--source-path', 'src_path_option', help="File path to copy from (if different from FILE_GLOB values)", required=False) | 
            
            
                                                        
            
                                    
            
            
                | 
                    267
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-I', '--ignore', help="Disable checking for existing file entries (faster)", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    268
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-f', '--force', help="Replace existing file entries with the same ID (no effect when --ignore is set, too)", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    269
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-s', '--skip', help="Skip files not matching --regex (instead of failing)", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    270
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('file_glob', nargs=-1, required=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    271
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    272
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, local_filename, file_grp, dry_run, file_glob, src_path_option, ignore, force, skip):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    273
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    274
                 | 
                                    
                                                     | 
                
                 | 
                    Add files in bulk to an OCR-D workspace.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    275
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    276
                 | 
                                    
                                                     | 
                
                 | 
                    FILE_GLOB can either be a shell glob expression to match file names,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    277
                 | 
                                    
                                                     | 
                
                 | 
                    or a list of expressions or '-', in which case expressions are read from STDIN.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    278
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    279
                 | 
                                    
                                                     | 
                
                 | 
                    After globbing, --regex is matched against each expression resulting from FILE_GLOB, and can  | 
            
            
                                                        
            
                                    
            
            
                | 
                    280
                 | 
                                    
                                                     | 
                
                 | 
                    define named groups reusable in the --page-id, --file-id, --mimetype, --url, --source-path and  | 
            
            
                                                        
            
                                    
            
            
                | 
                    281
                 | 
                                    
                                                     | 
                
                 | 
                    --file-grp options, e.g. by referencing the group name 'grp' from the regex as '{{ grp }}'. | 
            
            
                                                        
            
                                    
            
            
                | 
                    282
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    283
                 | 
                                    
                                                     | 
                
                 | 
                    If the FILE_GLOB expressions do not denote the file names themselves  | 
            
            
                                                        
            
                                    
            
            
                | 
                    284
                 | 
                                    
                                                     | 
                
                 | 
                    (but arbitrary strings for --regex matching), then use --source-path to set  | 
            
            
                                                        
            
                                    
            
            
                | 
                    285
                 | 
                                    
                                                     | 
                
                 | 
                    the actual file paths to use. (This could involve fixed strings or group references.)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    286
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    287
                 | 
                                    
                                                     | 
                
                 | 
                    \b  | 
            
            
                                                        
            
                                    
            
            
                | 
                    288
                 | 
                                    
                                                     | 
                
                 | 
                    Examples:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    289
                 | 
                                    
                                                     | 
                
                 | 
                        ocrd workspace bulk-add \\  | 
            
            
                                                        
            
                                    
            
            
                | 
                    290
                 | 
                                    
                                                     | 
                
                 | 
                                --regex '(?P<fileGrp>[^/]+)/page_(?P<pageid>.*)\\.[^.]+' \\  | 
            
            
                                                        
            
                                    
            
            
                | 
                    291
                 | 
                                    
                                                     | 
                
                 | 
                                --page-id 'PHYS_{{ pageid }}' \\ | 
            
            
                                                        
            
                                    
            
            
                | 
                    292
                 | 
                                    
                                                     | 
                
                 | 
                                --file-grp "{{ fileGrp }}" \\ | 
            
            
                                                        
            
                                    
            
            
                | 
                    293
                 | 
                                    
                                                     | 
                
                 | 
                                path/to/files/*/*.*  | 
            
            
                                                        
            
                                    
            
            
                | 
                    294
                 | 
                                    
                                                     | 
                
                 | 
                        \b  | 
            
            
                                                        
            
                                    
            
            
                | 
                    295
                 | 
                                    
                                                     | 
                
                 | 
                        echo "path/to/src/file.xml SEG/page_p0001.xml" \\  | 
            
            
                                                        
            
                                    
            
            
                | 
                    296
                 | 
                                    
                                                     | 
                
                 | 
                        | ocrd workspace bulk-add \\  | 
            
            
                                                        
            
                                    
            
            
                | 
                    297
                 | 
                                    
                                                     | 
                
                 | 
                                --regex '(?P<src>.*?) (?P<fileGrp>.+?)/page_(?P<pageid>.*)\\.(?P<ext>[^\\.]*)' \\  | 
            
            
                                                        
            
                                    
            
            
                | 
                    298
                 | 
                                    
                                                     | 
                
                 | 
                                --file-id 'FILE_{{ fileGrp }}_{{ pageid }}' \\ | 
            
            
                                                        
            
                                    
            
            
                | 
                    299
                 | 
                                    
                                                     | 
                
                 | 
                                --page-id 'PHYS_{{ pageid }}' \\ | 
            
            
                                                        
            
                                    
            
            
                | 
                    300
                 | 
                                    
                                                     | 
                
                 | 
                                --file-grp "{{ fileGrp }}" \\ | 
            
            
                                                        
            
                                    
            
            
                | 
                    301
                 | 
                                    
                                                     | 
                
                 | 
                                --local-filename '{{ fileGrp }}/FILE_{{ pageid }}.{{ ext }}' \\ | 
            
            
                                                        
            
                                    
            
            
                | 
                    302
                 | 
                                    
                                                     | 
                
                 | 
                                -  | 
            
            
                                                        
            
                                    
            
            
                | 
                    303
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    304
                 | 
                                    
                                                     | 
                
                 | 
                        \b  | 
            
            
                                                        
            
                                    
            
            
                | 
                    305
                 | 
                                    
                                                     | 
                
                 | 
                        { echo PHYS_0001 BIN FILE_0001_BIN.IMG-wolf BIN/FILE_0001_BIN.IMG-wolf.png; \\ | 
            
            
                                                        
            
                                    
            
            
                | 
                    306
                 | 
                                    
                                                     | 
                
                 | 
                          echo PHYS_0001 BIN FILE_0001_BIN BIN/FILE_0001_BIN.xml; \\  | 
            
            
                                                        
            
                                    
            
            
                | 
                    307
                 | 
                                    
                                                     | 
                
                 | 
                          echo PHYS_0002 BIN FILE_0002_BIN.IMG-wolf BIN/FILE_0002_BIN.IMG-wolf.png; \\  | 
            
            
                                                        
            
                                    
            
            
                | 
                    308
                 | 
                                    
                                                     | 
                
                 | 
                          echo PHYS_0002 BIN FILE_0002_BIN BIN/FILE_0002_BIN.xml; \\  | 
            
            
                                                        
            
                                    
            
            
                | 
                    309
                 | 
                                    
                                                     | 
                
                 | 
                        } | ocrd workspace bulk-add -r '(?P<pageid>.*) (?P<filegrp>.*) (?P<fileid>.*) (?P<local_filename>.*)' \\  | 
            
            
                                                        
            
                                    
            
            
                | 
                    310
                 | 
                                    
                                                     | 
                
                 | 
                          -G '{{ filegrp }}' -g '{{ pageid }}' -i '{{ fileid }}' -S '{{ local_filename }}' - | 
            
            
                                                        
            
                                    
            
            
                | 
                    311
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    312
                 | 
                                    
                                                     | 
                
                 | 
                    log = getLogger('ocrd.cli.workspace.bulk-add') # pylint: disable=redefined-outer-name | 
            
            
                                                        
            
                                    
            
            
                | 
                    313
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    314
                 | 
                                    
                                                     | 
                
                 | 
                        ctx.resolver,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    315
                 | 
                                    
                                                     | 
                
                 | 
                        directory=ctx.directory,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    316
                 | 
                                    
                                                     | 
                
                 | 
                        mets_basename=ctx.mets_basename,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    317
                 | 
                                    
                                                     | 
                
                 | 
                        automatic_backup=ctx.automatic_backup,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    318
                 | 
                                    
                                                     | 
                
                 | 
                        mets_server_url=ctx.mets_server_url,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    319
                 | 
                                    
                                                     | 
                
                 | 
                    )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    320
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    321
                 | 
                                    
                                                     | 
                
                 | 
                    try:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    322
                 | 
                                    
                                                     | 
                
                 | 
                        pat = re.compile(regex)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    323
                 | 
                                    
                                                     | 
                
                 | 
                    except Exception as e:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    324
                 | 
                                    
                                                     | 
                
                 | 
                        log.error("Invalid regex: %s" % e) | 
            
            
                                                        
            
                                    
            
            
                | 
                    325
                 | 
                                    
                                                     | 
                
                 | 
                        sys.exit(1)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    326
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    327
                 | 
                                    
                                                     | 
                
                 | 
                    file_paths = []  | 
            
            
                                                        
            
                                    
            
            
                | 
                    328
                 | 
                                    
                                                     | 
                
                 | 
                    from_stdin = file_glob == ('-',) | 
            
            
                                                        
            
                                    
            
            
                | 
                    329
                 | 
                                    
                                                     | 
                
                 | 
                    if from_stdin:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    330
                 | 
                                    
                                                     | 
                
                 | 
                        file_paths += [Path(x.strip('\n')) for x in sys.stdin.readlines()] | 
            
            
                                                        
            
                                    
            
            
                | 
                    331
                 | 
                                    
                                                     | 
                
                 | 
                    else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    332
                 | 
                                    
                                                     | 
                
                 | 
                        for fglob in file_glob:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    333
                 | 
                                    
                                                     | 
                
                 | 
                            expanded = glob(fglob)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    334
                 | 
                                    
                                                     | 
                
                 | 
                            if not expanded:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    335
                 | 
                                    
                                                     | 
                
                 | 
                                file_paths += [Path(fglob)]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    336
                 | 
                                    
                                                     | 
                
                 | 
                            else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    337
                 | 
                                    
                                                     | 
                
                 | 
                                file_paths += [Path(x) for x in expanded]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    338
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    339
                 | 
                                    
                                                     | 
                
                 | 
                    for i, file_path in enumerate(file_paths):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    340
                 | 
                                    
                                                     | 
                
                 | 
                        log.info("[%4d/%d] %s" % (i + 1, len(file_paths), file_path)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    341
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    342
                 | 
                                    
                                                     | 
                
                 | 
                        # match regex  | 
            
            
                                                        
            
                                    
            
            
                | 
                    343
                 | 
                                    
                                                     | 
                
                 | 
                        m = pat.match(str(file_path))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    344
                 | 
                                    
                                                     | 
                
                 | 
                        if not m:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    345
                 | 
                                    
                                                     | 
                
                 | 
                            if skip:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    346
                 | 
                                    
                                                     | 
                
                 | 
                                continue  | 
            
            
                                                        
            
                                    
            
            
                | 
                    347
                 | 
                                    
                                                     | 
                
                 | 
                            log.error("File '%s' not matched by regex: '%s'" % (file_path, regex)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    348
                 | 
                                    
                                                     | 
                
                 | 
                            sys.exit(1)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    349
                 | 
                                    
                                                     | 
                
                 | 
                        group_dict = m.groupdict()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    350
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    351
                 | 
                                    
                                                     | 
                
                 | 
                        # set up file info  | 
            
            
                                                        
            
                                    
            
            
                | 
                    352
                 | 
                                    
                                                     | 
                
                 | 
                        file_dict = {'local_filename': local_filename, 'url': url, 'mimetype': mimetype, 'file_id': file_id, 'page_id': page_id, 'file_grp': file_grp} | 
            
            
                                                        
            
                                    
            
            
                | 
                    353
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    354
                 | 
                                    
                                                     | 
                
                 | 
                        # Flag to track whether 'local_filename' should be 'src'  | 
            
            
                                                        
            
                                    
            
            
                | 
                    355
                 | 
                                    
                                                     | 
                
                 | 
                        local_filename_is_src = False  | 
            
            
                                                        
            
                                    
            
            
                | 
                    356
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    357
                 | 
                                    
                                                     | 
                
                 | 
                        # expand templates  | 
            
            
                                                        
            
                                    
            
            
                | 
                    358
                 | 
                                    
                                                     | 
                
                 | 
                        for param_name in file_dict:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    359
                 | 
                                    
                                                     | 
                
                 | 
                            if not file_dict[param_name]:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    360
                 | 
                                    
                                                     | 
                
                 | 
                                if param_name == 'local_filename':  | 
            
            
                                                        
            
                                    
            
            
                | 
                    361
                 | 
                                    
                                                     | 
                
                 | 
                                    local_filename_is_src = True  | 
            
            
                                                        
            
                                    
            
            
                | 
                    362
                 | 
                                    
                                                     | 
                
                 | 
                                    continue  | 
            
            
                                                        
            
                                    
            
            
                | 
                    363
                 | 
                                    
                                                     | 
                
                 | 
                                elif param_name in ['mimetype', 'file_id']:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    364
                 | 
                                    
                                                     | 
                
                 | 
                                    # auto-filled below once the other  | 
            
            
                                                        
            
                                    
            
            
                | 
                    365
                 | 
                                    
                                                     | 
                
                 | 
                                    # replacements have happened  | 
            
            
                                                        
            
                                    
            
            
                | 
                    366
                 | 
                                    
                                                     | 
                
                 | 
                                    continue  | 
            
            
                                                        
            
                                    
            
            
                | 
                    367
                 | 
                                    
                                                     | 
                
                 | 
                                elif param_name == 'url':  | 
            
            
                                                        
            
                                    
            
            
                | 
                    368
                 | 
                                    
                                                     | 
                
                 | 
                                    # Remote URL is not required  | 
            
            
                                                        
            
                                    
            
            
                | 
                    369
                 | 
                                    
                                                     | 
                
                 | 
                                    continue  | 
            
            
                                                        
            
                                    
            
            
                | 
                    370
                 | 
                                    
                                                     | 
                
                 | 
                                raise ValueError(f"OcrdFile attribute '{param_name}' unset ({file_dict})") | 
            
            
                                                        
            
                                    
            
            
                | 
                    371
                 | 
                                    
                                                     | 
                
                 | 
                            for group_name in group_dict:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    372
                 | 
                                    
                                                     | 
                
                 | 
                                file_dict[param_name] = file_dict[param_name].replace('{{ %s }}' % group_name, group_dict[group_name]) | 
            
            
                                                        
            
                                    
            
            
                | 
                    373
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    374
                 | 
                                    
                                                     | 
                
                 | 
                        # Where to copy from  | 
            
            
                                                        
            
                                    
            
            
                | 
                    375
                 | 
                                    
                                                     | 
                
                 | 
                        if src_path_option:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    376
                 | 
                                    
                                                     | 
                
                 | 
                            src_path = src_path_option  | 
            
            
                                                        
            
                                    
            
            
                | 
                    377
                 | 
                                    
                                                     | 
                
                 | 
                            for group_name in group_dict:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    378
                 | 
                                    
                                                     | 
                
                 | 
                                src_path = src_path.replace('{{ %s }}' % group_name, group_dict[group_name]) | 
            
            
                                                        
            
                                    
            
            
                | 
                    379
                 | 
                                    
                                                     | 
                
                 | 
                            srcpath = Path(src_path)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    380
                 | 
                                    
                                                     | 
                
                 | 
                        else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    381
                 | 
                                    
                                                     | 
                
                 | 
                            srcpath = file_path  | 
            
            
                                                        
            
                                    
            
            
                | 
                    382
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    383
                 | 
                                    
                                                     | 
                
                 | 
                        # derive --file-id from filename if not --file-id not explicitly set  | 
            
            
                                                        
            
                                    
            
            
                | 
                    384
                 | 
                                    
                                                     | 
                
                 | 
                        if not file_id:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    385
                 | 
                                    
                                                     | 
                
                 | 
                            id_field = srcpath.stem if file_path != srcpath else file_path.stem  | 
            
            
                                                        
            
                                    
            
            
                | 
                    386
                 | 
                                    
                                                     | 
                
                 | 
                            file_dict['file_id'] = safe_filename('%s_%s' % (file_dict['file_grp'], id_field)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    387
                 | 
                                    
                                                     | 
                
                 | 
                        if not mimetype:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    388
                 | 
                                    
                                                     | 
                
                 | 
                            try:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    389
                 | 
                                    
                                                     | 
                
                 | 
                                file_dict['mimetype'] = EXT_TO_MIME[srcpath.suffix]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    390
                 | 
                                    
                                                     | 
                
                 | 
                            except KeyError:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    391
                 | 
                                    
                                                     | 
                
                 | 
                                log.error("Cannot guess MIME type from extension '%s' for '%s'. Set --mimetype explicitly" % (srcpath.suffix, srcpath)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    392
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    393
                 | 
                                    
                                                     | 
                
                 | 
                        # copy files if src != url  | 
            
            
                                                        
            
                                    
            
            
                | 
                    394
                 | 
                                    
                                                     | 
                
                 | 
                        if local_filename_is_src:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    395
                 | 
                                    
                                                     | 
                
                 | 
                            file_dict['local_filename'] = srcpath  | 
            
            
                                                        
            
                                    
            
            
                | 
                    396
                 | 
                                    
                                                     | 
                
                 | 
                        else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    397
                 | 
                                    
                                                     | 
                
                 | 
                            destpath = Path(workspace.directory, file_dict['local_filename'])  | 
            
            
                                                        
            
                                    
            
            
                | 
                    398
                 | 
                                    
                                                     | 
                
                 | 
                            if srcpath != destpath and not destpath.exists():  | 
            
            
                                                        
            
                                    
            
            
                | 
                    399
                 | 
                                    
                                                     | 
                
                 | 
                                log.info("cp '%s' '%s'", srcpath, destpath) | 
            
            
                                                        
            
                                    
            
            
                | 
                    400
                 | 
                                    
                                                     | 
                
                 | 
                                if not dry_run:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    401
                 | 
                                    
                                                     | 
                
                 | 
                                    if not destpath.parent.is_dir():  | 
            
            
                                                        
            
                                    
            
            
                | 
                    402
                 | 
                                    
                                                     | 
                
                 | 
                                        destpath.parent.mkdir()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    403
                 | 
                                    
                                                     | 
                
                 | 
                                    destpath.write_bytes(srcpath.read_bytes())  | 
            
            
                                                        
            
                                    
            
            
                | 
                    404
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    405
                 | 
                                    
                                                     | 
                
                 | 
                        # Add to workspace (or not)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    406
                 | 
                                    
                                                     | 
                
                 | 
                        fileGrp = file_dict.pop('file_grp') | 
            
            
                                                        
            
                                    
            
            
                | 
                    407
                 | 
                                    
                                                     | 
                
                 | 
                        if dry_run:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    408
                 | 
                                    
                                                     | 
                
                 | 
                            log.info('workspace.add_file(%s)' % file_dict) | 
            
            
                                                        
            
                                    
            
            
                | 
                    409
                 | 
                                    
                                                     | 
                
                 | 
                        else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    410
                 | 
                                    
                                                     | 
                
                 | 
                            workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    411
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    412
                 | 
                                    
                                                     | 
                
                 | 
                    # save changes to disk  | 
            
            
                                                        
            
                                    
            
            
                | 
                    413
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    414
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    415
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    416
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    417
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace find  | 
            
            
                                                        
            
                                    
            
            
                | 
                    418
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    419
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    420
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('find') | 
            
            
                                                        
            
                                    
            
            
                | 
                    421
                 | 
                                    
                                                     | 
                
                 | 
                @mets_find_options  | 
            
            
                                                        
            
                                    
            
            
                | 
                    422
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-k', '--output-field', help="Output field. Repeat for multiple fields, will be joined with tab", | 
            
            
                                                        
            
                                    
            
            
                | 
                    423
                 | 
                                    
                                                     | 
                
                 | 
                              default=['local_filename'],  | 
            
            
                                                        
            
                                    
            
            
                | 
                    424
                 | 
                                    
                                                     | 
                
                 | 
                              show_default=True,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    425
                 | 
                                    
                                                     | 
                
                 | 
                              multiple=True,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    426
                 | 
                                    
                                                     | 
                
                 | 
                              type=click.Choice([  | 
            
            
                                                        
            
                                    
            
            
                | 
                    427
                 | 
                                    
                                                     | 
                
                 | 
                                  'url',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    428
                 | 
                                    
                                                     | 
                
                 | 
                                  'mimetype',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    429
                 | 
                                    
                                                     | 
                
                 | 
                                  'page_id',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    430
                 | 
                                    
                                                     | 
                
                 | 
                                  'pageId',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    431
                 | 
                                    
                                                     | 
                
                 | 
                                  'file_id',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    432
                 | 
                                    
                                                     | 
                
                 | 
                                  'ID',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    433
                 | 
                                    
                                                     | 
                
                 | 
                                  'file_grp',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    434
                 | 
                                    
                                                     | 
                
                 | 
                                  'fileGrp',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    435
                 | 
                                    
                                                     | 
                
                 | 
                                  'basename',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    436
                 | 
                                    
                                                     | 
                
                 | 
                                  'basename_without_extension',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    437
                 | 
                                    
                                                     | 
                
                 | 
                                  'local_filename',  | 
            
            
                                                        
            
                                    
            
            
                | 
                    438
                 | 
                                    
                                                     | 
                
                 | 
                              ]))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    439
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--download', is_flag=True, help="Download found files to workspace and change location in METS file ") | 
            
            
                                                        
            
                                    
            
            
                | 
                    440
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--undo-download', is_flag=True, help="Remove all downloaded files from the METS") | 
            
            
                                                        
            
                                    
            
            
                | 
                    441
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--wait', type=int, default=0, help="Wait this many seconds between download requests") | 
            
            
                                                        
            
                                    
            
            
                | 
                    442
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    443
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, include_fileGrp, exclude_fileGrp, download, undo_download, wait):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    444
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    445
                 | 
                                    
                                                     | 
                
                 | 
                    Find files.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    446
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    447
                 | 
                                    
                                                     | 
                
                 | 
                    (If any ``FILTER`` starts with ``//``, then its remainder  | 
            
            
                                                        
            
                                    
            
            
                | 
                    448
                 | 
                                    
                                                     | 
                
                 | 
                     will be interpreted as a regular expression.)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    449
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    450
                 | 
                                    
                                                     | 
                
                 | 
                    snake_to_camel = {"file_id": "ID", "page_id": "pageId", "file_grp": "fileGrp"} | 
            
            
                                                        
            
                                    
            
            
                | 
                    451
                 | 
                                    
                                                     | 
                
                 | 
                    output_field = [snake_to_camel.get(x, x) for x in output_field]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    452
                 | 
                                    
                                                     | 
                
                 | 
                    modified_mets = False  | 
            
            
                                                        
            
                                    
            
            
                | 
                    453
                 | 
                                    
                                                     | 
                
                 | 
                    ret = list()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    454
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    455
                 | 
                                    
                                                     | 
                
                 | 
                        ctx.resolver,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    456
                 | 
                                    
                                                     | 
                
                 | 
                        directory=ctx.directory,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    457
                 | 
                                    
                                                     | 
                
                 | 
                        mets_basename=ctx.mets_basename,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    458
                 | 
                                    
                                                     | 
                
                 | 
                        mets_server_url=ctx.mets_server_url,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    459
                 | 
                                    
                                                     | 
                
                 | 
                    )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    460
                 | 
                                    
                                                     | 
                
                 | 
                    for f in workspace.find_files(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    461
                 | 
                                    
                                                     | 
                
                 | 
                            file_id=file_id,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    462
                 | 
                                    
                                                     | 
                
                 | 
                            file_grp=file_grp,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    463
                 | 
                                    
                                                     | 
                
                 | 
                            mimetype=mimetype,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    464
                 | 
                                    
                                                     | 
                
                 | 
                            page_id=page_id,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    465
                 | 
                                    
                                                     | 
                
                 | 
                            include_fileGrp=include_fileGrp,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    466
                 | 
                                    
                                                     | 
                
                 | 
                            exclude_fileGrp=exclude_fileGrp,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    467
                 | 
                                    
                                                     | 
                
                 | 
                        ):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    468
                 | 
                                    
                                                     | 
                
                 | 
                        ret_entry = [f.ID if field == 'pageId' else str(getattr(f, field)) or '' for field in output_field]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    469
                 | 
                                    
                                                     | 
                
                 | 
                        if download and not f.local_filename:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    470
                 | 
                                    
                                                     | 
                
                 | 
                            workspace.download_file(f)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    471
                 | 
                                    
                                                     | 
                
                 | 
                            modified_mets = True  | 
            
            
                                                        
            
                                    
            
            
                | 
                    472
                 | 
                                    
                                                     | 
                
                 | 
                            if wait:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    473
                 | 
                                    
                                                     | 
                
                 | 
                                time.sleep(wait)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    474
                 | 
                                    
                                                     | 
                
                 | 
                        if undo_download and f.local_filename:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    475
                 | 
                                    
                                                     | 
                
                 | 
                            ret_entry = [f'Removed local_filename {f.local_filename}'] | 
            
            
                                                        
            
                                    
            
            
                | 
                    476
                 | 
                                    
                                                     | 
                
                 | 
                            f.local_filename = None  | 
            
            
                                                        
            
                                    
            
            
                | 
                    477
                 | 
                                    
                                                     | 
                
                 | 
                            modified_mets = True  | 
            
            
                                                        
            
                                    
            
            
                | 
                    478
                 | 
                                    
                                                     | 
                
                 | 
                        ret.append(ret_entry)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    479
                 | 
                                    
                                                     | 
                
                 | 
                    if modified_mets:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    480
                 | 
                                    
                                                     | 
                
                 | 
                        workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    481
                 | 
                                    
                                                     | 
                
                 | 
                    if 'pageId' in output_field:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    482
                 | 
                                    
                                                     | 
                
                 | 
                        idx = output_field.index('pageId') | 
            
            
                                                        
            
                                    
            
            
                | 
                    483
                 | 
                                    
                                                     | 
                
                 | 
                        fileIds = list(map(lambda fields: fields[idx], ret))  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                        
            
                                    
            
            
                | 
                    484
                 | 
                                    
                                                     | 
                
                 | 
                        pages = workspace.mets.get_physical_pages(for_fileIds=fileIds)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    485
                 | 
                                    
                                                     | 
                
                 | 
                        for fields, page in zip(ret, pages):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    486
                 | 
                                    
                                                     | 
                
                 | 
                            fields[idx] = page or ''  | 
            
            
                                                        
            
                                    
            
            
                | 
                    487
                 | 
                                    
                                                     | 
                
                 | 
                    for fields in ret:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    488
                 | 
                                    
                                                     | 
                
                 | 
                        print('\t'.join(fields)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    489
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    490
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    491
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace remove  | 
            
            
                                                        
            
                                    
            
            
                | 
                    492
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    493
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    494
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('remove') | 
            
            
                                                        
            
                                    
            
            
                | 
                    495
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-k', '--keep-file', help="Do not delete file from file system", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    496
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-f', '--force', help="Continue even if mets:file or file on file system does not exist", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    497
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('ID', nargs=-1) | 
            
            
                                                        
            
                                    
            
            
                | 
                    498
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    499
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_remove_file(ctx, id, force, keep_file):  # pylint: disable=redefined-builtin  | 
            
            
                                                        
            
                                    
            
            
                | 
                    500
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    501
                 | 
                                    
                                                     | 
                
                 | 
                    Delete files (given by their ID attribute ``ID``).  | 
            
            
                                                        
            
                                    
            
            
                | 
                    502
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    503
                 | 
                                    
                                                     | 
                
                 | 
                    (If any ``ID`` starts with ``//``, then its remainder  | 
            
            
                                                        
            
                                    
            
            
                | 
                    504
                 | 
                                    
                                                     | 
                
                 | 
                     will be interpreted as a regular expression.)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    505
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    506
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    507
                 | 
                                    
                                                     | 
                
                 | 
                    for i in id:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    508
                 | 
                                    
                                                     | 
                
                 | 
                        workspace.remove_file(i, force=force, keep_file=keep_file)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    509
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    510
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    511
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    512
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    513
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace rename-group  | 
            
            
                                                        
            
                                    
            
            
                | 
                    514
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    515
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    516
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('rename-group') | 
            
            
                                                        
            
                                    
            
            
                | 
                    517
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('OLD', nargs=1) | 
            
            
                                                        
            
                                    
            
            
                | 
                    518
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('NEW', nargs=1) | 
            
            
                                                        
            
                                    
            
            
                | 
                    519
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    520
                 | 
                                    
                                                     | 
                
                 | 
                def rename_group(ctx, old, new):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    521
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    522
                 | 
                                    
                                                     | 
                
                 | 
                    Rename fileGrp (USE attribute ``NEW`` to ``OLD``).  | 
            
            
                                                        
            
                                    
            
            
                | 
                    523
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    524
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    525
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.rename_file_group(old, new)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    526
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    527
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    528
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    529
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace remove-group  | 
            
            
                                                        
            
                                    
            
            
                | 
                    530
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    531
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    532
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('remove-group') | 
            
            
                                                        
            
                                    
            
            
                | 
                    533
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-r', '--recursive', help="Delete any files in the group before the group itself", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    534
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-f', '--force', help="Continue removing even if group or containing files not found in METS", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    535
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-k', '--keep-files', help="Do not delete files from file system", default=False, is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    536
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('GROUP', nargs=-1) | 
            
            
                                                        
            
                                    
            
            
                | 
                    537
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    538
                 | 
                                    
                                                     | 
                
                 | 
                def remove_group(ctx, group, recursive, force, keep_files):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    539
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    540
                 | 
                                    
                                                     | 
                
                 | 
                    Delete fileGrps (given by their USE attribute ``GROUP``).  | 
            
            
                                                        
            
                                    
            
            
                | 
                    541
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    542
                 | 
                                    
                                                     | 
                
                 | 
                    (If any ``GROUP`` starts with ``//``, then its remainder  | 
            
            
                                                        
            
                                    
            
            
                | 
                    543
                 | 
                                    
                                                     | 
                
                 | 
                     will be interpreted as a regular expression.)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    544
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    545
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    546
                 | 
                                    
                                                     | 
                
                 | 
                    for g in group:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    547
                 | 
                                    
                                                     | 
                
                 | 
                        workspace.remove_file_group(g, recursive=recursive, force=force, keep_files=keep_files)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    548
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    549
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    550
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    551
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace prune-files  | 
            
            
                                                        
            
                                    
            
            
                | 
                    552
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    553
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    554
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('prune-files') | 
            
            
                                                        
            
                                    
            
            
                | 
                    555
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-G', '--file-grp', help="fileGrp USE", metavar='FILTER') | 
            
            
                                                        
            
                                    
            
            
                | 
                    556
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-m', '--mimetype', help="Media type to look for", metavar='FILTER') | 
            
            
                                                        
            
                                    
            
            
                | 
                    557
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-g', '--page-id', help="Page ID", metavar='FILTER') | 
            
            
                                                        
            
                                    
            
            
                | 
                    558
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-i', '--file-id', help="ID", metavar='FILTER') | 
            
            
                                                        
            
                                    
            
            
                | 
                    559
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    560
                 | 
                                    
                                                     | 
                
                 | 
                def prune_files(ctx, file_grp, mimetype, page_id, file_id):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    561
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    562
                 | 
                                    
                                                     | 
                
                 | 
                    Removes mets:files that point to non-existing local files  | 
            
            
                                                        
            
                                    
            
            
                | 
                    563
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    564
                 | 
                                    
                                                     | 
                
                 | 
                    (If any ``FILTER`` starts with ``//``, then its remainder  | 
            
            
                                                        
            
                                    
            
            
                | 
                    565
                 | 
                                    
                                                     | 
                
                 | 
                     will be interpreted as a regular expression.)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    566
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    567
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    568
                 | 
                                    
                                                     | 
                
                 | 
                    with pushd_popd(workspace.directory):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    569
                 | 
                                    
                                                     | 
                
                 | 
                        for f in workspace.find_files(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    570
                 | 
                                    
                                                     | 
                
                 | 
                            file_id=file_id,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    571
                 | 
                                    
                                                     | 
                
                 | 
                            file_grp=file_grp,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    572
                 | 
                                    
                                                     | 
                
                 | 
                            mimetype=mimetype,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    573
                 | 
                                    
                                                     | 
                
                 | 
                            page_id=page_id,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    574
                 | 
                                    
                                                     | 
                
                 | 
                        ):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    575
                 | 
                                    
                                                     | 
                
                 | 
                            try:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    576
                 | 
                                    
                                                     | 
                
                 | 
                                if not f.local_filename or not exists(f.local_filename):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    577
                 | 
                                    
                                                     | 
                
                 | 
                                    workspace.mets.remove_file(f.ID)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    578
                 | 
                                    
                                                     | 
                
                 | 
                            except Exception as e:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    579
                 | 
                                    
                                                     | 
                
                 | 
                                ctx.log.exception("Error removing %f: %s", f, e) | 
            
            
                                                        
            
                                    
            
            
                | 
                    580
                 | 
                                    
                                                     | 
                
                 | 
                                raise(e)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    581
                 | 
                                    
                                                     | 
                
                 | 
                        workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    582
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    583
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    584
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace list-group  | 
            
            
                                                        
            
                                    
            
            
                | 
                    585
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    586
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    587
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('list-group') | 
            
            
                                                        
            
                                    
            
            
                | 
                    588
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    589
                 | 
                                    
                                                     | 
                
                 | 
                def list_groups(ctx):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    590
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    591
                 | 
                                    
                                                     | 
                
                 | 
                    List fileGrp USE attributes  | 
            
            
                                                        
            
                                    
            
            
                | 
                    592
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    593
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    594
                 | 
                                    
                                                     | 
                
                 | 
                    print("\n".join(workspace.mets.file_groups)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    595
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    596
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    597
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace list-page  | 
            
            
                                                        
            
                                    
            
            
                | 
                    598
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    599
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    600
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('list-page') | 
            
            
                                                        
            
                                    
            
            
                | 
                    601
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-k', '--output-field', help="Output field. Repeat for multiple fields, will be joined with tab", | 
            
            
                                                        
            
                                    
            
            
                | 
                    602
                 | 
                                    
                                                     | 
                
                 | 
                              default=['ID'],  | 
            
            
                                                        
            
                                    
            
            
                | 
                    603
                 | 
                                    
                                                     | 
                
                 | 
                              show_default=True,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    604
                 | 
                                    
                                                     | 
                
                 | 
                              multiple=True,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    605
                 | 
                                    
                                                     | 
                
                 | 
                              type=click.Choice(METS_PAGE_DIV_ATTRIBUTE.names()))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    606
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-f', '--output-format', help="Output format", type=click.Choice(['one-per-line', 'comma-separated', 'json']), default='one-per-line') | 
            
            
                                                        
            
                                    
            
            
                | 
                    607
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-D', '--chunk-number', help="Partition the return value into n roughly equally sized chunks", default=1, type=int) | 
            
            
                                                        
            
                                    
            
            
                | 
                    608
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-C', '--chunk-index', help="Output the nth chunk of results, -1 for all of them.", default=None, type=int) | 
            
            
                                                        
            
                                    
            
            
                | 
                    609
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-r', '--page-id-range', help="Restrict the pages to those matching the provided range, based on the @ID attribute. Separate start/end with ..") | 
            
            
                                                        
            
                                    
            
            
                | 
                    610
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-R', '--numeric-range', help="Restrict the pages to those in the range, in numerical document order. Separate start/end with ..") | 
            
            
                                                        
            
                                    
            
            
                | 
                    611
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    612
                 | 
                                    
                                                     | 
                
                 | 
                def list_pages(ctx, output_field, output_format, chunk_number, chunk_index, page_id_range, numeric_range):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    613
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    614
                 | 
                                    
                                                     | 
                
                 | 
                    List physical page IDs  | 
            
            
                                                        
            
                                    
            
            
                | 
                    615
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    616
                 | 
                                    
                                                     | 
                
                 | 
                    (If any ``FILTER`` starts with ``//``, then its remainder  | 
            
            
                                                        
            
                                    
            
            
                | 
                    617
                 | 
                                    
                                                     | 
                
                 | 
                     will be interpreted as a regular expression.)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    618
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    619
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    620
                 | 
                                    
                                                     | 
                
                 | 
                    find_kwargs = {} | 
            
            
                                                        
            
                                    
            
            
                | 
                    621
                 | 
                                    
                                                     | 
                
                 | 
                    if page_id_range and 'ID' in output_field:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    622
                 | 
                                    
                                                     | 
                
                 | 
                        find_kwargs['pageId'] = page_id_range  | 
            
            
                                                        
            
                                    
            
            
                | 
                    623
                 | 
                                    
                                                     | 
                
                 | 
                    page_ids = sorted({x.pageId for x in workspace.mets.find_files(**find_kwargs) if x.pageId}) | 
            
            
                                                        
            
                                    
            
            
                | 
                    624
                 | 
                                    
                                                     | 
                
                 | 
                    ret = []  | 
            
            
                                                        
            
                                    
            
            
                | 
                    625
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    626
                 | 
                                    
                                                     | 
                
                 | 
                    if output_field == ['ID']:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    627
                 | 
                                    
                                                     | 
                
                 | 
                        ret = [[x] for x in page_ids]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    628
                 | 
                                    
                                                     | 
                
                 | 
                    else:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    629
                 | 
                                    
                                                     | 
                
                 | 
                        for i, page_div in enumerate(workspace.mets.get_physical_pages(for_pageIds=','.join(page_ids), return_divs=True)):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    630
                 | 
                                    
                                                     | 
                
                 | 
                            ret.append([])  | 
            
            
                                                        
            
                                    
            
            
                | 
                    631
                 | 
                                    
                                                     | 
                
                 | 
                            for k in output_field:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    632
                 | 
                                    
                                                     | 
                
                 | 
                                ret[i].append(page_div.get(k, 'None'))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    633
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    634
                 | 
                                    
                                                     | 
                
                 | 
                    if numeric_range:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    635
                 | 
                                    
                                                     | 
                
                 | 
                        start, end = map(int, numeric_range.split('..')) | 
            
            
                                                        
            
                                    
            
            
                | 
                    636
                 | 
                                    
                                                     | 
                
                 | 
                        ret = ret[start-1:end]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    637
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    638
                 | 
                                    
                                                     | 
                
                 | 
                    chunks = partition_list(ret, chunk_number, chunk_index)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    639
                 | 
                                    
                                                     | 
                
                 | 
                    lines = []  | 
            
            
                                                        
            
                                    
            
            
                | 
                    640
                 | 
                                    
                                                     | 
                
                 | 
                    if output_format == 'one-per-line':  | 
            
            
                                                        
            
                                    
            
            
                | 
                    641
                 | 
                                    
                                                     | 
                
                 | 
                        for chunk in chunks:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    642
                 | 
                                    
                                                     | 
                
                 | 
                            line_strs = []  | 
            
            
                                                        
            
                                    
            
            
                | 
                    643
                 | 
                                    
                                                     | 
                
                 | 
                            for entry in chunk:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    644
                 | 
                                    
                                                     | 
                
                 | 
                                line_strs.append("\t".join(entry)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    645
                 | 
                                    
                                                     | 
                
                 | 
                            lines.append('\n'.join(line_strs)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    646
                 | 
                                    
                                                     | 
                
                 | 
                    elif output_format == 'comma-separated':  | 
            
            
                                                        
            
                                    
            
            
                | 
                    647
                 | 
                                    
                                                     | 
                
                 | 
                        for chunk in chunks:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    648
                 | 
                                    
                                                     | 
                
                 | 
                            line_strs = []  | 
            
            
                                                        
            
                                    
            
            
                | 
                    649
                 | 
                                    
                                                     | 
                
                 | 
                            for entry in chunk:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    650
                 | 
                                    
                                                     | 
                
                 | 
                                line_strs.append("\t".join(entry)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    651
                 | 
                                    
                                                     | 
                
                 | 
                            lines.append(','.join(line_strs)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    652
                 | 
                                    
                                                     | 
                
                 | 
                    elif output_format == 'json':  | 
            
            
                                                        
            
                                    
            
            
                | 
                    653
                 | 
                                    
                                                     | 
                
                 | 
                        lines.append(dumps(chunks))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    654
                 | 
                                    
                                                     | 
                
                 | 
                    print('\n'.join(lines)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    655
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    656
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    657
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace get-id  | 
            
            
                                                        
            
                                    
            
            
                | 
                    658
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    659
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    660
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('get-id') | 
            
            
                                                        
            
                                    
            
            
                | 
                    661
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    662
                 | 
                                    
                                                     | 
                
                 | 
                def get_id(ctx):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    663
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    664
                 | 
                                    
                                                     | 
                
                 | 
                    Get METS id if any  | 
            
            
                                                        
            
                                    
            
            
                | 
                    665
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    666
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    667
                 | 
                                    
                                                     | 
                
                 | 
                    ID = workspace.mets.unique_identifier  | 
            
            
                                                        
            
                                    
            
            
                | 
                    668
                 | 
                                    
                                                     | 
                
                 | 
                    if ID:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    669
                 | 
                                    
                                                     | 
                
                 | 
                        print(ID)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    670
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    671
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    672
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace set-id  | 
            
            
                                                        
            
                                    
            
            
                | 
                    673
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    674
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    675
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('set-id') | 
            
            
                                                        
            
                                    
            
            
                | 
                    676
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('ID') | 
            
            
                                                        
            
                                    
            
            
                | 
                    677
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    678
                 | 
                                    
                                                     | 
                
                 | 
                def set_id(ctx, id):   # pylint: disable=redefined-builtin  | 
            
            
                                                        
            
                                    
            
            
                | 
                    679
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    680
                 | 
                                    
                                                     | 
                
                 | 
                    Set METS ID.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    681
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    682
                 | 
                                    
                                                     | 
                
                 | 
                    If one of the supported identifier mechanisms is used, will set this identifier.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    683
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    684
                 | 
                                    
                                                     | 
                
                 | 
                    Otherwise will create a new <mods:identifier type="purl">{{ ID }}</mods:identifier>. | 
            
            
                                                        
            
                                    
            
            
                | 
                    685
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    686
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    687
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.mets.unique_identifier = id  | 
            
            
                                                        
            
                                    
            
            
                | 
                    688
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    689
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    690
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('update-page') | 
            
            
                                                        
            
                                    
            
            
                | 
                    691
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--set', 'attr_value_pairs', help=f"set mets:div ATTR to VALUE. possible keys: {METS_PAGE_DIV_ATTRIBUTE.names()}", metavar="ATTR VALUE", nargs=2, multiple=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    692
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--order', help="[DEPRECATED - use --set ATTR VALUE", metavar='ORDER')                | 
            
            
                                                        
            
                                    
            
            
                | 
                    693
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--orderlabel', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL') | 
            
            
                                                        
            
                                    
            
            
                | 
                    694
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--contentids', help="DEPRECATED - use --set ATTR VALUE", metavar='ORDERLABEL') | 
            
            
                                                        
            
                                    
            
            
                | 
                    695
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('PAGE_ID') | 
            
            
                                                        
            
                                    
            
            
                | 
                    696
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    697
                 | 
                                    
                                                     | 
                
                 | 
                def update_page(ctx, attr_value_pairs, order, orderlabel, contentids, page_id):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    698
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    699
                 | 
                                    
                                                     | 
                
                 | 
                    Update the @ID, @ORDER, @ORDERLABEL, @LABEL or @CONTENTIDS attributes of the mets:div with @ID=PAGE_ID  | 
            
            
                                                        
            
                                    
            
            
                | 
                    700
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    701
                 | 
                                    
                                                     | 
                
                 | 
                    update_kwargs = {k: v for k, v in attr_value_pairs} | 
            
            
                                                        
            
                                    
            
            
                | 
                    702
                 | 
                                    
                                                     | 
                
                 | 
                    if order:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    703
                 | 
                                    
                                                     | 
                
                 | 
                        update_kwargs['ORDER'] = order  | 
            
            
                                                        
            
                                    
            
            
                | 
                    704
                 | 
                                    
                                                     | 
                
                 | 
                    if orderlabel:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    705
                 | 
                                    
                                                     | 
                
                 | 
                        update_kwargs['ORDERLABEL'] = orderlabel  | 
            
            
                                                        
            
                                    
            
            
                | 
                    706
                 | 
                                    
                                                     | 
                
                 | 
                    if contentids:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    707
                 | 
                                    
                                                     | 
                
                 | 
                        update_kwargs['CONTENTIDS'] = contentids  | 
            
            
                                                        
            
                                    
            
            
                | 
                    708
                 | 
                                    
                                                     | 
                
                 | 
                    try:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    709
                 | 
                                    
                                                     | 
                
                 | 
                        workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    710
                 | 
                                    
                                                     | 
                
                 | 
                        workspace.mets.update_physical_page_attributes(page_id, **update_kwargs)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    711
                 | 
                                    
                                                     | 
                
                 | 
                        workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    712
                 | 
                                    
                                                     | 
                
                 | 
                    except Exception as err:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    713
                 | 
                                    
                                                     | 
                
                 | 
                        print(f"Error: {err}") | 
            
            
                                                        
            
                                    
            
            
                | 
                    714
                 | 
                                    
                                                     | 
                
                 | 
                        sys.exit(1)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    715
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    716
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    717
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace merge  | 
            
            
                                                        
            
                                    
            
            
                | 
                    718
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    719
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    720
                 | 
                                    
                                                     | 
                
                 | 
                def _handle_json_option(ctx, param, value):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    721
                 | 
                                    
                                                     | 
                
                 | 
                    return parse_json_string_or_file(value) if value else None  | 
            
            
                                                        
            
                                    
            
            
                | 
                    722
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    723
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.command('merge') | 
            
            
                                                        
            
                                    
            
            
                | 
                    724
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('METS_PATH') | 
            
            
                                                        
            
                                    
            
            
                | 
                    725
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--overwrite/--no-overwrite', is_flag=True, default=False, help="Overwrite on-disk file in case of file name conflicts with data from METS_PATH") | 
            
            
                                                        
            
                                    
            
            
                | 
                    726
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--force/--no-force', is_flag=True, default=False, help="Overwrite mets:file from --mets with mets:file from METS_PATH if IDs clash") | 
            
            
                                                        
            
                                    
            
            
                | 
                    727
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--copy-files/--no-copy-files', is_flag=True, help="Copy files as well", default=True, show_default=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    728
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--fileGrp-mapping', help="JSON object mapping src to dest fileGrp", callback=_handle_json_option) | 
            
            
                                                        
            
                                    
            
            
                | 
                    729
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--fileId-mapping', help="JSON object mapping src to dest file ID", callback=_handle_json_option) | 
            
            
                                                        
            
                                    
            
            
                | 
                    730
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('--pageId-mapping', help="JSON object mapping src to dest page ID", callback=_handle_json_option) | 
            
            
                                                        
            
                                    
            
            
                | 
                    731
                 | 
                                    
                                                     | 
                
                 | 
                @mets_find_options  | 
            
            
                                                        
            
                                    
            
            
                | 
                    732
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    733
                 | 
                                    
                                                     | 
                
                 | 
                def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pageid_mapping, file_grp, file_id, page_id, mimetype, include_fileGrp, exclude_fileGrp, mets_path):   # pylint: disable=redefined-builtin  | 
            
            
                                                        
            
                                    
            
            
                | 
                    734
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    735
                 | 
                                    
                                                     | 
                
                 | 
                    Merges this workspace with the workspace that contains ``METS_PATH``  | 
            
            
                                                        
            
                                    
            
            
                | 
                    736
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    737
                 | 
                                    
                                                     | 
                
                 | 
                    Pass a JSON string or file to ``--fileGrp-mapping``, ``--fileId-mapping`` or ``--pageId-mapping``  | 
            
            
                                                        
            
                                    
            
            
                | 
                    738
                 | 
                                    
                                                     | 
                
                 | 
                    in order to rename all fileGrp, file ID or page ID values, respectively.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    739
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    740
                 | 
                                    
                                                     | 
                
                 | 
                    The ``--file-id``, ``--page-id``, ``--mimetype`` and ``--file-grp`` options have  | 
            
            
                                                        
            
                                    
            
            
                | 
                    741
                 | 
                                    
                                                     | 
                
                 | 
                    the same semantics as in ``ocrd workspace find``, see ``ocrd workspace find --help``  | 
            
            
                                                        
            
                                    
            
            
                | 
                    742
                 | 
                                    
                                                     | 
                
                 | 
                    for an explanation.  | 
            
            
                                                        
            
                                    
            
            
                | 
                    743
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    744
                 | 
                                    
                                                     | 
                
                 | 
                    mets_path = Path(mets_path)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    745
                 | 
                                    
                                                     | 
                
                 | 
                    if filegrp_mapping:  | 
            
            
                                                        
            
                                    
            
            
                | 
                    746
                 | 
                                    
                                                     | 
                
                 | 
                        filegrp_mapping = loads(filegrp_mapping)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    747
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    748
                 | 
                                    
                                                     | 
                
                 | 
                    other_workspace = Workspace(ctx.resolver, directory=str(mets_path.parent), mets_basename=str(mets_path.name))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    749
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.merge(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    750
                 | 
                                    
                                                     | 
                
                 | 
                        other_workspace,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    751
                 | 
                                    
                                                     | 
                
                 | 
                        force=force,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    752
                 | 
                                    
                                                     | 
                
                 | 
                        overwrite=overwrite,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    753
                 | 
                                    
                                                     | 
                
                 | 
                        copy_files=copy_files,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    754
                 | 
                                    
                                                     | 
                
                 | 
                        fileGrp_mapping=filegrp_mapping,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    755
                 | 
                                    
                                                     | 
                
                 | 
                        fileId_mapping=fileid_mapping,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    756
                 | 
                                    
                                                     | 
                
                 | 
                        pageId_mapping=pageid_mapping,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    757
                 | 
                                    
                                                     | 
                
                 | 
                        file_grp=file_grp,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    758
                 | 
                                    
                                                     | 
                
                 | 
                        file_id=file_id,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    759
                 | 
                                    
                                                     | 
                
                 | 
                        page_id=page_id,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    760
                 | 
                                    
                                                     | 
                
                 | 
                        mimetype=mimetype,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    761
                 | 
                                    
                                                     | 
                
                 | 
                        include_fileGrp=include_fileGrp,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    762
                 | 
                                    
                                                     | 
                
                 | 
                        exclude_fileGrp=exclude_fileGrp,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    763
                 | 
                                    
                                                     | 
                
                 | 
                    )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    764
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.save_mets()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    765
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    766
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    767
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace backup  | 
            
            
                                                        
            
                                    
            
            
                | 
                    768
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    769
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    770
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.group('backup') | 
            
            
                                                        
            
                                    
            
            
                | 
                    771
                 | 
                                    
                                                     | 
                
                 | 
                @click.pass_context  | 
            
            
                                                        
            
                                    
            
            
                | 
                    772
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_backup_cli(ctx): # pylint: disable=unused-argument  | 
            
            
                                                        
            
                                    
            
            
                | 
                    773
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    774
                 | 
                                    
                                                     | 
                
                 | 
                    Backing and restoring workspaces - dev edition  | 
            
            
                                                        
            
                                    
            
            
                | 
                    775
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    776
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    777
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_backup_cli.command('add') | 
            
            
                                                        
            
                                    
            
            
                | 
                    778
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    779
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_backup_add(ctx):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    780
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    781
                 | 
                                    
                                                     | 
                
                 | 
                    Create a new backup  | 
            
            
                                                        
            
                                    
            
            
                | 
                    782
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    783
                 | 
                                    
                                                     | 
                
                 | 
                    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    784
                 | 
                                    
                                                     | 
                
                 | 
                    backup_manager.add()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    785
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    786
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_backup_cli.command('list') | 
            
            
                                                        
            
                                    
            
            
                | 
                    787
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    788
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_backup_list(ctx):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    789
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    790
                 | 
                                    
                                                     | 
                
                 | 
                    List backups  | 
            
            
                                                        
            
                                    
            
            
                | 
                    791
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    792
                 | 
                                    
                                                     | 
                
                 | 
                    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    793
                 | 
                                    
                                                     | 
                
                 | 
                    for b in backup_manager.list():  | 
            
            
                                                        
            
                                    
            
            
                | 
                    794
                 | 
                                    
                                                     | 
                
                 | 
                        print(b)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    795
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    796
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_backup_cli.command('restore') | 
            
            
                                                        
            
                                    
            
            
                | 
                    797
                 | 
                                    
                                                     | 
                
                 | 
                @click.option('-f', '--choose-first', help="Restore first matching version if more than one", is_flag=True) | 
            
            
                                                        
            
                                    
            
            
                | 
                    798
                 | 
                                    
                                                     | 
                
                 | 
                @click.argument('bak') #, type=click.Path(dir_okay=False, readable=True, resolve_path=True)) | 
            
            
                                                        
            
                                    
            
            
                | 
                    799
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    800
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_backup_restore(ctx, choose_first, bak):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    801
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    802
                 | 
                                    
                                                     | 
                
                 | 
                    Restore backup BAK  | 
            
            
                                                        
            
                                    
            
            
                | 
                    803
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    804
                 | 
                                    
                                                     | 
                
                 | 
                    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    805
                 | 
                                    
                                                     | 
                
                 | 
                    backup_manager.restore(bak, choose_first)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    806
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    807
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_backup_cli.command('undo') | 
            
            
                                                        
            
                                    
            
            
                | 
                    808
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    809
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_backup_undo(ctx):  | 
            
            
                                                        
            
                                    
            
            
                | 
                    810
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    811
                 | 
                                    
                                                     | 
                
                 | 
                    Restore the last backup  | 
            
            
                                                        
            
                                    
            
            
                | 
                    812
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    813
                 | 
                                    
                                                     | 
                
                 | 
                    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))  | 
            
            
                                                        
            
                                    
            
            
                | 
                    814
                 | 
                                    
                                                     | 
                
                 | 
                    backup_manager.undo()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    815
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    816
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    817
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    818
                 | 
                                    
                                                     | 
                
                 | 
                # ocrd workspace server  | 
            
            
                                                        
            
                                    
            
            
                | 
                    819
                 | 
                                    
                                                     | 
                
                 | 
                # ----------------------------------------------------------------------  | 
            
            
                                                        
            
                                    
            
            
                | 
                    820
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    821
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_cli.group('server') | 
            
            
                                                        
            
                                    
            
            
                | 
                    822
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    823
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_serve_cli(ctx): # pylint: disable=unused-argument  | 
            
            
                                                        
            
                                    
            
            
                | 
                    824
                 | 
                                    
                                                     | 
                
                 | 
                    """Control a METS server for this workspace"""  | 
            
            
                                                        
            
                                    
            
            
                | 
                    825
                 | 
                                    
                                                     | 
                
                 | 
                    assert ctx.mets_server_url, "For METS server commands, you must provide '-U/--mets-server-url'"  | 
            
            
                                                        
            
                                    
            
            
                | 
                    826
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    827
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_serve_cli.command('stop') | 
            
            
                                                        
            
                                    
            
            
                | 
                    828
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    829
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_serve_stop(ctx): # pylint: disable=unused-argument  | 
            
            
                                                        
            
                                    
            
            
                | 
                    830
                 | 
                                    
                                                     | 
                
                 | 
                    """Stop the METS server"""  | 
            
            
                                                        
            
                                    
            
            
                | 
                    831
                 | 
                                    
                                                     | 
                
                 | 
                    workspace = Workspace(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    832
                 | 
                                    
                                                     | 
                
                 | 
                        ctx.resolver,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    833
                 | 
                                    
                                                     | 
                
                 | 
                        directory=ctx.directory,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    834
                 | 
                                    
                                                     | 
                
                 | 
                        mets_basename=ctx.mets_basename,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    835
                 | 
                                    
                                                     | 
                
                 | 
                        mets_server_url=ctx.mets_server_url,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    836
                 | 
                                    
                                                     | 
                
                 | 
                    )  | 
            
            
                                                        
            
                                    
            
            
                | 
                    837
                 | 
                                    
                                                     | 
                
                 | 
                    workspace.mets.stop()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    838
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    839
                 | 
                                    
                                                     | 
                
                 | 
                @workspace_serve_cli.command('start') | 
            
            
                                                        
            
                                    
            
            
                | 
                    840
                 | 
                                    
                                                     | 
                
                 | 
                @pass_workspace  | 
            
            
                                                        
            
                                    
            
            
                | 
                    841
                 | 
                                    
                                                     | 
                
                 | 
                def workspace_serve_start(ctx): # pylint: disable=unused-argument  | 
            
            
                                                        
            
                                    
            
            
                | 
                    842
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    843
                 | 
                                    
                                                     | 
                
                 | 
                    Start a METS server  | 
            
            
                                                        
            
                                    
            
            
                | 
                    844
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                        
            
                                    
            
            
                | 
                    845
                 | 
                                    
                                                     | 
                
                 | 
                    (For TCP backend, pass a network interface to bind to as the '-U/--mets-server-url' parameter.)  | 
            
            
                                                        
            
                                    
            
            
                | 
                    846
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                        
            
                                    
            
            
                | 
                    847
                 | 
                                    
                                                     | 
                
                 | 
                    OcrdMetsServer(  | 
            
            
                                                        
            
                                    
            
            
                | 
                    848
                 | 
                                    
                                                     | 
                
                 | 
                        workspace=Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename),  | 
            
            
                                                        
            
                                    
            
            
                | 
                    849
                 | 
                                    
                                                     | 
                
                 | 
                        url=ctx.mets_server_url,  | 
            
            
                                                        
            
                                    
            
            
                | 
                    850
                 | 
                                    
                                                     | 
                
                 | 
                    ).startup()  | 
            
            
                                                        
            
                                    
            
            
                | 
                    851
                 | 
                                    
                                                     | 
                
                 | 
                 |