| 
                    1
                 | 
                                    
                                                     | 
                
                 | 
                import io  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    2
                 | 
                                    
                                                     | 
                
                 | 
                from os import makedirs, unlink, listdir, path  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    3
                 | 
                                    
                                                     | 
                
                 | 
                from pathlib import Path  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    4
                 | 
                                    
                                                     | 
                
                 | 
                from shutil import copyfileobj  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    5
                 | 
                                    
                                                     | 
                
                 | 
                from re import sub  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    6
                 | 
                                    
                                                     | 
                
                 | 
                from tempfile import NamedTemporaryFile  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    7
                 | 
                                    
                                                     | 
                
                 | 
                from contextlib import contextmanager  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    8
                 | 
                                    
                                                     | 
                
                 | 
                from typing import Optional, Union, Callable  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    9
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    10
                 | 
                                    
                                                     | 
                
                 | 
                from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    11
                 | 
                                    
                                                     | 
                
                 | 
                from PIL import Image  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    12
                 | 
                                    
                                                     | 
                
                 | 
                import numpy as np  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    13
                 | 
                                    
                                                     | 
                
                 | 
                from deprecated.sphinx import deprecated  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    14
                 | 
                                    
                                                     | 
                
                 | 
                import requests  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    15
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    16
                 | 
                                    
                                                     | 
                
                 | 
                from ocrd_models import OcrdMets, OcrdFile  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    17
                 | 
                                    
                                                     | 
                
                 | 
                from ocrd_models.ocrd_file import ClientSideOcrdFile  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    18
                 | 
                                    
                                                     | 
                
                 | 
                from ocrd_models.ocrd_page import parse, BorderType, to_xml  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    19
                 | 
                                    
                                                     | 
                
                 | 
                from ocrd_modelfactory import exif_from_filename, page_from_file  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    20
                 | 
                                    
                                                     | 
                
                 | 
                from ocrd_utils import (  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    21
                 | 
                                    
                                                     | 
                
                 | 
                    atomic_write,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    22
                 | 
                                    
                                                     | 
                
                 | 
                    config,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    23
                 | 
                                    
                                                     | 
                
                 | 
                    getLogger,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    24
                 | 
                                    
                                                     | 
                
                 | 
                    image_from_polygon,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    25
                 | 
                                    
                                                     | 
                
                 | 
                    coordinates_of_segment,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    26
                 | 
                                    
                                                     | 
                
                 | 
                    adjust_canvas_to_rotation,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    27
                 | 
                                    
                                                     | 
                
                 | 
                    adjust_canvas_to_transposition,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    28
                 | 
                                    
                                                     | 
                
                 | 
                    scale_coordinates,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    29
                 | 
                                    
                                                     | 
                
                 | 
                    shift_coordinates,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    30
                 | 
                                    
                                                     | 
                
                 | 
                    rotate_coordinates,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    31
                 | 
                                    
                                                     | 
                
                 | 
                    transform_coordinates,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    32
                 | 
                                    
                                                     | 
                
                 | 
                    transpose_coordinates,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    33
                 | 
                                    
                                                     | 
                
                 | 
                    crop_image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    34
                 | 
                                    
                                                     | 
                
                 | 
                    rotate_image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    35
                 | 
                                    
                                                     | 
                
                 | 
                    transpose_image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    36
                 | 
                                    
                                                     | 
                
                 | 
                    bbox_from_polygon,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    37
                 | 
                                    
                                                     | 
                
                 | 
                    polygon_from_points,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    38
                 | 
                                    
                                                     | 
                
                 | 
                    xywh_from_bbox,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    39
                 | 
                                    
                                                     | 
                
                 | 
                    pushd_popd,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    40
                 | 
                                    
                                                     | 
                
                 | 
                    is_local_filename,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    41
                 | 
                                    
                                                     | 
                
                 | 
                    deprecated_alias,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    42
                 | 
                                    
                                                     | 
                
                 | 
                    DEFAULT_METS_BASENAME,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    43
                 | 
                                    
                                                     | 
                
                 | 
                    MIME_TO_EXT,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    44
                 | 
                                    
                                                     | 
                
                 | 
                    MIME_TO_PIL,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    45
                 | 
                                    
                                                     | 
                
                 | 
                    MIMETYPE_PAGE,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    46
                 | 
                                    
                                                     | 
                
                 | 
                    REGEX_PREFIX,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    47
                 | 
                                    
                                                     | 
                
                 | 
                )  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    48
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    49
                 | 
                                    
                                                     | 
                
                 | 
                from .workspace_backup import WorkspaceBackupManager  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    50
                 | 
                                    
                                                     | 
                
                 | 
                from .mets_server import ClientSideOcrdMets  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    51
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    52
                 | 
                                    
                                                     | 
                
                 | 
                __all__ = ['Workspace']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    53
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    54
                 | 
                                    
                                                     | 
                
                 | 
                @contextmanager  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    55
                 | 
                                    
                                                     | 
                
                 | 
                def download_temporary_file(url):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    56
                 | 
                                    
                                                     | 
                
                 | 
                    with NamedTemporaryFile(prefix='ocrd-download-') as f:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    57
                 | 
                                    
                                                     | 
                
                 | 
                        with requests.get(url) as r:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    58
                 | 
                                    
                                                     | 
                
                 | 
                            f.write(r.content)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    59
                 | 
                                    
                                                     | 
                
                 | 
                        yield f  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    60
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    61
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    62
                 | 
                                    
                                                     | 
                
                 | 
                class Workspace():  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    63
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    64
                 | 
                                    
                                                     | 
                
                 | 
                    A workspace is a temporary directory set up for a processor. It's the  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    65
                 | 
                                    
                                                     | 
                
                 | 
                    interface to the METS/PAGE XML and delegates download and upload to the  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    66
                 | 
                                    
                                                     | 
                
                 | 
                    :py:class:`ocrd.resolver.Resolver`.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    67
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    68
                 | 
                                    
                                                     | 
                
                 | 
                    Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    69
                 | 
                                    
                                                     | 
                
                 | 
                        resolver (:py:class:`ocrd.Resolver`) : `Resolver` instance  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    70
                 | 
                                    
                                                     | 
                
                 | 
                        directory (string) : Filesystem path to work in  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    71
                 | 
                                    
                                                     | 
                
                 | 
                        mets (:py:class:`ocrd_models.ocrd_mets.OcrdMets`) : `OcrdMets` representing this workspace.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    72
                 | 
                                    
                                                     | 
                
                 | 
                            If `None`, then loaded from ``directory``/``mets_basename``  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    73
                 | 
                                    
                                                     | 
                
                 | 
                            or delegated to ``mets_server_url``.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    74
                 | 
                                    
                                                     | 
                
                 | 
                        mets_basename (string, mets.xml) : Basename of the METS XML file in the workspace directory.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    75
                 | 
                                    
                                                     | 
                
                 | 
                        mets_server_url (string, None) : URI of TCP or local path of UDS for METS server handling the  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    76
                 | 
                                    
                                                     | 
                
                 | 
                            `OcrdMets` of this workspace. If `None`, then the METS will be read from and written to  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    77
                 | 
                                    
                                                     | 
                
                 | 
                            the filesystem directly.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    78
                 | 
                                    
                                                     | 
                
                 | 
                        baseurl (string, None) : Base URL to prefix to relative URL.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    79
                 | 
                                    
                                                     | 
                
                 | 
                    """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    80
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    81
                 | 
                                    
                                                     | 
                
                 | 
                    def __init__(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    82
                 | 
                                    
                                                     | 
                
                 | 
                        self,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    83
                 | 
                                    
                                                     | 
                
                 | 
                        resolver,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    84
                 | 
                                    
                                                     | 
                
                 | 
                        directory,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    85
                 | 
                                    
                                                     | 
                
                 | 
                        mets : Optional[Union[OcrdMets, ClientSideOcrdMets]] = None,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    86
                 | 
                                    
                                                     | 
                
                 | 
                        mets_basename=DEFAULT_METS_BASENAME,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    87
                 | 
                                    
                                                     | 
                
                 | 
                        automatic_backup=False,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    88
                 | 
                                    
                                                     | 
                
                 | 
                        baseurl=None,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    89
                 | 
                                    
                                                     | 
                
                 | 
                        mets_server_url=None  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    90
                 | 
                                    
                                                     | 
                
                 | 
                    ):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    91
                 | 
                                    
                                                     | 
                
                 | 
                        self.resolver = resolver  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    92
                 | 
                                    
                                                     | 
                
                 | 
                        self.directory = directory  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    93
                 | 
                                    
                                                     | 
                
                 | 
                        self.mets_target = str(Path(directory, mets_basename))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    94
                 | 
                                    
                                                     | 
                
                 | 
                        self.is_remote = bool(mets_server_url)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    95
                 | 
                                    
                                                     | 
                
                 | 
                        if mets is None:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    96
                 | 
                                    
                                                     | 
                
                 | 
                            if self.is_remote:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    97
                 | 
                                    
                                                     | 
                
                 | 
                                mets = ClientSideOcrdMets(mets_server_url, self.directory)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    98
                 | 
                                    
                                                     | 
                
                 | 
                                if mets.workspace_path != self.directory:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    99
                 | 
                                    
                                                     | 
                
                 | 
                                    raise ValueError(f"METS server {mets_server_url} workspace directory '{mets.workspace_path}' differs " | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    100
                 | 
                                    
                                                     | 
                
                 | 
                                            f"from local workspace directory '{self.directory}'. These are not the same workspaces.") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    101
                 | 
                                    
                                                     | 
                
                 | 
                            else:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    102
                 | 
                                    
                                                     | 
                
                 | 
                                mets = OcrdMets(filename=self.mets_target)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    103
                 | 
                                    
                                                     | 
                
                 | 
                        self.mets = mets  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    104
                 | 
                                    
                                                     | 
                
                 | 
                        if automatic_backup:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    105
                 | 
                                    
                                                     | 
                
                 | 
                            self.automatic_backup = WorkspaceBackupManager(self)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    106
                 | 
                                    
                                                     | 
                
                 | 
                            self.automatic_backup.add()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    107
                 | 
                                    
                                                     | 
                
                 | 
                        else:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    108
                 | 
                                    
                                                     | 
                
                 | 
                            self.automatic_backup = None  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    109
                 | 
                                    
                                                     | 
                
                 | 
                        self.baseurl = baseurl  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    110
                 | 
                                    
                                                     | 
                
                 | 
                        #  print(mets.to_xml(xmllint=True).decode('utf-8')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    111
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    112
                 | 
                                    
                                                     | 
                
                 | 
                    def __repr__(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    113
                 | 
                                    
                                                     | 
                
                 | 
                        return 'Workspace[remote=%s, directory=%s, baseurl=%s, file_groups=%s, files=%s]' % (  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    114
                 | 
                                    
                                                     | 
                
                 | 
                            self.is_remote,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    115
                 | 
                                    
                                                     | 
                
                 | 
                            self.directory,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    116
                 | 
                                    
                                                     | 
                
                 | 
                            self.baseurl,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    117
                 | 
                                    
                                                     | 
                
                 | 
                            self.mets.file_groups,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    118
                 | 
                                    
                                                     | 
                
                 | 
                            [str(f) for f in self.mets.find_all_files()],  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    119
                 | 
                                    
                                                     | 
                
                 | 
                        )  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    120
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    121
                 | 
                                    
                                                     | 
                
                 | 
                    def reload_mets(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    122
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    123
                 | 
                                    
                                                     | 
                
                 | 
                        Reload METS from the filesystem.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    124
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    125
                 | 
                                    
                                                     | 
                
                 | 
                        if self.is_remote:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    126
                 | 
                                    
                                                     | 
                
                 | 
                            self.mets.reload()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    127
                 | 
                                    
                                                     | 
                
                 | 
                        else:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    128
                 | 
                                    
                                                     | 
                
                 | 
                            self.mets = OcrdMets(filename=self.mets_target)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    129
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    130
                 | 
                                    
                                                     | 
                
                 | 
                    @deprecated_alias(pageId="page_id")  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    131
                 | 
                                    
                                                     | 
                
                 | 
                    @deprecated_alias(ID="file_id")  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    132
                 | 
                                    
                                                     | 
                
                 | 
                    @deprecated_alias(fileGrp="file_grp")  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    133
                 | 
                                    
                                                     | 
                
                 | 
                    @deprecated_alias(fileGrp_mapping="filegrp_mapping")  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    134
                 | 
                                    
                                                     | 
                
                 | 
                    def merge(self, other_workspace, copy_files=True, overwrite=False, **kwargs):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    135
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    136
                 | 
                                    
                                                     | 
                
                 | 
                        Merge ``other_workspace`` into this one  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    137
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    138
                 | 
                                    
                                                     | 
                
                 | 
                        See :py:meth:`ocrd_models.ocrd_mets.OcrdMets.merge` for the `kwargs`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    139
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    140
                 | 
                                    
                                                     | 
                
                 | 
                        Keyword Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    141
                 | 
                                    
                                                     | 
                
                 | 
                            copy_files (boolean): Whether to copy files from `other_workspace` to this one  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    142
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    143
                 | 
                                    
                                                     | 
                
                 | 
                        def after_add_cb(f):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    144
                 | 
                                    
                                                     | 
                
                 | 
                            """callback to run on merged OcrdFile instances in the destination"""  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    145
                 | 
                                    
                                                     | 
                
                 | 
                            if not f.local_filename:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    146
                 | 
                                    
                                                     | 
                
                 | 
                                # OcrdFile has no local_filename, so nothing to be copied  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    147
                 | 
                                    
                                                     | 
                
                 | 
                                return  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    148
                 | 
                                    
                                                     | 
                
                 | 
                            if not copy_files:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    149
                 | 
                                    
                                                     | 
                
                 | 
                                fpath_src = Path(other_workspace.directory).resolve()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    150
                 | 
                                    
                                                     | 
                
                 | 
                                fpath_dst = Path(self.directory).resolve()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    151
                 | 
                                    
                                                     | 
                
                 | 
                                dstprefix = fpath_src.relative_to(fpath_dst) # raises ValueError if not a subpath  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    152
                 | 
                                    
                                                     | 
                
                 | 
                                f.local_filename = dstprefix / f.local_filename  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    153
                 | 
                                    
                                                     | 
                
                 | 
                                return  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    154
                 | 
                                    
                                                     | 
                
                 | 
                            fpath_src = Path(other_workspace.directory, f.local_filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    155
                 | 
                                    
                                                     | 
                
                 | 
                            fpath_dest = Path(self.directory, f.local_filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    156
                 | 
                                    
                                                     | 
                
                 | 
                            if fpath_src.exists():  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    157
                 | 
                                    
                                                     | 
                
                 | 
                                if fpath_dest.exists() and not overwrite:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    158
                 | 
                                    
                                                     | 
                
                 | 
                                    raise FileExistsError("Copying %s to %s would overwrite the latter" % (fpath_src, fpath_dest)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    159
                 | 
                                    
                                                     | 
                
                 | 
                                if not fpath_dest.parent.is_dir():  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    160
                 | 
                                    
                                                     | 
                
                 | 
                                    makedirs(str(fpath_dest.parent))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    161
                 | 
                                    
                                                     | 
                
                 | 
                                with open(str(fpath_src), 'rb') as fstream_in, open(str(fpath_dest), 'wb') as fstream_out:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    162
                 | 
                                    
                                                     | 
                
                 | 
                                    copyfileobj(fstream_in, fstream_out)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    163
                 | 
                                    
                                                     | 
                
                 | 
                        if 'page_id' in kwargs:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    164
                 | 
                                    
                                                     | 
                
                 | 
                            kwargs['pageId'] = kwargs.pop('page_id') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    165
                 | 
                                    
                                                     | 
                
                 | 
                        if 'file_id' in kwargs:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    166
                 | 
                                    
                                                     | 
                
                 | 
                            kwargs['ID'] = kwargs.pop('file_id') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    167
                 | 
                                    
                                                     | 
                
                 | 
                        if 'file_grp' in kwargs:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    168
                 | 
                                    
                                                     | 
                
                 | 
                            kwargs['fileGrp'] = kwargs.pop('file_grp') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    169
                 | 
                                    
                                                     | 
                
                 | 
                        if 'filegrp_mapping' in kwargs:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    170
                 | 
                                    
                                                     | 
                
                 | 
                            kwargs['fileGrp_mapping'] = kwargs.pop('filegrp_mapping') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    171
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    172
                 | 
                                    
                                                     | 
                
                 | 
                        self.mets.merge(other_workspace.mets, after_add_cb=after_add_cb, **kwargs)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    173
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    174
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    175
                 | 
                                    
                                                     | 
                
                 | 
                    @deprecated(version='1.0.0', reason="Use workspace.download_file")  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    176
                 | 
                                    
                                                     | 
                
                 | 
                    def download_url(self, url, **kwargs):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    177
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    178
                 | 
                                    
                                                     | 
                
                 | 
                        Download a URL to the workspace.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    179
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    180
                 | 
                                    
                                                     | 
                
                 | 
                        Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    181
                 | 
                                    
                                                     | 
                
                 | 
                            url (string): URL to download to directory  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    182
                 | 
                                    
                                                     | 
                
                 | 
                            **kwargs : See :py:class:`ocrd_models.ocrd_file.OcrdFile`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    183
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    184
                 | 
                                    
                                                     | 
                
                 | 
                        Returns:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    185
                 | 
                                    
                                                     | 
                
                 | 
                            The local filename of the downloaded file  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    186
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    187
                 | 
                                    
                                                     | 
                
                 | 
                        dummy_mets = OcrdMets.empty_mets()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    188
                 | 
                                    
                                                     | 
                
                 | 
                        f = dummy_mets.add_file('DEPRECATED', ID=Path(url).name, url=url) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    189
                 | 
                                    
                                                     | 
                
                 | 
                        f = self.download_file(f)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    190
                 | 
                                    
                                                     | 
                
                 | 
                        return f.local_filename  | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    191
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    192
                 | 
                                    
                                                     | 
                
                 | 
                    def download_file(self, f, _recursion_count=0):  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    193
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    194
                 | 
                                    
                                                     | 
                
                 | 
                        Download a :py:class:`ocrd_models.ocrd_file.OcrdFile` to the workspace.  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    195
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    196
                 | 
                                    
                                                     | 
                
                 | 
                        log = getLogger('ocrd.workspace.download_file') | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    197
                 | 
                                    
                                                     | 
                
                 | 
                        with pushd_popd(self.directory):  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    198
                 | 
                                    
                                                     | 
                
                 | 
                            if f.local_filename:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    199
                 | 
                                    
                                                     | 
                
                 | 
                                file_path = Path(f.local_filename).absolute()  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    200
                 | 
                                    
                                                     | 
                
                 | 
                                if file_path.exists():  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    201
                 | 
                                    
                                                     | 
                
                 | 
                                    try:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    202
                 | 
                                    
                                                     | 
                
                 | 
                                        file_path.relative_to(Path(self.directory).resolve()) # raises ValueError if not relative  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    203
                 | 
                                    
                                                     | 
                
                 | 
                                        # If the f.local_filename exists and is within self.directory, nothing to do  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    204
                 | 
                                    
                                                     | 
                
                 | 
                                        log.debug(f"'local_filename' {f.local_filename} already within {self.directory} - nothing to do") | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    205
                 | 
                                    
                                                     | 
                
                 | 
                                    except ValueError:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    206
                 | 
                                    
                                                     | 
                
                 | 
                                        # f.local_filename exists, but not within self.directory, copy it  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    207
                 | 
                                    
                                                     | 
                
                 | 
                                        log.debug("Copying 'local_filename' %s to workspace directory %s" % (f.local_filename, self.directory)) | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    208
                 | 
                                    
                                                     | 
                
                 | 
                                        f.local_filename = self.resolver.download_to_directory(self.directory, f.local_filename, subdir=f.fileGrp)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    209
                 | 
                                    
                                                     | 
                
                 | 
                                    return f  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    210
                 | 
                                    
                                                     | 
                
                 | 
                                if f.url:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    211
                 | 
                                    
                                                     | 
                
                 | 
                                    log.debug("OcrdFile has 'local_filename' but it doesn't resolve - trying to download from 'url' %s", f.url) | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    212
                 | 
                                    
                                                     | 
                
                 | 
                                    url = f.url  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    213
                 | 
                                    
                                                     | 
                
                 | 
                                elif self.baseurl:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    214
                 | 
                                    
                                                     | 
                
                 | 
                                    log.debug("OcrdFile has 'local_filename' but it doesn't resolve, and no 'url' - trying 'baseurl' %s with 'local_filename' %s", | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    215
                 | 
                                    
                                                     | 
                
                 | 
                                              self.baseurl, f.local_filename)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    216
                 | 
                                    
                                                     | 
                
                 | 
                                    url = '%s/%s' % (self.baseurl, f.local_filename)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    217
                 | 
                                    
                                                     | 
                
                 | 
                                else:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    218
                 | 
                                    
                                                     | 
                
                 | 
                                    raise FileNotFoundError(f"'local_filename' {f.local_filename} points to non-existing file, " | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    219
                 | 
                                    
                                                     | 
                
                 | 
                                                            "and no 'url' to download and no 'baseurl' set on workspace - nothing we can do.")  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    220
                 | 
                                    
                                                     | 
                
                 | 
                                file_path = Path(f.local_filename)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    221
                 | 
                                    
                                                     | 
                
                 | 
                                self.resolver.download_to_directory(self.directory, url, subdir=file_path.parent, basename=file_path.name)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    222
                 | 
                                    
                                                     | 
                
                 | 
                                return f  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    223
                 | 
                                    
                                                     | 
                
                 | 
                            if f.url:  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    224
                 | 
                                    
                                                     | 
                
                 | 
                                # If f.url is set, download the file to the workspace  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    225
                 | 
                                    
                                                     | 
                
                 | 
                                basename = '%s%s' % (f.ID, MIME_TO_EXT.get(f.mimetype, '')) if f.ID else f.basename  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    226
                 | 
                                    
                                                     | 
                
                 | 
                                f.local_filename = self.resolver.download_to_directory(self.directory, f.url, subdir=f.fileGrp, basename=basename)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    227
                 | 
                                    
                                                     | 
                
                 | 
                                return f  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    228
                 | 
                                    
                                                     | 
                
                 | 
                            # If neither f.local_filename nor f.url is set, fail  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    229
                 | 
                                    
                                                     | 
                
                 | 
                            raise ValueError(f"OcrdFile {f} has neither 'url' nor 'local_filename', so cannot be downloaded") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    230
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    231
                 | 
                                    
                                                     | 
                
                 | 
                    def remove_file(self, file_id, force=False, keep_file=False, page_recursive=False, page_same_group=False):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    232
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    233
                 | 
                                    
                                                     | 
                
                 | 
                        Remove a METS `file` from the workspace.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    234
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    235
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    236
                 | 
                                    
                                                     | 
                
                 | 
                            file_id (string|:py:class:`ocrd_models.ocrd_file.OcrdFile`): `@ID` of the METS `file`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    237
                 | 
                                    
                                                     | 
                
                 | 
                                to delete or the file itself  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    238
                 | 
                                    
                                                     | 
                
                 | 
                        Keyword Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    239
                 | 
                                    
                                                     | 
                
                 | 
                            force (boolean): Continue removing even if file not found in METS  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    240
                 | 
                                    
                                                     | 
                
                 | 
                            keep_file (boolean): Whether to keep files on disk  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    241
                 | 
                                    
                                                     | 
                
                 | 
                            page_recursive (boolean): Whether to remove all images referenced in the file  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    242
                 | 
                                    
                                                     | 
                
                 | 
                                if the file is a PAGE-XML document.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    243
                 | 
                                    
                                                     | 
                
                 | 
                            page_same_group (boolean): Remove only images in the same file group as the PAGE-XML.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    244
                 | 
                                    
                                                     | 
                
                 | 
                                Has no effect unless ``page_recursive`` is `True`.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    245
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    246
                 | 
                                    
                                                     | 
                
                 | 
                        log = getLogger('ocrd.workspace.remove_file') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    247
                 | 
                                    
                                                     | 
                
                 | 
                        log.debug('Deleting mets:file %s', file_id) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    248
                 | 
                                    
                                                     | 
                
                 | 
                        if isinstance(file_id, OcrdFile):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    249
                 | 
                                    
                                                     | 
                
                 | 
                            file_id = file_id.ID  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    250
                 | 
                                    
                                                     | 
                
                 | 
                        try:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    251
                 | 
                                    
                                                     | 
                
                 | 
                            try:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    252
                 | 
                                    
                                                     | 
                
                 | 
                                ocrd_file = next(self.mets.find_files(ID=file_id))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    253
                 | 
                                    
                                                     | 
                
                 | 
                            except StopIteration:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    254
                 | 
                                    
                                                     | 
                
                 | 
                                if file_id.startswith(REGEX_PREFIX):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    255
                 | 
                                    
                                                     | 
                
                 | 
                                    # allow empty results if filter criteria involve a regex  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    256
                 | 
                                    
                                                     | 
                
                 | 
                                    return None  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    257
                 | 
                                    
                                                     | 
                
                 | 
                                raise FileNotFoundError("File %s not found in METS" % file_id) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    258
                 | 
                                    
                                                     | 
                
                 | 
                            if page_recursive and ocrd_file.mimetype == MIMETYPE_PAGE:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    259
                 | 
                                    
                                                     | 
                
                 | 
                                with pushd_popd(self.directory):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    260
                 | 
                                    
                                                     | 
                
                 | 
                                    ocrd_page = parse(self.download_file(ocrd_file).local_filename, silence=True)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    261
                 | 
                                    
                                                     | 
                
                 | 
                                    for img_url in ocrd_page.get_AllAlternativeImagePaths():  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    262
                 | 
                                    
                                                     | 
                
                 | 
                                        img_kwargs = {'local_filename': img_url} | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    263
                 | 
                                    
                                                     | 
                
                 | 
                                        if page_same_group:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    264
                 | 
                                    
                                                     | 
                
                 | 
                                            img_kwargs['fileGrp'] = ocrd_file.fileGrp  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    265
                 | 
                                    
                                                     | 
                
                 | 
                                        for img_file in self.mets.find_files(**img_kwargs):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    266
                 | 
                                    
                                                     | 
                
                 | 
                                            self.remove_file(img_file, keep_file=keep_file, force=force)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    267
                 | 
                                    
                                                     | 
                
                 | 
                            if not keep_file:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    268
                 | 
                                    
                                                     | 
                
                 | 
                                with pushd_popd(self.directory):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    269
                 | 
                                    
                                                     | 
                
                 | 
                                    if not ocrd_file.local_filename:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    270
                 | 
                                    
                                                     | 
                
                 | 
                                        if force:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    271
                 | 
                                    
                                                     | 
                
                 | 
                                            log.debug("File not locally available but --force is set: %s", ocrd_file) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    272
                 | 
                                    
                                                     | 
                
                 | 
                                        else:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    273
                 | 
                                    
                                                     | 
                
                 | 
                                            raise Exception("File not locally available %s" % ocrd_file) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    274
                 | 
                                    
                                                     | 
                
                 | 
                                    else:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    275
                 | 
                                    
                                                     | 
                
                 | 
                                        log.debug("rm %s [cwd=%s]", ocrd_file.local_filename, self.directory) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    276
                 | 
                                    
                                                     | 
                
                 | 
                                        unlink(ocrd_file.local_filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    277
                 | 
                                    
                                                     | 
                
                 | 
                            # Remove from METS only after the recursion of AlternativeImages  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    278
                 | 
                                    
                                                     | 
                
                 | 
                            self.mets.remove_file(file_id)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    279
                 | 
                                    
                                                     | 
                
                 | 
                            return ocrd_file  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    280
                 | 
                                    
                                                     | 
                
                 | 
                        except FileNotFoundError as e:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    281
                 | 
                                    
                                                     | 
                
                 | 
                            if not force:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    282
                 | 
                                    
                                                     | 
                
                 | 
                                raise e  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    283
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    284
                 | 
                                    
                                                     | 
                
                 | 
                    def remove_file_group(self, USE, recursive=False, force=False, keep_files=False, page_recursive=False, page_same_group=False):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    285
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    286
                 | 
                                    
                                                     | 
                
                 | 
                        Remove a METS `fileGrp`.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    287
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    288
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    289
                 | 
                                    
                                                     | 
                
                 | 
                            USE (string): `@USE` of the METS `fileGrp` to delete  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    290
                 | 
                                    
                                                     | 
                
                 | 
                        Keyword Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    291
                 | 
                                    
                                                     | 
                
                 | 
                            recursive (boolean): Whether to recursively delete all files in the group  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    292
                 | 
                                    
                                                     | 
                
                 | 
                            force (boolean): Continue removing even if group or containing files not found in METS  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    293
                 | 
                                    
                                                     | 
                
                 | 
                            keep_files (boolean): When deleting recursively whether to keep files on disk  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    294
                 | 
                                    
                                                     | 
                
                 | 
                            page_recursive (boolean): Whether to remove all images referenced in the file  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    295
                 | 
                                    
                                                     | 
                
                 | 
                                if the file is a PAGE-XML document.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    296
                 | 
                                    
                                                     | 
                
                 | 
                            page_same_group (boolean): Remove only images in the same file group as the PAGE-XML.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    297
                 | 
                                    
                                                     | 
                
                 | 
                                Has no effect unless ``page_recursive`` is `True`.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    298
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    299
                 | 
                                    
                                                     | 
                
                 | 
                        if (not USE.startswith(REGEX_PREFIX)) and (USE not in self.mets.file_groups) and (not force):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    300
                 | 
                                    
                                                     | 
                
                 | 
                            raise Exception("No such fileGrp: %s" % USE) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    301
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    302
                 | 
                                    
                                                     | 
                
                 | 
                        file_dirs = []  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    303
                 | 
                                    
                                                     | 
                
                 | 
                        if recursive:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    304
                 | 
                                    
                                                     | 
                
                 | 
                            for f in self.mets.find_files(fileGrp=USE):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    305
                 | 
                                    
                                                     | 
                
                 | 
                                self.remove_file(f, force=force, keep_file=keep_files, page_recursive=page_recursive, page_same_group=page_same_group)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    306
                 | 
                                    
                                                     | 
                
                 | 
                                if f.local_filename:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    307
                 | 
                                    
                                                     | 
                
                 | 
                                    f_dir = path.dirname(f.local_filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    308
                 | 
                                    
                                                     | 
                
                 | 
                                    if f_dir:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    309
                 | 
                                    
                                                     | 
                
                 | 
                                        file_dirs.append(f_dir)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    310
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    311
                 | 
                                    
                                                     | 
                
                 | 
                        self.mets.remove_file_group(USE, force=force, recursive=recursive)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    312
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    313
                 | 
                                    
                                                     | 
                
                 | 
                        # PLEASE NOTE: this only removes directories in the workspace if they are empty  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    314
                 | 
                                    
                                                     | 
                
                 | 
                        # and named after the fileGrp which is a convention in OCR-D.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    315
                 | 
                                    
                                                     | 
                
                 | 
                        with pushd_popd(self.directory):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    316
                 | 
                                    
                                                     | 
                
                 | 
                            if Path(USE).is_dir() and not listdir(USE):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    317
                 | 
                                    
                                                     | 
                
                 | 
                                Path(USE).rmdir()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    318
                 | 
                                    
                                                     | 
                
                 | 
                            if file_dirs:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    319
                 | 
                                    
                                                     | 
                
                 | 
                                for file_dir in set(file_dirs):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    320
                 | 
                                    
                                                     | 
                
                 | 
                                    if Path(file_dir).is_dir() and not listdir(file_dir):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    321
                 | 
                                    
                                                     | 
                
                 | 
                                        Path(file_dir).rmdir()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    322
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    323
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    324
                 | 
                                    
                                                     | 
                
                 | 
                    def rename_file_group(self, old, new):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    325
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    326
                 | 
                                    
                                                     | 
                
                 | 
                        Rename a METS `fileGrp`.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    327
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    328
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    329
                 | 
                                    
                                                     | 
                
                 | 
                            old (string): `@USE` of the METS `fileGrp` to rename  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    330
                 | 
                                    
                                                     | 
                
                 | 
                            new (string): `@USE` of the METS `fileGrp` to rename as  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    331
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    332
                 | 
                                    
                                                     | 
                
                 | 
                        log = getLogger('ocrd.workspace.rename_file_group') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    333
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    334
                 | 
                                    
                                                     | 
                
                 | 
                        if old not in self.mets.file_groups:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    335
                 | 
                                    
                                                     | 
                
                 | 
                            raise ValueError(f"No such fileGrp: {old}") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    336
                 | 
                                    
                                                     | 
                
                 | 
                        if new in self.mets.file_groups:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    337
                 | 
                                    
                                                     | 
                
                 | 
                            raise ValueError(f"fileGrp already exists {new}") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    338
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    339
                 | 
                                    
                                                     | 
                
                 | 
                        with pushd_popd(self.directory):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    340
                 | 
                                    
                                                     | 
                
                 | 
                            # create workspace dir ``new``  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    341
                 | 
                                    
                                                     | 
                
                 | 
                            log.debug("mkdir %s" % new) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    342
                 | 
                                    
                                                     | 
                
                 | 
                            if not Path(new).is_dir():  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    343
                 | 
                                    
                                                     | 
                
                 | 
                                Path(new).mkdir()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    344
                 | 
                                    
                                                     | 
                
                 | 
                            local_filename_replacements = {} | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    345
                 | 
                                    
                                                     | 
                
                 | 
                            log.debug("Moving files") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    346
                 | 
                                    
                                                     | 
                
                 | 
                            for mets_file in self.mets.find_files(fileGrp=old, local_only=True):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    347
                 | 
                                    
                                                     | 
                
                 | 
                                new_local_filename = old_local_filename = mets_file.local_filename  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    348
                 | 
                                    
                                                     | 
                
                 | 
                                assert new_local_filename  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    349
                 | 
                                    
                                                     | 
                
                 | 
                                assert old_local_filename  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    350
                 | 
                                    
                                                     | 
                
                 | 
                                # Directory part  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    351
                 | 
                                    
                                                     | 
                
                 | 
                                new_local_filename = sub(r'^%s/' % old, r'%s/' % new, new_local_filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    352
                 | 
                                    
                                                     | 
                
                 | 
                                # File part  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    353
                 | 
                                    
                                                     | 
                
                 | 
                                new_local_filename = sub(r'/%s' % old, r'/%s' % new, new_local_filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    354
                 | 
                                    
                                                     | 
                
                 | 
                                local_filename_replacements[str(mets_file.local_filename)] = new_local_filename  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    355
                 | 
                                    
                                                     | 
                
                 | 
                                # move file from ``old`` to ``new``  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    356
                 | 
                                    
                                                     | 
                
                 | 
                                Path(old_local_filename).rename(new_local_filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    357
                 | 
                                    
                                                     | 
                
                 | 
                                # change the url of ``mets:file``  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    358
                 | 
                                    
                                                     | 
                
                 | 
                                mets_file.local_filename = new_local_filename  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    359
                 | 
                                    
                                                     | 
                
                 | 
                                # change the file ID and update structMap  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    360
                 | 
                                    
                                                     | 
                
                 | 
                                # change the file ID and update structMap  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    361
                 | 
                                    
                                                     | 
                
                 | 
                                new_id = sub(r'^%s' % old, r'%s' % new, mets_file.ID)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    362
                 | 
                                    
                                                     | 
                
                 | 
                                try:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    363
                 | 
                                    
                                                     | 
                
                 | 
                                    next(self.mets.find_files(ID=new_id))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    364
                 | 
                                    
                                                     | 
                
                 | 
                                    log.warning("ID %s already exists, not changing ID while renaming %s -> %s" % (new_id, old_local_filename, new_local_filename)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    365
                 | 
                                    
                                                     | 
                
                 | 
                                except StopIteration:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    366
                 | 
                                    
                                                     | 
                
                 | 
                                    mets_file.ID = new_id  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    367
                 | 
                                    
                                                     | 
                
                 | 
                            # change file paths in PAGE-XML imageFilename and filename attributes  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    368
                 | 
                                    
                                                     | 
                
                 | 
                            for page_file in self.mets.find_files(mimetype=MIMETYPE_PAGE, local_only=True):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    369
                 | 
                                    
                                                     | 
                
                 | 
                                log.debug("Renaming file references in PAGE-XML %s" % page_file) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    370
                 | 
                                    
                                                     | 
                
                 | 
                                pcgts = page_from_file(page_file)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    371
                 | 
                                    
                                                     | 
                
                 | 
                                changed = False  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    372
                 | 
                                    
                                                     | 
                
                 | 
                                for old_local_filename, new_local_filename in local_filename_replacements.items():  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    373
                 | 
                                    
                                                     | 
                
                 | 
                                    if pcgts.get_Page().imageFilename == old_local_filename:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    374
                 | 
                                    
                                                     | 
                
                 | 
                                        changed = True  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    375
                 | 
                                    
                                                     | 
                
                 | 
                                        log.debug("Rename pc:Page/@imageFilename: %s -> %s" % (old_local_filename, new_local_filename)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    376
                 | 
                                    
                                                     | 
                
                 | 
                                        pcgts.get_Page().imageFilename = new_local_filename  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    377
                 | 
                                    
                                                     | 
                
                 | 
                                for ai in pcgts.get_Page().get_AllAlternativeImages():  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    378
                 | 
                                    
                                                     | 
                
                 | 
                                    for old_local_filename, new_local_filename in local_filename_replacements.items():  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    379
                 | 
                                    
                                                     | 
                
                 | 
                                        if ai.filename == old_local_filename:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    380
                 | 
                                    
                                                     | 
                
                 | 
                                            changed = True  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    381
                 | 
                                    
                                                     | 
                
                 | 
                                            log.debug("Rename pc:Page/../AlternativeImage: %s -> %s" % (old_local_filename, new_local_filename)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    382
                 | 
                                    
                                                     | 
                
                 | 
                                            ai.filename = new_local_filename  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    383
                 | 
                                    
                                                     | 
                
                 | 
                                if changed:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    384
                 | 
                                    
                                                     | 
                
                 | 
                                    log.debug("PAGE-XML changed, writing %s" % (page_file.local_filename)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    385
                 | 
                                    
                                                     | 
                
                 | 
                                    with open(page_file.local_filename, 'w', encoding='utf-8') as f:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    386
                 | 
                                    
                                                     | 
                
                 | 
                                        f.write(to_xml(pcgts))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    387
                 | 
                                    
                                                     | 
                
                 | 
                            # change the ``USE`` attribute of the fileGrp  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    388
                 | 
                                    
                                                     | 
                
                 | 
                            self.mets.rename_file_group(old, new)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    389
                 | 
                                    
                                                     | 
                
                 | 
                            # Remove the old dir  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    390
                 | 
                                    
                                                     | 
                
                 | 
                            log.debug("rmdir %s" % old) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    391
                 | 
                                    
                                                     | 
                
                 | 
                            if Path(old).is_dir() and not listdir(old):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    392
                 | 
                                    
                                                     | 
                
                 | 
                                Path(old).rmdir()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    393
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    394
                 | 
                                    
                                                     | 
                
                 | 
                    @deprecated_alias(pageId="page_id")  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    395
                 | 
                                    
                                                     | 
                
                 | 
                    @deprecated_alias(ID="file_id")  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    396
                 | 
                                    
                                                     | 
                
                 | 
                    def add_file(self, file_grp, content=None, **kwargs) -> Union[OcrdFile, ClientSideOcrdFile]:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    397
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    398
                 | 
                                    
                                                     | 
                
                 | 
                        Add a file to the :py:class:`ocrd_models.ocrd_mets.OcrdMets` of the workspace.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    399
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    400
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    401
                 | 
                                    
                                                     | 
                
                 | 
                            file_grp (string): `@USE` of the METS `fileGrp` to add to  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    402
                 | 
                                    
                                                     | 
                
                 | 
                        Keyword Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    403
                 | 
                                    
                                                     | 
                
                 | 
                            content (string|bytes): optional content to write to the file  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    404
                 | 
                                    
                                                     | 
                
                 | 
                                in the filesystem  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    405
                 | 
                                    
                                                     | 
                
                 | 
                            **kwargs: See :py:func:`ocrd_models.ocrd_mets.OcrdMets.add_file`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    406
                 | 
                                    
                                                     | 
                
                 | 
                        Returns:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    407
                 | 
                                    
                                                     | 
                
                 | 
                            a new :py:class:`ocrd_models.ocrd_file.OcrdFile`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    408
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    409
                 | 
                                    
                                                     | 
                
                 | 
                        log = getLogger('ocrd.workspace.add_file') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    410
                 | 
                                    
                                                     | 
                
                 | 
                        log.debug(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    411
                 | 
                                    
                                                     | 
                
                 | 
                            'outputfile file_grp=%s local_filename=%s content=%s',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    412
                 | 
                                    
                                                     | 
                
                 | 
                            file_grp,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    413
                 | 
                                    
                                                     | 
                
                 | 
                            kwargs.get('local_filename'), | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    414
                 | 
                                    
                                                     | 
                
                 | 
                            content is not None)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    415
                 | 
                                    
                                                     | 
                
                 | 
                        if 'page_id' not in kwargs:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    416
                 | 
                                    
                                                     | 
                
                 | 
                            raise ValueError("workspace.add_file must be passed a 'page_id' kwarg, even if it is None.") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    417
                 | 
                                    
                                                     | 
                
                 | 
                        if content is not None and not kwargs.get('local_filename'): | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    418
                 | 
                                    
                                                     | 
                
                 | 
                            raise Exception("'content' was set but no 'local_filename'") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    419
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    420
                 | 
                                    
                                                     | 
                
                 | 
                        with pushd_popd(self.directory):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    421
                 | 
                                    
                                                     | 
                
                 | 
                            if kwargs.get('local_filename'): | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    422
                 | 
                                    
                                                     | 
                
                 | 
                                # If the local filename has folder components, create those folders  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    423
                 | 
                                    
                                                     | 
                
                 | 
                                local_filename_dir = str(kwargs['local_filename']).rsplit('/', 1)[0] | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    424
                 | 
                                    
                                                     | 
                
                 | 
                                if local_filename_dir != str(kwargs['local_filename']) and not Path(local_filename_dir).is_dir():  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    425
                 | 
                                    
                                                     | 
                
                 | 
                                    makedirs(local_filename_dir, exist_ok=True)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    426
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    427
                 | 
                                    
                                                     | 
                
                 | 
                            #  print(kwargs)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    428
                 | 
                                    
                                                     | 
                
                 | 
                            kwargs["pageId"] = kwargs.pop("page_id") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    429
                 | 
                                    
                                                     | 
                
                 | 
                            if "file_id" in kwargs:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    430
                 | 
                                    
                                                     | 
                
                 | 
                                kwargs["ID"] = kwargs.pop("file_id") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    431
                 | 
                                    
                                                     | 
                
                 | 
                            if config.OCRD_EXISTING_OUTPUT == 'OVERWRITE':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    432
                 | 
                                    
                                                     | 
                
                 | 
                                kwargs["force"] = True  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    433
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    434
                 | 
                                    
                                                     | 
                
                 | 
                            ret = self.mets.add_file(file_grp, **kwargs)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    435
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    436
                 | 
                                    
                                                     | 
                
                 | 
                            # content being set implies is_remote==False because METS server  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    437
                 | 
                                    
                                                     | 
                
                 | 
                            # does not pass file contents  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    438
                 | 
                                    
                                                     | 
                
                 | 
                            if content is not None:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    439
                 | 
                                    
                                                     | 
                
                 | 
                                with open(kwargs['local_filename'], 'wb') as f:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    440
                 | 
                                    
                                                     | 
                
                 | 
                                    if isinstance(content, str):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    441
                 | 
                                    
                                                     | 
                
                 | 
                                        content = bytes(content, 'utf-8')  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    442
                 | 
                                    
                                                     | 
                
                 | 
                                    f.write(content)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    443
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    444
                 | 
                                    
                                                     | 
                
                 | 
                        return ret  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    445
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    446
                 | 
                                    
                                                     | 
                
                 | 
                    def save_mets(self):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    447
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    448
                 | 
                                    
                                                     | 
                
                 | 
                        Write out the current state of the METS file to the filesystem.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    449
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    450
                 | 
                                    
                                                     | 
                
                 | 
                        log = getLogger('ocrd.workspace.save_mets') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    451
                 | 
                                    
                                                     | 
                
                 | 
                        if self.is_remote:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    452
                 | 
                                    
                                                     | 
                
                 | 
                            self.mets.save()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    453
                 | 
                                    
                                                     | 
                
                 | 
                        else:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    454
                 | 
                                    
                                                     | 
                
                 | 
                            log.debug("Saving mets '%s'", self.mets_target) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    455
                 | 
                                    
                                                     | 
                
                 | 
                            if self.automatic_backup:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    456
                 | 
                                    
                                                     | 
                
                 | 
                                WorkspaceBackupManager(self).add()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    457
                 | 
                                    
                                                     | 
                
                 | 
                            with atomic_write(self.mets_target) as f:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    458
                 | 
                                    
                                                     | 
                
                 | 
                                f.write(self.mets.to_xml(xmllint=True).decode('utf-8')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    459
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    460
                 | 
                                    
                                                     | 
                
                 | 
                    def _apply_mets_file(self, filename_or_url: str, fun: Callable):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    461
                 | 
                                    
                                                     | 
                
                 | 
                        if not filename_or_url:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    462
                 | 
                                    
                                                     | 
                
                 | 
                            # avoid "finding" just any file  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    463
                 | 
                                    
                                                     | 
                
                 | 
                            raise ValueError("requires non-empty filename or URL") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    464
                 | 
                                    
                                                     | 
                
                 | 
                        with pushd_popd(self.directory):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    465
                 | 
                                    
                                                     | 
                
                 | 
                            if Path(filename_or_url).exists():  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    466
                 | 
                                    
                                                     | 
                
                 | 
                                return fun(filename_or_url)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    467
                 | 
                                    
                                                     | 
                
                 | 
                            if image_file := next(self.mets.find_files(local_filename=str(filename_or_url)), None):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    468
                 | 
                                    
                                                     | 
                
                 | 
                                return fun(image_file.local_filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    469
                 | 
                                    
                                                     | 
                
                 | 
                            if image_file := next(self.mets.find_files(url=str(filename_or_url)), None):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    470
                 | 
                                    
                                                     | 
                
                 | 
                                return fun(self.download_file(image_file).local_filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    471
                 | 
                                    
                                                     | 
                
                 | 
                            with download_temporary_file(filename_or_url) as f:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    472
                 | 
                                    
                                                     | 
                
                 | 
                                return fun(f.name)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    473
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    474
                 | 
                                    
                                                     | 
                
                 | 
                    def resolve_image_exif(self, image_url):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    475
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    476
                 | 
                                    
                                                     | 
                
                 | 
                        Get the EXIF metadata about an image URL as :py:class:`ocrd_models.ocrd_exif.OcrdExif`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    477
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    478
                 | 
                                    
                                                     | 
                
                 | 
                        Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    479
                 | 
                                    
                                                     | 
                
                 | 
                            image_url (string) : `@href` (path or URL) of the METS `file` to inspect  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    480
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    481
                 | 
                                    
                                                     | 
                
                 | 
                        Returns:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    482
                 | 
                                    
                                                     | 
                
                 | 
                            :py:class:`ocrd_models.ocrd_exif.OcrdExif`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    483
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    484
                 | 
                                    
                                                     | 
                
                 | 
                        return self._apply_mets_file(image_url, exif_from_filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    485
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    486
                 | 
                                    
                                                     | 
                
                 | 
                    @deprecated(version='1.0.0', reason="Use workspace.image_from_page and workspace.image_from_segment")  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    487
                 | 
                                    
                                                     | 
                
                 | 
                    def resolve_image_as_pil(self, image_url, coords=None):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    488
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    489
                 | 
                                    
                                                     | 
                
                 | 
                        Resolve an image URL to a `PIL.Image`.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    490
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    491
                 | 
                                    
                                                     | 
                
                 | 
                        Arguments:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    492
                 | 
                                    
                                                     | 
                
                 | 
                            image_url (string): `@href` (path or URL) of the METS `file` to retrieve  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    493
                 | 
                                    
                                                     | 
                
                 | 
                        Keyword Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    494
                 | 
                                    
                                                     | 
                
                 | 
                            coords (list) : Coordinates of the bounding box to cut from the image  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    495
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    496
                 | 
                                    
                                                     | 
                
                 | 
                        Returns:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    497
                 | 
                                    
                                                     | 
                
                 | 
                            Full or cropped `PIL.Image`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    498
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    499
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    500
                 | 
                                    
                                                     | 
                
                 | 
                        return self._resolve_image_as_pil(image_url, coords)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    501
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    502
                 | 
                                    
                                                     | 
                
                 | 
                    def _resolve_image_as_pil(self, image_url, coords=None):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    503
                 | 
                                    
                                                     | 
                
                 | 
                        log = getLogger('ocrd.workspace._resolve_image_as_pil') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    504
                 | 
                                    
                                                     | 
                
                 | 
                        pil_image = self._apply_mets_file(image_url, Image.open)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    505
                 | 
                                    
                                                     | 
                
                 | 
                        pil_image.load() # alloc and give up the FD  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    506
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    507
                 | 
                                    
                                                     | 
                
                 | 
                        # Pillow does not properly support higher color depths  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    508
                 | 
                                    
                                                     | 
                
                 | 
                        # (e.g. 16-bit or 32-bit or floating point grayscale),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    509
                 | 
                                    
                                                     | 
                
                 | 
                        # clipping its dynamic range to the lower 8-bit in  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    510
                 | 
                                    
                                                     | 
                
                 | 
                        # many operations (including paste, putalpha, ImageStat...),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    511
                 | 
                                    
                                                     | 
                
                 | 
                        # even including conversion.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    512
                 | 
                                    
                                                     | 
                
                 | 
                        # Cf. Pillow#3011 Pillow#3159 Pillow#3838 (still open in 8.0)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    513
                 | 
                                    
                                                     | 
                
                 | 
                        # So to be on the safe side, we must re-quantize these  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    514
                 | 
                                    
                                                     | 
                
                 | 
                        # to 8-bit via numpy (conversion to/from which fortunately  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    515
                 | 
                                    
                                                     | 
                
                 | 
                        # seems to work reliably):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    516
                 | 
                                    
                                                     | 
                
                 | 
                        if (pil_image.mode.startswith('I') or | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    517
                 | 
                                    
                                                     | 
                
                 | 
                            pil_image.mode.startswith('F')): | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    518
                 | 
                                    
                                                     | 
                
                 | 
                            arr_image = np.array(pil_image)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    519
                 | 
                                    
                                                     | 
                
                 | 
                            if arr_image.dtype.kind == 'i':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    520
                 | 
                                    
                                                     | 
                
                 | 
                                # signed integer is *not* trustworthy in this context  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    521
                 | 
                                    
                                                     | 
                
                 | 
                                # (usually a mistake in the array interface)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    522
                 | 
                                    
                                                     | 
                
                 | 
                                log.debug('Casting image "%s" from signed to unsigned', image_url) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    523
                 | 
                                    
                                                     | 
                
                 | 
                                arr_image.dtype = np.dtype('u' + arr_image.dtype.name) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    524
                 | 
                                    
                                                     | 
                
                 | 
                            if arr_image.dtype.kind == 'u':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    525
                 | 
                                    
                                                     | 
                
                 | 
                                # integer needs to be scaled linearly to 8 bit  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    526
                 | 
                                    
                                                     | 
                
                 | 
                                # of course, an image might actually have some lower range  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    527
                 | 
                                    
                                                     | 
                
                 | 
                                # (e.g. 10-bit in I;16 or 20-bit in I or 4-bit in L),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    528
                 | 
                                    
                                                     | 
                
                 | 
                                # but that would be guessing anyway, so here don't  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    529
                 | 
                                    
                                                     | 
                
                 | 
                                # make assumptions on _scale_, just reduce _precision_  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    530
                 | 
                                    
                                                     | 
                
                 | 
                                log.debug('Reducing image "%s" from depth %d bit to 8 bit', | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    531
                 | 
                                    
                                                     | 
                
                 | 
                                          image_url, arr_image.dtype.itemsize * 8)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    532
                 | 
                                    
                                                     | 
                
                 | 
                                arr_image = arr_image >> 8 * (arr_image.dtype.itemsize-1)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    533
                 | 
                                    
                                                     | 
                
                 | 
                                arr_image = arr_image.astype(np.uint8)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    534
                 | 
                                    
                                                     | 
                
                 | 
                            elif arr_image.dtype.kind == 'f':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    535
                 | 
                                    
                                                     | 
                
                 | 
                                # float needs to be scaled from [0,1.0] to [0,255]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    536
                 | 
                                    
                                                     | 
                
                 | 
                                log.debug('Reducing image "%s" from floating point to 8 bit', | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    537
                 | 
                                    
                                                     | 
                
                 | 
                                          image_url)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    538
                 | 
                                    
                                                     | 
                
                 | 
                                arr_image *= 255  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    539
                 | 
                                    
                                                     | 
                
                 | 
                                arr_image = arr_image.astype(np.uint8)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    540
                 | 
                                    
                                                     | 
                
                 | 
                            pil_image = Image.fromarray(arr_image)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    541
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    542
                 | 
                                    
                                                     | 
                
                 | 
                        if coords is None:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    543
                 | 
                                    
                                                     | 
                
                 | 
                            return pil_image  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    544
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    545
                 | 
                                    
                                                     | 
                
                 | 
                        # FIXME: remove or replace this by (image_from_polygon+) crop_image ...  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    546
                 | 
                                    
                                                     | 
                
                 | 
                        log.debug("Converting PIL to OpenCV: %s", image_url) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    547
                 | 
                                    
                                                     | 
                
                 | 
                        color_conversion = COLOR_GRAY2BGR if pil_image.mode in ('1', 'L') else  COLOR_RGB2BGR | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    548
                 | 
                                    
                                                     | 
                
                 | 
                        pil_as_np_array = np.array(pil_image).astype('uint8') if pil_image.mode == '1' else np.array(pil_image) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    549
                 | 
                                    
                                                     | 
                
                 | 
                        cv2_image = cvtColor(pil_as_np_array, color_conversion)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    550
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    551
                 | 
                                    
                                                     | 
                
                 | 
                        poly = np.array(coords, np.int32)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    552
                 | 
                                    
                                                     | 
                
                 | 
                        log.debug("Cutting region %s from %s", coords, image_url) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    553
                 | 
                                    
                                                     | 
                
                 | 
                        region_cut = cv2_image[  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    554
                 | 
                                    
                                                     | 
                
                 | 
                            np.min(poly[:, 1]):np.max(poly[:, 1]),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    555
                 | 
                                    
                                                     | 
                
                 | 
                            np.min(poly[:, 0]):np.max(poly[:, 0])  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    556
                 | 
                                    
                                                     | 
                
                 | 
                        ]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    557
                 | 
                                    
                                                     | 
                
                 | 
                        return Image.fromarray(region_cut)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    558
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    559
                 | 
                                    
                                                     | 
                
                 | 
                    def image_from_page(self, page, page_id,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    560
                 | 
                                    
                                                     | 
                
                 | 
                                        fill='background', transparency=False,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    561
                 | 
                                    
                                                     | 
                
                 | 
                                        feature_selector='', feature_filter='', filename=''):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    562
                 | 
                                    
                                                     | 
                
                 | 
                        """Extract an image for a PAGE-XML page from the workspace.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    563
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    564
                 | 
                                    
                                                     | 
                
                 | 
                        Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    565
                 | 
                                    
                                                     | 
                
                 | 
                            page (:py:class:`ocrd_models.ocrd_page.PageType`): a PAGE `PageType` object  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    566
                 | 
                                    
                                                     | 
                
                 | 
                            page_id (string): its `@ID` in the METS physical `structMap`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    567
                 | 
                                    
                                                     | 
                
                 | 
                        Keyword Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    568
                 | 
                                    
                                                     | 
                
                 | 
                            fill (string): a `PIL` color specifier, or `background` or `none`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    569
                 | 
                                    
                                                     | 
                
                 | 
                            transparency (boolean): whether to add an alpha channel for masking  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    570
                 | 
                                    
                                                     | 
                
                 | 
                            feature_selector (string): a comma-separated list of `@comments` classes  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    571
                 | 
                                    
                                                     | 
                
                 | 
                            feature_filter (string): a comma-separated list of `@comments` classes  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    572
                 | 
                                    
                                                     | 
                
                 | 
                            filename (string): which file path to use  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    573
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    574
                 | 
                                    
                                                     | 
                
                 | 
                        Extract a `PIL.Image` from ``page``, either from its `AlternativeImage`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    575
                 | 
                                    
                                                     | 
                
                 | 
                        (if it exists), or from its `@imageFilename` (otherwise). Also crop it,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    576
                 | 
                                    
                                                     | 
                
                 | 
                        if a `Border` exists, and rotate it, if any `@orientation` angle is  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    577
                 | 
                                    
                                                     | 
                
                 | 
                        annotated.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    578
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    579
                 | 
                                    
                                                     | 
                
                 | 
                        If ``filename`` is given, then among `@imageFilename` and the available  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    580
                 | 
                                    
                                                     | 
                
                 | 
                        `AlternativeImage/@filename` images, pick that one, or raise an error.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    581
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    582
                 | 
                                    
                                                     | 
                
                 | 
                        If ``feature_selector`` and/or ``feature_filter`` is given, then  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    583
                 | 
                                    
                                                     | 
                
                 | 
                        among the `@imageFilename` image and the available AlternativeImages,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    584
                 | 
                                    
                                                     | 
                
                 | 
                        select/filter the richest one which contains all of the selected,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    585
                 | 
                                    
                                                     | 
                
                 | 
                        but none of the filtered features (i.e. `@comments` classes), or  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    586
                 | 
                                    
                                                     | 
                
                 | 
                        raise an error.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    587
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    588
                 | 
                                    
                                                     | 
                
                 | 
                        (Required and produced features need not be in the same order, so  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    589
                 | 
                                    
                                                     | 
                
                 | 
                        ``feature_selector`` is merely a mask specifying Boolean AND, and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    590
                 | 
                                    
                                                     | 
                
                 | 
                        ``feature_filter`` is merely a mask specifying Boolean OR.)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    591
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    592
                 | 
                                    
                                                     | 
                
                 | 
                        If the chosen image does not have the feature `"cropped"` yet, but  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    593
                 | 
                                    
                                                     | 
                
                 | 
                        a `Border` exists, and unless `"cropped"` is being filtered, then crop it.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    594
                 | 
                                    
                                                     | 
                
                 | 
                        Likewise, if the chosen image does not have the feature `"deskewed"` yet,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    595
                 | 
                                    
                                                     | 
                
                 | 
                        but an `@orientation` angle is annotated, and unless `"deskewed"` is being  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    596
                 | 
                                    
                                                     | 
                
                 | 
                        filtered, then rotate it. (However, if `@orientation` is above the  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    597
                 | 
                                    
                                                     | 
                
                 | 
                        [-45°,45°] interval, then apply as much transposition as possible first,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    598
                 | 
                                    
                                                     | 
                
                 | 
                        unless `"rotated-90"` / `"rotated-180"` / `"rotated-270"` is being filtered.)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    599
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    600
                 | 
                                    
                                                     | 
                
                 | 
                        Cropping uses a polygon mask (not just the bounding box rectangle).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    601
                 | 
                                    
                                                     | 
                
                 | 
                        Areas outside the polygon will be filled according to ``fill``:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    602
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    603
                 | 
                                    
                                                     | 
                
                 | 
                        - if `"background"` (the default),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    604
                 | 
                                    
                                                     | 
                
                 | 
                          then fill with the median color of the image;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    605
                 | 
                                    
                                                     | 
                
                 | 
                        - else if `"none"`, then avoid masking polygons where possible  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    606
                 | 
                                    
                                                     | 
                
                 | 
                          (i.e. when cropping) or revert to the default (i.e. when rotating)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    607
                 | 
                                    
                                                     | 
                
                 | 
                        - otherwise, use the given color, e.g. `"white"` or `(255,255,255)`.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    608
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    609
                 | 
                                    
                                                     | 
                
                 | 
                        Moreover, if ``transparency`` is true, and unless the image already  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    610
                 | 
                                    
                                                     | 
                
                 | 
                        has an alpha channel, then add an alpha channel which is fully opaque  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    611
                 | 
                                    
                                                     | 
                
                 | 
                        before cropping and rotating. (Thus, unexposed/masked areas will be  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    612
                 | 
                                    
                                                     | 
                
                 | 
                        transparent afterwards for consumers that can interpret alpha channels).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    613
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    614
                 | 
                                    
                                                     | 
                
                 | 
                        Returns:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    615
                 | 
                                    
                                                     | 
                
                 | 
                            a tuple of  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    616
                 | 
                                    
                                                     | 
                
                 | 
                             * the extracted `PIL.Image`,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    617
                 | 
                                    
                                                     | 
                
                 | 
                             * a `dict` with information about the extracted image:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    618
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    619
                 | 
                                    
                                                     | 
                
                 | 
                               - `"transform"`: a `Numpy` array with an affine transform which  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    620
                 | 
                                    
                                                     | 
                
                 | 
                                   converts from absolute coordinates to those relative to the image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    621
                 | 
                                    
                                                     | 
                
                 | 
                                   i.e. after cropping to the page's border / bounding box (if any)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    622
                 | 
                                    
                                                     | 
                
                 | 
                                   and deskewing with the page's orientation angle (if any)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    623
                 | 
                                    
                                                     | 
                
                 | 
                               - `"angle"`: the rotation/reflection angle applied to the image so far,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    624
                 | 
                                    
                                                     | 
                
                 | 
                               - `"DPI"`: the pixel density of the original image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    625
                 | 
                                    
                                                     | 
                
                 | 
                               - `"features"`: the `AlternativeImage` `@comments` for the image, i.e.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    626
                 | 
                                    
                                                     | 
                
                 | 
                                 names of all applied operations that lead up to this result,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    627
                 | 
                                    
                                                     | 
                
                 | 
                             * an :py:class:`ocrd_models.ocrd_exif.OcrdExif` instance associated with  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    628
                 | 
                                    
                                                     | 
                
                 | 
                               the original image.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    629
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    630
                 | 
                                    
                                                     | 
                
                 | 
                        (The first two can be used to annotate a new `AlternativeImage`,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    631
                 | 
                                    
                                                     | 
                
                 | 
                         or be passed down with :py:meth:`image_from_segment`.)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    632
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    633
                 | 
                                    
                                                     | 
                
                 | 
                        Examples:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    634
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    635
                 | 
                                    
                                                     | 
                
                 | 
                         * get a raw (colored) but already deskewed and cropped image::  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    636
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    637
                 | 
                                    
                                                     | 
                
                 | 
                                page_image, page_coords, page_image_info = workspace.image_from_page(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    638
                 | 
                                    
                                                     | 
                
                 | 
                                    page, page_id,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    639
                 | 
                                    
                                                     | 
                
                 | 
                                    feature_selector='deskewed,cropped',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    640
                 | 
                                    
                                                     | 
                
                 | 
                                    feature_filter='binarized,grayscale_normalized')  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    641
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    642
                 | 
                                    
                                                     | 
                
                 | 
                        log = getLogger('ocrd.workspace.image_from_page') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    643
                 | 
                                    
                                                     | 
                
                 | 
                        page_image_info = self.resolve_image_exif(page.imageFilename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    644
                 | 
                                    
                                                     | 
                
                 | 
                        page_image = self._resolve_image_as_pil(page.imageFilename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    645
                 | 
                                    
                                                     | 
                
                 | 
                        page_coords = {} | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    646
                 | 
                                    
                                                     | 
                
                 | 
                        # use identity as initial affine coordinate transform:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    647
                 | 
                                    
                                                     | 
                
                 | 
                        page_coords['transform'] = np.eye(3)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    648
                 | 
                                    
                                                     | 
                
                 | 
                        # interim bbox (updated with each change to the transform):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    649
                 | 
                                    
                                                     | 
                
                 | 
                        page_bbox = [0, 0, page_image.width, page_image.height]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    650
                 | 
                                    
                                                     | 
                
                 | 
                        page_xywh = {'x': 0, 'y': 0, | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    651
                 | 
                                    
                                                     | 
                
                 | 
                                     'w': page_image.width, 'h': page_image.height}  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    652
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    653
                 | 
                                    
                                                     | 
                
                 | 
                        border = page.get_Border()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    654
                 | 
                                    
                                                     | 
                
                 | 
                        # page angle: PAGE @orientation is defined clockwise,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    655
                 | 
                                    
                                                     | 
                
                 | 
                        # whereas PIL/ndimage rotation is in mathematical direction:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    656
                 | 
                                    
                                                     | 
                
                 | 
                        page_coords['angle'] = -(page.get_orientation() or 0)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    657
                 | 
                                    
                                                     | 
                
                 | 
                        # map angle from (-180,180] to [0,360], and partition into multiples of 90;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    658
                 | 
                                    
                                                     | 
                
                 | 
                        # but avoid unnecessary large remainders, i.e. split symmetrically:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    659
                 | 
                                    
                                                     | 
                
                 | 
                        orientation = (page_coords['angle'] + 45) % 360  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    660
                 | 
                                    
                                                     | 
                
                 | 
                        orientation = orientation - (orientation % 90)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    661
                 | 
                                    
                                                     | 
                
                 | 
                        skew = (page_coords['angle'] % 360) - orientation  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    662
                 | 
                                    
                                                     | 
                
                 | 
                        skew = 180 - (180 - skew) % 360 # map to [-45,45]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    663
                 | 
                                    
                                                     | 
                
                 | 
                        page_coords['angle'] = 0 # nothing applied yet (depends on filters)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    664
                 | 
                                    
                                                     | 
                
                 | 
                        log.debug("page '%s' has %s orientation=%d skew=%.2f", | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    665
                 | 
                                    
                                                     | 
                
                 | 
                                  page_id, "border," if border else "", orientation, skew)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    666
                 | 
                                    
                                                     | 
                
                 | 
                        if page_image_info.resolution != 1:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    667
                 | 
                                    
                                                     | 
                
                 | 
                            dpi = page_image_info.resolution  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    668
                 | 
                                    
                                                     | 
                
                 | 
                            if page_image_info.resolutionUnit == 'cm':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    669
                 | 
                                    
                                                     | 
                
                 | 
                                dpi = round(dpi * 2.54)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    670
                 | 
                                    
                                                     | 
                
                 | 
                            dpi = int(dpi)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    671
                 | 
                                    
                                                     | 
                
                 | 
                            log.debug("page '%s' images will use %d DPI from image meta-data", page_id, dpi) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    672
                 | 
                                    
                                                     | 
                
                 | 
                            page_coords['DPI'] = dpi  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    673
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    674
                 | 
                                    
                                                     | 
                
                 | 
                        # initialize AlternativeImage@comments classes as empty:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    675
                 | 
                                    
                                                     | 
                
                 | 
                        page_coords['features'] = ''  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    676
                 | 
                                    
                                                     | 
                
                 | 
                        best_image = None  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    677
                 | 
                                    
                                                     | 
                
                 | 
                        alternative_images = page.get_AlternativeImage()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    678
                 | 
                                    
                                                     | 
                
                 | 
                        if alternative_images:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    679
                 | 
                                    
                                                     | 
                
                 | 
                            # (e.g. from page-level cropping, binarization, deskewing or despeckling)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    680
                 | 
                                    
                                                     | 
                
                 | 
                            best_features = set()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    681
                 | 
                                    
                                                     | 
                
                 | 
                            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'} | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    682
                 | 
                                    
                                                     | 
                
                 | 
                            # search to the end, because by convention we always append,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    683
                 | 
                                    
                                                     | 
                
                 | 
                            # and among multiple satisfactory images we want the most recent,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    684
                 | 
                                    
                                                     | 
                
                 | 
                            # but also ensure that we get the richest feature set, i.e. most  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    685
                 | 
                                    
                                                     | 
                
                 | 
                            # of those features that we cannot reproduce automatically below  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    686
                 | 
                                    
                                                     | 
                
                 | 
                            for alternative_image in alternative_images:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    687
                 | 
                                    
                                                     | 
                
                 | 
                                if filename and filename != alternative_image.filename:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    688
                 | 
                                    
                                                     | 
                
                 | 
                                    continue  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    689
                 | 
                                    
                                                     | 
                
                 | 
                                features = alternative_image.get_comments()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    690
                 | 
                                    
                                                     | 
                
                 | 
                                if not features:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    691
                 | 
                                    
                                                     | 
                
                 | 
                                    log.warning("AlternativeImage %d for page '%s' does not have any feature attributes", | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    692
                 | 
                                    
                                                     | 
                
                 | 
                                                alternative_images.index(alternative_image) + 1, page_id)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    693
                 | 
                                    
                                                     | 
                
                 | 
                                    features = ''  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    694
                 | 
                                    
                                                     | 
                
                 | 
                                featureset = set(features.split(',')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    695
                 | 
                                    
                                                     | 
                
                 | 
                                if (all(feature in featureset  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    696
                 | 
                                    
                                                     | 
                
                 | 
                                        for feature in feature_selector.split(',') if feature) and | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    697
                 | 
                                    
                                                     | 
                
                 | 
                                    not any(feature in featureset  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    698
                 | 
                                    
                                                     | 
                
                 | 
                                            for feature in feature_filter.split(',') if feature) and | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    699
                 | 
                                    
                                                     | 
                
                 | 
                                    len(featureset.difference(auto_features)) >= \  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    700
                 | 
                                    
                                                     | 
                
                 | 
                                    len(best_features.difference(auto_features))):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    701
                 | 
                                    
                                                     | 
                
                 | 
                                    best_features = featureset  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    702
                 | 
                                    
                                                     | 
                
                 | 
                                    best_image = alternative_image  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    703
                 | 
                                    
                                                     | 
                
                 | 
                            if best_image:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    704
                 | 
                                    
                                                     | 
                
                 | 
                                log.debug("Using AlternativeImage %d %s for page '%s'", | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    705
                 | 
                                    
                                                     | 
                
                 | 
                                          alternative_images.index(best_image) + 1,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    706
                 | 
                                    
                                                     | 
                
                 | 
                                          best_features, page_id)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    707
                 | 
                                    
                                                     | 
                
                 | 
                                page_image = self._resolve_image_as_pil(best_image.get_filename())  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    708
                 | 
                                    
                                                     | 
                
                 | 
                                page_coords['features'] = best_image.get_comments() # including duplicates  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    709
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    710
                 | 
                                    
                                                     | 
                
                 | 
                        # adjust the coord transformation to the steps applied on the image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    711
                 | 
                                    
                                                     | 
                
                 | 
                        # and apply steps on the existing image in case it is missing there,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    712
                 | 
                                    
                                                     | 
                
                 | 
                        # but traverse all steps (crop/reflect/rotate) in a particular order:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    713
                 | 
                                    
                                                     | 
                
                 | 
                        # - existing image features take priority (in the order annotated),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    714
                 | 
                                    
                                                     | 
                
                 | 
                        # - next is cropping (if necessary but not already applied),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    715
                 | 
                                    
                                                     | 
                
                 | 
                        # - next is reflection (if necessary but not already applied),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    716
                 | 
                                    
                                                     | 
                
                 | 
                        # - next is rotation (if necessary but not already applied).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    717
                 | 
                                    
                                                     | 
                
                 | 
                        # This helps deal with arbitrary workflows (e.g. crop then deskew,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    718
                 | 
                                    
                                                     | 
                
                 | 
                        # or deskew then crop), regardless of where images are generated.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    719
                 | 
                                    
                                                     | 
                
                 | 
                        alternative_image_features = page_coords['features'].split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    720
                 | 
                                    
                                                     | 
                
                 | 
                        for duplicate_feature in set([feature for feature in alternative_image_features  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    721
                 | 
                                    
                                                     | 
                
                 | 
                                                      # features relevant in reconstructing coordinates:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    722
                 | 
                                    
                                                     | 
                
                 | 
                                                      if (feature in ['cropped', 'deskewed', 'rotated-90',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    723
                 | 
                                    
                                                     | 
                
                 | 
                                                                      'rotated-180', 'rotated-270'] and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    724
                 | 
                                    
                                                     | 
                
                 | 
                                                          alternative_image_features.count(feature) > 1)]):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    725
                 | 
                                    
                                                     | 
                
                 | 
                            log.error("Duplicate feature %s in AlternativeImage for page '%s'", | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    726
                 | 
                                    
                                                     | 
                
                 | 
                                      duplicate_feature, page_id)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    727
                 | 
                                    
                                                     | 
                
                 | 
                        for i, feature in enumerate(alternative_image_features +  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    728
                 | 
                                    
                                                     | 
                
                 | 
                                                    (['cropped']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    729
                 | 
                                    
                                                     | 
                
                 | 
                                                     if (border and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    730
                 | 
                                    
                                                     | 
                
                 | 
                                                         not 'cropped' in alternative_image_features and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    731
                 | 
                                    
                                                     | 
                
                 | 
                                                         not 'cropped' in feature_filter.split(',')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    732
                 | 
                                    
                                                     | 
                
                 | 
                                                     else []) +  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    733
                 | 
                                    
                                                     | 
                
                 | 
                                                    (['rotated-%d' % orientation]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    734
                 | 
                                    
                                                     | 
                
                 | 
                                                     if (orientation and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    735
                 | 
                                    
                                                     | 
                
                 | 
                                                         not 'rotated-%d' % orientation in alternative_image_features and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    736
                 | 
                                    
                                                     | 
                
                 | 
                                                         not 'rotated-%d' % orientation in feature_filter.split(',')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    737
                 | 
                                    
                                                     | 
                
                 | 
                                                     else []) +  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    738
                 | 
                                    
                                                     | 
                
                 | 
                                                    (['deskewed']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    739
                 | 
                                    
                                                     | 
                
                 | 
                                                     if (skew and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    740
                 | 
                                    
                                                     | 
                
                 | 
                                                         not 'deskewed' in alternative_image_features and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    741
                 | 
                                    
                                                     | 
                
                 | 
                                                         not 'deskewed' in feature_filter.split(',')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    742
                 | 
                                    
                                                     | 
                
                 | 
                                                     else []) +  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    743
                 | 
                                    
                                                     | 
                
                 | 
                                                    # not a feature to be added, but merely as a fallback position  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    744
                 | 
                                    
                                                     | 
                
                 | 
                                                    # to always enter loop at i == len(alternative_image_features)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    745
                 | 
                                    
                                                     | 
                
                 | 
                                                    ['_check']):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    746
                 | 
                                    
                                                     | 
                
                 | 
                            # image geometry vs feature consistency can only be checked  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    747
                 | 
                                    
                                                     | 
                
                 | 
                            # after all features on the existing AlternativeImage have  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    748
                 | 
                                    
                                                     | 
                
                 | 
                            # been adjusted for in the transform, and when there is a mismatch,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    749
                 | 
                                    
                                                     | 
                
                 | 
                            # additional steps applied here would only repeat the respective  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    750
                 | 
                                    
                                                     | 
                
                 | 
                            # error message; so we only check once at the boundary between  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    751
                 | 
                                    
                                                     | 
                
                 | 
                            # existing and new features  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    752
                 | 
                                    
                                                     | 
                
                 | 
                            # FIXME we should check/enforce consistency when _adding_ AlternativeImage  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    753
                 | 
                                    
                                                     | 
                
                 | 
                            if (i == len(alternative_image_features) and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    754
                 | 
                                    
                                                     | 
                
                 | 
                                not (page_xywh['w'] - 2 < page_image.width < page_xywh['w'] + 2 and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    755
                 | 
                                    
                                                     | 
                
                 | 
                                     page_xywh['h'] - 2 < page_image.height < page_xywh['h'] + 2)):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    756
                 | 
                                    
                                                     | 
                
                 | 
                                log.error('page "%s" image (%s; %dx%d) has not been cropped properly (%dx%d)', | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    757
                 | 
                                    
                                                     | 
                
                 | 
                                          page_id, page_coords['features'],  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    758
                 | 
                                    
                                                     | 
                
                 | 
                                          page_image.width, page_image.height,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    759
                 | 
                                    
                                                     | 
                
                 | 
                                          page_xywh['w'], page_xywh['h'])  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    760
                 | 
                                    
                                                     | 
                
                 | 
                            name = "%s for page '%s'" % ("AlternativeImage" if best_image | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    761
                 | 
                                    
                                                     | 
                
                 | 
                                                         else "original image", page_id)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    762
                 | 
                                    
                                                     | 
                
                 | 
                            # adjust transform to feature, and ensure feature is applied to image  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    763
                 | 
                                    
                                                     | 
                
                 | 
                            if feature == 'cropped':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    764
                 | 
                                    
                                                     | 
                
                 | 
                                page_image, page_coords, page_xywh = _crop(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    765
                 | 
                                    
                                                     | 
                
                 | 
                                    log, name, border, page_image, page_coords,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    766
                 | 
                                    
                                                     | 
                
                 | 
                                    fill=fill, transparency=transparency)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    767
                 | 
                                    
                                                     | 
                
                 | 
                            elif feature == 'rotated-%d' % orientation:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    768
                 | 
                                    
                                                     | 
                
                 | 
                                page_image, page_coords, page_xywh = _reflect(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    769
                 | 
                                    
                                                     | 
                
                 | 
                                    log, name, orientation, page_image, page_coords, page_xywh)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    770
                 | 
                                    
                                                     | 
                
                 | 
                            elif feature == 'deskewed':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    771
                 | 
                                    
                                                     | 
                
                 | 
                                page_image, page_coords, page_xywh = _rotate(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    772
                 | 
                                    
                                                     | 
                
                 | 
                                    log, name, skew, border, page_image, page_coords, page_xywh,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    773
                 | 
                                    
                                                     | 
                
                 | 
                                    fill=fill, transparency=transparency)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    774
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    775
                 | 
                                    
                                                     | 
                
                 | 
                        # verify constraints again:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    776
                 | 
                                    
                                                     | 
                
                 | 
                        if filename and not getattr(page_image, 'filename', '').endswith(filename):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    777
                 | 
                                    
                                                     | 
                
                 | 
                            raise Exception('Found no AlternativeImage that satisfies all requirements ' + | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    778
                 | 
                                    
                                                     | 
                
                 | 
                                            'filename="%s" in page "%s"' % (  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    779
                 | 
                                    
                                                     | 
                
                 | 
                                                filename, page_id))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    780
                 | 
                                    
                                                     | 
                
                 | 
                        if not all(feature in page_coords['features']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    781
                 | 
                                    
                                                     | 
                
                 | 
                                   for feature in feature_selector.split(',') if feature): | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    782
                 | 
                                    
                                                     | 
                
                 | 
                            raise Exception('Found no AlternativeImage that satisfies all requirements ' + | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    783
                 | 
                                    
                                                     | 
                
                 | 
                                            'selector="%s" in page "%s"' % (  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    784
                 | 
                                    
                                                     | 
                
                 | 
                                                feature_selector, page_id))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    785
                 | 
                                    
                                                     | 
                
                 | 
                        if any(feature in page_coords['features']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    786
                 | 
                                    
                                                     | 
                
                 | 
                               for feature in feature_filter.split(',') if feature): | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    787
                 | 
                                    
                                                     | 
                
                 | 
                            raise Exception('Found no AlternativeImage that satisfies all requirements ' + | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    788
                 | 
                                    
                                                     | 
                
                 | 
                                            'filter="%s" in page "%s"' % (  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    789
                 | 
                                    
                                                     | 
                
                 | 
                                                feature_filter, page_id))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    790
                 | 
                                    
                                                     | 
                
                 | 
                        # ensure DPI will be set in image meta-data again  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    791
                 | 
                                    
                                                     | 
                
                 | 
                        if 'DPI' in page_coords:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    792
                 | 
                                    
                                                     | 
                
                 | 
                            dpi = page_coords['DPI']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    793
                 | 
                                    
                                                     | 
                
                 | 
                            if 'dpi' not in page_image.info:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    794
                 | 
                                    
                                                     | 
                
                 | 
                                page_image.info['dpi'] = (dpi, dpi)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    795
                 | 
                                    
                                                     | 
                
                 | 
                        return page_image, page_coords, page_image_info  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    796
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    797
                 | 
                                    
                                                     | 
                
                 | 
                    def image_from_segment(self, segment, parent_image, parent_coords,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    798
                 | 
                                    
                                                     | 
                
                 | 
                                           fill='background', transparency=False,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    799
                 | 
                                    
                                                     | 
                
                 | 
                                           feature_selector='', feature_filter='', filename=''):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    800
                 | 
                                    
                                                     | 
                
                 | 
                        """Extract an image for a PAGE-XML hierarchy segment from its parent's image.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    801
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    802
                 | 
                                    
                                                     | 
                
                 | 
                        Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    803
                 | 
                                    
                                                     | 
                
                 | 
                            segment (object): a PAGE segment object \  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    804
                 | 
                                    
                                                     | 
                
                 | 
                                (i.e. :py:class:`~ocrd_models.ocrd_page.TextRegionType` \  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    805
                 | 
                                    
                                                     | 
                
                 | 
                                or :py:class:`~ocrd_models.ocrd_page.TextLineType` \  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    806
                 | 
                                    
                                                     | 
                
                 | 
                                or :py:class:`~ocrd_models.ocrd_page.WordType` \  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    807
                 | 
                                    
                                                     | 
                
                 | 
                                or :py:class:`~ocrd_models.ocrd_page.GlyphType`)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    808
                 | 
                                    
                                                     | 
                
                 | 
                            parent_image (`PIL.Image`): image of the `segment`'s parent  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    809
                 | 
                                    
                                                     | 
                
                 | 
                            parent_coords (dict): a `dict` with information about `parent_image`:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    810
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    811
                 | 
                                    
                                                     | 
                
                 | 
                               - `"transform"`: a `Numpy` array with an affine transform which  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    812
                 | 
                                    
                                                     | 
                
                 | 
                                 converts from absolute coordinates to those relative to the image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    813
                 | 
                                    
                                                     | 
                
                 | 
                                 i.e. after applying all operations (starting with the original image)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    814
                 | 
                                    
                                                     | 
                
                 | 
                               - `"angle"`: the rotation/reflection angle applied to the image so far,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    815
                 | 
                                    
                                                     | 
                
                 | 
                               - `"DPI"`: the pixel density of the parent image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    816
                 | 
                                    
                                                     | 
                
                 | 
                               - `"features"`: the ``AlternativeImage/@comments`` for the image, i.e.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    817
                 | 
                                    
                                                     | 
                
                 | 
                                 names of all operations that lead up to this result, and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    818
                 | 
                                    
                                                     | 
                
                 | 
                        Keyword Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    819
                 | 
                                    
                                                     | 
                
                 | 
                            fill (string): a `PIL` color specifier, or `background` or `none`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    820
                 | 
                                    
                                                     | 
                
                 | 
                            transparency (boolean): whether to add an alpha channel for masking  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    821
                 | 
                                    
                                                     | 
                
                 | 
                            feature_selector (string): a comma-separated list of ``@comments`` classes  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    822
                 | 
                                    
                                                     | 
                
                 | 
                            feature_filter (string): a comma-separated list of ``@comments`` classes  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    823
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    824
                 | 
                                    
                                                     | 
                
                 | 
                        Extract a `PIL.Image` from `segment`, either from ``AlternativeImage``  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    825
                 | 
                                    
                                                     | 
                
                 | 
                        (if it exists), or producing a new image via cropping from `parent_image`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    826
                 | 
                                    
                                                     | 
                
                 | 
                        (otherwise). Pass in `parent_image` and `parent_coords` from the result  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    827
                 | 
                                    
                                                     | 
                
                 | 
                        of the next higher-level of this function or from :py:meth:`image_from_page`.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    828
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    829
                 | 
                                    
                                                     | 
                
                 | 
                        If ``filename`` is given, then among the available `AlternativeImage/@filename`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    830
                 | 
                                    
                                                     | 
                
                 | 
                        images, pick that one, or raise an error.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    831
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    832
                 | 
                                    
                                                     | 
                
                 | 
                        If ``feature_selector`` and/or ``feature_filter`` is given, then  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    833
                 | 
                                    
                                                     | 
                
                 | 
                        among the cropped `parent_image` and the available AlternativeImages,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    834
                 | 
                                    
                                                     | 
                
                 | 
                        select/filter the richest one which contains all of the selected,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    835
                 | 
                                    
                                                     | 
                
                 | 
                        but none of the filtered features (i.e. ``@comments`` classes), or  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    836
                 | 
                                    
                                                     | 
                
                 | 
                        raise an error.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    837
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    838
                 | 
                                    
                                                     | 
                
                 | 
                        (Required and produced features need not be in the same order, so  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    839
                 | 
                                    
                                                     | 
                
                 | 
                        `feature_selector` is merely a mask specifying Boolean AND, and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    840
                 | 
                                    
                                                     | 
                
                 | 
                        `feature_filter` is merely a mask specifying Boolean OR.)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    841
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    842
                 | 
                                    
                                                     | 
                
                 | 
                        Cropping uses a polygon mask (not just the bounding box rectangle).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    843
                 | 
                                    
                                                     | 
                
                 | 
                        Areas outside the polygon will be filled according to `fill`:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    844
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    845
                 | 
                                    
                                                     | 
                
                 | 
                        - if `"background"` (the default),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    846
                 | 
                                    
                                                     | 
                
                 | 
                          then fill with the median color of the image;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    847
                 | 
                                    
                                                     | 
                
                 | 
                        - else if `"none"`, then avoid masking polygons where possible  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    848
                 | 
                                    
                                                     | 
                
                 | 
                          (i.e. when cropping) or revert to the default (i.e. when rotating)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    849
                 | 
                                    
                                                     | 
                
                 | 
                        - otherwise, use the given color, e.g. `"white"` or `(255,255,255)`.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    850
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    851
                 | 
                                    
                                                     | 
                
                 | 
                        Moreover, if `transparency` is true, and unless the image already  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    852
                 | 
                                    
                                                     | 
                
                 | 
                        has an alpha channel, then add an alpha channel which is fully opaque  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    853
                 | 
                                    
                                                     | 
                
                 | 
                        before cropping and rotating. (Thus, unexposed/masked areas will be  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    854
                 | 
                                    
                                                     | 
                
                 | 
                        transparent afterwards for consumers that can interpret alpha channels).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    855
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    856
                 | 
                                    
                                                     | 
                
                 | 
                        When cropping, compensate any ``@orientation`` angle annotated for the  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    857
                 | 
                                    
                                                     | 
                
                 | 
                        parent (from parent-level deskewing) by rotating the segment coordinates  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    858
                 | 
                                    
                                                     | 
                
                 | 
                        in an inverse transformation (i.e. translation to center, then passive  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    859
                 | 
                                    
                                                     | 
                
                 | 
                        rotation, and translation back).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    860
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    861
                 | 
                                    
                                                     | 
                
                 | 
                        Regardless, if any ``@orientation`` angle is annotated for the segment  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    862
                 | 
                                    
                                                     | 
                
                 | 
                        (from segment-level deskewing), and the chosen image does not have  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    863
                 | 
                                    
                                                     | 
                
                 | 
                        the feature `"deskewed"` yet, and unless `"deskewed"` is being filtered,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    864
                 | 
                                    
                                                     | 
                
                 | 
                        then rotate it - compensating for any previous `"angle"`. (However,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    865
                 | 
                                    
                                                     | 
                
                 | 
                        if ``@orientation`` is above the [-45°,45°] interval, then apply as much  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    866
                 | 
                                    
                                                     | 
                
                 | 
                        transposition as possible first, unless `"rotated-90"` / `"rotated-180"` /  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    867
                 | 
                                    
                                                     | 
                
                 | 
                        `"rotated-270"` is being filtered.)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    868
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    869
                 | 
                                    
                                                     | 
                
                 | 
                        Returns:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    870
                 | 
                                    
                                                     | 
                
                 | 
                            a tuple of  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    871
                 | 
                                    
                                                     | 
                
                 | 
                             * the extracted `PIL.Image`,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    872
                 | 
                                    
                                                     | 
                
                 | 
                             * a `dict` with information about the extracted image:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    873
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    874
                 | 
                                    
                                                     | 
                
                 | 
                               - `"transform"`: a `Numpy` array with an affine transform which  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    875
                 | 
                                    
                                                     | 
                
                 | 
                                   converts from absolute coordinates to those relative to the image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    876
                 | 
                                    
                                                     | 
                
                 | 
                                   i.e. after applying all parent operations, and then cropping to  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    877
                 | 
                                    
                                                     | 
                
                 | 
                                   the segment's bounding box, and deskewing with the segment's  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    878
                 | 
                                    
                                                     | 
                
                 | 
                                   orientation angle (if any)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    879
                 | 
                                    
                                                     | 
                
                 | 
                               - `"angle"`: the rotation/reflection angle applied to the image so far,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    880
                 | 
                                    
                                                     | 
                
                 | 
                               - `"DPI"`: the pixel density of this image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    881
                 | 
                                    
                                                     | 
                
                 | 
                               - `"features"`: the ``AlternativeImage/@comments`` for the image, i.e.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    882
                 | 
                                    
                                                     | 
                
                 | 
                                 names of all applied operations that lead up to this result.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    883
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    884
                 | 
                                    
                                                     | 
                
                 | 
                        (These can be used to create a new ``AlternativeImage``, or passed down  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    885
                 | 
                                    
                                                     | 
                
                 | 
                         for :py:meth:`image_from_segment` calls on lower hierarchy levels.)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    886
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    887
                 | 
                                    
                                                     | 
                
                 | 
                        Examples:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    888
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    889
                 | 
                                    
                                                     | 
                
                 | 
                         * get a raw (colored) but already deskewed and cropped image::  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    890
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    891
                 | 
                                    
                                                     | 
                
                 | 
                                image, xywh = workspace.image_from_segment(region,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    892
                 | 
                                    
                                                     | 
                
                 | 
                                    page_image, page_xywh,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    893
                 | 
                                    
                                                     | 
                
                 | 
                                    feature_selector='deskewed,cropped',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    894
                 | 
                                    
                                                     | 
                
                 | 
                                    feature_filter='binarized,grayscale_normalized')  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    895
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    896
                 | 
                                    
                                                     | 
                
                 | 
                        log = getLogger('ocrd.workspace.image_from_segment') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    897
                 | 
                                    
                                                     | 
                
                 | 
                        # note: We should mask overlapping neighbouring segments here,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    898
                 | 
                                    
                                                     | 
                
                 | 
                        # but finding the right clipping rules can be difficult if operating  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    899
                 | 
                                    
                                                     | 
                
                 | 
                        # on the raw (non-binary) image data alone: for each intersection, it  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    900
                 | 
                                    
                                                     | 
                
                 | 
                        # must be decided which one of either segment or neighbour to assign,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    901
                 | 
                                    
                                                     | 
                
                 | 
                        # e.g. an ImageRegion which properly contains our TextRegion should be  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    902
                 | 
                                    
                                                     | 
                
                 | 
                        # completely ignored, but an ImageRegion which is properly contained  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    903
                 | 
                                    
                                                     | 
                
                 | 
                        # in our TextRegion should be completely masked, while partial overlap  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    904
                 | 
                                    
                                                     | 
                
                 | 
                        # may be more difficult to decide. On the other hand, on the binary image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    905
                 | 
                                    
                                                     | 
                
                 | 
                        # we can use connected component analysis to mask foreground areas which  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    906
                 | 
                                    
                                                     | 
                
                 | 
                        # originate in the neighbouring regions. But that would introduce either  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    907
                 | 
                                    
                                                     | 
                
                 | 
                        # the assumption that the input has already been binarized, or a dependency  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    908
                 | 
                                    
                                                     | 
                
                 | 
                        # on some ad-hoc binarization method. Thus, it is preferable to use  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    909
                 | 
                                    
                                                     | 
                
                 | 
                        # a dedicated processor for this (which produces clipped AlternativeImage  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    910
                 | 
                                    
                                                     | 
                
                 | 
                        # or reduced polygon coordinates).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    911
                 | 
                                    
                                                     | 
                
                 | 
                        segment_image, segment_coords, segment_xywh = _crop(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    912
                 | 
                                    
                                                     | 
                
                 | 
                            log, "parent image for segment '%s'" % segment.id,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    913
                 | 
                                    
                                                     | 
                
                 | 
                            segment, parent_image, parent_coords,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    914
                 | 
                                    
                                                     | 
                
                 | 
                            fill=fill, transparency=transparency)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    915
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    916
                 | 
                                    
                                                     | 
                
                 | 
                        # Semantics of missing @orientation at region level could be either  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    917
                 | 
                                    
                                                     | 
                
                 | 
                        # - inherited from page level: same as line or word level (no @orientation),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    918
                 | 
                                    
                                                     | 
                
                 | 
                        # - zero (unrotate page angle): different from line or word level (because  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    919
                 | 
                                    
                                                     | 
                
                 | 
                        #   otherwise deskewing would never have an effect on lines and words)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    920
                 | 
                                    
                                                     | 
                
                 | 
                        # The PAGE specification is silent here (but does generally not concern itself  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    921
                 | 
                                    
                                                     | 
                
                 | 
                        # much with AlternativeImage coordinate consistency).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    922
                 | 
                                    
                                                     | 
                
                 | 
                        # Since our (generateDS-backed) ocrd_page supports the zero/none distinction,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    923
                 | 
                                    
                                                     | 
                
                 | 
                        # we choose the former (i.e. None is inheritance).  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    924
                 | 
                                    
                                                     | 
                
                 | 
                        if 'orientation' in segment.__dict__ and segment.get_orientation() is not None:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    925
                 | 
                                    
                                                     | 
                
                 | 
                            # region angle: PAGE @orientation is defined clockwise,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    926
                 | 
                                    
                                                     | 
                
                 | 
                            # whereas PIL/ndimage rotation is in mathematical direction:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    927
                 | 
                                    
                                                     | 
                
                 | 
                            angle = -segment.get_orientation()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    928
                 | 
                                    
                                                     | 
                
                 | 
                            # @orientation is always absolute; if higher levels  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    929
                 | 
                                    
                                                     | 
                
                 | 
                            # have already rotated, then we must compensate:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    930
                 | 
                                    
                                                     | 
                
                 | 
                            angle -= parent_coords['angle']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    931
                 | 
                                    
                                                     | 
                
                 | 
                            # map angle from (-180,180] to [0,360], and partition into multiples of 90;  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    932
                 | 
                                    
                                                     | 
                
                 | 
                            # but avoid unnecessary large remainders, i.e. split symmetrically:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    933
                 | 
                                    
                                                     | 
                
                 | 
                            orientation = (angle + 45) % 360  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    934
                 | 
                                    
                                                     | 
                
                 | 
                            orientation = orientation - (orientation % 90)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    935
                 | 
                                    
                                                     | 
                
                 | 
                            skew = (angle % 360) - orientation  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    936
                 | 
                                    
                                                     | 
                
                 | 
                            skew = 180 - (180 - skew) % 360 # map to [-45,45]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    937
                 | 
                                    
                                                     | 
                
                 | 
                            log.debug("segment '%s' has orientation=%d skew=%.2f", | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    938
                 | 
                                    
                                                     | 
                
                 | 
                                      segment.id, orientation, skew)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    939
                 | 
                                    
                                                     | 
                
                 | 
                        else:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    940
                 | 
                                    
                                                     | 
                
                 | 
                            orientation = 0  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    941
                 | 
                                    
                                                     | 
                
                 | 
                            skew = 0  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    942
                 | 
                                    
                                                     | 
                
                 | 
                        segment_coords['angle'] = parent_coords['angle'] # nothing applied yet (depends on filters)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    943
                 | 
                                    
                                                     | 
                
                 | 
                        if 'DPI' in parent_coords:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    944
                 | 
                                    
                                                     | 
                
                 | 
                            segment_coords['DPI'] = parent_coords['DPI'] # not rescaled yet  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    945
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    946
                 | 
                                    
                                                     | 
                
                 | 
                        # initialize AlternativeImage@comments classes from parent, except  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    947
                 | 
                                    
                                                     | 
                
                 | 
                        # for those operations that can apply on multiple hierarchy levels:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    948
                 | 
                                    
                                                     | 
                
                 | 
                        segment_coords['features'] = ','.join(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    949
                 | 
                                    
                                                     | 
                
                 | 
                            [feature for feature in parent_coords['features'].split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    950
                 | 
                                    
                                                     | 
                
                 | 
                             if feature in ['binarized', 'grayscale_normalized',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    951
                 | 
                                    
                                                     | 
                
                 | 
                                            'despeckled', 'dewarped']])  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    952
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    953
                 | 
                                    
                                                     | 
                
                 | 
                        best_image = None  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    954
                 | 
                                    
                                                     | 
                
                 | 
                        alternative_images = segment.get_AlternativeImage()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    955
                 | 
                                    
                                                     | 
                
                 | 
                        if alternative_images:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    956
                 | 
                                    
                                                     | 
                
                 | 
                            # (e.g. from segment-level cropping, binarization, deskewing or despeckling)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    957
                 | 
                                    
                                                     | 
                
                 | 
                            best_features = set()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    958
                 | 
                                    
                                                     | 
                
                 | 
                            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'} | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    959
                 | 
                                    
                                                     | 
                
                 | 
                            # search to the end, because by convention we always append,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    960
                 | 
                                    
                                                     | 
                
                 | 
                            # and among multiple satisfactory images we want the most recent,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    961
                 | 
                                    
                                                     | 
                
                 | 
                            # but also ensure that we get the richest feature set, i.e. most  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    962
                 | 
                                    
                                                     | 
                
                 | 
                            # of those features that we cannot reproduce automatically below  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    963
                 | 
                                    
                                                     | 
                
                 | 
                            for alternative_image in alternative_images:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    964
                 | 
                                    
                                                     | 
                
                 | 
                                if filename and filename != alternative_image.filename:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    965
                 | 
                                    
                                                     | 
                
                 | 
                                    continue  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    966
                 | 
                                    
                                                     | 
                
                 | 
                                features = alternative_image.get_comments()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    967
                 | 
                                    
                                                     | 
                
                 | 
                                if not features:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    968
                 | 
                                    
                                                     | 
                
                 | 
                                    log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes", | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    969
                 | 
                                    
                                                     | 
                
                 | 
                                                alternative_images.index(alternative_image) + 1, segment.id)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    970
                 | 
                                    
                                                     | 
                
                 | 
                                    features = ''  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    971
                 | 
                                    
                                                     | 
                
                 | 
                                featureset = set(features.split(',')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    972
                 | 
                                    
                                                     | 
                
                 | 
                                if (all(feature in featureset  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    973
                 | 
                                    
                                                     | 
                
                 | 
                                        for feature in feature_selector.split(',') if feature) and | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    974
                 | 
                                    
                                                     | 
                
                 | 
                                    not any(feature in featureset  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    975
                 | 
                                    
                                                     | 
                
                 | 
                                            for feature in feature_filter.split(',') if feature) and | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    976
                 | 
                                    
                                                     | 
                
                 | 
                                    len(featureset.difference(auto_features)) >= \  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    977
                 | 
                                    
                                                     | 
                
                 | 
                                    len(best_features.difference(auto_features))):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    978
                 | 
                                    
                                                     | 
                
                 | 
                                    best_features = featureset  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    979
                 | 
                                    
                                                     | 
                
                 | 
                                    best_image = alternative_image  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    980
                 | 
                                    
                                                     | 
                
                 | 
                            if best_image:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    981
                 | 
                                    
                                                     | 
                
                 | 
                                log.debug("Using AlternativeImage %d %s for segment '%s'", | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    982
                 | 
                                    
                                                     | 
                
                 | 
                                          alternative_images.index(best_image) + 1,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    983
                 | 
                                    
                                                     | 
                
                 | 
                                          best_features, segment.id)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    984
                 | 
                                    
                                                     | 
                
                 | 
                                segment_image = self._resolve_image_as_pil(alternative_image.get_filename())  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    985
                 | 
                                    
                                                     | 
                
                 | 
                                segment_coords['features'] = best_image.get_comments() # including duplicates  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    986
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    987
                 | 
                                    
                                                     | 
                
                 | 
                        alternative_image_features = segment_coords['features'].split(',') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    988
                 | 
                                    
                                                     | 
                
                 | 
                        for duplicate_feature in set([feature for feature in alternative_image_features  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    989
                 | 
                                    
                                                     | 
                
                 | 
                                                      # features relevant in reconstructing coordinates:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    990
                 | 
                                    
                                                     | 
                
                 | 
                                                      if (feature in ['deskewed', 'rotated-90',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    991
                 | 
                                    
                                                     | 
                
                 | 
                                                                      'rotated-180', 'rotated-270'] and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    992
                 | 
                                    
                                                     | 
                
                 | 
                                                          alternative_image_features.count(feature) > 1)]):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    993
                 | 
                                    
                                                     | 
                
                 | 
                            log.error("Duplicate feature %s in AlternativeImage for segment '%s'", | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    994
                 | 
                                    
                                                     | 
                
                 | 
                                      duplicate_feature, segment.id)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    995
                 | 
                                    
                                                     | 
                
                 | 
                        for i, feature in enumerate(alternative_image_features +  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    996
                 | 
                                    
                                                     | 
                
                 | 
                                                    (['rotated-%d' % orientation]  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    997
                 | 
                                    
                                                     | 
                
                 | 
                                                     if (orientation and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    998
                 | 
                                    
                                                     | 
                
                 | 
                                                         not 'rotated-%d' % orientation in alternative_image_features and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    999
                 | 
                                    
                                                     | 
                
                 | 
                                                         not 'rotated-%d' % orientation in feature_filter.split(',')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1000
                 | 
                                    
                                                     | 
                
                 | 
                                                     else []) +  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1001
                 | 
                                    
                                                     | 
                
                 | 
                                                    (['deskewed']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1002
                 | 
                                    
                                                     | 
                
                 | 
                                                     if (skew and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1003
                 | 
                                    
                                                     | 
                
                 | 
                                                         not 'deskewed' in alternative_image_features and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1004
                 | 
                                    
                                                     | 
                
                 | 
                                                         not 'deskewed' in feature_filter.split(',')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1005
                 | 
                                    
                                                     | 
                
                 | 
                                                     else []) +  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1006
                 | 
                                    
                                                     | 
                
                 | 
                                                    # not a feature to be added, but merely as a fallback position  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1007
                 | 
                                    
                                                     | 
                
                 | 
                                                    # to always enter loop at i == len(alternative_image_features)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1008
                 | 
                                    
                                                     | 
                
                 | 
                                                    ['_check']):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1009
                 | 
                                    
                                                     | 
                
                 | 
                            # image geometry vs feature consistency can only be checked  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1010
                 | 
                                    
                                                     | 
                
                 | 
                            # after all features on the existing AlternativeImage have  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1011
                 | 
                                    
                                                     | 
                
                 | 
                            # been adjusted for in the transform, and when there is a mismatch,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1012
                 | 
                                    
                                                     | 
                
                 | 
                            # additional steps applied here would only repeat the respective  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1013
                 | 
                                    
                                                     | 
                
                 | 
                            # error message; so we only check once at the boundary between  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1014
                 | 
                                    
                                                     | 
                
                 | 
                            # existing and new features  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1015
                 | 
                                    
                                                     | 
                
                 | 
                            # FIXME we should enforce consistency here (i.e. split into transposition  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1016
                 | 
                                    
                                                     | 
                
                 | 
                            #       and minimal rotation, rotation always reshapes, rescaling never happens)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1017
                 | 
                                    
                                                     | 
                
                 | 
                            # FIXME: inconsistency currently unavoidable with line-level dewarping (which increases height)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1018
                 | 
                                    
                                                     | 
                
                 | 
                            if (i == len(alternative_image_features) and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1019
                 | 
                                    
                                                     | 
                
                 | 
                                not (segment_xywh['w'] - 2 < segment_image.width < segment_xywh['w'] + 2 and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1020
                 | 
                                    
                                                     | 
                
                 | 
                                     segment_xywh['h'] - 2 < segment_image.height < segment_xywh['h'] + 2)):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1021
                 | 
                                    
                                                     | 
                
                 | 
                                log.error('segment "%s" image (%s; %dx%d) has not been cropped properly (%dx%d)', | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1022
                 | 
                                    
                                                     | 
                
                 | 
                                          segment.id, segment_coords['features'],  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1023
                 | 
                                    
                                                     | 
                
                 | 
                                          segment_image.width, segment_image.height,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1024
                 | 
                                    
                                                     | 
                
                 | 
                                          segment_xywh['w'], segment_xywh['h'])  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1025
                 | 
                                    
                                                     | 
                
                 | 
                            name = "%s for segment '%s'" % ("AlternativeImage" if best_image | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1026
                 | 
                                    
                                                     | 
                
                 | 
                                                            else "parent image", segment.id)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1027
                 | 
                                    
                                                     | 
                
                 | 
                            # adjust transform to feature, and ensure feature is applied to image  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1028
                 | 
                                    
                                                     | 
                
                 | 
                            if feature == 'rotated-%d' % orientation:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1029
                 | 
                                    
                                                     | 
                
                 | 
                                segment_image, segment_coords, segment_xywh = _reflect(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1030
                 | 
                                    
                                                     | 
                
                 | 
                                    log, name, orientation, segment_image, segment_coords, segment_xywh)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1031
                 | 
                                    
                                                     | 
                
                 | 
                            elif feature == 'deskewed':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1032
                 | 
                                    
                                                     | 
                
                 | 
                                segment_image, segment_coords, segment_xywh = _rotate(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1033
                 | 
                                    
                                                     | 
                
                 | 
                                    log, name, skew, segment, segment_image, segment_coords, segment_xywh,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1034
                 | 
                                    
                                                     | 
                
                 | 
                                    fill=fill, transparency=transparency)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1035
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1036
                 | 
                                    
                                                     | 
                
                 | 
                        # verify constraints again:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1037
                 | 
                                    
                                                     | 
                
                 | 
                        if filename and not getattr(segment_image, 'filename', '').endswith(filename):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1038
                 | 
                                    
                                                     | 
                
                 | 
                            raise Exception('Found no AlternativeImage that satisfies all requirements ' + | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1039
                 | 
                                    
                                                     | 
                
                 | 
                                            'filename="%s" in segment "%s"' % (  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1040
                 | 
                                    
                                                     | 
                
                 | 
                                                filename, segment.id))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1041
                 | 
                                    
                                                     | 
                
                 | 
                        if not all(feature in segment_coords['features']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1042
                 | 
                                    
                                                     | 
                
                 | 
                                   for feature in feature_selector.split(',') if feature): | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1043
                 | 
                                    
                                                     | 
                
                 | 
                            raise Exception('Found no AlternativeImage that satisfies all requirements' + | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1044
                 | 
                                    
                                                     | 
                
                 | 
                                            'selector="%s" in segment "%s"' % (  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1045
                 | 
                                    
                                                     | 
                
                 | 
                                                feature_selector, segment.id))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1046
                 | 
                                    
                                                     | 
                
                 | 
                        if any(feature in segment_coords['features']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1047
                 | 
                                    
                                                     | 
                
                 | 
                               for feature in feature_filter.split(',') if feature): | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1048
                 | 
                                    
                                                     | 
                
                 | 
                            raise Exception('Found no AlternativeImage that satisfies all requirements ' + | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1049
                 | 
                                    
                                                     | 
                
                 | 
                                            'filter="%s" in segment "%s"' % (  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1050
                 | 
                                    
                                                     | 
                
                 | 
                                                feature_filter, segment.id))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1051
                 | 
                                    
                                                     | 
                
                 | 
                        # ensure DPI will be set in image meta-data again  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1052
                 | 
                                    
                                                     | 
                
                 | 
                        if 'DPI' in segment_coords:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1053
                 | 
                                    
                                                     | 
                
                 | 
                            dpi = segment_coords['DPI']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1054
                 | 
                                    
                                                     | 
                
                 | 
                            if 'dpi' not in segment_image.info:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1055
                 | 
                                    
                                                     | 
                
                 | 
                                segment_image.info['dpi'] = (dpi, dpi)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1056
                 | 
                                    
                                                     | 
                
                 | 
                        return segment_image, segment_coords  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1057
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1058
                 | 
                                    
                                                     | 
                
                 | 
                    # pylint: disable=redefined-builtin  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1059
                 | 
                                    
                                                     | 
                
                 | 
                    def save_image_file(self, image : Image.Image,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1060
                 | 
                                    
                                                     | 
                
                 | 
                                        file_id : str,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1061
                 | 
                                    
                                                     | 
                
                 | 
                                        file_grp : str,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1062
                 | 
                                    
                                                     | 
                
                 | 
                                        file_path : Optional[str] = None,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1063
                 | 
                                    
                                                     | 
                
                 | 
                                        page_id : Optional[str] = None,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1064
                 | 
                                    
                                                     | 
                
                 | 
                                        mimetype : str = 'image/png',  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1065
                 | 
                                    
                                                     | 
                
                 | 
                                        force : bool = False) -> str:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1066
                 | 
                                    
                                                     | 
                
                 | 
                        """Store an image in the filesystem and reference it as new file in the METS.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1067
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1068
                 | 
                                    
                                                     | 
                
                 | 
                        Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1069
                 | 
                                    
                                                     | 
                
                 | 
                            image (PIL.Image): derived image to save  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1070
                 | 
                                    
                                                     | 
                
                 | 
                            file_id (string): `@ID` of the METS `file` to use  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1071
                 | 
                                    
                                                     | 
                
                 | 
                            file_grp (string): `@USE` of the METS `fileGrp` to use  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1072
                 | 
                                    
                                                     | 
                
                 | 
                        Keyword Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1073
                 | 
                                    
                                                     | 
                
                 | 
                            file_path (string): `@href` of the METS `file/FLocat` to use.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1074
                 | 
                                    
                                                     | 
                
                 | 
                            page_id (string): `@ID` in the METS physical `structMap` to use  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1075
                 | 
                                    
                                                     | 
                
                 | 
                            mimetype (string): MIME type of the image format to serialize as  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1076
                 | 
                                    
                                                     | 
                
                 | 
                            force (boolean): whether to replace any existing `file` with that `@ID`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1077
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1078
                 | 
                                    
                                                     | 
                
                 | 
                        Serialize the image into the filesystem, and add a `file` for it in the METS.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1079
                 | 
                                    
                                                     | 
                
                 | 
                        Use ``file_grp`` as directory and ``file_id`` concatenated with extension  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1080
                 | 
                                    
                                                     | 
                
                 | 
                        based on ``mimetype`` as file name, unless directly passing ``file_path``.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1081
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1082
                 | 
                                    
                                                     | 
                
                 | 
                        Returns:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1083
                 | 
                                    
                                                     | 
                
                 | 
                            The (absolute) path of the created file.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1084
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1085
                 | 
                                    
                                                     | 
                
                 | 
                        log = getLogger('ocrd.workspace.save_image_file') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1086
                 | 
                                    
                                                     | 
                
                 | 
                        saveargs = {} | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1087
                 | 
                                    
                                                     | 
                
                 | 
                        if 'dpi' in image.info:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1088
                 | 
                                    
                                                     | 
                
                 | 
                            saveargs['dpi'] = image.info['dpi']  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1089
                 | 
                                    
                                                     | 
                
                 | 
                        image_bytes = io.BytesIO()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1090
                 | 
                                    
                                                     | 
                
                 | 
                        image.save(image_bytes, format=MIME_TO_PIL[mimetype], **saveargs)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1091
                 | 
                                    
                                                     | 
                
                 | 
                        if file_path is None:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1092
                 | 
                                    
                                                     | 
                
                 | 
                            file_path = str(Path(file_grp, '%s%s' % (file_id, MIME_TO_EXT[mimetype])))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1093
                 | 
                                    
                                                     | 
                
                 | 
                        out = self.add_file(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1094
                 | 
                                    
                                                     | 
                
                 | 
                            file_grp,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1095
                 | 
                                    
                                                     | 
                
                 | 
                            file_id=file_id,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1096
                 | 
                                    
                                                     | 
                
                 | 
                            page_id=page_id,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1097
                 | 
                                    
                                                     | 
                
                 | 
                            local_filename=file_path,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1098
                 | 
                                    
                                                     | 
                
                 | 
                            mimetype=mimetype,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1099
                 | 
                                    
                                                     | 
                
                 | 
                            content=image_bytes.getvalue(),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1100
                 | 
                                    
                                                     | 
                
                 | 
                            force=force)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1101
                 | 
                                    
                                                     | 
                
                 | 
                        log.info('created file ID: %s, file_grp: %s, path: %s', | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1102
                 | 
                                    
                                                     | 
                
                 | 
                                 file_id, file_grp, out.local_filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1103
                 | 
                                    
                                                     | 
                
                 | 
                        return file_path  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1104
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1105
                 | 
                                    
                                                     | 
                
                 | 
                    def find_files(self, *args, **kwargs):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1106
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1107
                 | 
                                    
                                                     | 
                
                 | 
                        Search ``mets:file`` entries in wrapped METS document and yield results.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1108
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1109
                 | 
                                    
                                                     | 
                
                 | 
                        Delegator to :py:func:`ocrd_models.ocrd_mets.OcrdMets.find_files`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1110
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1111
                 | 
                                    
                                                     | 
                
                 | 
                        Keyword Args:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1112
                 | 
                                    
                                                     | 
                
                 | 
                            **kwargs: See :py:func:`ocrd_models.ocrd_mets.OcrdMets.find_files`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1113
                 | 
                                    
                                                     | 
                
                 | 
                        Returns:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1114
                 | 
                                    
                                                     | 
                
                 | 
                            Generator which yields :py:class:`ocrd_models:ocrd_file:OcrdFile` instantiations  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1115
                 | 
                                    
                                                     | 
                
                 | 
                        """  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1116
                 | 
                                    
                                                     | 
                
                 | 
                        log = getLogger('ocrd.workspace.find_files') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1117
                 | 
                                    
                                                     | 
                
                 | 
                        log.debug('find files in mets. kwargs=%s' % kwargs) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1118
                 | 
                                    
                                                     | 
                
                 | 
                        if "page_id" in kwargs:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1119
                 | 
                                    
                                                     | 
                
                 | 
                            kwargs["pageId"] = kwargs.pop("page_id") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1120
                 | 
                                    
                                                     | 
                
                 | 
                        if "file_id" in kwargs:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1121
                 | 
                                    
                                                     | 
                
                 | 
                            kwargs["ID"] = kwargs.pop("file_id") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1122
                 | 
                                    
                                                     | 
                
                 | 
                        if "file_grp" in kwargs:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1123
                 | 
                                    
                                                     | 
                
                 | 
                            kwargs["fileGrp"] = kwargs.pop("file_grp") | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1124
                 | 
                                    
                                                     | 
                
                 | 
                        with pushd_popd(self.directory):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1125
                 | 
                                    
                                                     | 
                
                 | 
                            return self.mets.find_files(*args, **kwargs)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1126
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1127
                 | 
                                    
                                                     | 
                
                 | 
                def _crop(log, name, segment, parent_image, parent_coords, op='cropped', **kwargs):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1128
                 | 
                                    
                                                     | 
                
                 | 
                    segment_coords = parent_coords.copy()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1129
                 | 
                                    
                                                     | 
                
                 | 
                    # get polygon outline of segment relative to parent image:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1130
                 | 
                                    
                                                     | 
                
                 | 
                    segment_polygon = coordinates_of_segment(segment, parent_image, parent_coords)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1131
                 | 
                                    
                                                     | 
                
                 | 
                    # get relative bounding box:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1132
                 | 
                                    
                                                     | 
                
                 | 
                    segment_bbox = bbox_from_polygon(segment_polygon)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1133
                 | 
                                    
                                                     | 
                
                 | 
                    # get size of the segment in the parent image after cropping  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1134
                 | 
                                    
                                                     | 
                
                 | 
                    # (i.e. possibly different from size before rotation at the parent, but  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1135
                 | 
                                    
                                                     | 
                
                 | 
                    #  also possibly different from size after rotation below/AlternativeImage):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1136
                 | 
                                    
                                                     | 
                
                 | 
                    segment_xywh = xywh_from_bbox(*segment_bbox)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1137
                 | 
                                    
                                                     | 
                
                 | 
                    # crop, if (still) necessary:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1138
                 | 
                                    
                                                     | 
                
                 | 
                    if (not isinstance(segment, BorderType) or # always crop below page level  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1139
                 | 
                                    
                                                     | 
                
                 | 
                        not op in parent_coords['features']):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1140
                 | 
                                    
                                                     | 
                
                 | 
                        if op == 'recropped':  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1141
                 | 
                                    
                                                     | 
                
                 | 
                            log.debug("Recropping %s", name) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1142
                 | 
                                    
                                                     | 
                
                 | 
                        elif isinstance(segment, BorderType):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1143
                 | 
                                    
                                                     | 
                
                 | 
                            log.debug("Cropping %s", name) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1144
                 | 
                                    
                                                     | 
                
                 | 
                            segment_coords['features'] += ',' + op  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1145
                 | 
                                    
                                                     | 
                
                 | 
                        # create a mask from the segment polygon:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1146
                 | 
                                    
                                                     | 
                
                 | 
                        segment_image = image_from_polygon(parent_image, segment_polygon, **kwargs)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1147
                 | 
                                    
                                                     | 
                
                 | 
                        # crop to bbox:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1148
                 | 
                                    
                                                     | 
                
                 | 
                        segment_image = crop_image(segment_image, box=segment_bbox)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1149
                 | 
                                    
                                                     | 
                
                 | 
                    else:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1150
                 | 
                                    
                                                     | 
                
                 | 
                        segment_image = parent_image  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1151
                 | 
                                    
                                                     | 
                
                 | 
                    # subtract offset from parent in affine coordinate transform:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1152
                 | 
                                    
                                                     | 
                
                 | 
                    # (consistent with image cropping)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1153
                 | 
                                    
                                                     | 
                
                 | 
                    segment_coords['transform'] = shift_coordinates(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1154
                 | 
                                    
                                                     | 
                
                 | 
                        parent_coords['transform'],  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1155
                 | 
                                    
                                                     | 
                
                 | 
                        np.array([-segment_bbox[0],  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1156
                 | 
                                    
                                                     | 
                
                 | 
                                  -segment_bbox[1]]))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1157
                 | 
                                    
                                                     | 
                
                 | 
                    return segment_image, segment_coords, segment_xywh  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1158
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1159
                 | 
                                    
                                                     | 
                
                 | 
                def _reflect(log, name, orientation, segment_image, segment_coords, segment_xywh):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1160
                 | 
                                    
                                                     | 
                
                 | 
                    # Transpose in affine coordinate transform:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1161
                 | 
                                    
                                                     | 
                
                 | 
                    # (consistent with image transposition or AlternativeImage below)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1162
                 | 
                                    
                                                     | 
                
                 | 
                    transposition = { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1163
                 | 
                                    
                                                     | 
                
                 | 
                        90: Image.Transpose.ROTATE_90,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1164
                 | 
                                    
                                                     | 
                
                 | 
                        180: Image.Transpose.ROTATE_180,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1165
                 | 
                                    
                                                     | 
                
                 | 
                        270: Image.Transpose.ROTATE_270  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1166
                 | 
                                    
                                                     | 
                
                 | 
                    }.get(orientation) # no default  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1167
                 | 
                                    
                                                     | 
                
                 | 
                    segment_coords['transform'] = transpose_coordinates(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1168
                 | 
                                    
                                                     | 
                
                 | 
                        segment_coords['transform'], transposition,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1169
                 | 
                                    
                                                     | 
                
                 | 
                        np.array([0.5 * segment_xywh['w'],  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1170
                 | 
                                    
                                                     | 
                
                 | 
                                  0.5 * segment_xywh['h']]))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1171
                 | 
                                    
                                                     | 
                
                 | 
                    segment_xywh['w'], segment_xywh['h'] = adjust_canvas_to_transposition(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1172
                 | 
                                    
                                                     | 
                
                 | 
                        [segment_xywh['w'], segment_xywh['h']], transposition)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1173
                 | 
                                    
                                                     | 
                
                 | 
                    segment_coords['angle'] += orientation  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1174
                 | 
                                    
                                                     | 
                
                 | 
                    # transpose, if (still) necessary:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1175
                 | 
                                    
                                                     | 
                
                 | 
                    if not 'rotated-%d' % orientation in segment_coords['features']:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1176
                 | 
                                    
                                                     | 
                
                 | 
                        log.debug("Transposing %s by %d°", name, orientation) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1177
                 | 
                                    
                                                     | 
                
                 | 
                        segment_image = transpose_image(segment_image, transposition)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1178
                 | 
                                    
                                                     | 
                
                 | 
                        segment_coords['features'] += ',rotated-%d' % orientation  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1179
                 | 
                                    
                                                     | 
                
                 | 
                    return segment_image, segment_coords, segment_xywh  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1180
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1181
                 | 
                                    
                                                     | 
                
                 | 
                def _rotate(log, name, skew, segment, segment_image, segment_coords, segment_xywh, **kwargs):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1182
                 | 
                                    
                                                     | 
                
                 | 
                    # Rotate around center in affine coordinate transform:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1183
                 | 
                                    
                                                     | 
                
                 | 
                    # (consistent with image rotation or AlternativeImage below)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1184
                 | 
                                    
                                                     | 
                
                 | 
                    segment_coords['transform'] = rotate_coordinates(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1185
                 | 
                                    
                                                     | 
                
                 | 
                        segment_coords['transform'], skew,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1186
                 | 
                                    
                                                     | 
                
                 | 
                        np.array([0.5 * segment_xywh['w'],  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1187
                 | 
                                    
                                                     | 
                
                 | 
                                  0.5 * segment_xywh['h']]))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1188
                 | 
                                    
                                                     | 
                
                 | 
                    segment_xywh['w'], segment_xywh['h'] = adjust_canvas_to_rotation(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1189
                 | 
                                    
                                                     | 
                
                 | 
                        [segment_xywh['w'], segment_xywh['h']], skew)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1190
                 | 
                                    
                                                     | 
                
                 | 
                    segment_coords['angle'] += skew  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1191
                 | 
                                    
                                                     | 
                
                 | 
                    # deskew, if (still) necessary:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1192
                 | 
                                    
                                                     | 
                
                 | 
                    if not 'deskewed' in segment_coords['features']:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1193
                 | 
                                    
                                                     | 
                
                 | 
                        log.debug("Rotating %s by %.2f°", name, skew) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1194
                 | 
                                    
                                                     | 
                
                 | 
                        segment_image = rotate_image(segment_image, skew, **kwargs)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1195
                 | 
                                    
                                                     | 
                
                 | 
                        segment_coords['features'] += ',deskewed'  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1196
                 | 
                                    
                                                     | 
                
                 | 
                        if (segment and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1197
                 | 
                                    
                                                     | 
                
                 | 
                            (not isinstance(segment, BorderType) or # always crop below page level  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1198
                 | 
                                    
                                                     | 
                
                 | 
                             'cropped' in segment_coords['features'])):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1199
                 | 
                                    
                                                     | 
                
                 | 
                            # re-crop to new bbox (which may deviate  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1200
                 | 
                                    
                                                     | 
                
                 | 
                            # if segment polygon was not a rectangle)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1201
                 | 
                                    
                                                     | 
                
                 | 
                            segment_image, segment_coords, segment_xywh = _crop(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1202
                 | 
                                    
                                                     | 
                
                 | 
                                log, name, segment, segment_image, segment_coords,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1203
                 | 
                                    
                                                     | 
                
                 | 
                                op='recropped', **kwargs)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1204
                 | 
                                    
                                                     | 
                
                 | 
                    elif (segment and  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1205
                 | 
                                    
                                                     | 
                
                 | 
                          (not isinstance(segment, BorderType) or # always crop below page level  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1206
                 | 
                                    
                                                     | 
                
                 | 
                           'cropped' in segment_coords['features'])):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1207
                 | 
                                    
                                                     | 
                
                 | 
                        # only shift coordinates as if re-cropping  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1208
                 | 
                                    
                                                     | 
                
                 | 
                        segment_polygon = coordinates_of_segment(segment, segment_image, segment_coords)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1209
                 | 
                                    
                                                     | 
                
                 | 
                        segment_bbox = bbox_from_polygon(segment_polygon)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1210
                 | 
                                    
                                                     | 
                
                 | 
                        segment_xywh = xywh_from_bbox(*segment_bbox)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1211
                 | 
                                    
                                                     | 
                
                 | 
                        segment_coords['transform'] = shift_coordinates(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1212
                 | 
                                    
                                                     | 
                
                 | 
                            segment_coords['transform'],  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1213
                 | 
                                    
                                                     | 
                
                 | 
                            np.array([-segment_bbox[0],  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1214
                 | 
                                    
                                                     | 
                
                 | 
                                      -segment_bbox[1]]))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1215
                 | 
                                    
                                                     | 
                
                 | 
                    return segment_image, segment_coords, segment_xywh  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1216
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1217
                 | 
                                    
                                                     | 
                
                 | 
                def _scale(log, name, factor, segment_image, segment_coords, segment_xywh, **kwargs):  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1218
                 | 
                                    
                                                     | 
                
                 | 
                    # Resize linearly  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1219
                 | 
                                    
                                                     | 
                
                 | 
                    segment_coords['transform'] = scale_coordinates(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1220
                 | 
                                    
                                                     | 
                
                 | 
                        segment_coords['transform'], [factor, factor])  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1221
                 | 
                                    
                                                     | 
                
                 | 
                    segment_coords['scale'] = segment_coords.setdefault('scale', 1.0) * factor | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1222
                 | 
                                    
                                                     | 
                
                 | 
                    segment_xywh['w'] *= factor  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1223
                 | 
                                    
                                                     | 
                
                 | 
                    segment_xywh['h'] *= factor  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1224
                 | 
                                    
                                                     | 
                
                 | 
                    # resize, if (still) necessary  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1225
                 | 
                                    
                                                     | 
                
                 | 
                    if not 'scaled' in segment_coords['features']:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1226
                 | 
                                    
                                                     | 
                
                 | 
                        log.debug("Scaling %s by %.2f", name, factor) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1227
                 | 
                                    
                                                     | 
                
                 | 
                        segment_coords['features'] += ',scaled'  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1228
                 | 
                                    
                                                     | 
                
                 | 
                        # FIXME: validate factor against PAGE-XML attributes  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1229
                 | 
                                    
                                                     | 
                
                 | 
                        # FIXME: factor should become less precise due to rounding  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1230
                 | 
                                    
                                                     | 
                
                 | 
                        segment_image = segment_image.resize((int(segment_image.width * factor),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1231
                 | 
                                    
                                                     | 
                
                 | 
                                                              int(segment_image.height * factor)),  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1232
                 | 
                                    
                                                     | 
                
                 | 
                                                             # slowest, but highest quality:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    1233
                 | 
                                    
                                                     | 
                
                 | 
                                                             Image.Resampling.BICUBIC)  | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    1234
                 | 
                                    
                                                     | 
                
                 | 
                    return segment_image, segment_coords, segment_xywh  | 
            
            
                                                        
            
                                    
            
            
                | 
                    1235
                 | 
                                    
                                                     | 
                
                 | 
                 |