| 1 |  |  | from pathlib import Path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | from os.path import join | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | from json import loads | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | from json.decoder import JSONDecodeError | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | from os import environ, listdir, getcwd, path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | from fnmatch import filter as apply_glob | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | from shutil import copytree | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | from datetime import datetime | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | from tarfile import open as open_tarfile | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | from urllib.parse import urlparse, unquote | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | from subprocess import run, PIPE | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | import requests | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | from yaml import safe_load, safe_dump | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | from ocrd_validators import OcrdResourceListValidator | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | from ocrd_utils import getLogger, directory_size | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  | from ocrd_utils.os import get_processor_resource_types, list_all_resources, pushd_popd, get_ocrd_tool_json | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | from .constants import RESOURCE_LIST_FILENAME, RESOURCE_USER_LIST_COMMENT | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | class OcrdResourceManager(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |     Managing processor resources | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     def __init__(self, userdir=None, xdg_config_home=None, xdg_data_home=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |         self.log = getLogger('ocrd.resource_manager') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |         self.database = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |         self._xdg_data_home = xdg_data_home | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |         self._xdg_config_home = xdg_config_home | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |         self._userdir = userdir | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |         self.user_list = Path(self.xdg_config_home, 'ocrd', 'resources.yml') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         self.load_resource_list(Path(RESOURCE_LIST_FILENAME)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |         if not self.user_list.exists(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |             if not self.user_list.parent.exists(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |                 self.user_list.parent.mkdir(parents=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |             self.save_user_list() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |         self.load_resource_list(self.user_list) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     def userdir(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |         if not self._userdir: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |             self._userdir = path.expanduser('~') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |             if 'HOME' in environ and environ['HOME'] != path.expanduser('~'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |                 self._userdir = environ['HOME'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         return self._userdir | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |     def xdg_data_home(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |         if not self._xdg_data_home: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |             if 'XDG_DATA_HOME' in environ: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |                 self._xdg_data_home = environ['XDG_DATA_HOME'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |                 self._xdg_data_home = join(self.userdir, '.local', 'share') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |         return self._xdg_data_home | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |     def xdg_config_home(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |         if not self._xdg_config_home: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |             if 'XDG_CONFIG_HOME' in environ: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |                 self._xdg_config_home = environ['XDG_CONFIG_HOME'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |                 self._xdg_config_home = join(self.userdir, '.config') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |         return self._xdg_config_home | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |     def save_user_list(self, database=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |         if not database: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |             database = self.database | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         with open(self.user_list, 'w', encoding='utf-8') as f: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |             f.write(RESOURCE_USER_LIST_COMMENT) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |             f.write('\n') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |             f.write(safe_dump(database)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |     def load_resource_list(self, list_filename, database=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |         if not database: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |             database = self.database | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |         if list_filename.is_file(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |             with open(list_filename, 'r', encoding='utf-8') as f: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |                 list_loaded = safe_load(f) or {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |             report = OcrdResourceListValidator.validate(list_loaded) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |             if not report.is_valid: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |                 self.log.error('\n'.join(report.errors)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |                 raise ValueError("Resource list %s is invalid!" % (list_filename)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |             for executable, resource_list in list_loaded.items(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |                 if executable not in database: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |                     database[executable] = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |                 # Prepend, so user provided is sorted before builtin | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |                 database[executable] = list_loaded[executable] + database[executable] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         return database | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |     def list_available(self, executable=None, dynamic=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         List models available for download by processor | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |         if not executable: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |             return self.database.items() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         if dynamic: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |             for exec_dir in environ['PATH'].split(':'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |                 for exec_path in Path(exec_dir).glob(f'{executable}'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |                     self.log.info(f"Inspecting '{exec_path} --dump-json' for resources") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |                     try: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |                         ocrd_tool = get_ocrd_tool_json(exec_path) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |                     except JSONDecodeError: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |                         self.log.info(f"Failed to parse {exec_path} --dump-json output - not an OCR-D processor?") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |                     for resdict in ocrd_tool.get('resources', ()): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |                         for res_remove in (res for res in self.database.get(executable, []) if res['name'] == resdict['name']): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |                             self.database.get(executable).remove(res_remove) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |                         self.database[exec_path.name].append(resdict) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         ret = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |         for k in self.database: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |             if apply_glob([k], executable): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |                 ret.append((k, self.database[k])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         return ret | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |     def list_installed(self, executable=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |         List installed resources, matching with registry by ``name`` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |         ret = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |         if executable: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |             all_executables = [executable] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |             # resources we know about | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |             all_executables = list(self.database.keys()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |             # resources in the file system | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |             parent_dirs = [join(x, 'ocrd-resources') for x in [self.xdg_data_home, '/usr/local/share']] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |             for parent_dir in parent_dirs: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |                 if Path(parent_dir).exists(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |                     all_executables += [x for x in listdir(parent_dir) if x.startswith('ocrd-')] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |         for this_executable in set(all_executables): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |             reslist = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |             mimetypes = get_processor_resource_types(this_executable) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |             for res_filename in list_all_resources(this_executable, xdg_data_home=self.xdg_data_home): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |                 res_filename = Path(res_filename) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |                 if not '*/*' in mimetypes: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |                     if res_filename.is_dir() and not 'text/directory' in mimetypes: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |                         continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |                     if res_filename.is_file() and ['text/directory'] == mimetypes: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |                         continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |                 res_name = res_filename.name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |                 resdict = [x for x in self.database.get(this_executable, []) if x['name'] == res_name] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |                 if not resdict: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |                     self.log.info("%s resource '%s' (%s) not a known resource, creating stub in %s'", this_executable, res_name, str(res_filename), self.user_list) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |                     resdict = [self.add_to_user_database(this_executable, res_filename)] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |                 resdict[0]['path'] = str(res_filename) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |                 reslist.append(resdict[0]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |             ret.append((this_executable, reslist)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |         return ret | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |     def add_to_user_database(self, executable, res_filename, url=None, resource_type='file'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |         Add a stub entry to the user resource.yml | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |         res_name = Path(res_filename).name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |         if Path(res_filename).is_dir(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |             res_size = directory_size(res_filename) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |             res_size = Path(res_filename).stat().st_size | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |         with open(self.user_list, 'r', encoding='utf-8') as f: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |             user_database = safe_load(f) or {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |         if executable not in user_database: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |             user_database[executable] = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |         resources_found = self.find_resources(executable=executable, name=res_name, database=user_database) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |         if not resources_found: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |             resdict = { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |                 'name': res_name, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |                 'url': url if url else '???', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |                 'description': 'Found at %s on %s' % (self.resource_dir_to_location(res_filename), datetime.now()), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |                 'version_range': '???', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |                 'type': resource_type, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |                 'size': res_size | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |             user_database[executable].append(resdict) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |             resdict = resources_found[0][1] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |         self.save_user_list(user_database) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |         self.load_resource_list(self.user_list) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |         return resdict | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |     def find_resources(self, executable=None, name=None, url=None, database=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |         Find resources in the registry | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |         if not database: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |             database = self.database | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |         ret = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |         if executable and executable not in database.keys(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |             return ret | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |         for executable in [executable] if executable else database.keys(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |             for resdict in database[executable]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |                 if not name and not url: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |                     ret.append((executable, resdict)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |                 elif url and url == resdict['url']: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |                     ret.append((executable, resdict)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |                 elif name and name == resdict['name']: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |                     ret.append((executable, resdict)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |         return ret | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |     @property | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |     def default_resource_dir(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |         return self.location_to_resource_dir('data') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |     def location_to_resource_dir(self, location): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  |         return '/usr/local/share/ocrd-resources' if location == 'system' else \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 |  |  |                 join(self.xdg_data_home, 'ocrd-resources') if location == 'data' else \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 |  |  |                 getcwd() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 |  |  |     def resource_dir_to_location(self, resource_path): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 |  |  |         resource_path = str(resource_path) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  |         return 'system' if resource_path.startswith('/usr/local/share/ocrd-resources') else \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 213 |  |  |                'data' if resource_path.startswith(join(self.xdg_data_home, 'ocrd-resources')) else \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 214 |  |  |                'cwd' if resource_path.startswith(getcwd()) else \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 215 |  |  |                resource_path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 216 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 217 |  |  |     def parameter_usage(self, name, usage='as-is'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 218 |  |  |         if usage == 'as-is': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 219 |  |  |             return name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 220 |  |  |         elif usage == 'without-extension': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 221 |  |  |             return Path(name).stem | 
            
                                                                                                            
                            
            
                                    
            
            
                | 222 |  |  |         raise ValueError("No such usage '%s'" % usage) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 223 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 224 |  |  |     def _download_impl(self, url, filename, progress_cb=None, size=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 225 |  |  |         log = getLogger('ocrd.resource_manager._download_impl') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 226 |  |  |         log.info("Downloading %s to %s" % (url, filename)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 227 |  |  |         with open(filename, 'wb') as f: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 228 |  |  |             with requests.get(url, stream=True) as r: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 229 |  |  |                 total = size if size else int(r.headers.get('content-length')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 230 |  |  |                 for data in r.iter_content(chunk_size=4096): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 231 |  |  |                     if progress_cb: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 232 |  |  |                         progress_cb(len(data)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 233 |  |  |                     f.write(data) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 234 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 235 |  |  |     def _copy_impl(self, src_filename, filename, progress_cb=None): | 
            
                                                                        
                            
            
                                    
            
            
                | 236 |  |  |         log = getLogger('ocrd.resource_manager._copy_impl') | 
            
                                                                        
                            
            
                                    
            
            
                | 237 |  |  |         log.info("Copying %s to %s", src_filename, filename) | 
            
                                                                        
                            
            
                                    
            
            
                | 238 |  |  |         if Path(src_filename).is_dir(): | 
            
                                                                        
                            
            
                                    
            
            
                | 239 |  |  |             log.info(f"Copying recursively from {src_filename} to {filename}") | 
            
                                                                        
                            
            
                                    
            
            
                | 240 |  |  |             for child in Path(src_filename).rglob('*'): | 
            
                                                                        
                            
            
                                    
            
            
                | 241 |  |  |                 child_dst = Path(filename) / child.relative_to(src_filename) | 
            
                                                                        
                            
            
                                    
            
            
                | 242 |  |  |                 child_dst.parent.mkdir(parents=True, exist_ok=True) | 
            
                                                                        
                            
            
                                    
            
            
                | 243 |  |  |                 with open(child_dst, 'wb') as f_out, open(child, 'rb') as f_in: | 
            
                                                                        
                            
            
                                    
            
            
                | 244 |  |  |                     while True: | 
            
                                                                        
                            
            
                                    
            
            
                | 245 |  |  |                         chunk = f_in.read(4096) | 
            
                                                                        
                            
            
                                    
            
            
                | 246 |  |  |                         if chunk: | 
            
                                                                        
                            
            
                                    
            
            
                | 247 |  |  |                             f_out.write(chunk) | 
            
                                                                        
                            
            
                                    
            
            
                | 248 |  |  |                             if progress_cb: | 
            
                                                                        
                            
            
                                    
            
            
                | 249 |  |  |                                 progress_cb(len(chunk)) | 
            
                                                                        
                            
            
                                    
            
            
                | 250 |  |  |                         else: | 
            
                                                                        
                            
            
                                    
            
            
                | 251 |  |  |                             break | 
            
                                                                        
                            
            
                                    
            
            
                | 252 |  |  |         else: | 
            
                                                                        
                            
            
                                    
            
            
                | 253 |  |  |             with open(filename, 'wb') as f_out, open(src_filename, 'rb') as f_in: | 
            
                                                                        
                            
            
                                    
            
            
                | 254 |  |  |                 while True: | 
            
                                                                        
                            
            
                                    
            
            
                | 255 |  |  |                     chunk = f_in.read(4096) | 
            
                                                                        
                            
            
                                    
            
            
                | 256 |  |  |                     if chunk: | 
            
                                                                        
                            
            
                                    
            
            
                | 257 |  |  |                         f_out.write(chunk) | 
            
                                                                        
                            
            
                                    
            
            
                | 258 |  |  |                         if progress_cb: | 
            
                                                                        
                            
            
                                    
            
            
                | 259 |  |  |                             progress_cb(len(chunk)) | 
            
                                                                        
                            
            
                                    
            
            
                | 260 |  |  |                     else: | 
            
                                                                        
                            
            
                                    
            
            
                | 261 |  |  |                         break | 
            
                                                                                                            
                            
            
                                    
            
            
                | 262 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 263 |  |  |     # TODO Proper caching (make head request for size, If-Modified etc) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 264 |  |  |     def download( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 265 |  |  |         self, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 266 |  |  |         executable, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 267 |  |  |         url, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 268 |  |  |         basedir, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 269 |  |  |         overwrite=False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 270 |  |  |         no_subdir=False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 271 |  |  |         name=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 272 |  |  |         resource_type='file', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 273 |  |  |         path_in_archive='.', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 274 |  |  |         progress_cb=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 275 |  |  |         size=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 276 |  |  |     ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 277 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 278 |  |  |         Download a resource by URL | 
            
                                                                                                            
                            
            
                                    
            
            
                | 279 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 280 |  |  |         log = getLogger('ocrd.resource_manager.download') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 281 |  |  |         destdir = Path(basedir) if no_subdir else Path(basedir, executable) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 282 |  |  |         if not name: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 283 |  |  |             url_parsed = urlparse(url) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 284 |  |  |             name = Path(unquote(url_parsed.path)).name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 285 |  |  |         fpath = Path(destdir, name) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 286 |  |  |         is_url = url.startswith('https://') or url.startswith('http://') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 287 |  |  |         if fpath.exists() and not overwrite: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 288 |  |  |             log.info("%s to be %s to %s which already exists and overwrite is False" % (url, 'downloaded' if is_url else 'copied', fpath)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 289 |  |  |             return fpath | 
            
                                                                                                            
                            
            
                                    
            
            
                | 290 |  |  |         destdir.mkdir(parents=True, exist_ok=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 291 |  |  |         if resource_type in ('file', 'directory'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 292 |  |  |             if is_url: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 293 |  |  |                 self._download_impl(url, fpath, progress_cb) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 294 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 295 |  |  |                 self._copy_impl(url, fpath, progress_cb) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 296 |  |  |         elif resource_type == 'archive': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 297 |  |  |             with pushd_popd(tempdir=True) as tempdir: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 298 |  |  |                 if is_url: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 299 |  |  |                     self._download_impl(url, 'download.tar.xx', progress_cb, size) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 300 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 301 |  |  |                     self._copy_impl(url, 'download.tar.xx', progress_cb) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 302 |  |  |                 Path('out').mkdir() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 303 |  |  |                 with pushd_popd('out'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 304 |  |  |                     log.info("Extracting archive to %s/out" % tempdir) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 305 |  |  |                     with open_tarfile('../download.tar.xx', 'r:*') as tar: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 306 |  |  |                         tar.extractall() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 307 |  |  |                     log.info("Copying '%s' from archive to %s" % (path_in_archive, fpath)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 308 |  |  |                     copytree(path_in_archive, str(fpath)) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 309 |  |  |         return fpath | 
            
                                                        
            
                                    
            
            
                | 310 |  |  |  |