| 1 |  |  | from pathlib import Path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | import re | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | from shutil import copyfileobj, copytree | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | from tempfile import TemporaryFile | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | from tarfile import open as open_tarfile | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | import requests | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | from yaml import safe_load | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | from .constants import RESOURCE_LIST_FILENAME | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | from ocrd_validators import OcrdResourceListValidator | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | from ocrd_utils import getLogger | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | from ocrd_utils.constants import HOME, XDG_CACHE_HOME | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | from ocrd_utils.os import list_resource_candidates, list_all_resources, pushd_popd | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | builtin_list_filename = Path(RESOURCE_LIST_FILENAME) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  | user_list_filename = Path(HOME, 'ocrd', 'resources.yml') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  | class OcrdResourceManager(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |     Managing processor resources | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |     def __init__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |         self.log = getLogger('ocrd.resource_manager') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |         self.database = {} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |         self.load_resource_list(builtin_list_filename) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |         self.load_resource_list(user_list_filename) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |     def load_resource_list(self, list_filename): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |         if list_filename.is_file(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |             with open(list_filename, 'r', encoding='utf-8') as f: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |                 list_loaded = safe_load(f) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |             report = OcrdResourceListValidator.validate(list_loaded) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |             if not report.is_valid: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |                 self.log.error('\n'.join(report.errors)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |                 raise ValueError("Resource list %s is invalid!" % (list_filename)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |             for executable, resource_list in list_loaded.items(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |                 if executable not in self.database: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |                     self.database[executable] = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |                 # Prepend, so user provided is sorted before builtin | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |                 self.database[executable] = list_loaded[executable] + self.database[executable] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     def list_available(self, executable=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         List models available for download by processor | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |         if executable: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |             return [(executable, self.database[executable])] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |         return [(x, y) for x, y in self.database.items()] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     def list_installed(self, executable=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         List installed resources, matching with registry by ``name`` | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |         ret = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         for executable in [executable] if executable else self.database.keys(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |             reslist = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |             for res_filename in list_all_resources(executable): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |                 res_name = Path(res_filename).name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |                 resdict = [x for x in self.database[executable] if x['name'] == res_name] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |                 if not resdict: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |                     # TODO handle gracefully | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |                     resdict = [{'name': res_name, 'url': '???', 'description': '???', 'version_range': '???'}] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |                 reslist.append(resdict[0]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |             ret.append((executable, reslist)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |         return ret | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |     def find_resources(self, executable=None, name=None, url=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |         Find resources in the registry | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |         ret = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |         if executable and executable not in self.database.keys(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |             return ret | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |         for executable in [executable] if executable else self.database.keys(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |             for resdict in self.database[executable]: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |                 if url and url == resdict['url']: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |                     ret.append((executable, resdict)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |                 elif name and name == resdict['name']: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |                     ret.append((executable, resdict)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |         return ret | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 84 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 85 |  |  |     def parameter_usage(self, name, usage='as-is'): | 
            
                                                                        
                            
            
                                    
            
            
                | 86 |  |  |         if usage == 'as-is': | 
            
                                                                        
                            
            
                                    
            
            
                | 87 |  |  |             return name | 
            
                                                                        
                            
            
                                    
            
            
                | 88 |  |  |         if usage == 'without-extension': | 
            
                                                                        
                            
            
                                    
            
            
                | 89 |  |  |             return Path(name).stem | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |     def _download_impl(self, url, filename, progress_cb=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         with open(filename, 'wb') as f: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |             with requests.get(url, stream=True) as r: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |                 total = int(r.headers.get('content-length')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |                 # copyfileobj(r.raw, f_write_tar) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |                 for data in r.iter_content(chunk_size=4096): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |                     if progress_cb: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |                         progress_cb(len(data)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |                     f.write(data) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |     # TODO Proper caching (make head request for size, If-Modified etc) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |     def download( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |         self, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |         executable, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |         url, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |         overwrite=False, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |         basedir=XDG_CACHE_HOME, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |         name=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |         resource_type='file', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |         path_in_archive='.', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         progress_cb=None, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |     ): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |         Download a resource by URL | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |         log = getLogger('ocrd.resource_manager.download') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |         destdir = Path(basedir, executable) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |         if not name: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |             name = re.sub('[^A-Za-z0-9]', '', url) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |         fpath = Path(destdir, name) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |         if fpath.exists() and not overwrite: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |             log.info("%s to be downloaded to %s which already exists and overwrite is False" % (url, fpath)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |             return fpath | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |         destdir.mkdir(parents=True, exist_ok=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |         if resource_type == 'file': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |             self._download_impl(url, fpath, progress_cb) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |         elif resource_type == 'tarball': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |             with pushd_popd(tempdir=True): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |                 log.info("Downloading %s" % url) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |                 self._download_impl(url, 'download.tar.xx', progress_cb) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |                 Path('out').mkdir() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |                 with pushd_popd('out'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |                     log.info("Extracting tarball") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |                     with open_tarfile('../download.tar.xx', 'r:*') as tar: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |                         tar.extractall() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |                     log.info("Copying '%s' from tarball to %s" % (path_in_archive, fpath)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |                     copytree(path_in_archive, str(fpath)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |         # TODO | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |         # elif resource_type == 'github-dir': | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 140 |  |  |         return fpath | 
            
                                                        
            
                                    
            
            
                | 141 |  |  |  |