Passed
Push — master ( 2330e7...80eb58 )
by Konstantin
02:37
created

ocrd.cli.resmgr   B

Complexity

Total Complexity 49

Size/Duplication

Total Lines 225
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 49
eloc 165
dl 0
loc 225
rs 8.48
c 0
b 0
f 0

6 Functions

Rating   Name   Duplication   Size   Complexity  
A resmgr_cli() 0 6 1
C migrate() 0 34 10
A list_installed() 0 9 2
A list_available() 0 12 2
A print_resources() 0 6 3
F download() 0 117 31

How to fix   Complexity   

Complexity

Complex classes like ocrd.cli.resmgr often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""
2
OCR-D CLI: management of processor resources
3
4
.. click:: ocrd.cli.resmgr:resmgr_cli
5
    :prog: ocrd resmgr
6
    :nested: full
7
"""
8
import sys
9
from pathlib import Path
10
from shutil import which
11
from yaml import safe_load, safe_dump
12
13
import requests
14
import click
15
16
from ocrd_utils import (
17
    directory_size,
18
    getLogger,
19
    get_moduledir,
20
    get_ocrd_tool_json,
21
    resource_filename,
22
    initLogging,
23
    RESOURCE_LOCATIONS,
24
)
25
from ocrd.constants import RESOURCE_USER_LIST_COMMENT
26
27
from ..resource_manager import OcrdResourceManager
28
29
30
def print_resources(executable, reslist, resmgr):
31
    print(f"{executable}")
32
    for resdict in reslist:
33
        res_loc = resmgr.resource_dir_to_location(resdict['path']) if 'path' in resdict else ''
34
        print(f"- {resdict['name']} @ {res_loc} ({resdict['url']})\n  {resdict['description']}")
35
    print()
36
37
38
@click.group("resmgr")
39
def resmgr_cli():
40
    """
41
    Managing processor resources
42
    """
43
    initLogging()
44
45
46
@resmgr_cli.command('list-available')
47
@click.option('-D', '--no-dynamic', is_flag=True, default=False,
48
              help="Whether to skip looking into each processor's --dump-{json,module-dir} for module-level resources")
49
@click.option('-e', '--executable', metavar='EXEC', default='ocrd-*',
50
              help='Show only resources for executable beginning with EXEC', )
51
def list_available(executable, no_dynamic):
52
    """
53
    List available resources
54
    """
55
    resmgr = OcrdResourceManager()
56
    for executable, reslist in resmgr.list_available(executable=executable, dynamic=not no_dynamic):
57
        print_resources(executable, reslist, resmgr)
58
59
60
@resmgr_cli.command('list-installed')
61
@click.option('-e', '--executable', help='Show only resources for executable EXEC', metavar='EXEC')
62
def list_installed(executable=None):
63
    """
64
    List installed resources
65
    """
66
    resmgr = OcrdResourceManager()
67
    for executable, reslist in resmgr.list_installed(executable):
68
        print_resources(executable, reslist, resmgr)
69
70
71
@resmgr_cli.command('download')
72
@click.option('-n', '--any-url', default='', help='URL of unregistered resource to download/copy from')
73
@click.option('-D', '--no-dynamic', default=False, is_flag=True,
74
              help="Whether to skip looking into each processor's --dump-{json,module-dir} for module-level resources")
75
@click.option('-t', '--resource-type', type=click.Choice(['file', 'directory', 'archive']), default='file',
76
              help='Type of resource',)
77
@click.option('-P', '--path-in-archive', default='.', help='Path to extract in case of archive type')
78
@click.option('-a', '--allow-uninstalled', is_flag=True,
79
              help="Allow installing resources for uninstalled processors",)
80
@click.option('-o', '--overwrite', help='Overwrite existing resources', is_flag=True)
81
@click.option('-l', '--location', type=click.Choice(RESOURCE_LOCATIONS),
82
              help="Where to store resources - defaults to first location in processor's 'resource_locations' "
83
                   "list or finally 'data'")
84
@click.argument('executable', required=True)
85
@click.argument('name', required=False)
86
def download(any_url, no_dynamic, resource_type, path_in_archive, allow_uninstalled, overwrite, location, executable,
87
             name):
88
    """
89
    Download resource NAME for processor EXECUTABLE.
90
91
    NAME is the name of the resource made available by downloading or copying.
92
93
    If NAME is '*' (asterisk), then download all known registered resources for this processor.
94
95
    If ``--any-url=URL`` or ``-n URL`` is given, then URL is accepted regardless of registered resources for ``NAME``.
96
    (This can be used for unknown resources or for replacing registered resources.)
97
98
    If ``--resource-type`` is set to `archive`, then that archive gets unpacked after download,
99
    and its ``--path-in-archive`` will subsequently be renamed to NAME.
100
    """
101
    log = getLogger('ocrd.cli.resmgr')
102
    resmgr = OcrdResourceManager()
103
    if executable != '*' and not name:
104
        log.error(f"Unless EXECUTABLE ('{executable}') is the '*' wildcard, NAME is required")
105
        sys.exit(1)
106
    elif executable == '*':
107
        executable = None
108
    if name == '*':
109
        name = None
110
    is_url = (any_url.startswith('https://') or any_url.startswith('http://')) if any_url else False
111
    is_filename = Path(any_url).exists() if any_url else False
112
    if executable and not which(executable):
113
        if not allow_uninstalled:
114
            log.error(f"Executable '{executable}' is not installed. "
115
                      f"To download resources anyway, use the -a/--allow-uninstalled flag")
116
            sys.exit(1)
117
        else:
118
            log.info(f"Executable '{executable}' is not installed, but downloading resources anyway")
119
    reslist = resmgr.list_available(executable=executable, dynamic=not no_dynamic, name=name)
120
    if not any(r[1] for r in reslist):
121
        log.info(f"No resources {name} found in registry for executable {executable}")
122
        if executable and name:
123
            reslist = [(executable, [{
124
                'url': any_url or '???',
125
                'name': name,
126
                'type': resource_type,
127
                'path_in_archive': path_in_archive}]
128
            )]
129
    for this_executable, this_reslist in reslist:
130
        for resdict in this_reslist:
131
            if 'size' in resdict:
132
                registered = "registered"
133
            else:
134
                registered = "unregistered"
135
            if any_url:
136
                resdict['url'] = any_url
137
            if resdict['url'] == '???':
138
                log.warning(f"Cannot download user resource {resdict['name']}")
139
                continue
140
            if resdict['url'].startswith('https://') or resdict['url'].startswith('http://'):
141
                log.info(f"Downloading {registered} resource '{resdict['name']}' ({resdict['url']})")
142
                if 'size' not in resdict:
143
                    with requests.head(resdict['url']) as r:
144
                        resdict['size'] = int(r.headers.get('content-length', 0))
145
            else:
146
                log.info(f"Copying {registered} resource '{resdict['name']}' ({resdict['url']})")
147
                urlpath = Path(resdict['url'])
148
                resdict['url'] = str(urlpath.resolve())
149
                if Path(urlpath).is_dir():
150
                    resdict['size'] = directory_size(urlpath)
151
                else:
152
                    resdict['size'] = urlpath.stat().st_size
153
            if not location:
154
                location = get_ocrd_tool_json(this_executable)['resource_locations'][0]
155
            elif location not in get_ocrd_tool_json(this_executable)['resource_locations']:
156
                log.error(f"The selected --location {location} is not in the {this_executable}'s resource search path, "
157
                          f"refusing to install to invalid location")
158
                sys.exit(1)
159
            if location != 'module':
160
                basedir = resmgr.location_to_resource_dir(location)
161
            else:
162
                basedir = get_moduledir(this_executable)
163
                if not basedir:
164
                    basedir = resmgr.location_to_resource_dir('data')
165
166
            try:
167
                with click.progressbar(length=resdict['size']) as bar:
168
                    fpath = resmgr.download(
169
                        this_executable,
170
                        resdict['url'],
171
                        basedir,
172
                        name=resdict['name'],
173
                        resource_type=resdict.get('type', resource_type),
174
                        path_in_archive=resdict.get('path_in_archive', path_in_archive),
175
                        overwrite=overwrite,
176
                        no_subdir=location in ['cwd', 'module'],
177
                        progress_cb=lambda delta: bar.update(delta)
178
                    )
179
                if registered == 'unregistered':
180
                    log.info(f"{this_executable} resource '{name}' ({any_url}) not a known resource, creating stub "
181
                             f"in {resmgr.user_list}'")
182
                    resmgr.add_to_user_database(this_executable, fpath, url=any_url)
183
                resmgr.save_user_list()
184
                log.info(f"Installed resource {resdict['url']} under {fpath}")
185
            except FileExistsError as exc:
186
                log.info(str(exc))
187
            log.info(f"Use in parameters as "
188
                     f"'{resmgr.parameter_usage(resdict['name'], usage=resdict.get('parameter_usage', 'as-is'))}'")
189
190
191
@resmgr_cli.command('migrate')
192
@click.argument('migration', type=click.Choice(['2.37.0']))
193
def migrate(migration):
194
    """
195
    Update the configuration after updating core to MIGRATION
196
    """
197
    resmgr = OcrdResourceManager(skip_init=True)
198
    log = getLogger('ocrd.resmgr.migrate')
199
    if not resmgr.user_list.exists():
200
        log.info(f'No configuration file found at {resmgr.user_list}, nothing to do')
201
    if migration == '2.37.0':
202
        backup_file = resmgr.user_list.with_suffix(f'.yml.before-{migration}')
203
        yaml_in_str = resmgr.user_list.read_text()
204
        log.info(f'Backing {resmgr.user_list} to {backup_file}')
205
        backup_file.write_text(yaml_in_str)
206
        log.info(f'Applying migration {migration} to {resmgr.user_list}')
207
        yaml_in = safe_load(yaml_in_str)
208
        yaml_out = {}
209
        for executable, reslist_in in yaml_in.items():
210
            yaml_out[executable] = []
211
            for resdict_in in reslist_in:
212
                resdict_out = {}
213
                for k_in, v_in in resdict_in.items():
214
                    k_out, v_out = k_in, v_in
215
                    if k_in == 'type' and v_in in ['github-dir', 'tarball']:
216
                        if v_in == 'github-dir':
217
                            v_out = 'directory'
218
                        elif v_in == 'tarball':
219
                            v_out = 'directory'
220
                    resdict_out[k_out] = v_out
221
                yaml_out[executable].append(resdict_out)
222
        resmgr.user_list.write_text(
223
            RESOURCE_USER_LIST_COMMENT + '\n# migrated with ocrd resmgr migrate {migration}\n' + safe_dump(yaml_out))
224
        log.info(f'Applied migration {migration} to {resmgr.user_list}')
225