Passed
Pull Request — master (#498)
by Konstantin
01:55
created

ocrd.cli.workspace.set_id()   A

Complexity

Conditions 1

Size

Total Lines 14
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 7
nop 2
dl 0
loc 14
rs 10
c 0
b 0
f 0
1
import os
2
from os.path import relpath, exists, join
3
import sys
4
5
import click
6
7
from ocrd import Resolver, Workspace, WorkspaceValidator, WorkspaceBackupManager
8
from ocrd_utils import getLogger, pushd_popd
9
10
log = getLogger('ocrd.cli.workspace')
11
12
class WorkspaceCtx():
13
14
    def __init__(self, directory, mets_basename, automatic_backup):
15
        self.directory = directory
16
        self.resolver = Resolver()
17
        self.mets_basename = mets_basename
18
        self.automatic_backup = automatic_backup
19
20
pass_workspace = click.make_pass_decorator(WorkspaceCtx)
21
22
# ----------------------------------------------------------------------
23
# ocrd workspace
24
# ----------------------------------------------------------------------
25
26
@click.group("workspace")
27
@click.option('-d', '--directory', envvar='WORKSPACE_DIR', default='.', type=click.Path(file_okay=False), metavar='WORKSPACE_DIR', help='Changes the workspace folder location.', show_default=True)
28
@click.option('-M', '--mets-basename', default="mets.xml", help='The basename of the METS file.', show_default=True)
29
@click.option('--backup', default=False, help="Backup mets.xml whenever it is saved.", is_flag=True)
30
@click.pass_context
31
def workspace_cli(ctx, directory, mets_basename, backup):
32
    """
33
    Working with workspace
34
    """
35
    ctx.obj = WorkspaceCtx(os.path.abspath(directory), mets_basename, automatic_backup=backup)
36
37
# ----------------------------------------------------------------------
38
# ocrd workspace validate
39
# ----------------------------------------------------------------------
40
41
@workspace_cli.command('validate', help='''
42
43
    Validate a workspace
44
45
''')
46
@pass_workspace
47
@click.option('-a', '--download', is_flag=True, help="Download all files")
48
@click.option('-s', '--skip', help="Tests to skip", default=[], multiple=True, type=click.Choice(['imagefilename', 'dimension', 'mets_unique_identifier', 'mets_file_group_names', 'mets_files', 'pixel_density', 'page', 'url']))
49
@click.option('--page-textequiv-consistency', '--page-strictness', help="How strict to check PAGE multi-level textequiv consistency", type=click.Choice(['strict', 'lax', 'fix', 'off']), default='strict')
50
@click.option('--page-coordinate-consistency', help="How fierce to check PAGE multi-level coordinate consistency", type=click.Choice(['poly', 'baseline', 'both', 'off']), default='poly')
51
@click.argument('mets_url', nargs=-1)
52
def validate_workspace(ctx, mets_url, download, skip, page_textequiv_consistency, page_coordinate_consistency):
53
    if not mets_url:
54
        mets_url = 'mets.xml'
55
    else:
56
        mets_url = mets_url[0]
57
    report = WorkspaceValidator.validate(
58
        ctx.resolver,
59
        mets_url,
60
        src_dir=ctx.directory,
61
        skip=skip,
62
        download=download,
63
        page_strictness=page_textequiv_consistency,
64
        page_coordinate_consistency=page_coordinate_consistency
65
    )
66
    print(report.to_xml())
67
    if not report.is_valid:
68
        sys.exit(128)
69
70
# ----------------------------------------------------------------------
71
# ocrd workspace clone
72
# ----------------------------------------------------------------------
73
74
@workspace_cli.command('clone')
75
@click.option('-f', '--clobber-mets', help="Overwrite existing METS file", default=False, is_flag=True)
76
@click.option('-a', '--download', is_flag=True, help="Download all files and change location in METS file after cloning")
77
@click.argument('mets_url')
78
@click.argument('workspace_dir', default=None, required=False)
79
@pass_workspace
80
def workspace_clone(ctx, clobber_mets, download, mets_url, workspace_dir):
81
    """
82
    Create a workspace from a METS_URL and return the directory
83
84
    METS_URL can be a URL, an absolute path or a path relative to $PWD.
85
86
    If WORKSPACE_DIR is not provided, use the current working directory
87
    """
88
    if not workspace_dir:
89
        workspace_dir = '.'
90
    workspace = ctx.resolver.workspace_from_url(
91
        mets_url,
92
        dst_dir=os.path.abspath(workspace_dir),
93
        mets_basename=ctx.mets_basename,
94
        clobber_mets=clobber_mets,
95
        download=download,
96
    )
97
    workspace.save_mets()
98
    print(workspace.directory)
99
100
# ----------------------------------------------------------------------
101
# ocrd workspace init
102
# ----------------------------------------------------------------------
103
104
@workspace_cli.command('init')
105
@click.option('-f', '--clobber-mets', help="Clobber mets.xml if it exists", is_flag=True, default=False)
106
@click.argument('directory', required=False)
107
@pass_workspace
108
def workspace_create(ctx, clobber_mets, directory):
109
    """
110
    Create a workspace with an empty METS file in DIRECTORY.
111
112
    Use '.' for $PWD"
113
    """
114
    if not directory:
115
        directory = '.'
116
    workspace = ctx.resolver.workspace_from_nothing(
117
        directory=os.path.abspath(directory),
118
        mets_basename=ctx.mets_basename,
119
        clobber_mets=clobber_mets
120
    )
121
    workspace.save_mets()
122
    print(workspace.directory)
123
124
# ----------------------------------------------------------------------
125
# ocrd workspace add
126
# ----------------------------------------------------------------------
127
128
@workspace_cli.command('add')
129
@click.option('-G', '--file-grp', help="fileGrp USE", required=True)
130
@click.option('-i', '--file-id', help="ID for the file", required=True)
131
@click.option('-m', '--mimetype', help="Media type of the file", required=True)
132
@click.option('-g', '--page-id', help="ID of the physical page")
133
@click.option('-C', '--check-file-exists', help="Whether to ensure FNAME exists", is_flag=True, default=False)
134
@click.option('--force', help="If file with ID already exists, replace it", default=False, is_flag=True)
135
@click.argument('fname', required=True)
136
@pass_workspace
137
def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, check_file_exists, force, fname):
138
    """
139
    Add a file or http(s) URL FNAME to METS in a workspace.
140
    If FNAME is not an http(s) URL and is not a workspace-local existing file, try to copy to workspace.
141
    """
142
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
143
144
    kwargs = {'fileGrp': file_grp, 'ID': file_id, 'mimetype': mimetype, 'pageId': page_id, 'force': force}
145
    log = getLogger('ocrd.cli.workspace.add')
146
    if not (fname.startswith('http://') or fname.startswith('https://')):
147
        if not fname.startswith(ctx.directory):
148
            if exists(join(ctx.directory, fname)):
149
                fname = join(ctx.directory, fname)
150
            else:
151
                log.debug("File '%s' is not in workspace, copying", fname)
152
                try:
153
                    fname = ctx.resolver.download_to_directory(ctx.directory, fname, subdir=file_grp)
154
                except FileNotFoundError as e:
155
                    if check_file_exists:
156
                        log.error("File '%s' does not exist, halt execution!" % fname)
157
                        sys.exit(1)
158
        if check_file_exists and not exists(fname):
159
            log.error("File '%s' does not exist, halt execution!" % fname)
160
            sys.exit(1)
161
        if fname.startswith(ctx.directory):
162
            fname = relpath(fname, ctx.directory)
163
        kwargs['local_filename'] = fname
164
165
    kwargs['url'] = fname
166
    workspace.mets.add_file(**kwargs)
167
    workspace.save_mets()
168
169
# ----------------------------------------------------------------------
170
# ocrd workspace find
171
# ----------------------------------------------------------------------
172
173
@workspace_cli.command('find')
174
@click.option('-G', '--file-grp', help="fileGrp USE")
175
@click.option('-m', '--mimetype', help="Media type to look for")
176
@click.option('-g', '--page-id', help="Page ID")
177
@click.option('-i', '--file-id', help="ID")
178
# pylint: disable=bad-continuation
179
@click.option('-k', '--output-field', help="Output field. Repeat for multiple fields, will be joined with tab",
180
        default=['url'],
181
        multiple=True,
182
        type=click.Choice([
183
            'url',
184
            'mimetype',
185
            'pageId',
186
            'ID',
187
            'fileGrp',
188
            'basename',
189
            'basename_without_extension',
190
            'local_filename',
191
        ]))
192
@click.option('--download', is_flag=True, help="Download found files to workspace and change location in METS file ")
193
@pass_workspace
194
def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, download):
195
    """
196
    Find files.
197
    """
198
    modified_mets = False
199
    ret = list()
200
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
201
    for f in workspace.mets.find_files(
202
            ID=file_id,
203
            fileGrp=file_grp,
204
            mimetype=mimetype,
205
            pageId=page_id,
206
        ):
207
        if download and not f.local_filename:
208
            workspace.download_file(f)
209
            modified_mets = True
210
        ret.append([f.ID if field == 'pageId' else getattr(f, field) or ''
211
                    for field in output_field])
212
    if modified_mets:
213
        workspace.save_mets()
214
    if 'pageId' in output_field:
215
        idx = output_field.index('pageId')
216
        fileIds = list(map(lambda fields: fields[idx], ret))
0 ignored issues
show
introduced by
The variable idx does not seem to be defined in case 'pageId' in output_field on line 214 is False. Are you sure this can never be the case?
Loading history...
217
        pages = workspace.mets.get_physical_pages(for_fileIds=fileIds)
218
        for fields, page in zip(ret, pages):
219
            fields[idx] = page or ''
220
    for fields in ret:
221
        print('\t'.join(fields))
222
223
# ----------------------------------------------------------------------
224
# ocrd workspace remove
225
# ----------------------------------------------------------------------
226
227
@workspace_cli.command('remove')
228
@click.option('-k', '--keep-file', help="Do not delete file from file system", default=False, is_flag=True)
229
@click.option('-f', '--force', help="Continue even if mets:file or file on file system does not exist", default=False, is_flag=True)
230
@click.argument('ID', nargs=-1)
231
@pass_workspace
232
def workspace_remove_file(ctx, id, force, keep_file):  # pylint: disable=redefined-builtin
233
    """
234
    Delete file by ID from mets.xml
235
    """
236
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
237
    for i in id:
238
        workspace.remove_file(i, force=force, keep_file=keep_file)
239
    workspace.save_mets()
240
241
242
# ----------------------------------------------------------------------
243
# ocrd workspace remove-group
244
# ----------------------------------------------------------------------
245
246
@workspace_cli.command('remove-group', help="""
247
248
    Delete a file group
249
250
""")
251
@click.option('-r', '--recursive', help="Delete any files in the group before the group itself", default=False, is_flag=True)
252
@click.option('-f', '--force', help="Continue removing even if group or containing files not found in METS", default=False, is_flag=True)
253
@click.option('-k', '--keep-files', help="Do not delete files from file system", default=False, is_flag=True)
254
@click.argument('GROUP', nargs=-1)
255
@pass_workspace
256
def remove_group(ctx, group, recursive, force, keep_files):
257
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
258
    for g in group:
259
        workspace.remove_file_group(g, recursive=recursive, force=force, keep_files=keep_files)
260
    workspace.save_mets()
261
262
# ----------------------------------------------------------------------
263
# ocrd workspace prune-files
264
# ----------------------------------------------------------------------
265
266
@workspace_cli.command('prune-files', help="""
267
268
    Removes mets:files that point to non-existing local files
269
270
""")
271
@pass_workspace
272
def prune_files(ctx):
273
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename)
274
    with pushd_popd(workspace.directory):
275
        for f in workspace.mets.find_files():
276
            try:
277
                if not f.local_filename or not exists(f.local_filename):
278
                    workspace.mets.remove_file(f.ID)
279
            except Exception as e:
280
                log.exception("Error removing %f: %s", f, e)
281
                raise(e)
282
        workspace.save_mets()
283
284
# ----------------------------------------------------------------------
285
# ocrd workspace list-group
286
# ----------------------------------------------------------------------
287
288
@workspace_cli.command('list-group', help="""
289
290
    List fileGrp USE attributes
291
292
""")
293
@pass_workspace
294
def list_groups(ctx):
295
    workspace = Workspace(ctx.resolver, directory=ctx.directory)
296
    print("\n".join(workspace.mets.file_groups))
297
298
# ----------------------------------------------------------------------
299
# ocrd workspace list-pages
300
# ----------------------------------------------------------------------
301
302
@workspace_cli.command('list-page', help="""
303
304
    List page IDs
305
306
""")
307
@pass_workspace
308
def list_pages(ctx):
309
    workspace = Workspace(ctx.resolver, directory=ctx.directory)
310
    print("\n".join(workspace.mets.physical_pages))
311
312
# ----------------------------------------------------------------------
313
# ocrd workspace get-id
314
# ----------------------------------------------------------------------
315
316
@workspace_cli.command('get-id', help="""
317
318
    Get METS id if any
319
320
""")
321
@pass_workspace
322
def get_id(ctx):
323
    workspace = Workspace(ctx.resolver, directory=ctx.directory)
324
    ID = workspace.mets.unique_identifier
325
    if ID:
326
        print(ID)
327
328
# ----------------------------------------------------------------------
329
# ocrd workspace set-id
330
# ----------------------------------------------------------------------
331
332
@workspace_cli.command('set-id', help="""
333
334
    Set METS ID.
335
336
    If one of the supported identifier mechanisms is used, will set this identifier.
337
338
    Otherwise will create a new <mods:identifier type="purl">{{ ID }}</mods:identifier>.
339
""")
340
@click.argument('ID')
341
@pass_workspace
342
def set_id(ctx, id):   # pylint: disable=redefined-builtin
343
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup)
344
    workspace.mets.unique_identifier = id
345
    workspace.save_mets()
346
347
# ----------------------------------------------------------------------
348
# ocrd workspace backup
349
# ----------------------------------------------------------------------
350
351
@workspace_cli.group('backup')
352
@click.pass_context
353
def workspace_backup_cli(ctx): # pylint: disable=unused-argument
354
    """
355
    Backing and restoring workspaces - dev edition
356
    """
357
358
@workspace_backup_cli.command('add')
359
@pass_workspace
360
def workspace_backup_add(ctx):
361
    """
362
    Create a new backup
363
    """
364
    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
365
    backup_manager.add()
366
367
@workspace_backup_cli.command('list')
368
@pass_workspace
369
def workspace_backup_list(ctx):
370
    """
371
    List backups
372
    """
373
    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
374
    for b in backup_manager.list():
375
        print(b)
376
377
@workspace_backup_cli.command('restore')
378
@click.option('-f', '--choose-first', help="Restore first matching version if more than one", is_flag=True)
379
@click.argument('bak') #, type=click.Path(dir_okay=False, readable=True, resolve_path=True))
380
@pass_workspace
381
def workspace_backup_restore(ctx, choose_first, bak):
382
    """
383
    Restore backup BAK
384
    """
385
    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
386
    backup_manager.restore(bak, choose_first)
387
388
@workspace_backup_cli.command('undo')
389
@pass_workspace
390
def workspace_backup_undo(ctx):
391
    """
392
    Restore the last backup
393
    """
394
    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=ctx.mets_basename, automatic_backup=ctx.automatic_backup))
395
    backup_manager.undo()
396