Passed
Pull Request — master (#673)
by Konstantin
02:14
created

ocrd.cli.workspace.merge()   A

Complexity

Conditions 2

Size

Total Lines 29
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 21
dl 0
loc 29
rs 9.376
c 0
b 0
f 0
cc 2
nop 8

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
import os
2
from os import getcwd
3
from os.path import relpath, exists, join, isabs, dirname, basename, abspath
4
from pathlib import Path
5
from json import loads
6
import sys
7
from glob import glob   # XXX pathlib.Path.glob does not support absolute globs
8
import re
9
10
import click
11
12
from ocrd import Resolver, Workspace, WorkspaceValidator, WorkspaceBackupManager
13
from ocrd_utils import getLogger, initLogging, pushd_popd, EXT_TO_MIME
14
from ocrd.decorators import mets_find_options
15
from . import command_with_replaced_help
16
17
18
class WorkspaceCtx():
19
20
    def __init__(self, directory, mets_url, mets_basename, automatic_backup):
21
        self.log = getLogger('ocrd.cli.workspace')
22
        if mets_basename and mets_url:
23
            raise ValueError("Use either --mets or --mets-basename, not both")
24
        if mets_basename and not mets_url:
25
            self.log.warning(DeprecationWarning("--mets-basename is deprecated. Use --mets/--directory instead"))
26
        mets_basename = mets_basename if mets_basename else 'mets.xml'
27
        if directory and mets_url:
28
            directory = abspath(directory)
29
            if not abspath(mets_url).startswith(directory):
30
                raise ValueError("--mets has a directory part inconsistent with --directory")
31
        elif not directory and mets_url:
32
            if mets_url.startswith('http') or mets_url.startswith('https:'):
33
                raise ValueError("--mets is an http(s) URL but no --directory was given")
34
            directory = dirname(abspath(mets_url)) or getcwd()
35
        elif directory and not mets_url:
36
            directory = abspath(directory)
37
            mets_url = join(directory, mets_basename)
38
        else:
39
            directory = getcwd()
40
            mets_url = join(directory, mets_basename)
41
        self.directory = directory
42
        self.resolver = Resolver()
43
        self.mets_url = mets_url
44
        self.automatic_backup = automatic_backup
45
46
pass_workspace = click.make_pass_decorator(WorkspaceCtx)
47
48
# ----------------------------------------------------------------------
49
# ocrd workspace
50
# ----------------------------------------------------------------------
51
52
@click.group("workspace")
53
@click.option('-d', '--directory', envvar='WORKSPACE_DIR', type=click.Path(file_okay=False), metavar='WORKSPACE_DIR', help='Changes the workspace folder location [default: METS_URL directory or .]"')
54
@click.option('-M', '--mets-basename', default=None, help='METS file basename. Deprecated, use --mets/--directory')
55
@click.option('-m', '--mets', default=None, help='The path/URL of the METS file [default: WORKSPACE_DIR/mets.xml]', metavar="METS_URL")
56
@click.option('--backup', default=False, help="Backup mets.xml whenever it is saved.", is_flag=True)
57
@click.pass_context
58
def workspace_cli(ctx, directory, mets, mets_basename, backup):
59
    """
60
    Working with workspace
61
    """
62
    initLogging()
63
    ctx.obj = WorkspaceCtx(directory, mets_url=mets, mets_basename=mets_basename, automatic_backup=backup)
64
65
# ----------------------------------------------------------------------
66
# ocrd workspace validate
67
# ----------------------------------------------------------------------
68
69
@workspace_cli.command('validate', cls=command_with_replaced_help(
70
    (r' \[METS_URL\]', ''))) # XXX deprecated argument
71
@pass_workspace
72
@click.option('-a', '--download', is_flag=True, help="Download all files")
73
@click.option('-s', '--skip', help="Tests to skip", default=[], multiple=True, type=click.Choice(['imagefilename', 'dimension', 'mets_unique_identifier', 'mets_file_group_names', 'mets_files', 'pixel_density', 'page', 'page_xsd', 'mets_xsd', 'url']))
74
@click.option('--page-textequiv-consistency', '--page-strictness', help="How strict to check PAGE multi-level textequiv consistency", type=click.Choice(['strict', 'lax', 'fix', 'off']), default='strict')
75
@click.option('--page-coordinate-consistency', help="How fierce to check PAGE multi-level coordinate consistency", type=click.Choice(['poly', 'baseline', 'both', 'off']), default='poly')
76
@click.argument('mets_url', default=None, required=False)
77
def workspace_validate(ctx, mets_url, download, skip, page_textequiv_consistency, page_coordinate_consistency):
78
    """
79
    Validate a workspace
80
    
81
    METS_URL can be a URL, an absolute path or a path relative to $PWD.
82
    If not given, use --mets accordingly.
83
    
84
    Check that the METS and its referenced file contents
85
    abide by the OCR-D specifications.
86
    """
87
    LOG = getLogger('ocrd.cli.workspace.validate')
88
    if mets_url:
89
        LOG.warning(DeprecationWarning("Use 'ocrd workspace --mets METS init' instead of argument 'METS_URL' ('%s')" % mets_url))
90
    else:
91
        mets_url = ctx.mets_url
92
    report = WorkspaceValidator.validate(
93
        ctx.resolver,
94
        mets_url,
95
        src_dir=ctx.directory,
96
        skip=skip,
97
        download=download,
98
        page_strictness=page_textequiv_consistency,
99
        page_coordinate_consistency=page_coordinate_consistency
100
    )
101
    print(report.to_xml())
102
    if not report.is_valid:
103
        sys.exit(128)
104
105
# ----------------------------------------------------------------------
106
# ocrd workspace clone
107
# ----------------------------------------------------------------------
108
109
@workspace_cli.command('clone', cls=command_with_replaced_help(
110
    (r' \[WORKSPACE_DIR\]', ''))) # XXX deprecated argument
111
@click.option('-f', '--clobber-mets', help="Overwrite existing METS file", default=False, is_flag=True)
112
@click.option('-a', '--download', is_flag=True, help="Download all files and change location in METS file after cloning")
113
@click.argument('mets_url')
114
# XXX deprecated
115
@click.argument('workspace_dir', default=None, required=False)
116
@pass_workspace
117
def workspace_clone(ctx, clobber_mets, download, mets_url, workspace_dir):
118
    """
119
    Create a workspace from METS_URL and return the directory
120
121
    METS_URL can be a URL, an absolute path or a path relative to $PWD.
122
    If METS_URL is not provided, use --mets accordingly.
123
    METS_URL can also be an OAI-PMH GetRecord URL wrapping a METS file.
124
    """
125
    LOG = getLogger('ocrd.cli.workspace.clone')
126
    if workspace_dir:
127
        LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR clone' instead of argument 'WORKSPACE_DIR' ('%s')" % workspace_dir))
128
        ctx.directory = workspace_dir
129
130
    workspace = ctx.resolver.workspace_from_url(
131
        mets_url,
132
        dst_dir=os.path.abspath(ctx.directory),
133
        mets_basename=basename(ctx.mets_url),
134
        clobber_mets=clobber_mets,
135
        download=download,
136
    )
137
    workspace.save_mets()
138
    print(workspace.directory)
139
140
# ----------------------------------------------------------------------
141
# ocrd workspace init
142
# ----------------------------------------------------------------------
143
144
@workspace_cli.command('init', cls=command_with_replaced_help(
145
    (r' \[DIRECTORY\]', ''))) # XXX deprecated argument
146
@click.option('-f', '--clobber-mets', help="Clobber mets.xml if it exists", is_flag=True, default=False)
147
# XXX deprecated
148
@click.argument('directory', default=None, required=False)
149
@pass_workspace
150
def workspace_init(ctx, clobber_mets, directory):
151
    """
152
    Create a workspace with an empty METS file in --directory.
153
154
    """
155
    LOG = getLogger('ocrd.cli.workspace.init')
156
    if directory:
157
        LOG.warning(DeprecationWarning("Use 'ocrd workspace --directory DIR init' instead of argument 'DIRECTORY' ('%s')" % directory))
158
        ctx.directory = directory
159
    workspace = ctx.resolver.workspace_from_nothing(
160
        directory=os.path.abspath(ctx.directory),
161
        mets_basename=basename(ctx.mets_url),
162
        clobber_mets=clobber_mets
163
    )
164
    workspace.save_mets()
165
    print(workspace.directory)
166
167
# ----------------------------------------------------------------------
168
# ocrd workspace add
169
# ----------------------------------------------------------------------
170
171
@workspace_cli.command('add')
172
@click.option('-G', '--file-grp', help="fileGrp USE", required=True, metavar='FILE_GRP')
173
@click.option('-i', '--file-id', help="ID for the file", required=True, metavar='FILE_ID')
174
@click.option('-m', '--mimetype', help="Media type of the file. Guessed from extension if not provided", required=False, metavar='TYPE')
175
@click.option('-g', '--page-id', help="ID of the physical page", metavar='PAGE_ID')
176
@click.option('-C', '--check-file-exists', help="Whether to ensure FNAME exists", is_flag=True, default=False)
177
@click.option('--ignore', help="Do not check whether file exists.", default=False, is_flag=True)
178
@click.option('--force', help="If file with ID already exists, replace it. No effect if --ignore is set.", default=False, is_flag=True)
179
@click.argument('fname', required=True)
180
@pass_workspace
181
def workspace_add_file(ctx, file_grp, file_id, mimetype, page_id, ignore, check_file_exists, force, fname):
182
    """
183
    Add a file or http(s) URL FNAME to METS in a workspace.
184
    If FNAME is not an http(s) URL and is not a workspace-local existing file, try to copy to workspace.
185
    """
186
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url), automatic_backup=ctx.automatic_backup)
187
188
    log = getLogger('ocrd.cli.workspace.add')
189
    if not mimetype:
190
        try:
191
            mimetype = EXT_TO_MIME[Path(fname).suffix]
192
            log.info("Guessed mimetype to be %s" % mimetype)
193
        except KeyError:
194
            log.error("Cannot guess mimetype from extension '%s' for '%s'. Set --mimetype explicitly" % (Path(fname).suffix, fname))
195
196
    kwargs = {'fileGrp': file_grp, 'ID': file_id, 'mimetype': mimetype, 'pageId': page_id, 'force': force, 'ignore': ignore}
197
    log.debug("Adding '%s' (%s)", fname, kwargs)
198
    if not (fname.startswith('http://') or fname.startswith('https://')):
199
        if not fname.startswith(ctx.directory):
200
            if not isabs(fname) and exists(join(ctx.directory, fname)):
201
                fname = join(ctx.directory, fname)
202
            else:
203
                log.debug("File '%s' is not in workspace, copying", fname)
204
                try:
205
                    fname = ctx.resolver.download_to_directory(ctx.directory, fname, subdir=file_grp)
206
                except FileNotFoundError:
207
                    if check_file_exists:
208
                        log.error("File '%s' does not exist, halt execution!" % fname)
209
                        sys.exit(1)
210
        if check_file_exists and not exists(fname):
211
            log.error("File '%s' does not exist, halt execution!" % fname)
212
            sys.exit(1)
213
        if fname.startswith(ctx.directory):
214
            fname = relpath(fname, ctx.directory)
215
        kwargs['local_filename'] = fname
216
217
    kwargs['url'] = fname
218
    if not page_id:
219
        log.warning("You did not provide '--page-id/-g', so the file you added is not linked to a specific page.")
220
    workspace.mets.add_file(**kwargs)
221
    workspace.save_mets()
222
223
# ----------------------------------------------------------------------
224
# ocrd workspace add-bulk
225
# ----------------------------------------------------------------------
226
227
# pylint: disable=broad-except
228
@workspace_cli.command('bulk-add')
229
@click.option('-r', '--regex', help="Regular expression matching the FILE_GLOB filesystem paths to define named captures usable in the other parameters", required=True)
230
@click.option('-m', '--mimetype', help="Media type of the file. If not provided, guess from filename", required=False)
231
@click.option('-g', '--page-id', help="physical page ID of the file", required=False)
232
@click.option('-i', '--file-id', help="ID of the file", required=True)
233
@click.option('-u', '--url', help="local filesystem path in the workspace directory (copied from source file if different)", required=True)
234
@click.option('-G', '--file-grp', help="File group USE of the file", required=True)
235
@click.option('-n', '--dry-run', help="Don't actually do anything to the METS or filesystem, just preview", default=False, is_flag=True)
236
@click.option('-I', '--ignore', help="Disable checking for existing file entries (faster)", default=False, is_flag=True)
237
@click.option('-f', '--force', help="Replace existing file entries with the same ID (no effect when --ignore is set, too)", default=False, is_flag=True)
238
@click.option('-s', '--skip', help="Skip files not matching --regex (instead of failing)", default=False, is_flag=True)
239
@click.argument('file_glob', nargs=-1, required=True)
240
@pass_workspace
241
def workspace_cli_bulk_add(ctx, regex, mimetype, page_id, file_id, url, file_grp, dry_run, file_glob, ignore, force, skip):
242
    r"""
243
    Add files in bulk to an OCR-D workspace.
244
245
    FILE_GLOB can either be a shell glob expression or a list of files.
246
247
    --regex is applied to the absolute path of every file in FILE_GLOB and can
248
    define named groups that can be used in --page-id, --file-id, --mimetype, --url and
249
    --file-grp by referencing the named group 'grp' in the regex as '{{ grp }}'.
250
251
    \b
252
    Example:
253
        ocrd workspace bulk-add \\
254
                --regex '^.*/(?P<fileGrp>[^/]+)/page_(?P<pageid>.*)\.(?P<ext>[^\.]*)$' \\
255
                --file-id 'FILE_{{ fileGrp }}_{{ pageid }}' \\
256
                --page-id 'PHYS_{{ pageid }}' \\
257
                --file-grp "{{ fileGrp }}" \\
258
                --url '{{ fileGrp }}/FILE_{{ pageid }}.{{ ext }}' \\
259
                path/to/files/*/*.*
260
261
    """
262
    log = getLogger('ocrd.cli.workspace.bulk-add') # pylint: disable=redefined-outer-name
263
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url), automatic_backup=ctx.automatic_backup)
264
265
    try:
266
        pat = re.compile(regex)
267
    except Exception as e:
268
        log.error("Invalid regex: %s" % e)
269
        sys.exit(1)
270
271
    file_paths = []
272
    for fglob in file_glob:
273
        file_paths += [Path(x).resolve() for x in glob(fglob)]
274
275
    for i, file_path in enumerate(file_paths):
276
        log.info("[%4d/%d] %s" % (i, len(file_paths), file_path))
277
278
        # match regex
279
        m = pat.match(str(file_path))
280
        if not m:
281
            if skip:
282
                continue
283
            log.error("File not matched by regex: '%s'" % file_path)
284
            sys.exit(1)
285
        group_dict = m.groupdict()
286
287
        # set up file info
288
        file_dict = {'url': url, 'mimetype': mimetype, 'ID': file_id, 'pageId': page_id, 'fileGrp': file_grp}
289
290
        # guess mime type
291
        if not file_dict['mimetype']:
292
            try:
293
                file_dict['mimetype'] = EXT_TO_MIME[file_path.suffix]
294
            except KeyError:
295
                log.error("Cannot guess mimetype from extension '%s' for '%s'. Set --mimetype explicitly" % (file_path.suffix, file_path))
296
297
        # expand templates
298
        for param_name in file_dict:
299
            for group_name in group_dict:
300
                file_dict[param_name] = file_dict[param_name].replace('{{ %s }}' % group_name, group_dict[group_name])
301
302
        # copy files
303
        if file_dict['url']:
304
            urlpath = Path(workspace.directory, file_dict['url'])
305
            if not urlpath.exists():
306
                log.info("cp '%s' '%s'", file_path, urlpath)
307
                if not dry_run:
308
                    if not urlpath.parent.is_dir():
309
                        urlpath.parent.mkdir()
310
                    urlpath.write_bytes(file_path.read_bytes())
311
312
        # Add to workspace (or not)
313
        fileGrp = file_dict.pop('fileGrp')
314
        if dry_run:
315
            log.info('workspace.add_file(%s)' % file_dict)
316
        else:
317
            workspace.add_file(fileGrp, ignore=ignore, force=force, **file_dict)
318
319
    # save changes to disk
320
    workspace.save_mets()
321
322
323
# ----------------------------------------------------------------------
324
# ocrd workspace find
325
# ----------------------------------------------------------------------
326
327
@workspace_cli.command('find')
328
@mets_find_options
329
@click.option('-k', '--output-field', help="Output field. Repeat for multiple fields, will be joined with tab",
330
        default=['url'],
331
        multiple=True,
332
        type=click.Choice([
333
            'url',
334
            'mimetype',
335
            'pageId',
336
            'ID',
337
            'fileGrp',
338
            'basename',
339
            'basename_without_extension',
340
            'local_filename',
341
        ]))
342
@click.option('--download', is_flag=True, help="Download found files to workspace and change location in METS file ")
343
@pass_workspace
344
def workspace_find(ctx, file_grp, mimetype, page_id, file_id, output_field, download):
345
    """
346
    Find files.
347
348
    (If any ``FILTER`` starts with ``//``, then its remainder
349
     will be interpreted as a regular expression.)
350
    """
351
    modified_mets = False
352
    ret = list()
353
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url))
354
    for f in workspace.mets.find_files(
355
            ID=file_id,
356
            fileGrp=file_grp,
357
            mimetype=mimetype,
358
            pageId=page_id,
359
        ):
360
        if download and not f.local_filename:
361
            workspace.download_file(f)
362
            modified_mets = True
363
        ret.append([f.ID if field == 'pageId' else getattr(f, field) or ''
364
                    for field in output_field])
365
    if modified_mets:
366
        workspace.save_mets()
367
    if 'pageId' in output_field:
368
        idx = output_field.index('pageId')
369
        fileIds = list(map(lambda fields: fields[idx], ret))
0 ignored issues
show
introduced by
The variable idx does not seem to be defined in case 'pageId' in output_field on line 367 is False. Are you sure this can never be the case?
Loading history...
370
        pages = workspace.mets.get_physical_pages(for_fileIds=fileIds)
371
        for fields, page in zip(ret, pages):
372
            fields[idx] = page or ''
373
    for fields in ret:
374
        print('\t'.join(fields))
375
376
# ----------------------------------------------------------------------
377
# ocrd workspace remove
378
# ----------------------------------------------------------------------
379
380
@workspace_cli.command('remove')
381
@click.option('-k', '--keep-file', help="Do not delete file from file system", default=False, is_flag=True)
382
@click.option('-f', '--force', help="Continue even if mets:file or file on file system does not exist", default=False, is_flag=True)
383
@click.argument('ID', nargs=-1)
384
@pass_workspace
385
def workspace_remove_file(ctx, id, force, keep_file):  # pylint: disable=redefined-builtin
386
    """
387
    Delete files (given by their ID attribute ``ID``).
388
    
389
    (If any ``ID`` starts with ``//``, then its remainder
390
     will be interpreted as a regular expression.)
391
    """
392
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url), automatic_backup=ctx.automatic_backup)
393
    for i in id:
394
        workspace.remove_file(i, force=force, keep_file=keep_file)
395
    workspace.save_mets()
396
397
398
# ----------------------------------------------------------------------
399
# ocrd workspace rename-group
400
# ----------------------------------------------------------------------
401
402
@workspace_cli.command('rename-group')
403
@click.argument('OLD', nargs=1)
404
@click.argument('NEW', nargs=1)
405
@pass_workspace
406
def rename_group(ctx, old, new):
407
    """
408
    Rename fileGrp (USE attribute ``NEW`` to ``OLD``).
409
    """
410
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url))
411
    workspace.rename_file_group(old, new)
412
    workspace.save_mets()
413
414
# ----------------------------------------------------------------------
415
# ocrd workspace remove-group
416
# ----------------------------------------------------------------------
417
418
@workspace_cli.command('remove-group')
419
@click.option('-r', '--recursive', help="Delete any files in the group before the group itself", default=False, is_flag=True)
420
@click.option('-f', '--force', help="Continue removing even if group or containing files not found in METS", default=False, is_flag=True)
421
@click.option('-k', '--keep-files', help="Do not delete files from file system", default=False, is_flag=True)
422
@click.argument('GROUP', nargs=-1)
423
@pass_workspace
424
def remove_group(ctx, group, recursive, force, keep_files):
425
    """
426
    Delete fileGrps (given by their USE attribute ``GROUP``).
427
    
428
    (If any ``GROUP`` starts with ``//``, then its remainder
429
     will be interpreted as a regular expression.)
430
    """
431
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url))
432
    for g in group:
433
        workspace.remove_file_group(g, recursive=recursive, force=force, keep_files=keep_files)
434
    workspace.save_mets()
435
436
# ----------------------------------------------------------------------
437
# ocrd workspace prune-files
438
# ----------------------------------------------------------------------
439
440
@workspace_cli.command('prune-files')
441
@click.option('-G', '--file-grp', help="fileGrp USE", metavar='FILTER')
442
@click.option('-m', '--mimetype', help="Media type to look for", metavar='FILTER')
443
@click.option('-g', '--page-id', help="Page ID", metavar='FILTER')
444
@click.option('-i', '--file-id', help="ID", metavar='FILTER')
445
@pass_workspace
446
def prune_files(ctx, file_grp, mimetype, page_id, file_id):
447
    """
448
    Removes mets:files that point to non-existing local files
449
450
    (If any ``FILTER`` starts with ``//``, then its remainder
451
     will be interpreted as a regular expression.)
452
    """
453
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url), automatic_backup=ctx.automatic_backup)
454
    with pushd_popd(workspace.directory):
455
        for f in workspace.mets.find_files(
456
            ID=file_id,
457
            fileGrp=file_grp,
458
            mimetype=mimetype,
459
            pageId=page_id,
460
        ):
461
            try:
462
                if not f.local_filename or not exists(f.local_filename):
463
                    workspace.mets.remove_file(f.ID)
464
            except Exception as e:
465
                ctx.log.exception("Error removing %f: %s", f, e)
466
                raise(e)
467
        workspace.save_mets()
468
469
# ----------------------------------------------------------------------
470
# ocrd workspace list-group
471
# ----------------------------------------------------------------------
472
473
@workspace_cli.command('list-group')
474
@pass_workspace
475
def list_groups(ctx):
476
    """
477
    List fileGrp USE attributes
478
    """
479
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url))
480
    print("\n".join(workspace.mets.file_groups))
481
482
# ----------------------------------------------------------------------
483
# ocrd workspace list-pages
484
# ----------------------------------------------------------------------
485
486
@workspace_cli.command('list-page')
487
@pass_workspace
488
def list_pages(ctx):
489
    """
490
    List physical page IDs
491
    """
492
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url))
493
    print("\n".join(workspace.mets.physical_pages))
494
495
# ----------------------------------------------------------------------
496
# ocrd workspace get-id
497
# ----------------------------------------------------------------------
498
499
@workspace_cli.command('get-id')
500
@pass_workspace
501
def get_id(ctx):
502
    """
503
    Get METS id if any
504
    """
505
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url))
506
    ID = workspace.mets.unique_identifier
507
    if ID:
508
        print(ID)
509
510
# ----------------------------------------------------------------------
511
# ocrd workspace set-id
512
# ----------------------------------------------------------------------
513
514
@workspace_cli.command('set-id')
515
@click.argument('ID')
516
@pass_workspace
517
def set_id(ctx, id):   # pylint: disable=redefined-builtin
518
    """
519
    Set METS ID.
520
521
    If one of the supported identifier mechanisms is used, will set this identifier.
522
523
    Otherwise will create a new <mods:identifier type="purl">{{ ID }}</mods:identifier>.
524
    """
525
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url), automatic_backup=ctx.automatic_backup)
526
    workspace.mets.unique_identifier = id
527
    workspace.save_mets()
528
529
# ----------------------------------------------------------------------
530
# ocrd workspace merge
531
# ----------------------------------------------------------------------
532
533
@workspace_cli.command('merge')
534
@click.argument('METS_PATH')
535
@click.option('--copy-files', is_flag=True, help="Copy files as well", default=True)
536
@click.option('--fileGrp-mapping', help="JSON object mapping src to dest fileGrp")
537
@mets_find_options
538
@pass_workspace
539
def merge(ctx, copy_files, filegrp_mapping, filegrp, file_id, page_id, mimetype, mets_path):   # pylint: disable=redefined-builtin
540
    """
541
    Merges this workspace with the workspace that contains ``METS_PATH``
542
543
    The ``--file-id``, ``--page-id``, ``--mimetype`` and ``--fileGrp`` options have
544
    the same semantics as in ``ocrd workspace find``, see ``ocrd workspace find --help``
545
    for an explanation.
546
    """
547
    mets_path = Path(mets_path)
548
    if filegrp_mapping:
549
        filegrp_mapping = loads(filegrp_mapping)
550
    workspace = Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url), automatic_backup=ctx.automatic_backup)
551
    other_workspace = Workspace(ctx.resolver, directory=str(mets_path.parent), mets_basename=str(mets_path.name))
552
    workspace.merge(
553
        other_workspace,
554
        copy_files=copy_files,
555
        fileGrp_mapping=filegrp_mapping,
556
        fileGrp=filegrp,
557
        ID=file_id,
558
        pageId=page_id,
559
        mimetype=mimetype,
560
    )
561
    workspace.save_mets()
562
563
# ----------------------------------------------------------------------
564
# ocrd workspace backup
565
# ----------------------------------------------------------------------
566
567
@workspace_cli.group('backup')
568
@click.pass_context
569
def workspace_backup_cli(ctx): # pylint: disable=unused-argument
570
    """
571
    Backing and restoring workspaces - dev edition
572
    """
573
574
@workspace_backup_cli.command('add')
575
@pass_workspace
576
def workspace_backup_add(ctx):
577
    """
578
    Create a new backup
579
    """
580
    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url), automatic_backup=ctx.automatic_backup))
581
    backup_manager.add()
582
583
@workspace_backup_cli.command('list')
584
@pass_workspace
585
def workspace_backup_list(ctx):
586
    """
587
    List backups
588
    """
589
    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url), automatic_backup=ctx.automatic_backup))
590
    for b in backup_manager.list():
591
        print(b)
592
593
@workspace_backup_cli.command('restore')
594
@click.option('-f', '--choose-first', help="Restore first matching version if more than one", is_flag=True)
595
@click.argument('bak') #, type=click.Path(dir_okay=False, readable=True, resolve_path=True))
596
@pass_workspace
597
def workspace_backup_restore(ctx, choose_first, bak):
598
    """
599
    Restore backup BAK
600
    """
601
    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url), automatic_backup=ctx.automatic_backup))
602
    backup_manager.restore(bak, choose_first)
603
604
@workspace_backup_cli.command('undo')
605
@pass_workspace
606
def workspace_backup_undo(ctx):
607
    """
608
    Restore the last backup
609
    """
610
    backup_manager = WorkspaceBackupManager(Workspace(ctx.resolver, directory=ctx.directory, mets_basename=basename(ctx.mets_url), automatic_backup=ctx.automatic_backup))
611
    backup_manager.undo()
612