Passed
Push — issue760-hugging-face-hub-inte... ( d7be13...0c57bf )
by Juho
04:14
created

annif.cli   D

Complexity

Total Complexity 59

Size/Duplication

Total Lines 736
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 485
dl 0
loc 736
rs 4.08
c 0
b 0
f 0
wmc 59

15 Functions

Rating   Name   Duplication   Size   Complexity  
A run_hyperopt() 0 36 2
A run_completion() 0 16 2
A run_train() 0 42 3
C run_optimize() 0 96 8
A run_download() 0 58 3
A run_load_vocab() 0 36 4
B run_suggest() 0 42 6
A run_learn() 0 20 1
B run_upload() 0 65 6
A run_list_vocabs() 0 28 4
A run_list_projects() 0 39 3
B run_index() 0 43 6
A run_clear_project() 0 9 1
C run_eval() 0 108 9
A run_show_project() 0 18 1

How to fix   Complexity   

Complexity

Complex classes like annif.cli often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""Definitions for command-line (Click) commands for invoking Annif
2
operations and printing the results to console."""
3
4
import collections
5
import importlib
6
import json
7
import os.path
8
import re
9
import sys
10
11
import click
12
import click_log
13
from flask.cli import FlaskGroup
14
15
import annif
16
import annif.corpus
17
import annif.parallel
18
import annif.project
19
import annif.registry
20
from annif import cli_util
21
from annif.exception import (
22
    NotInitializedException,
23
    NotSupportedException,
24
    OperationFailedException,
25
)
26
from annif.project import Access
27
from annif.util import metric_code
28
29
logger = annif.logger
30
click_log.basic_config(logger)
31
32
33
if len(sys.argv) > 1 and sys.argv[1] in ("run", "routes"):
34
    create_app = annif.create_app  # Use Flask with Connexion
35
else:
36
    # Connexion is not needed for most CLI commands, use plain Flask
37
    create_app = annif.create_flask_app
38
39
cli = FlaskGroup(create_app=create_app, add_version_option=False)
40
cli = click.version_option(message="%(version)s")(cli)
41
42
43
@cli.command("list-projects")
44
@cli_util.common_options
45
@click_log.simple_verbosity_option(logger, default="ERROR")
46
def run_list_projects():
47
    """
48
    List available projects.
49
    \f
50
    Show a list of currently defined projects. Projects are defined in a
51
    configuration file, normally called ``projects.cfg``. See `Project
52
    configuration
53
    <https://github.com/NatLibFi/Annif/wiki/Project-configuration>`_
54
    for details.
55
    """
56
57
    column_headings = (
58
        "Project ID",
59
        "Project Name",
60
        "Vocabulary ID",
61
        "Language",
62
        "Trained",
63
        "Modification time",
64
    )
65
    table = [
66
        (
67
            proj.project_id,
68
            proj.name,
69
            proj.vocab.vocab_id if proj.vocab_spec else "-",
70
            proj.language,
71
            str(proj.is_trained),
72
            cli_util.format_datetime(proj.modification_time),
73
        )
74
        for proj in annif.registry.get_projects(min_access=Access.private).values()
75
    ]
76
    template = cli_util.make_list_template(column_headings, *table)
77
    header = template.format(*column_headings)
78
    click.echo(header)
79
    click.echo("-" * len(header))
80
    for row in table:
81
        click.echo(template.format(*row))
82
83
84
@cli.command("show-project")
85
@cli_util.project_id
86
@cli_util.common_options
87
def run_show_project(project_id):
88
    """
89
    Show information about a project.
90
    """
91
92
    proj = cli_util.get_project(project_id)
93
    click.echo(f"Project ID:        {proj.project_id}")
94
    click.echo(f"Project Name:      {proj.name}")
95
    click.echo(f"Language:          {proj.language}")
96
    click.echo(f"Vocabulary:        {proj.vocab.vocab_id}")
97
    click.echo(f"Vocab language:    {proj.vocab_lang}")
98
    click.echo(f"Access:            {proj.access.name}")
99
    click.echo(f"Backend:           {proj.backend.name}")
100
    click.echo(f"Trained:           {proj.is_trained}")
101
    click.echo(f"Modification time: {cli_util.format_datetime(proj.modification_time)}")
102
103
104
@cli.command("clear")
105
@cli_util.project_id
106
@cli_util.common_options
107
def run_clear_project(project_id):
108
    """
109
    Initialize the project to its original, untrained state.
110
    """
111
    proj = cli_util.get_project(project_id)
112
    proj.remove_model_data()
113
114
115
@cli.command("list-vocabs")
116
@cli_util.common_options
117
@click_log.simple_verbosity_option(logger, default="ERROR")
118
def run_list_vocabs():
119
    """
120
    List available vocabularies.
121
    """
122
123
    column_headings = ("Vocabulary ID", "Languages", "Size", "Loaded")
124
    table = []
125
    for vocab in annif.registry.get_vocabs(min_access=Access.private).values():
126
        try:
127
            languages = ",".join(sorted(vocab.languages))
128
            size = len(vocab)
129
            loaded = True
130
        except NotInitializedException:
131
            languages = "-"
132
            size = "-"
133
            loaded = False
134
        row = (vocab.vocab_id, languages, str(size), str(loaded))
135
        table.append(row)
136
137
    template = cli_util.make_list_template(column_headings, *table)
138
    header = template.format(*column_headings)
139
    click.echo(header)
140
    click.echo("-" * len(header))
141
    for row in table:
142
        click.echo(template.format(*row))
143
144
145
@cli.command("load-vocab")
146
@click.argument("vocab_id", shell_complete=cli_util.complete_param)
147
@click.argument("subjectfile", type=click.Path(exists=True, dir_okay=False))
148
@click.option("--language", "-L", help="Language of subject file")
149
@click.option(
150
    "--force",
151
    "-f",
152
    default=False,
153
    is_flag=True,
154
    help="Replace existing vocabulary completely instead of updating it",
155
)
156
@cli_util.common_options
157
def run_load_vocab(vocab_id, language, force, subjectfile):
158
    """
159
    Load a vocabulary from a subject file.
160
    """
161
    vocab = cli_util.get_vocab(vocab_id)
162
    if annif.corpus.SubjectFileSKOS.is_rdf_file(subjectfile):
163
        # SKOS/RDF file supported by rdflib
164
        subjects = annif.corpus.SubjectFileSKOS(subjectfile)
165
        click.echo(f"Loading vocabulary from SKOS file {subjectfile}...")
166
    elif annif.corpus.SubjectFileCSV.is_csv_file(subjectfile):
167
        # CSV file
168
        subjects = annif.corpus.SubjectFileCSV(subjectfile)
169
        click.echo(f"Loading vocabulary from CSV file {subjectfile}...")
170
    else:
171
        # probably a TSV file - we need to know its language
172
        if not language:
173
            click.echo(
174
                "Please use --language option to set the language of a TSV vocabulary.",
175
                err=True,
176
            )
177
            sys.exit(1)
178
        click.echo(f"Loading vocabulary from TSV file {subjectfile}...")
179
        subjects = annif.corpus.SubjectFileTSV(subjectfile, language)
180
    vocab.load_vocabulary(subjects, force=force)
181
182
183
@cli.command("train")
184
@cli_util.project_id
185
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
186
@click.option(
187
    "--cached/--no-cached",
188
    "-c/-C",
189
    default=False,
190
    help="Reuse preprocessed training data from previous run",
191
)
192
@click.option(
193
    "--jobs",
194
    "-j",
195
    default=0,
196
    help="Number of parallel jobs (0 means choose automatically)",
197
)
198
@cli_util.docs_limit_option
199
@cli_util.backend_param_option
200
@cli_util.common_options
201
def run_train(project_id, paths, cached, docs_limit, jobs, backend_param):
202
    """
203
    Train a project on a collection of documents.
204
    \f
205
    This will train the project using the documents from ``PATHS`` (directories
206
    or possibly gzipped TSV files) in a single batch operation. If ``--cached``
207
    is set, preprocessed training data from the previous run is reused instead
208
    of documents input; see `Reusing preprocessed training data
209
    <https://github.com/NatLibFi/Annif/wiki/
210
    Reusing-preprocessed-training-data>`_.
211
    """
212
    proj = cli_util.get_project(project_id)
213
    backend_params = cli_util.parse_backend_params(backend_param, proj)
214
    if cached:
215
        if len(paths) > 0:
216
            raise click.UsageError(
217
                "Corpus paths cannot be given when using --cached option."
218
            )
219
        documents = "cached"
220
    else:
221
        documents = cli_util.open_documents(
222
            paths, proj.subjects, proj.vocab_lang, docs_limit
223
        )
224
    proj.train(documents, backend_params, jobs)
225
226
227
@cli.command("learn")
228
@cli_util.project_id
229
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
230
@cli_util.docs_limit_option
231
@cli_util.backend_param_option
232
@cli_util.common_options
233
def run_learn(project_id, paths, docs_limit, backend_param):
234
    """
235
    Further train an existing project on a collection of documents.
236
    \f
237
    Similar to the ``train`` command. This will continue training an already
238
    trained project using the documents given by ``PATHS`` in a single batch
239
    operation. Not supported by all backends.
240
    """
241
    proj = cli_util.get_project(project_id)
242
    backend_params = cli_util.parse_backend_params(backend_param, proj)
243
    documents = cli_util.open_documents(
244
        paths, proj.subjects, proj.vocab_lang, docs_limit
245
    )
246
    proj.learn(documents, backend_params)
247
248
249
@cli.command("suggest")
250
@cli_util.project_id
251
@click.argument(
252
    "paths", type=click.Path(dir_okay=False, exists=True, allow_dash=True), nargs=-1
253
)
254
@click.option("--limit", "-l", default=10, help="Maximum number of subjects")
255
@click.option("--threshold", "-t", default=0.0, help="Minimum score threshold")
256
@click.option("--language", "-L", help="Language of subject labels")
257
@cli_util.docs_limit_option
258
@cli_util.backend_param_option
259
@cli_util.common_options
260
def run_suggest(
261
    project_id, paths, limit, threshold, language, backend_param, docs_limit
262
):
263
    """
264
    Suggest subjects for a single document from standard input or for one or more
265
    document file(s) given its/their path(s).
266
    \f
267
    This will read a text document from standard input and suggest subjects for
268
    it, or if given path(s) to file(s), suggest subjects for it/them.
269
    """
270
    project = cli_util.get_project(project_id)
271
    lang = language or project.vocab_lang
272
    if lang not in project.vocab.languages:
273
        raise click.BadParameter(f'language "{lang}" not supported by vocabulary')
274
    backend_params = cli_util.parse_backend_params(backend_param, project)
275
276
    if paths and not (len(paths) == 1 and paths[0] == "-"):
277
        docs = cli_util.open_text_documents(paths, docs_limit)
278
        results = project.suggest_corpus(docs, backend_params).filter(limit, threshold)
279
        for (
280
            suggestions,
281
            path,
282
        ) in zip(results, paths):
283
            click.echo(f"Suggestions for {path}")
284
            cli_util.show_hits(suggestions, project, lang)
285
    else:
286
        text = sys.stdin.read()
287
        suggestions = project.suggest([text], backend_params).filter(limit, threshold)[
288
            0
289
        ]
290
        cli_util.show_hits(suggestions, project, lang)
291
292
293
@cli.command("index")
294
@cli_util.project_id
295
@click.argument("directory", type=click.Path(exists=True, file_okay=False))
296
@click.option(
297
    "--suffix", "-s", default=".annif", help="File name suffix for result files"
298
)
299
@click.option(
300
    "--force/--no-force",
301
    "-f/-F",
302
    default=False,
303
    help="Force overwriting of existing result files",
304
)
305
@click.option("--limit", "-l", default=10, help="Maximum number of subjects")
306
@click.option("--threshold", "-t", default=0.0, help="Minimum score threshold")
307
@click.option("--language", "-L", help="Language of subject labels")
308
@cli_util.backend_param_option
309
@cli_util.common_options
310
def run_index(
311
    project_id, directory, suffix, force, limit, threshold, language, backend_param
312
):
313
    """
314
    Index a directory with documents, suggesting subjects for each document.
315
    Write the results in TSV files with the given suffix (``.annif`` by
316
    default).
317
    """
318
    project = cli_util.get_project(project_id)
319
    lang = language or project.vocab_lang
320
    if lang not in project.vocab.languages:
321
        raise click.BadParameter(f'language "{lang}" not supported by vocabulary')
322
    backend_params = cli_util.parse_backend_params(backend_param, project)
323
324
    documents = annif.corpus.DocumentDirectory(directory, require_subjects=False)
325
    results = project.suggest_corpus(documents, backend_params).filter(limit, threshold)
326
327
    for (docfilename, _), suggestions in zip(documents, results):
328
        subjectfilename = re.sub(r"\.txt$", suffix, docfilename)
329
        if os.path.exists(subjectfilename) and not force:
330
            click.echo(
331
                "Not overwriting {} (use --force to override)".format(subjectfilename)
332
            )
333
            continue
334
        with open(subjectfilename, "w", encoding="utf-8") as subjfile:
335
            cli_util.show_hits(suggestions, project, lang, file=subjfile)
336
337
338
@cli.command("eval")
339
@cli_util.project_id
340
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
341
@click.option("--limit", "-l", default=10, help="Maximum number of subjects")
342
@click.option("--threshold", "-t", default=0.0, help="Minimum score threshold")
343
@click.option(
344
    "--metric",
345
    "-m",
346
    default=[],
347
    multiple=True,
348
    help="Metric to calculate (default: all)",
349
)
350
@click.option(
351
    "--metrics-file",
352
    "-M",
353
    type=click.File("w", encoding="utf-8", errors="ignore", lazy=True),
354
    help="""Specify file in order to write evaluation metrics in JSON format.
355
    File directory must exist, existing file will be overwritten.""",
356
)
357
@click.option(
358
    "--results-file",
359
    "-r",
360
    type=click.File("w", encoding="utf-8", errors="ignore", lazy=True),
361
    help="""Specify file in order to write non-aggregated results per subject.
362
    File directory must exist, existing file will be overwritten.""",
363
)
364
@click.option(
365
    "--jobs", "-j", default=1, help="Number of parallel jobs (0 means all CPUs)"
366
)
367
@cli_util.docs_limit_option
368
@cli_util.backend_param_option
369
@cli_util.common_options
370
def run_eval(
371
    project_id,
372
    paths,
373
    limit,
374
    threshold,
375
    docs_limit,
376
    metric,
377
    metrics_file,
378
    results_file,
379
    jobs,
380
    backend_param,
381
):
382
    """
383
    Suggest subjects for documents and evaluate the results by comparing
384
    against a gold standard.
385
    \f
386
    With this command the documents from ``PATHS`` (directories or possibly
387
    gzipped TSV files) will be assigned subject suggestions and then
388
    statistical measures are calculated that quantify how well the suggested
389
    subjects match the gold-standard subjects in the documents.
390
391
    Normally the output is the list of the metrics calculated across documents.
392
    If ``--results-file <FILENAME>`` option is given, the metrics are
393
    calculated separately for each subject, and written to the given file.
394
    """
395
396
    project = cli_util.get_project(project_id)
397
    backend_params = cli_util.parse_backend_params(backend_param, project)
398
399
    import annif.eval
400
401
    eval_batch = annif.eval.EvaluationBatch(project.subjects)
402
403
    if results_file:
404
        try:
405
            print("", end="", file=results_file)
406
            click.echo(
407
                "Writing per subject evaluation results to {!s}".format(
408
                    results_file.name
409
                )
410
            )
411
        except Exception as e:
412
            raise NotSupportedException(
413
                "cannot open results-file for writing: " + str(e)
414
            )
415
    corpus = cli_util.open_documents(
416
        paths, project.subjects, project.vocab_lang, docs_limit
417
    )
418
    jobs, pool_class = annif.parallel.get_pool(jobs)
419
420
    project.initialize(parallel=True)
421
    psmap = annif.parallel.ProjectSuggestMap(
422
        project.registry, [project_id], backend_params, limit, threshold
423
    )
424
425
    with pool_class(jobs) as pool:
426
        for hit_sets, subject_sets in pool.imap_unordered(
427
            psmap.suggest_batch, corpus.doc_batches
428
        ):
429
            eval_batch.evaluate_many(hit_sets[project_id], subject_sets)
430
431
    template = "{0:<30}\t{1:{fmt_spec}}"
432
    metrics = eval_batch.results(
433
        metrics=metric, results_file=results_file, language=project.vocab_lang
434
    )
435
    for metric, score in metrics.items():
436
        if isinstance(score, int):
437
            fmt_spec = "d"
438
        elif isinstance(score, float):
439
            fmt_spec = ".04f"
440
        click.echo(template.format(metric + ":", score, fmt_spec=fmt_spec))
441
    if metrics_file:
442
        json.dump(
443
            {metric_code(mname): val for mname, val in metrics.items()},
444
            metrics_file,
445
            indent=2,
446
        )
447
448
449
FILTER_BATCH_MAX_LIMIT = 15
450
OPTIMIZE_METRICS = ["Precision (doc avg)", "Recall (doc avg)", "F1 score (doc avg)"]
451
452
453
@cli.command("optimize")
454
@cli_util.project_id
455
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
456
@click.option(
457
    "--jobs", "-j", default=1, help="Number of parallel jobs (0 means all CPUs)"
458
)
459
@cli_util.docs_limit_option
460
@cli_util.backend_param_option
461
@cli_util.common_options
462
def run_optimize(project_id, paths, jobs, docs_limit, backend_param):
463
    """
464
    Suggest subjects for documents, testing multiple limits and thresholds.
465
    \f
466
    This command will use different limit (maximum number of subjects) and
467
    score threshold values when assigning subjects to each document given by
468
    ``PATHS`` and compare the results against the gold standard subjects in the
469
    documents. The output is a list of parameter combinations and their scores.
470
    From the output, you can determine the optimum limit and threshold
471
    parameters depending on which measure you want to target.
472
    """
473
    project = cli_util.get_project(project_id)
474
    backend_params = cli_util.parse_backend_params(backend_param, project)
475
    filter_params = cli_util.generate_filter_params(FILTER_BATCH_MAX_LIMIT)
476
477
    import annif.eval
478
479
    corpus = cli_util.open_documents(
480
        paths, project.subjects, project.vocab_lang, docs_limit
481
    )
482
483
    jobs, pool_class = annif.parallel.get_pool(jobs)
484
485
    project.initialize(parallel=True)
486
    psmap = annif.parallel.ProjectSuggestMap(
487
        project.registry,
488
        [project_id],
489
        backend_params,
490
        limit=FILTER_BATCH_MAX_LIMIT,
491
        threshold=0.0,
492
    )
493
494
    ndocs = 0
495
    suggestion_batches = []
496
    subject_set_batches = []
497
    with pool_class(jobs) as pool:
498
        for suggestion_batch, subject_sets in pool.imap_unordered(
499
            psmap.suggest_batch, corpus.doc_batches
500
        ):
501
            ndocs += len(suggestion_batch[project_id])
502
            suggestion_batches.append(suggestion_batch[project_id])
503
            subject_set_batches.append(subject_sets)
504
505
    from annif.suggestion import SuggestionResults
506
507
    orig_suggestion_results = SuggestionResults(suggestion_batches)
508
509
    click.echo("\t".join(("Limit", "Thresh.", "Prec.", "Rec.", "F1")))
510
511
    best_scores = collections.defaultdict(float)
512
    best_params = {}
513
514
    template = "{:d}\t{:.02f}\t{:.04f}\t{:.04f}\t{:.04f}"
515
    import annif.eval
516
517
    for limit, threshold in filter_params:
518
        eval_batch = annif.eval.EvaluationBatch(project.subjects)
519
        filtered_results = orig_suggestion_results.filter(limit, threshold)
520
        for batch, subject_sets in zip(filtered_results.batches, subject_set_batches):
521
            eval_batch.evaluate_many(batch, subject_sets)
522
        results = eval_batch.results(metrics=OPTIMIZE_METRICS)
523
        for metric, score in results.items():
524
            if score >= best_scores[metric]:
525
                best_scores[metric] = score
526
                best_params[metric] = (limit, threshold)
527
        click.echo(
528
            template.format(
529
                limit,
530
                threshold,
531
                results["Precision (doc avg)"],
532
                results["Recall (doc avg)"],
533
                results["F1 score (doc avg)"],
534
            )
535
        )
536
537
    click.echo()
538
    template2 = "Best {:>19}: {:.04f}\tLimit: {:d}\tThreshold: {:.02f}"
539
    for metric in OPTIMIZE_METRICS:
540
        click.echo(
541
            template2.format(
542
                metric,
543
                best_scores[metric],
544
                best_params[metric][0],
545
                best_params[metric][1],
546
            )
547
        )
548
    click.echo("Documents evaluated:\t{}".format(ndocs))
549
550
551
@cli.command("hyperopt")
552
@cli_util.project_id
553
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
554
@click.option("--trials", "-T", default=10, help="Number of trials")
555
@click.option(
556
    "--jobs", "-j", default=1, help="Number of parallel runs (0 means all CPUs)"
557
)
558
@click.option(
559
    "--metric", "-m", default="NDCG", help="Metric to optimize (default: NDCG)"
560
)
561
@click.option(
562
    "--results-file",
563
    "-r",
564
    type=click.File("w", encoding="utf-8", errors="ignore", lazy=True),
565
    help="""Specify file path to write trial results as CSV.
566
    File directory must exist, existing file will be overwritten.""",
567
)
568
@cli_util.docs_limit_option
569
@cli_util.common_options
570
def run_hyperopt(project_id, paths, docs_limit, trials, jobs, metric, results_file):
571
    """
572
    Optimize the hyperparameters of a project using validation documents from
573
    ``PATHS``. Not supported by all backends. Output is a list of trial results
574
    and a report of the best performing parameters.
575
    """
576
    proj = cli_util.get_project(project_id)
577
    documents = cli_util.open_documents(
578
        paths, proj.subjects, proj.vocab_lang, docs_limit
579
    )
580
    click.echo(f"Looking for optimal hyperparameters using {trials} trials")
581
    rec = proj.hyperopt(documents, trials, jobs, metric, results_file)
582
    click.echo(f"Got best {metric} score {rec.score:.4f} with:")
583
    click.echo("---")
584
    for line in rec.lines:
585
        click.echo(line)
586
    click.echo("---")
587
588
589
@cli.command("upload")
590
@click.argument("project_ids_pattern")
591
@click.argument("repo_id")
592
@click.option(
593
    "--token",
594
    help="""Authentication token, obtained from the Hugging Face Hub.
595
    Will default to the stored token.""",
596
)
597
@click.option(
598
    "--commit-message",
599
    help="""The summary / title / first line of the generated commit.""",
600
)
601
@cli_util.common_options
602
def run_upload(project_ids_pattern, repo_id, token, commit_message):
603
    """
604
    Upload selected projects and their vocabularies to a Hugging Face Hub repository.
605
    \f
606
    This command zips the project directories and vocabularies of the projects
607
    that match the given `project_ids_pattern` to archive files, and uploads the
608
    archives along with the project configurations to the specified Hugging Face
609
    Hub repository. An authentication token and commit message can be given with
610
    options.
611
    """
612
    from huggingface_hub import HfApi, preupload_lfs_files
613
    from huggingface_hub.utils import HfHubHTTPError, HFValidationError
614
615
    projects = cli_util.get_matching_projects(project_ids_pattern)
616
    click.echo(f"Uploading project(s): {', '.join([p.project_id for p in projects])}")
617
618
    commit_message = (
619
        commit_message
620
        if commit_message is not None
621
        else f"Upload project(s) {project_ids_pattern} with Annif"
622
    )
623
624
    project_dirs = {p.datadir for p in projects}
625
    vocab_dirs = {p.vocab.datadir for p in projects}
626
    data_dirs = project_dirs.union(vocab_dirs)
627
628
    fobjs, operations = [], []
629
    try:
630
        for data_dir in data_dirs:
631
            logger.debug(f"Archiving directory {data_dir}")
632
            fobj, operation = cli_util.prepare_datadir_commit(data_dir)
633
            logger.debug(f"Preuploading to {operation.path_in_repo}")
634
            preupload_lfs_files(repo_id, additions=[operation])
635
            fobjs.append(fobj)
636
            operations.append(operation)
637
        for project in projects:
638
            fobj, operation = cli_util.prepare_config_commit(project)
639
            fobjs.append(fobj)
640
            operations.append(operation)
641
642
        api = HfApi()
643
        api.create_commit(
644
            repo_id=repo_id,
645
            operations=operations,
646
            commit_message=commit_message,
647
            token=token,
648
        )
649
    except (HfHubHTTPError, HFValidationError) as err:
650
        raise OperationFailedException(str(err))
651
    finally:
652
        for fobj in fobjs:
653
            fobj.close()
654
655
656
@cli.command("download")
657
@click.argument("project_ids_pattern")
658
@click.argument("repo_id")
659
@click.option(
660
    "--token",
661
    help="""Authentication token, obtained from the Hugging Face Hub.
662
    Will default to the stored token.""",
663
)
664
@click.option(
665
    "--revision",
666
    help="""
667
    An optional Git revision id which can be a branch name, a tag, or a commit
668
    hash.
669
    """,
670
)
671
@click.option(
672
    "--force",
673
    "-f",
674
    default=False,
675
    is_flag=True,
676
    help="Replace an existing project/vocabulary/config with the downloaded one",
677
)
678
@cli_util.common_options
679
def run_download(project_ids_pattern, repo_id, token, revision, force):
680
    """
681
    Download selected projects and their vocabularies from a Hugging Face Hub
682
    repository.
683
    \f
684
    This command downloads the project and vocabulary archives and the
685
    configuration files of the projects that match the given
686
    `project_ids_pattern` from the specified Hugging Face Hub repository and
687
    unzips the archives to `data/` directory and places the configuration files
688
    to `projects.d/` directory. An authentication token and revision can
689
    be given with options.
690
    """
691
692
    project_ids = cli_util.get_matching_project_ids_from_hf_hub(
693
        project_ids_pattern, repo_id, token, revision
694
    )
695
    click.echo(f"Downloading project(s): {', '.join(project_ids)}")
696
697
    vocab_ids = set()
698
    for project_id in project_ids:
699
        project_zip_cache_path = cli_util.download_from_hf_hub(
700
            f"projects/{project_id}.zip", repo_id, token, revision
701
        )
702
        cli_util.unzip_archive(project_zip_cache_path, force)
703
        config_file_cache_path = cli_util.download_from_hf_hub(
704
            f"{project_id}.cfg", repo_id, token, revision
705
        )
706
        vocab_ids.add(cli_util.get_vocab_id_from_config(config_file_cache_path))
707
        cli_util.copy_project_config(config_file_cache_path, force)
708
709
    for vocab_id in vocab_ids:
710
        vocab_zip_cache_path = cli_util.download_from_hf_hub(
711
            f"vocabs/{vocab_id}.zip", repo_id, token, revision
712
        )
713
        cli_util.unzip_archive(vocab_zip_cache_path, force)
714
715
716
@cli.command("completion")
717
@click.option("--bash", "shell", flag_value="bash")
718
@click.option("--zsh", "shell", flag_value="zsh")
719
@click.option("--fish", "shell", flag_value="fish")
720
def run_completion(shell):
721
    """Generate the script for tab-key autocompletion for the given shell. To enable the
722
    completion support in your current bash terminal session run\n
723
        source <(annif completion --bash)
724
    """
725
726
    if shell is None:
727
        raise click.UsageError("Shell not given, try --bash, --zsh or --fish")
728
729
    script = os.popen(f"_ANNIF_COMPLETE={shell}_source annif").read()
730
    click.echo(f"# Generated by Annif {importlib.metadata.version('annif')}")
731
    click.echo(script)
732
733
734
if __name__ == "__main__":
735
    cli()
736