Passed
Push — issue760-hugging-face-hub-inte... ( 2952f6...32cbb7 )
by Juho
03:13
created

annif.cli   C

Complexity

Total Complexity 57

Size/Duplication

Total Lines 706
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 461
dl 0
loc 706
rs 5.04
c 0
b 0
f 0
wmc 57

15 Functions

Rating   Name   Duplication   Size   Complexity  
A run_hyperopt() 0 36 2
A run_train() 0 42 3
C run_optimize() 0 96 8
A run_load_vocab() 0 36 4
B run_suggest() 0 42 6
A run_learn() 0 20 1
A run_list_vocabs() 0 28 4
A run_list_projects() 0 39 3
B run_index() 0 43 6
A run_clear_project() 0 9 1
C run_eval() 0 108 9
A run_show_project() 0 18 1
A run_completion() 0 16 2
A run_download() 0 57 3
A run_upload() 0 40 4

How to fix   Complexity   

Complexity

Complex classes like annif.cli often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
"""Definitions for command-line (Click) commands for invoking Annif
2
operations and printing the results to console."""
3
4
import collections
5
import importlib
6
import json
7
import os.path
8
import re
9
import sys
10
11
import click
12
import click_log
13
from flask.cli import FlaskGroup
14
15
import annif
16
import annif.corpus
17
import annif.parallel
18
import annif.project
19
import annif.registry
20
from annif import cli_util
21
from annif.exception import NotInitializedException, NotSupportedException
22
from annif.project import Access
23
from annif.util import metric_code
24
25
logger = annif.logger
26
click_log.basic_config(logger)
27
28
29
if len(sys.argv) > 1 and sys.argv[1] in ("run", "routes"):
30
    create_app = annif.create_app  # Use Flask with Connexion
31
else:
32
    # Connexion is not needed for most CLI commands, use plain Flask
33
    create_app = annif.create_flask_app
34
35
cli = FlaskGroup(create_app=create_app, add_version_option=False)
36
cli = click.version_option(message="%(version)s")(cli)
37
38
39
@cli.command("list-projects")
40
@cli_util.common_options
41
@click_log.simple_verbosity_option(logger, default="ERROR")
42
def run_list_projects():
43
    """
44
    List available projects.
45
    \f
46
    Show a list of currently defined projects. Projects are defined in a
47
    configuration file, normally called ``projects.cfg``. See `Project
48
    configuration
49
    <https://github.com/NatLibFi/Annif/wiki/Project-configuration>`_
50
    for details.
51
    """
52
53
    column_headings = (
54
        "Project ID",
55
        "Project Name",
56
        "Vocabulary ID",
57
        "Language",
58
        "Trained",
59
        "Modification time",
60
    )
61
    table = [
62
        (
63
            proj.project_id,
64
            proj.name,
65
            proj.vocab.vocab_id if proj.vocab_spec else "-",
66
            proj.language,
67
            str(proj.is_trained),
68
            cli_util.format_datetime(proj.modification_time),
69
        )
70
        for proj in annif.registry.get_projects(min_access=Access.private).values()
71
    ]
72
    template = cli_util.make_list_template(column_headings, *table)
73
    header = template.format(*column_headings)
74
    click.echo(header)
75
    click.echo("-" * len(header))
76
    for row in table:
77
        click.echo(template.format(*row))
78
79
80
@cli.command("show-project")
81
@cli_util.project_id
82
@cli_util.common_options
83
def run_show_project(project_id):
84
    """
85
    Show information about a project.
86
    """
87
88
    proj = cli_util.get_project(project_id)
89
    click.echo(f"Project ID:        {proj.project_id}")
90
    click.echo(f"Project Name:      {proj.name}")
91
    click.echo(f"Language:          {proj.language}")
92
    click.echo(f"Vocabulary:        {proj.vocab.vocab_id}")
93
    click.echo(f"Vocab language:    {proj.vocab_lang}")
94
    click.echo(f"Access:            {proj.access.name}")
95
    click.echo(f"Backend:           {proj.backend.name}")
96
    click.echo(f"Trained:           {proj.is_trained}")
97
    click.echo(f"Modification time: {cli_util.format_datetime(proj.modification_time)}")
98
99
100
@cli.command("clear")
101
@cli_util.project_id
102
@cli_util.common_options
103
def run_clear_project(project_id):
104
    """
105
    Initialize the project to its original, untrained state.
106
    """
107
    proj = cli_util.get_project(project_id)
108
    proj.remove_model_data()
109
110
111
@cli.command("list-vocabs")
112
@cli_util.common_options
113
@click_log.simple_verbosity_option(logger, default="ERROR")
114
def run_list_vocabs():
115
    """
116
    List available vocabularies.
117
    """
118
119
    column_headings = ("Vocabulary ID", "Languages", "Size", "Loaded")
120
    table = []
121
    for vocab in annif.registry.get_vocabs(min_access=Access.private).values():
122
        try:
123
            languages = ",".join(sorted(vocab.languages))
124
            size = len(vocab)
125
            loaded = True
126
        except NotInitializedException:
127
            languages = "-"
128
            size = "-"
129
            loaded = False
130
        row = (vocab.vocab_id, languages, str(size), str(loaded))
131
        table.append(row)
132
133
    template = cli_util.make_list_template(column_headings, *table)
134
    header = template.format(*column_headings)
135
    click.echo(header)
136
    click.echo("-" * len(header))
137
    for row in table:
138
        click.echo(template.format(*row))
139
140
141
@cli.command("load-vocab")
142
@click.argument("vocab_id", shell_complete=cli_util.complete_param)
143
@click.argument("subjectfile", type=click.Path(exists=True, dir_okay=False))
144
@click.option("--language", "-L", help="Language of subject file")
145
@click.option(
146
    "--force",
147
    "-f",
148
    default=False,
149
    is_flag=True,
150
    help="Replace existing vocabulary completely instead of updating it",
151
)
152
@cli_util.common_options
153
def run_load_vocab(vocab_id, language, force, subjectfile):
154
    """
155
    Load a vocabulary from a subject file.
156
    """
157
    vocab = cli_util.get_vocab(vocab_id)
158
    if annif.corpus.SubjectFileSKOS.is_rdf_file(subjectfile):
159
        # SKOS/RDF file supported by rdflib
160
        subjects = annif.corpus.SubjectFileSKOS(subjectfile)
161
        click.echo(f"Loading vocabulary from SKOS file {subjectfile}...")
162
    elif annif.corpus.SubjectFileCSV.is_csv_file(subjectfile):
163
        # CSV file
164
        subjects = annif.corpus.SubjectFileCSV(subjectfile)
165
        click.echo(f"Loading vocabulary from CSV file {subjectfile}...")
166
    else:
167
        # probably a TSV file - we need to know its language
168
        if not language:
169
            click.echo(
170
                "Please use --language option to set the language of a TSV vocabulary.",
171
                err=True,
172
            )
173
            sys.exit(1)
174
        click.echo(f"Loading vocabulary from TSV file {subjectfile}...")
175
        subjects = annif.corpus.SubjectFileTSV(subjectfile, language)
176
    vocab.load_vocabulary(subjects, force=force)
177
178
179
@cli.command("train")
180
@cli_util.project_id
181
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
182
@click.option(
183
    "--cached/--no-cached",
184
    "-c/-C",
185
    default=False,
186
    help="Reuse preprocessed training data from previous run",
187
)
188
@click.option(
189
    "--jobs",
190
    "-j",
191
    default=0,
192
    help="Number of parallel jobs (0 means choose automatically)",
193
)
194
@cli_util.docs_limit_option
195
@cli_util.backend_param_option
196
@cli_util.common_options
197
def run_train(project_id, paths, cached, docs_limit, jobs, backend_param):
198
    """
199
    Train a project on a collection of documents.
200
    \f
201
    This will train the project using the documents from ``PATHS`` (directories
202
    or possibly gzipped TSV files) in a single batch operation. If ``--cached``
203
    is set, preprocessed training data from the previous run is reused instead
204
    of documents input; see `Reusing preprocessed training data
205
    <https://github.com/NatLibFi/Annif/wiki/
206
    Reusing-preprocessed-training-data>`_.
207
    """
208
    proj = cli_util.get_project(project_id)
209
    backend_params = cli_util.parse_backend_params(backend_param, proj)
210
    if cached:
211
        if len(paths) > 0:
212
            raise click.UsageError(
213
                "Corpus paths cannot be given when using --cached option."
214
            )
215
        documents = "cached"
216
    else:
217
        documents = cli_util.open_documents(
218
            paths, proj.subjects, proj.vocab_lang, docs_limit
219
        )
220
    proj.train(documents, backend_params, jobs)
221
222
223
@cli.command("learn")
224
@cli_util.project_id
225
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
226
@cli_util.docs_limit_option
227
@cli_util.backend_param_option
228
@cli_util.common_options
229
def run_learn(project_id, paths, docs_limit, backend_param):
230
    """
231
    Further train an existing project on a collection of documents.
232
    \f
233
    Similar to the ``train`` command. This will continue training an already
234
    trained project using the documents given by ``PATHS`` in a single batch
235
    operation. Not supported by all backends.
236
    """
237
    proj = cli_util.get_project(project_id)
238
    backend_params = cli_util.parse_backend_params(backend_param, proj)
239
    documents = cli_util.open_documents(
240
        paths, proj.subjects, proj.vocab_lang, docs_limit
241
    )
242
    proj.learn(documents, backend_params)
243
244
245
@cli.command("suggest")
246
@cli_util.project_id
247
@click.argument(
248
    "paths", type=click.Path(dir_okay=False, exists=True, allow_dash=True), nargs=-1
249
)
250
@click.option("--limit", "-l", default=10, help="Maximum number of subjects")
251
@click.option("--threshold", "-t", default=0.0, help="Minimum score threshold")
252
@click.option("--language", "-L", help="Language of subject labels")
253
@cli_util.docs_limit_option
254
@cli_util.backend_param_option
255
@cli_util.common_options
256
def run_suggest(
257
    project_id, paths, limit, threshold, language, backend_param, docs_limit
258
):
259
    """
260
    Suggest subjects for a single document from standard input or for one or more
261
    document file(s) given its/their path(s).
262
    \f
263
    This will read a text document from standard input and suggest subjects for
264
    it, or if given path(s) to file(s), suggest subjects for it/them.
265
    """
266
    project = cli_util.get_project(project_id)
267
    lang = language or project.vocab_lang
268
    if lang not in project.vocab.languages:
269
        raise click.BadParameter(f'language "{lang}" not supported by vocabulary')
270
    backend_params = cli_util.parse_backend_params(backend_param, project)
271
272
    if paths and not (len(paths) == 1 and paths[0] == "-"):
273
        docs = cli_util.open_text_documents(paths, docs_limit)
274
        results = project.suggest_corpus(docs, backend_params).filter(limit, threshold)
275
        for (
276
            suggestions,
277
            path,
278
        ) in zip(results, paths):
279
            click.echo(f"Suggestions for {path}")
280
            cli_util.show_hits(suggestions, project, lang)
281
    else:
282
        text = sys.stdin.read()
283
        suggestions = project.suggest([text], backend_params).filter(limit, threshold)[
284
            0
285
        ]
286
        cli_util.show_hits(suggestions, project, lang)
287
288
289
@cli.command("index")
290
@cli_util.project_id
291
@click.argument("directory", type=click.Path(exists=True, file_okay=False))
292
@click.option(
293
    "--suffix", "-s", default=".annif", help="File name suffix for result files"
294
)
295
@click.option(
296
    "--force/--no-force",
297
    "-f/-F",
298
    default=False,
299
    help="Force overwriting of existing result files",
300
)
301
@click.option("--limit", "-l", default=10, help="Maximum number of subjects")
302
@click.option("--threshold", "-t", default=0.0, help="Minimum score threshold")
303
@click.option("--language", "-L", help="Language of subject labels")
304
@cli_util.backend_param_option
305
@cli_util.common_options
306
def run_index(
307
    project_id, directory, suffix, force, limit, threshold, language, backend_param
308
):
309
    """
310
    Index a directory with documents, suggesting subjects for each document.
311
    Write the results in TSV files with the given suffix (``.annif`` by
312
    default).
313
    """
314
    project = cli_util.get_project(project_id)
315
    lang = language or project.vocab_lang
316
    if lang not in project.vocab.languages:
317
        raise click.BadParameter(f'language "{lang}" not supported by vocabulary')
318
    backend_params = cli_util.parse_backend_params(backend_param, project)
319
320
    documents = annif.corpus.DocumentDirectory(directory, require_subjects=False)
321
    results = project.suggest_corpus(documents, backend_params).filter(limit, threshold)
322
323
    for (docfilename, _), suggestions in zip(documents, results):
324
        subjectfilename = re.sub(r"\.txt$", suffix, docfilename)
325
        if os.path.exists(subjectfilename) and not force:
326
            click.echo(
327
                "Not overwriting {} (use --force to override)".format(subjectfilename)
328
            )
329
            continue
330
        with open(subjectfilename, "w", encoding="utf-8") as subjfile:
331
            cli_util.show_hits(suggestions, project, lang, file=subjfile)
332
333
334
@cli.command("eval")
335
@cli_util.project_id
336
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
337
@click.option("--limit", "-l", default=10, help="Maximum number of subjects")
338
@click.option("--threshold", "-t", default=0.0, help="Minimum score threshold")
339
@click.option(
340
    "--metric",
341
    "-m",
342
    default=[],
343
    multiple=True,
344
    help="Metric to calculate (default: all)",
345
)
346
@click.option(
347
    "--metrics-file",
348
    "-M",
349
    type=click.File("w", encoding="utf-8", errors="ignore", lazy=True),
350
    help="""Specify file in order to write evaluation metrics in JSON format.
351
    File directory must exist, existing file will be overwritten.""",
352
)
353
@click.option(
354
    "--results-file",
355
    "-r",
356
    type=click.File("w", encoding="utf-8", errors="ignore", lazy=True),
357
    help="""Specify file in order to write non-aggregated results per subject.
358
    File directory must exist, existing file will be overwritten.""",
359
)
360
@click.option(
361
    "--jobs", "-j", default=1, help="Number of parallel jobs (0 means all CPUs)"
362
)
363
@cli_util.docs_limit_option
364
@cli_util.backend_param_option
365
@cli_util.common_options
366
def run_eval(
367
    project_id,
368
    paths,
369
    limit,
370
    threshold,
371
    docs_limit,
372
    metric,
373
    metrics_file,
374
    results_file,
375
    jobs,
376
    backend_param,
377
):
378
    """
379
    Suggest subjects for documents and evaluate the results by comparing
380
    against a gold standard.
381
    \f
382
    With this command the documents from ``PATHS`` (directories or possibly
383
    gzipped TSV files) will be assigned subject suggestions and then
384
    statistical measures are calculated that quantify how well the suggested
385
    subjects match the gold-standard subjects in the documents.
386
387
    Normally the output is the list of the metrics calculated across documents.
388
    If ``--results-file <FILENAME>`` option is given, the metrics are
389
    calculated separately for each subject, and written to the given file.
390
    """
391
392
    project = cli_util.get_project(project_id)
393
    backend_params = cli_util.parse_backend_params(backend_param, project)
394
395
    import annif.eval
396
397
    eval_batch = annif.eval.EvaluationBatch(project.subjects)
398
399
    if results_file:
400
        try:
401
            print("", end="", file=results_file)
402
            click.echo(
403
                "Writing per subject evaluation results to {!s}".format(
404
                    results_file.name
405
                )
406
            )
407
        except Exception as e:
408
            raise NotSupportedException(
409
                "cannot open results-file for writing: " + str(e)
410
            )
411
    corpus = cli_util.open_documents(
412
        paths, project.subjects, project.vocab_lang, docs_limit
413
    )
414
    jobs, pool_class = annif.parallel.get_pool(jobs)
415
416
    project.initialize(parallel=True)
417
    psmap = annif.parallel.ProjectSuggestMap(
418
        project.registry, [project_id], backend_params, limit, threshold
419
    )
420
421
    with pool_class(jobs) as pool:
422
        for hit_sets, subject_sets in pool.imap_unordered(
423
            psmap.suggest_batch, corpus.doc_batches
424
        ):
425
            eval_batch.evaluate_many(hit_sets[project_id], subject_sets)
426
427
    template = "{0:<30}\t{1:{fmt_spec}}"
428
    metrics = eval_batch.results(
429
        metrics=metric, results_file=results_file, language=project.vocab_lang
430
    )
431
    for metric, score in metrics.items():
432
        if isinstance(score, int):
433
            fmt_spec = "d"
434
        elif isinstance(score, float):
435
            fmt_spec = ".04f"
436
        click.echo(template.format(metric + ":", score, fmt_spec=fmt_spec))
437
    if metrics_file:
438
        json.dump(
439
            {metric_code(mname): val for mname, val in metrics.items()},
440
            metrics_file,
441
            indent=2,
442
        )
443
444
445
FILTER_BATCH_MAX_LIMIT = 15
446
OPTIMIZE_METRICS = ["Precision (doc avg)", "Recall (doc avg)", "F1 score (doc avg)"]
447
448
449
@cli.command("optimize")
450
@cli_util.project_id
451
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
452
@click.option(
453
    "--jobs", "-j", default=1, help="Number of parallel jobs (0 means all CPUs)"
454
)
455
@cli_util.docs_limit_option
456
@cli_util.backend_param_option
457
@cli_util.common_options
458
def run_optimize(project_id, paths, jobs, docs_limit, backend_param):
459
    """
460
    Suggest subjects for documents, testing multiple limits and thresholds.
461
    \f
462
    This command will use different limit (maximum number of subjects) and
463
    score threshold values when assigning subjects to each document given by
464
    ``PATHS`` and compare the results against the gold standard subjects in the
465
    documents. The output is a list of parameter combinations and their scores.
466
    From the output, you can determine the optimum limit and threshold
467
    parameters depending on which measure you want to target.
468
    """
469
    project = cli_util.get_project(project_id)
470
    backend_params = cli_util.parse_backend_params(backend_param, project)
471
    filter_params = cli_util.generate_filter_params(FILTER_BATCH_MAX_LIMIT)
472
473
    import annif.eval
474
475
    corpus = cli_util.open_documents(
476
        paths, project.subjects, project.vocab_lang, docs_limit
477
    )
478
479
    jobs, pool_class = annif.parallel.get_pool(jobs)
480
481
    project.initialize(parallel=True)
482
    psmap = annif.parallel.ProjectSuggestMap(
483
        project.registry,
484
        [project_id],
485
        backend_params,
486
        limit=FILTER_BATCH_MAX_LIMIT,
487
        threshold=0.0,
488
    )
489
490
    ndocs = 0
491
    suggestion_batches = []
492
    subject_set_batches = []
493
    with pool_class(jobs) as pool:
494
        for suggestion_batch, subject_sets in pool.imap_unordered(
495
            psmap.suggest_batch, corpus.doc_batches
496
        ):
497
            ndocs += len(suggestion_batch[project_id])
498
            suggestion_batches.append(suggestion_batch[project_id])
499
            subject_set_batches.append(subject_sets)
500
501
    from annif.suggestion import SuggestionResults
502
503
    orig_suggestion_results = SuggestionResults(suggestion_batches)
504
505
    click.echo("\t".join(("Limit", "Thresh.", "Prec.", "Rec.", "F1")))
506
507
    best_scores = collections.defaultdict(float)
508
    best_params = {}
509
510
    template = "{:d}\t{:.02f}\t{:.04f}\t{:.04f}\t{:.04f}"
511
    import annif.eval
512
513
    for limit, threshold in filter_params:
514
        eval_batch = annif.eval.EvaluationBatch(project.subjects)
515
        filtered_results = orig_suggestion_results.filter(limit, threshold)
516
        for batch, subject_sets in zip(filtered_results.batches, subject_set_batches):
517
            eval_batch.evaluate_many(batch, subject_sets)
518
        results = eval_batch.results(metrics=OPTIMIZE_METRICS)
519
        for metric, score in results.items():
520
            if score >= best_scores[metric]:
521
                best_scores[metric] = score
522
                best_params[metric] = (limit, threshold)
523
        click.echo(
524
            template.format(
525
                limit,
526
                threshold,
527
                results["Precision (doc avg)"],
528
                results["Recall (doc avg)"],
529
                results["F1 score (doc avg)"],
530
            )
531
        )
532
533
    click.echo()
534
    template2 = "Best {:>19}: {:.04f}\tLimit: {:d}\tThreshold: {:.02f}"
535
    for metric in OPTIMIZE_METRICS:
536
        click.echo(
537
            template2.format(
538
                metric,
539
                best_scores[metric],
540
                best_params[metric][0],
541
                best_params[metric][1],
542
            )
543
        )
544
    click.echo("Documents evaluated:\t{}".format(ndocs))
545
546
547
@cli.command("hyperopt")
548
@cli_util.project_id
549
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
550
@click.option("--trials", "-T", default=10, help="Number of trials")
551
@click.option(
552
    "--jobs", "-j", default=1, help="Number of parallel runs (0 means all CPUs)"
553
)
554
@click.option(
555
    "--metric", "-m", default="NDCG", help="Metric to optimize (default: NDCG)"
556
)
557
@click.option(
558
    "--results-file",
559
    "-r",
560
    type=click.File("w", encoding="utf-8", errors="ignore", lazy=True),
561
    help="""Specify file path to write trial results as CSV.
562
    File directory must exist, existing file will be overwritten.""",
563
)
564
@cli_util.docs_limit_option
565
@cli_util.common_options
566
def run_hyperopt(project_id, paths, docs_limit, trials, jobs, metric, results_file):
567
    """
568
    Optimize the hyperparameters of a project using validation documents from
569
    ``PATHS``. Not supported by all backends. Output is a list of trial results
570
    and a report of the best performing parameters.
571
    """
572
    proj = cli_util.get_project(project_id)
573
    documents = cli_util.open_documents(
574
        paths, proj.subjects, proj.vocab_lang, docs_limit
575
    )
576
    click.echo(f"Looking for optimal hyperparameters using {trials} trials")
577
    rec = proj.hyperopt(documents, trials, jobs, metric, results_file)
578
    click.echo(f"Got best {metric} score {rec.score:.4f} with:")
579
    click.echo("---")
580
    for line in rec.lines:
581
        click.echo(line)
582
    click.echo("---")
583
584
585
@cli.command("upload")
586
@click.argument("project_ids_pattern")
587
@click.argument("repo_id")
588
@click.option(
589
    "--token",
590
    help="""Authentication token, obtained from the Hugging Face Hub.
591
    Will default to the stored token.""",
592
)
593
@click.option(
594
    "--commit-message",
595
    help="""The summary / title / first line of the generated commit.""",
596
)
597
@cli_util.common_options
598
def run_upload(project_ids_pattern, repo_id, token, commit_message):
599
    """
600
    Upload selected projects and their vocabularies to a Hugging Face Hub repository
601
    \f
602
    This command zips the project directories and vocabularies of the projects
603
    that match the given `project_ids_pattern`, and uploads the archives along
604
    with the projects configuration to the specified Hugging Face Hub repository.
605
    An authentication token and commit message can be given with options.
606
    """
607
    projects = cli_util.get_matching_projects(project_ids_pattern)
608
    click.echo(f"Uploading project(s): {', '.join([p.project_id for p in projects])}")
609
610
    commit_message = (
611
        commit_message
612
        if commit_message is not None
613
        else f"Upload project(s) {project_ids_pattern} with Annif"
614
    )
615
616
    project_dirs = {p.datadir for p in projects}
617
    vocab_dirs = {p.vocab.datadir for p in projects}
618
    data_dirs = project_dirs.union(vocab_dirs)
619
620
    for data_dir in data_dirs:
621
        cli_util.upload_datadir(data_dir, repo_id, token, commit_message)
622
623
    for project in projects:
624
        cli_util.upload_config(project, repo_id, token, commit_message)
625
626
627
@cli.command("download")
628
@click.argument("project_ids_pattern")
629
@click.argument("repo_id")
630
@click.option(
631
    "--token",
632
    help="""Authentication token, obtained from the Hugging Face Hub.
633
    Will default to the stored token.""",
634
)
635
@click.option(
636
    "--revision",
637
    help="""
638
    An optional Git revision id which can be a branch name, a tag, or a commit
639
    hash.
640
    """,
641
)
642
@click.option(
643
    "--force",
644
    "-f",
645
    default=False,
646
    is_flag=True,
647
    help="Replace an existing project/vocabulary/config with the downloaded one",
648
)
649
@cli_util.common_options
650
def run_download(project_ids_pattern, repo_id, token, revision, force):
651
    """
652
    Download selected projects and their vocabularies from a Hugging Face Hub repository
653
    \f
654
    This command downloads the project and vocabulary archives and the
655
    configuration files of the projects that match the given
656
    `project_ids_pattern` from the specified Hugging Face Hub repository and
657
    unzips the archives to `data/` directory and places the configuration files
658
    to `projects.d/` directory. An authentication token and revision can
659
    be given with options.
660
    """
661
662
    project_ids = cli_util.get_matching_project_ids_from_hf_hub(
663
        project_ids_pattern, repo_id, token, revision
664
    )
665
    click.echo(f"Downloading project(s): {', '.join(project_ids)}")
666
667
    vocab_ids = set()
668
    for project_id in project_ids:
669
        project_zip_local_cache_path = cli_util.download_from_hf_hub(
670
            f"projects/{project_id}.zip", repo_id, token, revision
671
        )
672
        cli_util.unzip(project_zip_local_cache_path, force)
673
        local_config_cache_path = cli_util.download_from_hf_hub(
674
            f"{project_id}.cfg", repo_id, token, revision
675
        )
676
        vocab_ids.add(cli_util.get_vocab_id(local_config_cache_path))
677
        cli_util.copy_project_config(local_config_cache_path, force)
678
679
    for vocab_id in vocab_ids:
680
        vocab_zip_local_cache_path = cli_util.download_from_hf_hub(
681
            f"vocabs/{vocab_id}.zip", repo_id, token, revision
682
        )
683
        cli_util.unzip(vocab_zip_local_cache_path, force)
684
685
686
@cli.command("completion")
687
@click.option("--bash", "shell", flag_value="bash")
688
@click.option("--zsh", "shell", flag_value="zsh")
689
@click.option("--fish", "shell", flag_value="fish")
690
def run_completion(shell):
691
    """Generate the script for tab-key autocompletion for the given shell. To enable the
692
    completion support in your current bash terminal session run\n
693
        source <(annif completion --bash)
694
    """
695
696
    if shell is None:
697
        raise click.UsageError("Shell not given, try --bash, --zsh or --fish")
698
699
    script = os.popen(f"_ANNIF_COMPLETE={shell}_source annif").read()
700
    click.echo(f"# Generated by Annif {importlib.metadata.version('annif')}")
701
    click.echo(script)
702
703
704
if __name__ == "__main__":
705
    cli()
706