Passed
Pull Request — master (#663)
by Juho
02:48
created

annif.cli.show_hits()   A

Complexity

Conditions 2

Size

Total Lines 9
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 8
nop 4
dl 0
loc 9
rs 10
c 0
b 0
f 0
1
"""Definitions for command-line (Click) commands for invoking Annif
2
operations and printing the results to console."""
3
4
5
import collections
6
import json
7
import os.path
8
import re
9
import sys
10
11
import click
12
import click_log
13
from flask import current_app
14
from flask.cli import FlaskGroup, ScriptInfo
15
16
import annif
17
import annif.corpus
18
import annif.parallel
19
import annif.project
20
import annif.registry
21
from annif.exception import (
22
    ConfigurationException,
23
    NotInitializedException,
24
    NotSupportedException,
25
)
26
from annif.project import Access
27
from annif.suggestion import ListSuggestionResult, SuggestionFilter
28
from annif.util import metric_code
29
30
logger = annif.logger
31
click_log.basic_config(logger)
32
33
cli = FlaskGroup(create_app=annif.create_app, add_version_option=False)
34
cli = click.version_option(message="%(version)s")(cli)
35
36
37
def get_project(project_id):
38
    """
39
    Helper function to get a project by ID and bail out if it doesn't exist"""
40
    try:
41
        return annif.registry.get_project(project_id, min_access=Access.private)
42
    except ValueError:
43
        click.echo("No projects found with id '{0}'.".format(project_id), err=True)
44
        sys.exit(1)
45
46
47
def get_vocab(vocab_id):
48
    """
49
    Helper function to get a vocabulary by ID and bail out if it doesn't
50
    exist"""
51
    try:
52
        return annif.registry.get_vocab(vocab_id, min_access=Access.private)
53
    except ValueError:
54
        click.echo(f"No vocabularies found with the id '{vocab_id}'.", err=True)
55
        sys.exit(1)
56
57
58
def open_documents(paths, subject_index, vocab_lang, docs_limit):
59
    """Helper function to open a document corpus from a list of pathnames,
60
    each of which is either a TSV file or a directory of TXT files. For
61
    directories with subjects in TSV files, the given vocabulary language
62
    will be used to convert subject labels into URIs. The corpus will be
63
    returned as an instance of DocumentCorpus or LimitingDocumentCorpus."""
64
65
    def open_doc_path(path, subject_index):
66
        """open a single path and return it as a DocumentCorpus"""
67
        if os.path.isdir(path):
68
            return annif.corpus.DocumentDirectory(
69
                path, subject_index, vocab_lang, require_subjects=True
70
            )
71
        return annif.corpus.DocumentFile(path, subject_index)
72
73
    if len(paths) == 0:
74
        logger.warning("Reading empty file")
75
        docs = open_doc_path(os.path.devnull, subject_index)
76
    elif len(paths) == 1:
77
        docs = open_doc_path(paths[0], subject_index)
78
    else:
79
        corpora = [open_doc_path(path, subject_index) for path in paths]
80
        docs = annif.corpus.CombinedCorpus(corpora)
81
    if docs_limit is not None:
82
        docs = annif.corpus.LimitingDocumentCorpus(docs, docs_limit)
83
    return docs
84
85
86
def open_text_documents(paths, docs_limit):
87
    docs = []
88
    for path in paths[:docs_limit]:
89
        with open(path, errors="replace", encoding="utf-8-sig") as docfile:
90
            docs.append(annif.corpus.Document(text=docfile.read(), subject_set=None))
91
    return annif.corpus.DocumentList(docs)
92
93
94
def show_hits(hits, project, lang, file=None):
95
    for hit in hits.as_list():
96
        subj = project.subjects[hit.subject_id]
97
        line = "<{}>\t{}\t{}".format(
98
            subj.uri,
99
            "\t".join(filter(None, (subj.labels[lang], subj.notation))),
100
            hit.score,
101
        )
102
        click.echo(line, file=file)
103
104
105
def parse_backend_params(backend_param, project):
106
    """Parse a list of backend parameters given with the --backend-param
107
    option into a nested dict structure"""
108
    backend_params = collections.defaultdict(dict)
109
    for beparam in backend_param:
110
        backend, param = beparam.split(".", 1)
111
        key, val = param.split("=", 1)
112
        validate_backend_params(backend, beparam, project)
113
        backend_params[backend][key] = val
114
    return backend_params
115
116
117
def validate_backend_params(backend, beparam, project):
118
    if backend != project.config["backend"]:
119
        raise ConfigurationException(
120
            'The backend {} in CLI option "-b {}" not matching the project'
121
            " backend {}.".format(backend, beparam, project.config["backend"])
122
        )
123
124
125
BATCH_MAX_LIMIT = 15
126
127
128
def generate_filter_batches(subjects):
129
    import annif.eval
130
131
    filter_batches = collections.OrderedDict()
132
    for limit in range(1, BATCH_MAX_LIMIT + 1):
133
        for threshold in [i * 0.05 for i in range(20)]:
134
            hit_filter = SuggestionFilter(subjects, limit, threshold)
135
            batch = annif.eval.EvaluationBatch(subjects)
136
            filter_batches[(limit, threshold)] = (hit_filter, batch)
137
    return filter_batches
138
139
140
def set_project_config_file_path(ctx, param, value):
141
    """Override the default path or the path given in env by CLI option"""
142
    with ctx.ensure_object(ScriptInfo).load_app().app_context():
143
        if value:
144
            current_app.config["PROJECTS_CONFIG_PATH"] = value
145
146
147
def common_options(f):
148
    """Decorator to add common options for all CLI commands"""
149
    f = click.option(
150
        "-p",
151
        "--projects",
152
        help="Set path to project configuration file or directory",
153
        type=click.Path(dir_okay=True, exists=True),
154
        callback=set_project_config_file_path,
155
        expose_value=False,
156
        is_eager=True,
157
    )(f)
158
    return click_log.simple_verbosity_option(logger)(f)
159
160
161
def backend_param_option(f):
162
    """Decorator to add an option for CLI commands to override BE parameters"""
163
    return click.option(
164
        "--backend-param",
165
        "-b",
166
        multiple=True,
167
        help="Override backend parameter of the config file. "
168
        + "Syntax: `-b <backend>.<parameter>=<value>`.",
169
    )(f)
170
171
172
@cli.command("list-projects")
173
@common_options
174
@click_log.simple_verbosity_option(logger, default="ERROR")
175
def run_list_projects():
176
    """
177
    List available projects.
178
    \f
179
    Show a list of currently defined projects. Projects are defined in a
180
    configuration file, normally called ``projects.cfg``. See `Project
181
    configuration
182
    <https://github.com/NatLibFi/Annif/wiki/Project-configuration>`_
183
    for details.
184
    """
185
186
    template = "{0: <25}{1: <45}{2: <10}{3: <7}"
187
    header = template.format("Project ID", "Project Name", "Language", "Trained")
188
    click.echo(header)
189
    click.echo("-" * len(header))
190
    for proj in annif.registry.get_projects(min_access=Access.private).values():
191
        click.echo(
192
            template.format(
193
                proj.project_id, proj.name, proj.language, str(proj.is_trained)
194
            )
195
        )
196
197
198
@cli.command("show-project")
199
@click.argument("project_id")
200
@common_options
201
def run_show_project(project_id):
202
    """
203
    Show information about a project.
204
    """
205
206
    proj = get_project(project_id)
207
    click.echo(f"Project ID:        {proj.project_id}")
208
    click.echo(f"Project Name:      {proj.name}")
209
    click.echo(f"Language:          {proj.language}")
210
    click.echo(f"Vocabulary:        {proj.vocab.vocab_id}")
211
    click.echo(f"Vocab language:    {proj.vocab_lang}")
212
    click.echo(f"Access:            {proj.access.name}")
213
    click.echo(f"Trained:           {proj.is_trained}")
214
    click.echo(f"Modification time: {proj.modification_time}")
215
216
217
@cli.command("clear")
218
@click.argument("project_id")
219
@common_options
220
def run_clear_project(project_id):
221
    """
222
    Initialize the project to its original, untrained state.
223
    """
224
    proj = get_project(project_id)
225
    proj.remove_model_data()
226
227
228
@cli.command("list-vocabs")
229
@common_options
230
@click_log.simple_verbosity_option(logger, default="ERROR")
231
def run_list_vocabs():
232
    """
233
    List available vocabularies.
234
    """
235
236
    template = "{0: <20}{1: <20}{2: >10}  {3: <6}"
237
    header = template.format("Vocabulary ID", "Languages", "Size", "Loaded")
238
    click.echo(header)
239
    click.echo("-" * len(header))
240
    for vocab in annif.registry.get_vocabs(min_access=Access.private).values():
241
        try:
242
            languages = ",".join(sorted(vocab.languages))
243
            size = len(vocab)
244
            loaded = True
245
        except NotInitializedException:
246
            languages = "-"
247
            size = "-"
248
            loaded = False
249
        click.echo(template.format(vocab.vocab_id, languages, size, str(loaded)))
250
251
252
@cli.command("load-vocab")
253
@click.argument("vocab_id")
254
@click.argument("subjectfile", type=click.Path(exists=True, dir_okay=False))
255
@click.option("--language", "-L", help="Language of subject file")
256
@click.option(
257
    "--force",
258
    "-f",
259
    default=False,
260
    is_flag=True,
261
    help="Replace existing vocabulary completely " + "instead of updating it",
262
)
263
@common_options
264
def run_load_vocab(vocab_id, language, force, subjectfile):
265
    """
266
    Load a vocabulary from a subject file.
267
    """
268
    vocab = get_vocab(vocab_id)
269
    if annif.corpus.SubjectFileSKOS.is_rdf_file(subjectfile):
270
        # SKOS/RDF file supported by rdflib
271
        subjects = annif.corpus.SubjectFileSKOS(subjectfile)
272
        click.echo(f"Loading vocabulary from SKOS file {subjectfile}...")
273
    elif annif.corpus.SubjectFileCSV.is_csv_file(subjectfile):
274
        # CSV file
275
        subjects = annif.corpus.SubjectFileCSV(subjectfile)
276
        click.echo(f"Loading vocabulary from CSV file {subjectfile}...")
277
    else:
278
        # probably a TSV file - we need to know its language
279
        if not language:
280
            click.echo(
281
                "Please use --language option to set the language of "
282
                + "a TSV vocabulary.",
283
                err=True,
284
            )
285
            sys.exit(1)
286
        click.echo(f"Loading vocabulary from TSV file {subjectfile}...")
287
        subjects = annif.corpus.SubjectFileTSV(subjectfile, language)
288
    vocab.load_vocabulary(subjects, force=force)
289
290
291
@cli.command("train")
292
@click.argument("project_id")
293
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
294
@click.option(
295
    "--cached/--no-cached",
296
    "-c/-C",
297
    default=False,
298
    help="Reuse preprocessed training data from previous run",
299
)
300
@click.option(
301
    "--docs-limit",
302
    "-d",
303
    default=None,
304
    type=click.IntRange(0, None),
305
    help="Maximum number of documents to use",
306
)
307
@click.option(
308
    "--jobs",
309
    "-j",
310
    default=0,
311
    help="Number of parallel jobs (0 means choose automatically)",
312
)
313
@backend_param_option
314
@common_options
315
def run_train(project_id, paths, cached, docs_limit, jobs, backend_param):
316
    """
317
    Train a project on a collection of documents.
318
    \f
319
    This will train the project using the documents from ``PATHS`` (directories
320
    or possibly gzipped TSV files) in a single batch operation. If ``--cached``
321
    is set, preprocessed training data from the previous run is reused instead
322
    of documents input; see `Reusing preprocessed training data
323
    <https://github.com/NatLibFi/Annif/wiki/
324
    Reusing-preprocessed-training-data>`_.
325
    """
326
    proj = get_project(project_id)
327
    backend_params = parse_backend_params(backend_param, proj)
328
    if cached:
329
        if len(paths) > 0:
330
            raise click.UsageError(
331
                "Corpus paths cannot be given when using --cached option."
332
            )
333
        documents = "cached"
334
    else:
335
        documents = open_documents(paths, proj.subjects, proj.vocab_lang, docs_limit)
336
    proj.train(documents, backend_params, jobs)
337
338
339
@cli.command("learn")
340
@click.argument("project_id")
341
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
342
@click.option(
343
    "--docs-limit",
344
    "-d",
345
    default=None,
346
    type=click.IntRange(0, None),
347
    help="Maximum number of documents to use",
348
)
349
@backend_param_option
350
@common_options
351
def run_learn(project_id, paths, docs_limit, backend_param):
352
    """
353
    Further train an existing project on a collection of documents.
354
    \f
355
    Similar to the ``train`` command. This will continue training an already
356
    trained project using the documents given by ``PATHS`` in a single batch
357
    operation. Not supported by all backends.
358
    """
359
    proj = get_project(project_id)
360
    backend_params = parse_backend_params(backend_param, proj)
361
    documents = open_documents(paths, proj.subjects, proj.vocab_lang, docs_limit)
362
    proj.learn(documents, backend_params)
363
364
365
@cli.command("suggest")
366
@click.argument("project_id")
367
@click.argument("paths", type=click.Path(dir_okay=False, exists=True), nargs=-1)
368
@click.option("--limit", "-l", default=10, help="Maximum number of subjects")
369
@click.option("--threshold", "-t", default=0.0, help="Minimum score threshold")
370
@click.option("--language", "-L", help="Language of subject labels")
371
@click.option(
372
    "--docs-limit",
373
    "-d",
374
    default=None,
375
    type=click.IntRange(0, None),
376
    help="Maximum number of documents to use",
377
)
378
@backend_param_option
379
@common_options
380
def run_suggest(
381
    project_id, paths, limit, threshold, language, backend_param, docs_limit
382
):
383
    """
384
    Suggest subjects for a single document from standard input or for one or more
385
    document file(s) given its/their path(s).
386
    \f
387
    This will read a text document from standard input and suggest subjects for
388
    it, or if given path(s) to file(s), suggest subjects for it/them.
389
    """
390
    project = get_project(project_id)
391
    lang = language or project.vocab_lang
392
    if lang not in project.vocab.languages:
393
        raise click.BadParameter(f'language "{lang}" not supported by vocabulary')
394
    backend_params = parse_backend_params(backend_param, project)
395
    hit_filter = SuggestionFilter(project.subjects, limit, threshold)
396
397
    if click.get_text_stream("stdin").isatty():
398
399
        docs = open_text_documents(paths, docs_limit)
400
        subject_sets = project.suggest_batch(docs, backend_params)
401
        for (
402
            subjects,
403
            path,
404
        ) in zip(subject_sets, paths):
405
            click.echo(f"Suggestions for {path}")
406
            hits = hit_filter(subjects)
407
            show_hits(hits, project, lang)
408
    else:
409
        text = sys.stdin.read()
410
        hits = hit_filter(project.suggest(text, backend_params))
411
        show_hits(hits, project, lang)
412
413
414
@cli.command("index")
415
@click.argument("project_id")
416
@click.argument("directory", type=click.Path(exists=True, file_okay=False))
417
@click.option(
418
    "--suffix", "-s", default=".annif", help="File name suffix for result files"
419
)
420
@click.option(
421
    "--force/--no-force",
422
    "-f/-F",
423
    default=False,
424
    help="Force overwriting of existing result files",
425
)
426
@click.option("--limit", "-l", default=10, help="Maximum number of subjects")
427
@click.option("--threshold", "-t", default=0.0, help="Minimum score threshold")
428
@click.option("--language", "-L", help="Language of subject labels")
429
@backend_param_option
430
@common_options
431
def run_index(
432
    project_id, directory, suffix, force, limit, threshold, language, backend_param
433
):
434
    """
435
    Index a directory with documents, suggesting subjects for each document.
436
    Write the results in TSV files with the given suffix (``.annif`` by
437
    default).
438
    """
439
    project = get_project(project_id)
440
    lang = language or project.vocab_lang
441
    if lang not in project.vocab.languages:
442
        raise click.BadParameter(f'language "{lang}" not supported by vocabulary')
443
    backend_params = parse_backend_params(backend_param, project)
444
    hit_filter = SuggestionFilter(project.subjects, limit, threshold)
445
446
    documents = annif.corpus.DocumentDirectory(
447
        directory, None, None, require_subjects=False
448
    )
449
    subject_sets = project.suggest_batch(documents, backend_params)
450
451
    for (docfilename, dummy_subjectfn), subjects in zip(documents, subject_sets):
452
        subjectfilename = re.sub(r"\.txt$", suffix, docfilename)
453
        if os.path.exists(subjectfilename) and not force:
454
            click.echo(
455
                "Not overwriting {} (use --force to override)".format(subjectfilename)
456
            )
457
            continue
458
        hits = hit_filter(subjects)
459
        with open(subjectfilename, "w", encoding="utf-8") as subjfile:
460
            show_hits(hits, project, lang, file=subjfile)
461
462
463
@cli.command("eval")
464
@click.argument("project_id")
465
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
466
@click.option("--limit", "-l", default=10, help="Maximum number of subjects")
467
@click.option("--threshold", "-t", default=0.0, help="Minimum score threshold")
468
@click.option(
469
    "--docs-limit",
470
    "-d",
471
    default=None,
472
    type=click.IntRange(0, None),
473
    help="Maximum number of documents to use",
474
)
475
@click.option(
476
    "--metric",
477
    "-m",
478
    default=[],
479
    multiple=True,
480
    help="Metric to calculate (default: all)",
481
)
482
@click.option(
483
    "--metrics-file",
484
    "-M",
485
    type=click.File("w", encoding="utf-8", errors="ignore", lazy=True),
486
    help="""Specify file in order to write evaluation metrics in JSON format.
487
    File directory must exist, existing file will be overwritten.""",
488
)
489
@click.option(
490
    "--results-file",
491
    "-r",
492
    type=click.File("w", encoding="utf-8", errors="ignore", lazy=True),
493
    help="""Specify file in order to write non-aggregated results per subject.
494
    File directory must exist, existing file will be overwritten.""",
495
)
496
@click.option(
497
    "--jobs", "-j", default=1, help="Number of parallel jobs (0 means all CPUs)"
498
)
499
@backend_param_option
500
@common_options
501
def run_eval(
502
    project_id,
503
    paths,
504
    limit,
505
    threshold,
506
    docs_limit,
507
    metric,
508
    metrics_file,
509
    results_file,
510
    jobs,
511
    backend_param,
512
):
513
    """
514
    Suggest subjects for documents and evaluate the results by comparing
515
    against a gold standard.
516
    \f
517
    With this command the documents from ``PATHS`` (directories or possibly
518
    gzipped TSV files) will be assigned subject suggestions and then
519
    statistical measures are calculated that quantify how well the suggested
520
    subjects match the gold-standard subjects in the documents.
521
522
    Normally the output is the list of the metrics calculated across documents.
523
    If ``--results-file <FILENAME>`` option is given, the metrics are
524
    calculated separately for each subject, and written to the given file.
525
    """
526
527
    project = get_project(project_id)
528
    backend_params = parse_backend_params(backend_param, project)
529
530
    import annif.eval
531
532
    eval_batch = annif.eval.EvaluationBatch(project.subjects)
533
534
    if results_file:
535
        try:
536
            print("", end="", file=results_file)
537
            click.echo(
538
                "Writing per subject evaluation results to {!s}".format(
539
                    results_file.name
540
                )
541
            )
542
        except Exception as e:
543
            raise NotSupportedException(
544
                "cannot open results-file for writing: " + str(e)
545
            )
546
    docs = open_documents(paths, project.subjects, project.vocab_lang, docs_limit)
547
548
    jobs, pool_class = annif.parallel.get_pool(jobs)
549
550
    project.initialize(parallel=True)
551
    psmap = annif.parallel.ProjectSuggestMap(
552
        project.registry, [project_id], backend_params, limit, threshold
553
    )
554
555
    with pool_class(jobs) as pool:
556
        for hits, subject_set in pool.imap_unordered(psmap.suggest, docs.documents):
557
            eval_batch.evaluate(hits[project_id], subject_set)
558
559
    template = "{0:<30}\t{1}"
560
    metrics = eval_batch.results(
561
        metrics=metric, results_file=results_file, language=project.vocab_lang
562
    )
563
    for metric, score in metrics.items():
564
        click.echo(template.format(metric + ":", score))
565
    if metrics_file:
566
        json.dump(
567
            {metric_code(mname): val for mname, val in metrics.items()},
568
            metrics_file,
569
            indent=2,
570
        )
571
572
573
@cli.command("optimize")
574
@click.argument("project_id")
575
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
576
@click.option(
577
    "--docs-limit",
578
    "-d",
579
    default=None,
580
    type=click.IntRange(0, None),
581
    help="Maximum number of documents to use",
582
)
583
@backend_param_option
584
@common_options
585
def run_optimize(project_id, paths, docs_limit, backend_param):
586
    """
587
    Suggest subjects for documents, testing multiple limits and thresholds.
588
    \f
589
    This command will use different limit (maximum number of subjects) and
590
    score threshold values when assigning subjects to each document given by
591
    ``PATHS`` and compare the results against the gold standard subjects in the
592
    documents. The output is a list of parameter combinations and their scores.
593
    From the output, you can determine the optimum limit and threshold
594
    parameters depending on which measure you want to target.
595
    """
596
    project = get_project(project_id)
597
    backend_params = parse_backend_params(backend_param, project)
598
599
    filter_batches = generate_filter_batches(project.subjects)
600
601
    ndocs = 0
602
    docs = open_documents(paths, project.subjects, project.vocab_lang, docs_limit)
603
    for doc in docs.documents:
604
        raw_hits = project.suggest(doc.text, backend_params)
605
        hits = raw_hits.filter(project.subjects, limit=BATCH_MAX_LIMIT)
606
        assert isinstance(hits, ListSuggestionResult), (
607
            "Optimize should only be done with ListSuggestionResult "
608
            + "as it would be very slow with VectorSuggestionResult."
609
        )
610
        for hit_filter, batch in filter_batches.values():
611
            batch.evaluate(hit_filter(hits), doc.subject_set)
612
        ndocs += 1
613
614
    click.echo("\t".join(("Limit", "Thresh.", "Prec.", "Rec.", "F1")))
615
616
    best_scores = collections.defaultdict(float)
617
    best_params = {}
618
619
    template = "{:d}\t{:.02f}\t{:.04f}\t{:.04f}\t{:.04f}"
620
    # Store the batches in a list that gets consumed along the way
621
    # This way GC will have a chance to reclaim the memory
622
    filter_batches = list(filter_batches.items())
623
    while filter_batches:
624
        params, filter_batch = filter_batches.pop(0)
625
        metrics = ["Precision (doc avg)", "Recall (doc avg)", "F1 score (doc avg)"]
626
        results = filter_batch[1].results(metrics=metrics)
627
        for metric, score in results.items():
628
            if score >= best_scores[metric]:
629
                best_scores[metric] = score
630
                best_params[metric] = params
631
        click.echo(
632
            template.format(
633
                params[0],
634
                params[1],
635
                results["Precision (doc avg)"],
636
                results["Recall (doc avg)"],
637
                results["F1 score (doc avg)"],
638
            )
639
        )
640
641
    click.echo()
642
    template2 = "Best {:>19}: {:.04f}\tLimit: {:d}\tThreshold: {:.02f}"
643
    for metric in metrics:
0 ignored issues
show
introduced by
The variable metrics does not seem to be defined in case the while loop on line 623 is not entered. Are you sure this can never be the case?
Loading history...
644
        click.echo(
645
            template2.format(
646
                metric,
647
                best_scores[metric],
648
                best_params[metric][0],
649
                best_params[metric][1],
650
            )
651
        )
652
    click.echo("Documents evaluated:\t{}".format(ndocs))
653
654
655
@cli.command("hyperopt")
656
@click.argument("project_id")
657
@click.argument("paths", type=click.Path(exists=True), nargs=-1)
658
@click.option(
659
    "--docs-limit",
660
    "-d",
661
    default=None,
662
    type=click.IntRange(0, None),
663
    help="Maximum number of documents to use",
664
)
665
@click.option("--trials", "-T", default=10, help="Number of trials")
666
@click.option(
667
    "--jobs", "-j", default=1, help="Number of parallel runs (0 means all CPUs)"
668
)
669
@click.option(
670
    "--metric", "-m", default="NDCG", help="Metric to optimize (default: NDCG)"
671
)
672
@click.option(
673
    "--results-file",
674
    "-r",
675
    type=click.File("w", encoding="utf-8", errors="ignore", lazy=True),
676
    help="""Specify file path to write trial results as CSV.
677
    File directory must exist, existing file will be overwritten.""",
678
)
679
@common_options
680
def run_hyperopt(project_id, paths, docs_limit, trials, jobs, metric, results_file):
681
    """
682
    Optimize the hyperparameters of a project using validation documents from
683
    ``PATHS``. Not supported by all backends. Output is a list of trial results
684
    and a report of the best performing parameters.
685
    """
686
    proj = get_project(project_id)
687
    documents = open_documents(paths, proj.subjects, proj.vocab_lang, docs_limit)
688
    click.echo(f"Looking for optimal hyperparameters using {trials} trials")
689
    rec = proj.hyperopt(documents, trials, jobs, metric, results_file)
690
    click.echo(f"Got best {metric} score {rec.score:.4f} with:")
691
    click.echo("---")
692
    for line in rec.lines:
693
        click.echo(line)
694
    click.echo("---")
695
696
697
if __name__ == "__main__":
698
    cli()
699