Passed
Branch dev-release (a75e90)
by Konstantinos
02:13
created

report_topics   A

Complexity

Total Complexity 3

Size/Duplication

Total Lines 54
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 43
dl 0
loc 54
rs 10
c 0
b 0
f 0
wmc 3

1 Function

Rating   Name   Duplication   Size   Complexity  
B main() 0 44 3
1
#!/usr/bin/env python
2
3
import click
4
from topic_modeling_toolkit.reporting import TopicsHandler
5
6
7
@click.command()
8
@click.option('--dataset', '-d', required=True, prompt="Which i the dataset the model was trained on? (input dataset string label)",
9
              help="The dataset that was used to train the model on.")
10
@click.option('--model-label', '-m-l', required=True, prompt="Which model do you want to query for its topics? (input model label)",
11
              help="The model label to use searching for stored experimental results.")
12
@click.option('--topics-set', '-t-s', default='domain', show_default=True, type=click.Choice(["background", "domain"]),
13
              help="Common lexis should be collected in 'background' topics. 'Domain' topics should be free of common lexis.")
14
@click.option('--tokens-type', '-t-t', default='top-tokens', show_default=True,
15
              help="'top-tokens' is a list sorted on p(w|t). 'kernel' is a list sorted on p(t|w); should be accompanied by "
16
                   "threshold, ie 'kernel60' -> 0.60, 'kernel25' -> 0.25")
17
@click.option('--sort', '-s', default='name', show_default=True,
18
              help="Reports back the list of topics sorted on the metric. 'name': alphabetically by name, 'coh': by kernel "
19
                   "coherence, 'con': by kernel contrast, 'pur': by kernel purity. The last 3 options require a threshold similar to the "
20
                   "'tokens-type' arguments. Example syntaxes are: 'coh-80', 'con-25', 'pur-90'.")
21
@click.option('--columns', '-c', default=10, show_default=True,
22
              help="The number of columns (each corresponding to a topic's tokens group) to include per row'")
23
@click.option('--number-of-tokens', '-nb-tokens', default=15, show_default=True,
24
              help="The maximum number of tokens to show per topic. If requested background tokens to report then this "
25
                   "argument correspond to the total amount of bg tokens to show.")
26
@click.option('--show_metrics/--no-show_metrics', show_default=True, default=True,
27
              help="Whether to print kernel coherence, contrast and purity for each individual topic. It requires a kernel "
28
                   "definition (threshold) to be inputted from '--tokens-type' or '--sort', else it has no effect.")
29
@click.option('--show_title/--no-show_title', show_default=True, default=False,
30
              help="Whether to print a title on top of the table of topics ")
31
def main(dataset, model_label, topics_set, tokens_type, sort, columns, number_of_tokens, show_metrics, show_title):
32
    collections_dir = os.getenv('COLLECTIONS_DIR')
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable os does not seem to be defined.
Loading history...
33
    if not collections_dir:
34
        raise RuntimeError("Please set the COLLECTIONS_DIR environment variable with the path to a directory containing collections/datasets")
35
    topic_handler = TopicsHandler(collections_dir)
36
    if topics_set == 'background':
37
        b = topic_handler.pformat_background([dataset, model_label],
38
                                             columns=columns,
39
                                             nb_tokens=number_of_tokens,
40
                                             show_title=show_title)
41
    else:
42
        b = topic_handler.pformat([dataset, model_label],
43
                                  topics_set,
44
                                  tokens_type,
45
                                  sort,
46
                                  number_of_tokens,
47
                                  columns,
48
                                  topic_info=show_metrics,
49
                                  show_title=show_title)
50
    print(b) # '--s_m/--no-s_m'
51
52
if __name__ == '__main__':
53
    main()
54