Passed
Push — dev ( 4c4a29...2dcfbf )
by Konstantinos
02:21 queued 10s
created

report_datasets   A

Complexity

Total Complexity 6

Size/Duplication

Total Lines 40
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 27
dl 0
loc 40
rs 10
c 0
b 0
f 0
wmc 6

2 Functions

Rating   Name   Duplication   Size   Complexity  
A get_cli_arguments() 0 5 1
A main() 0 22 5
1
#!/usr/bin/env python
2
3
import os
4
import argparse
5
from topic_modeling_toolkit.reporting import DatasetReporter
6
7
8
def get_cli_arguments():
9
    parser = argparse.ArgumentParser(description='Reports on topic-modeling datasets', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
10
    parser.add_argument('--details', '-d', dest='details', default=False, action='store_true', help='Switch to show details about the datasets')
11
    parser.add_argument('--select-dataset', '-s', dest='dataset_label', help='Whether to show information about a specific dataset only.')
12
    return parser.parse_args()
13
14
def main():
15
    collections_dir = os.getenv('COLLECTIONS_DIR')
16
    if not collections_dir:
17
        raise RuntimeError(
18
            "Please set the COLLECTIONS_DIR environment variable with the path to a directory containing collections/datasets")
19
    args = get_cli_arguments()
20
    dt_rprt = DatasetReporter(collections_dir)
21
22
    multiline_datasets_strings = dt_rprt.get_infos(details=args.details, selection=args.dataset_label)
23
24
    if args.dataset_label:
25
        l = []
26
        for i, line in enumerate(multiline_datasets_strings):
27
            if args.dataset_label in line:
28
                print(line)
29
                break
30
                # l.extend([line] + multiline_datasets_strings[i + 1 : i + 5])
31
                # break
32
        # print('\n'.join(l))
33
    # b = '\n'.join(dt_rprt.get_infos(details=args.details))
34
    else:
35
        print('\n'.join(multiline_datasets_strings))
36
37
38
if __name__ == '__main__':
39
    main()
40