Total Complexity | 6 |
Total Lines | 40 |
Duplicated Lines | 0 % |
Changes | 0 |
1 | #!/usr/bin/env python |
||
2 | |||
3 | import os |
||
4 | import argparse |
||
5 | from topic_modeling_toolkit.reporting import DatasetReporter |
||
6 | |||
7 | |||
8 | def get_cli_arguments(): |
||
9 | parser = argparse.ArgumentParser(description='Reports on topic-modeling datasets', formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
||
10 | parser.add_argument('--details', '-d', dest='details', default=False, action='store_true', help='Switch to show details about the datasets') |
||
11 | parser.add_argument('--select-dataset', '-s', dest='dataset_label', help='Whether to show information about a specific dataset only.') |
||
12 | return parser.parse_args() |
||
13 | |||
14 | def main(): |
||
15 | collections_dir = os.getenv('COLLECTIONS_DIR') |
||
16 | if not collections_dir: |
||
17 | raise RuntimeError( |
||
18 | "Please set the COLLECTIONS_DIR environment variable with the path to a directory containing collections/datasets") |
||
19 | args = get_cli_arguments() |
||
20 | dt_rprt = DatasetReporter(collections_dir) |
||
21 | |||
22 | multiline_datasets_strings = dt_rprt.get_infos(details=args.details, selection=args.dataset_label) |
||
23 | |||
24 | if args.dataset_label: |
||
25 | l = [] |
||
26 | for i, line in enumerate(multiline_datasets_strings): |
||
27 | if args.dataset_label in line: |
||
28 | print(line) |
||
29 | break |
||
30 | # l.extend([line] + multiline_datasets_strings[i + 1 : i + 5]) |
||
31 | # break |
||
32 | # print('\n'.join(l)) |
||
33 | # b = '\n'.join(dt_rprt.get_infos(details=args.details)) |
||
34 | else: |
||
35 | print('\n'.join(multiline_datasets_strings)) |
||
36 | |||
37 | |||
38 | if __name__ == '__main__': |
||
39 | main() |
||
40 |