train.main() - Code Metrics - Inspection of "Finalize README" - boromir674/topic-modeling-toolkit - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Branch — dev-release (a75e90)

by Konstantinos

created 2019-09-28 20:16 UTC

train.main() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines	38
Code Lines	26

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	3
eloc	26
nop	0
dl	0
loc	38
rs	9.256
c	0
b	0
f	0

#!/usr/bin/env python

import os
import sys
import argparse

from topic_modeling_toolkit.patm import TrainerFactory, Experiment


def get_cl_arguments():
    parser = argparse.ArgumentParser(prog='train.py', description='Trains an artm topic model and stores \'evaluation\' scores', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('collection', help='the name for the collection to train on')
    parser.add_argument('config', help='the .cfg file to use for constructing and training the topic_model')
    parser.add_argument('label', metavar='id', default='def', help='a unique identifier used for a newly created model')
    parser.add_argument('--reg-config', '--r-c', dest='reg_config', help='the .cfg file containing initialization parameters for the active regularizers')
    parser.add_argument('--save', default=True, action='store_true', help='saves the state of the model and experimental results after the training iterations finish')
    # parser.add_argument('--load', default=False, action='store_true', help='restores the model state and progress of tracked entities from disk')
    parser.add_argument('--new-batches', '--n-b', default=False, dest='new_batches', action='store_true', help='whether to force the creation of new batches, regardless of finding batches already existing')
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)
    return parser.parse_args()

def main():
    args = get_cl_arguments()
    collections_dir = os.getenv('COLLECTIONS_DIR')
    if not collections_dir:
        raise RuntimeError(
            "Please set the COLLECTIONS_DIR environment variable to the directory containing collections/datasets")
    root_dir = os.path.join(collections_dir, args.collection)
    model_trainer = TrainerFactory().create_trainer(root_dir, exploit_ideology_labels=True,
                                                    force_new_batches=args.new_batches)
    experiment = Experiment(root_dir)
    model_trainer.register(
        experiment)  # when the model_trainer trains, the experiment object keeps track of evaluation metrics

    # if args.load:
    #     topic_model = experiment.load_experiment(args.label)
    #     print '\nLoaded experiment and model state'
    #     settings = cfg2model_settings(args.config)
    #     train_specs = TrainSpecs(15, [], [])
    # else:
    topic_model = model_trainer.model_factory.create_model(args.label, args.config, reg_cfg=args.reg_config,
                                                           show_progress_bars=False)
    train_specs = model_trainer.model_factory.create_train_specs()
    experiment.init_empty_trackables(topic_model)
    # static_reg_specs = {}  # regularizers' parameters that should be kept constant during data fitting (model training)
    # import pprint
    # pprint.pprint({k: dict(v, **{setting_name: setting_value for setting_name, setting_value in {'target topics': (lambda x: 'all' if len(x) == 0 else '[{}]'.format(', '.join(x)))(topic_model.get_reg_obj(topic_model.get_reg_name(k)).topic_names), 'mods': getattr(topic_model.get_reg_obj(topic_model.get_reg_name(k)), 'class_ids', None)}.items()}) for k, v in self.static_regularization_specs.items()})
    # pprint.pprint(tm.modalities_dictionary)
    print("Initialized Model:")
    print(topic_model.pformat_regularizers)
    print(topic_model.pformat_modalities)
    model_trainer.train(topic_model, train_specs, effects=True, cache_theta=True)
    print('Iterated {} times through the collection and {} times over each document: total phi updates = {}'.
          format(train_specs.collection_passes, topic_model.document_passes,
                 train_specs.collection_passes * topic_model.document_passes))

    if args.save:
        experiment.save_experiment(save_phi=True)
        print("Saved results and model '{}'".format(args.label))


if __name__ == '__main__':
    main()

1			#!/usr/bin/env python
2
3			import os
4			import sys
5			import argparse
6
7			from topic_modeling_toolkit.patm import TrainerFactory, Experiment
8
9
10			def get_cl_arguments():
11			parser = argparse.ArgumentParser(prog='train.py', description='Trains an artm topic model and stores \'evaluation\' scores', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
12			parser.add_argument('collection', help='the name for the collection to train on')
13			parser.add_argument('config', help='the .cfg file to use for constructing and training the topic_model')
14			parser.add_argument('label', metavar='id', default='def', help='a unique identifier used for a newly created model')
15			parser.add_argument('--reg-config', '--r-c', dest='reg_config', help='the .cfg file containing initialization parameters for the active regularizers')
16			parser.add_argument('--save', default=True, action='store_true', help='saves the state of the model and experimental results after the training iterations finish')
17			# parser.add_argument('--load', default=False, action='store_true', help='restores the model state and progress of tracked entities from disk')
18			parser.add_argument('--new-batches', '--n-b', default=False, dest='new_batches', action='store_true', help='whether to force the creation of new batches, regardless of finding batches already existing')
19			if len(sys.argv) == 1:
20			parser.print_help()
21			sys.exit(1)
22			return parser.parse_args()
23
24			def main():
25			args = get_cl_arguments()
26			collections_dir = os.getenv('COLLECTIONS_DIR')
27			if not collections_dir:
28			raise RuntimeError(
29			"Please set the COLLECTIONS_DIR environment variable to the directory containing collections/datasets")
30			root_dir = os.path.join(collections_dir, args.collection)
31			model_trainer = TrainerFactory().create_trainer(root_dir, exploit_ideology_labels=True,
32			force_new_batches=args.new_batches)
33			experiment = Experiment(root_dir)
34			model_trainer.register(
35			experiment) # when the model_trainer trains, the experiment object keeps track of evaluation metrics
36
37			# if args.load:
38			# topic_model = experiment.load_experiment(args.label)
39			# print '\nLoaded experiment and model state'
40			# settings = cfg2model_settings(args.config)
41			# train_specs = TrainSpecs(15, [], [])
42			# else:
43			topic_model = model_trainer.model_factory.create_model(args.label, args.config, reg_cfg=args.reg_config,
44			show_progress_bars=False)
45			train_specs = model_trainer.model_factory.create_train_specs()
46			experiment.init_empty_trackables(topic_model)
47			# static_reg_specs = {} # regularizers' parameters that should be kept constant during data fitting (model training)
48			# import pprint
49			# pprint.pprint({k: dict(v, **{setting_name: setting_value for setting_name, setting_value in {'target topics': (lambda x: 'all' if len(x) == 0 else '[{}]'.format(', '.join(x)))(topic_model.get_reg_obj(topic_model.get_reg_name(k)).topic_names), 'mods': getattr(topic_model.get_reg_obj(topic_model.get_reg_name(k)), 'class_ids', None)}.items()}) for k, v in self.static_regularization_specs.items()})
50			# pprint.pprint(tm.modalities_dictionary)
51			print("Initialized Model:")
52			print(topic_model.pformat_regularizers)
53			print(topic_model.pformat_modalities)
54			model_trainer.train(topic_model, train_specs, effects=True, cache_theta=True)
55			print('Iterated {} times through the collection and {} times over each document: total phi updates = {}'.
56			format(train_specs.collection_passes, topic_model.document_passes,
57			train_specs.collection_passes * topic_model.document_passes))
58
59			if args.save:
60			experiment.save_experiment(save_phi=True)
61			print("Saved results and model '{}'".format(args.label))
62
63
64			if __name__ == '__main__':
65			main()

boromir674 / topic-modeling-toolkit

Branch — dev-release (a75e90)

train.main() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like