tests.regressions_test - Code Metrics - boromir674/topic-modeling-toolkit - Measure and Improve Code Quality continuously with Scrutinizer

tests.regressions_test A
last analyzed 2022-01-13 13:56 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	109
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	62
dl	0
loc	109
rs	10
c	0
b	0
f	0
wmc	11

7 Functions

Rating	Name	Size	Complexity
A	megadata_dir()	3	1
A	datasets()	30	2
A	test_sanity()	8	3
A	test_divergence_distances_computer()	5	2
A	regression_model_path()	13	1
A	known_expected()	5	1
A	pr()	5	1

import os
import pytest
from topic_modeling_toolkit.reporting import PsiReporter
from topic_modeling_toolkit.patm import Experiment, TrainerFactory


@pytest.fixture(scope='session')
def megadata_dir(unittests_data_dir):
    return os.path.join(unittests_data_dir, 'megadata')


@pytest.fixture(scope='session')
def regression_model_path(megadata_dir):
    trainer = TrainerFactory().create_trainer(megadata_dir, exploit_ideology_labels=True, force_new_batches=False)
    experiment = Experiment(megadata_dir)
    topic_model = trainer.model_factory.create_model('candidate', os.path.join(MODULE_DIR, 'regression.cfg'), reg_cfg=os.path.join(MODULE_DIR, 'test-regularizers.cfg'),

                                                     show_progress_bars=False)
    train_specs = trainer.model_factory.create_train_specs()
    trainer.register(experiment)
    experiment.init_empty_trackables(topic_model)
    trainer.train(topic_model, train_specs, effects=False, cache_theta=True)
    experiment.save_experiment(save_phi=True)
    # CHANGE THIS SO THAT YOU DO NOT OVERRIDE the previously persisted model phi matrix object and results json
    return 'candidate.phi'


@pytest.fixture(scope='module')
def pr(megadata_dir):
    pr = PsiReporter()
    pr.dataset = megadata_dir
    return pr


@pytest.fixture(scope='module', params=[3])
def datasets(megadata_dir, pr, request):
    """Dataset and models trained on it parametrized on the number of document classes defined"""
    data = {3: {'dataset': megadata_dir,
                'models': [
                    {
                        'label': 'sgo_1.phi',
                        'expected-string': 'liberal_Class           70.8 71.9\n'
                                           'centre_Class       70.8      65.1\n'
                                           'conservative_Class 71.9 65.1     \n',
                        'expected-matrix': [[0, 70.8, 71.9],
                                            [70.8, 0, 65.1],
                                            [71.9, 65.1, 0]]
                    },
                    {
                        'label': 'candidate.phi',
                        'expected-string': 'liberal_Class           34.9 35.0\n'
                                           'centre_Class       34.9      29.2\n'
                                           'conservative_Class 35.0 29.2     \n',
                        'expected-matrix': [[0, 34.9, 35],
                                            [34.9, 0, 29.2],
                                            [35, 29.2, 0]]
                     }
                ]
            }}
    subdict = data[request.param]
    for model_data in subdict['models']:
        model_data['reported-distances'] = [[float('{:.1f}'.format(k)) for k in z] for z in pr.values([model_data['label']], topics_set='domain')[0]]
        model_data['reported-string'] = pr.pformat([model_data['label']], topics_set='domain', show_model_name=False, show_class_names=True)
    return subdict

# Insert fixture 'regression_model_path', which is the path to a newly trained model, with settings in regression.cfg, to test if it places the ideology classes relatively in correct ordering on the political spectrum
def test_sanity(pr, datasets):
    pr.dataset = datasets['dataset']
    for d in datasets['models']:
        # array = [[float('{:.1f}'.format(k)) for k in z] for z in pr.values([d['label']], topics_set='domain')[0]]
        for i, row in enumerate(d['reported-distances']):
            assert row[:i] == sorted(row[:i], reverse=True)
            assert row[i] == 0
            assert row[i:] == sorted(row[i:], reverse=False)

#
# @pytest.fixture(scope='module', params=[3])
# def best_models(request):
#     """Models achieving best separation between the p(c|t) distributions and their exected Psi-matrix related results"""
#     return {3: [{'string': 'liberal_Class           70.8 71.9\n'
#                           'centre_Class       70.8      65.1\n'
#                           'conservative_Class 71.9 65.1     \n',
#                 'label': 'sgo_1.phi',
#                 'data': [[0, 70.8, 71.9],
#                          [70.8, 0, 65.1],
#                          [71.9, 65.1, 0]]},
#                 # {'string': '',
#                 #  'label': 'candidate',
#                 #  'data': [[]]}
#                 ],
#             }[request.param]
# #
# # @pytest.fixture(scope='module')
# # def
#
# def sane_separation(data):
# [[float('{:.1f}'.format(y)) for y in x] for x in pr.values([request.param], topics_set='domain')[0]]
@pytest.fixture(scope='module',)
def known_expected(datasets):
    d = datasets.copy()
    d['models'] = [x for x in d['models'] if 'expected-string' in x and 'expected-matrix' in x]
    return d


def test_divergence_distances_computer(pr, known_expected): # , regression_model_path):
    pr.dataset = known_expected['dataset']
    for d in known_expected['models']:
        assert d['reported-string'] == d['expected-string']
        assert d['reported-distances'] == d['expected-matrix']


# def test_loading(datasets, megadata_dir):
#     new_exp_obj = Experiment(megadata_dir)
#     trainer = TrainerFactory().create_trainer(megadata_dir)
#     trainer.register(new_exp_obj)
#     for d in datasets['models']:
#         loaded_model = new_exp_obj.load_experiment(d['label'].replace('.phi', ''))
#
#         assert loaded_model.regularizer_wrappers
#     return loaded_model, new_exp_obj
# @pytest.mark.parametrize("model_phi_path", [
#     'sgo_1.phi',
#     'candidate.phi'
# ])


# # T.O.D.O. implement a function that takes a KL distances matrix (see 'best_seperation_performance' fixture) and return a quantitative measure of quality
# # def fitness(data):
# #     pass


1			import os
2			import pytest
3			from topic_modeling_toolkit.reporting import PsiReporter
4			from topic_modeling_toolkit.patm import Experiment, TrainerFactory
5
6
7			@pytest.fixture(scope='session')
8			def megadata_dir(unittests_data_dir):
9			return os.path.join(unittests_data_dir, 'megadata')
10
11
12			@pytest.fixture(scope='session')
13			def regression_model_path(megadata_dir):
14			trainer = TrainerFactory().create_trainer(megadata_dir, exploit_ideology_labels=True, force_new_batches=False)
15			experiment = Experiment(megadata_dir)
16			topic_model = trainer.model_factory.create_model('candidate', os.path.join(MODULE_DIR, 'regression.cfg'), reg_cfg=os.path.join(MODULE_DIR, 'test-regularizers.cfg'),
			0 ignored issues – show Comprehensibility Best Practice introduced 2019-09-23 04:06 UTC by Report Bug Copy Issue Report The variable `MODULE_DIR` does not seem to be defined. Loading history...
17			show_progress_bars=False)
18			train_specs = trainer.model_factory.create_train_specs()
19			trainer.register(experiment)
20			experiment.init_empty_trackables(topic_model)
21			trainer.train(topic_model, train_specs, effects=False, cache_theta=True)
22			experiment.save_experiment(save_phi=True)
23			# CHANGE THIS SO THAT YOU DO NOT OVERRIDE the previously persisted model phi matrix object and results json
24			return 'candidate.phi'
25
26
27			@pytest.fixture(scope='module')
28			def pr(megadata_dir):
29			pr = PsiReporter()
30			pr.dataset = megadata_dir
31			return pr
32
33
34			@pytest.fixture(scope='module', params=[3])
35			def datasets(megadata_dir, pr, request):
36			"""Dataset and models trained on it parametrized on the number of document classes defined"""
37			data = {3: {'dataset': megadata_dir,
38			'models': [
39			{
40			'label': 'sgo_1.phi',
41			'expected-string': 'liberal_Class 70.8 71.9\n'
42			'centre_Class 70.8 65.1\n'
43			'conservative_Class 71.9 65.1 \n',
44			'expected-matrix': [[0, 70.8, 71.9],
45			[70.8, 0, 65.1],
46			[71.9, 65.1, 0]]
47			},
48			{
49			'label': 'candidate.phi',
50			'expected-string': 'liberal_Class 34.9 35.0\n'
51			'centre_Class 34.9 29.2\n'
52			'conservative_Class 35.0 29.2 \n',
53			'expected-matrix': [[0, 34.9, 35],
54			[34.9, 0, 29.2],
55			[35, 29.2, 0]]
56			}
57			]
58			}}
59			subdict = data[request.param]
60			for model_data in subdict['models']:
61			model_data['reported-distances'] = [[float('{:.1f}'.format(k)) for k in z] for z in pr.values([model_data['label']], topics_set='domain')[0]]
62			model_data['reported-string'] = pr.pformat([model_data['label']], topics_set='domain', show_model_name=False, show_class_names=True)
63			return subdict
64
65			# Insert fixture 'regression_model_path', which is the path to a newly trained model, with settings in regression.cfg, to test if it places the ideology classes relatively in correct ordering on the political spectrum
66			def test_sanity(pr, datasets):
67			pr.dataset = datasets['dataset']
68			for d in datasets['models']:
69			# array = [[float('{:.1f}'.format(k)) for k in z] for z in pr.values([d['label']], topics_set='domain')[0]]
70			for i, row in enumerate(d['reported-distances']):
71			assert row[:i] == sorted(row[:i], reverse=True)
72			assert row[i] == 0
73			assert row[i:] == sorted(row[i:], reverse=False)
74
75			#
76			# @pytest.fixture(scope='module', params=[3])
77			# def best_models(request):
78			# """Models achieving best separation between the p(c\|t) distributions and their exected Psi-matrix related results"""
79			# return {3: [{'string': 'liberal_Class 70.8 71.9\n'
80			# 'centre_Class 70.8 65.1\n'
81			# 'conservative_Class 71.9 65.1 \n',
82			# 'label': 'sgo_1.phi',
83			# 'data': [[0, 70.8, 71.9],
84			# [70.8, 0, 65.1],
85			# [71.9, 65.1, 0]]},
86			# # {'string': '',
87			# # 'label': 'candidate',
88			# # 'data': [[]]}
89			# ],
90			# }[request.param]
91			# #
92			# # @pytest.fixture(scope='module')
93			# # def
94			#
95			# def sane_separation(data):
96			# [[float('{:.1f}'.format(y)) for y in x] for x in pr.values([request.param], topics_set='domain')[0]]
97			@pytest.fixture(scope='module',)
98			def known_expected(datasets):
99			d = datasets.copy()
100			d['models'] = [x for x in d['models'] if 'expected-string' in x and 'expected-matrix' in x]
101			return d
102
103
104			def test_divergence_distances_computer(pr, known_expected): # , regression_model_path):
105			pr.dataset = known_expected['dataset']
106			for d in known_expected['models']:
107			assert d['reported-string'] == d['expected-string']
108			assert d['reported-distances'] == d['expected-matrix']
109
110
111			# def test_loading(datasets, megadata_dir):
112			# new_exp_obj = Experiment(megadata_dir)
113			# trainer = TrainerFactory().create_trainer(megadata_dir)
114			# trainer.register(new_exp_obj)
115			# for d in datasets['models']:
116			# loaded_model = new_exp_obj.load_experiment(d['label'].replace('.phi', ''))
117			#
118			# assert loaded_model.regularizer_wrappers
119			# return loaded_model, new_exp_obj
120			# @pytest.mark.parametrize("model_phi_path", [
121			# 'sgo_1.phi',
122			# 'candidate.phi'
123			# ])
124
125
126			# # T.O.D.O. implement a function that takes a KL distances matrix (see 'best_seperation_performance' fixture) and return a quantitative measure of quality
127			# # def fitness(data):
128			# # pass
129

boromir674 / topic-modeling-toolkit

tests.regressions_test A last analyzed 2022-01-13 13:56 UTC

Complexity

Size/Duplication

Importance

7 Functions

Duplication Side-by-Side

Filter issues like

tests.regressions_test A
last analyzed 2022-01-13 13:56 UTC