reporting.model_selection - Code Metrics - boromir674/topic-modeling-toolkit - Measure and Improve Code Quality continuously with Scrutinizer

reporting.model_selection A
last analyzed 2022-01-13 13:56 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	243
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	152
dl	0
loc	243
rs	9.0399
c	0
b	0
f	0
wmc	42

16 Methods

Rating	Name	Size	Complexity
A	ResultsHandler.stringnify()	9	1
A	ResultsHandler._label_selection()	5	1
A	ResultsHandler.get_all_columns()	4	3
A	ResultsHandler._get_hash_key()	3	1
A	ResultsHandler._is_token()	9	4
A	ResultsHandler.get_abbreviation()	4	1
A	ResultsHandler._process_result_path()	4	2
A	ResultsHandler.get_experimental_results()	24	5
A	ResultsHandler.get_titles()	3	1
A	ResultsHandler.extract()	12	1
A	ResultsHandler.__init__()	6	1
A	ResultsHandler._parse_column_definition()	3	2
A	MetricSorter.__call__()	2	1
A	MetricSorter.from_function()	3	1
A	ResultsHandler._get_experimental_results()	5	2
A	ResultsHandler._get_metric()	6	4

3 Functions

Rating	Name	Size	Complexity
A	regularizers_format()	2	1
B	_get_kernel_sub_hash()	7	7
A	_build_sorter()	5	3

How to fix Complexity

import os
import re
import warnings
from glob import glob
from functools import reduce

import attr

from topic_modeling_toolkit.results import ExperimentalResults

from .fitness import FitnessFunction


KERNEL_SUB_ENTITIES = ('coherence', 'contrast', 'purity', 'size')

MAX_DECIMALS = 2  # this consant should agree with the patm.modeling.experiment.Experiment.MAX_DECIMALS

def _get_kernel_sub_hash(entity):
    assert entity in KERNEL_SUB_ENTITIES
    return {'kernel-'+entity: {'scalar-extractor': lambda x,y: getattr(getattr(x.tracked, 'kernel'+y[2:]).average, entity).last if hasattr(x.tracked, 'kernel'+y[2:]) else None,
                               'list-extractor': lambda x, y: getattr(getattr(x.tracked, 'kernel' + y[2:]).average, entity).all if hasattr(x.tracked, 'kernel' + y[2:]) else None,
                               'column-title': lambda x: 'k'+entity[:3:2]+'.'+str(x)[2:],
                               'to-string': '{:.4f}',
                               'definitions': lambda x: ['kernel-{}-{}'.format(entity, y) for y in x.tracked.kernel_thresholds]}}

COLUMNS_HASH = {
    'nb-topics': {'scalar-extractor': lambda x: x.scalars.nb_topics,
                  'column-title': lambda: 'tpcs'},
    'collection-passes': {'scalar-extractor': lambda x: x.scalars.dataset_iterations,
                          'column-title': lambda: 'col-i'},
    'document-passes': {'scalar-extractor': lambda x: x.scalars.document_passes,
                        'column-title': lambda: 'doc-i'},
    'total-phi-updates': {'scalar-extractor': lambda x: x.scalars.dataset_iterations * x.scalars.document_passes,
                          'column-title': lambda: 'phi-u'},
    'perplexity': {'scalar-extractor': lambda x: x.tracked.perplexity.last,
                   'list-extractor': lambda x: x.tracked.perplexity.all,
                   'column-title': lambda: 'prpl',
                   'to-string': '{:.1f}'},
    'top-tokens-coherence': {'scalar-extractor': lambda x,y: getattr(x.tracked, 'top'+str(y)).average_coherence.last if hasattr(x.tracked, 'top'+str(y)) else None,
                             'list-extractor': lambda x,y: getattr(x.tracked, 'top'+str(y)).average_coherence.all if hasattr(x.tracked, 'top'+str(y)) else None,
                             'column-title': lambda x: 'top'+str(x)+'ch',
                             'to-string': '{:.4f}',
                             'definitions': lambda x: ['top-tokens-coherence-'+str(y) for y in x.tracked.top_tokens_cardinalities]},
    'sparsity-phi': {'scalar-extractor': lambda x,y: getattr(x.tracked, 'sparsity_phi_'+y).last if hasattr(x.tracked, 'sparsity_phi_'+y) else None,
                     'list-extractor': lambda x,y: getattr(x.tracked, 'sparsity_phi_'+y).all if hasattr(x.tracked, 'sparsity_phi_'+y) else None,
                     'column-title': lambda y: 'spp@'+y,
                     'to-string': '{:.2f}',
                     'definitions': lambda x: ['sparsity-phi-{}'.format(y) for y in x.tracked.modalities_initials]},
    'sparsity-theta': {'scalar-extractor': lambda x: x.tracked.sparsity_theta.last,
                       'list-extractor': lambda x: x.tracked.sparsity_theta.all,
                       'column-title': lambda: 'spt',
                       'to-string': '{:.2f}'},
    'background-tokens-ratio': {'scalar-extractor': lambda x,y: getattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]).last if hasattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]) else None,
                                # 'list-extractor': lambda x,y: getattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]).all if hasattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]) else None,
                                'list-extractor': lambda x,y: getattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]).all if hasattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]) else None,
                                'column-title': lambda x: 'btr.'+str(x)[2:],
                                'to-string': '{:.2f}',
                                'definitions': lambda x: ['background-tokens-ratio-{}'.format(y[:4]) for y in x.tracked.background_tokens_thresholds]},
    'regularizers': {'scalar-extractor': lambda x: '[{}]'.format(', '.join(map(regularizers_format, x.regularizers))),
                     'column-title': lambda: 'regs',},
    'kernel-size': {'scalar-extractor': lambda x,y: getattr(x.tracked, 'kernel'+y[2:]).average.size.last if hasattr(x.tracked, 'kernel'+y[2:]) else None,
                               'list-extractor': lambda x, y: getattr(x.tracked, 'kernel' + y[2:]).average.size.all if hasattr(x.tracked, 'kernel' + y[2:]) else None,
                               'column-title': lambda x: 'k'+'size'[:3:2]+'.'+str(x)[2:],
                               'to-string': '{:.1f}',
                               'definitions': lambda x: ['kernel-size-{}'.format(y) for y in x.tracked.kernel_thresholds]}
}

def regularizers_format(reg_def_string):
    return reg_def_string
    # try:
    #     return '-'.join(re.findall(r"(?:^|-)(\w{1,4})\w*", reg_def_string[:reg_def_string.index("|")])) + reg_def_string[reg_def_string.index("|"):]
    # except ValueError as e:
    #     print(e)
    #     return reg_def_string

COLUMNS_HASH = reduce(lambda x, y: dict(y, **x), [COLUMNS_HASH] + [_get_kernel_sub_hash(z) for z in KERNEL_SUB_ENTITIES])


class ResultsHandler(object):
    _list_selector_hash = {str: lambda x: x[0] if x[1] == 'all' else None,
                           range: lambda x: [x[0][_] for _ in x[1]],
                           int: lambda x: x[0][:x[1]],
                           list: lambda x: [x[0][_] for _ in x[1]]}
    _QUANTITY_2_EXTRACTOR_KEY = {'last': 'scalar', 'all': 'list'}
    DYNAMIC_COLUMNS = ['kernel-size', 'kernel-coherence', 'kernel-contrast', 'kernel-purity', 'top-tokens-coherence', 'sparsity-phi', 'background-tokens-ratio']
    DEFAULT_COLUMNS = ['nb-topics', 'collection-passes', 'document-passes', 'total-phi-updates', 'perplexity'] +\
                      DYNAMIC_COLUMNS[:-1] + ['sparsity-theta'] + [DYNAMIC_COLUMNS[-1]] + ['regularizers']

    def __init__(self, collections_root_path, results_dir_name='results'):
        self._collections_root = collections_root_path
        self._results_dir_name = results_dir_name
        self._results_hash = {}
        self._fitness_function_hash = {}
        self._list_selector = None

    def get_experimental_results(self, collection_name, sort='', selection='all'):
        """
        Call this method to get a list of experimental result objects from topic models trained on the given collection.\n
        :param str collection_name:
        :param str sort: if None the experimental results are obtained alphabetically on their json path
        :param str or range or int or list selection: whether to select a subset of the experimental results fron the given collection\n
            - if selection == 'all', returns every experimental results object "extracted" from the jsons
            - if type(selection) == range, returns a "slice" of the experimental results based on the range
            - if type(selection) == int, returns the first n experimental results
            - if type(selection) == list, then it represents specific indices to sample the list of experimental results from
        :return: the ExperimentalResults objects
        :rtype: list
        """
        result_paths = glob('{}/*.json'.format(os.path.join(self._collections_root, collection_name, self._results_dir_name)))
        if type(selection) == list and all(type(x) == str for x in selection):  # if input list contains model labels
            e = self._get_experimental_results([_ for _ in result_paths if re.search(r'/(?:{})\.json'.format('|'.join(selection)), _)])
            if len(e) != len(selection):
                raise ValueError("len1 = {}, len2 = {}\nseq1 = {}\nseq2 = {}".format(len(e), len(selection), [_.scalars.model_label for _ in e], selection))
            return e
        self._list_selector = lambda y: ResultsHandler._list_selector_hash[type(selection)]([y, selection])
        r = self._get_experimental_results(result_paths, metric_sorter=self._get_metric(sort))

        assert len(result_paths) == len(r)
        return self._list_selector(r)

    def _get_experimental_results(self, results_paths, metric_sorter=None):
        # print('_get_experimental_results.metric_sorter: {}'.format(metric_sorter))
        if metric_sorter is None:
            return [self._process_result_path(_) for _ in sorted(results_paths)]
        return metric_sorter([self._process_result_path(x) for x in results_paths])
        # sorted([self._process_result_path(x) for x in results_paths], key=metric_sorter, reverse=True)
        # sorter = ResultsSorter.from_function(metric_sorter)
        # return sorter([self._process_result_path(x) for x in results_paths])

        # if metric_sorter:
        #     assert hasattr(metric_sorter, '__call__')
        #     print(' Metric function:', metric_sorter.__name__)
        #
        #

    def _process_result_path(self, result_path):
        if result_path not in self._results_hash:
            self._results_hash[result_path] = ExperimentalResults.create_from_json_file(result_path)
        return self._results_hash[result_path]

    def _get_metric(self, metric):
        if not metric:
            return None
        if metric not in self._fitness_function_hash:
            self._fitness_function_hash[metric] = FitnessFunction.single_metric(metric)
        return MetricSorter(metric, lambda x: self._fitness_function_hash[metric].compute([ResultsHandler.extract(x, metric, 'last')]))

    @staticmethod
    def get_titles(column_definitions):
        return [ResultsHandler.get_abbreviation(x) for x in column_definitions]

    @staticmethod
    def get_abbreviation(definition):
        tokens, parameters = ResultsHandler._parse_column_definition(definition)
        return COLUMNS_HASH['-'.join(tokens)]['column-title'](*parameters)

    @staticmethod
    def stringnify(column, value):
        """
        :param str column: key or definition; example values: 'perplexity', 'kernel-coherence', 'kernel-coherence-0.80'
        :param value:
        :return:
        :rtype str
        """
        return COLUMNS_HASH.get(column, COLUMNS_HASH[ResultsHandler._get_hash_key(column)]).get('to-string', '{}').format(value)
        
    # @staticmethod
    # def get_tau_trajectory(exp_results, matrix_name):
    #     """
    #
    #     :param results.experimental_results.ExperimentalResults exp_results:
    #     :param matrix_name:
    #     :return:
    #     """
    #     return getattr(exp_results.tracked.tau_trajectories, matrix_name).all

    @staticmethod
    def extract(exp_results, column_definition, quantity):
        """
        Call this method to query the given experimental results object about a specific metric. Supports requesting all
        values tracked along the training process.
        :param results.experimental_results.ExperimentalResults exp_results:
        :param str column_definition:
        :param str quantity: must be one of {'last', 'all'}
        :return:
        """
        tokens, parameters = ResultsHandler._parse_column_definition(column_definition)
        return COLUMNS_HASH['-'.join(tokens)][ResultsHandler._QUANTITY_2_EXTRACTOR_KEY[quantity] + '-extractor'](*list([exp_results] + parameters))

    @staticmethod
    def get_all_columns(exp_results, requested_entities):
        return reduce(lambda i, j: i + j,
                      [COLUMNS_HASH[x]['definitions'](exp_results) if x in ResultsHandler.DYNAMIC_COLUMNS else [x] for x in requested_entities])

    ###### UTILITY FUNCTIONS ######
    @staticmethod
    def _parse_column_definition(definition):
        return [list([_f for _f in y if _f]) for y in zip(*[(x, None) if ResultsHandler._is_token(x) else (None, x) for x in definition.split('-')])]

    @staticmethod
    def _get_hash_key(column_definition):
        return '-'.join([_ for _ in column_definition.split('-') if ResultsHandler._is_token(_)])

    @staticmethod
    def _is_token(definition_element):
        try:
            _ = float(definition_element)
            return False
        except ValueError:
            if definition_element[0] == '@' or len(definition_element) == 1:
                return False
            return True

    @staticmethod
    def _label_selection(labels, experimental_results_list):
        """Returns the indices of the input labels based on the input experimental results list"""
        model_labels = [x.scalars.model_label for x in experimental_results_list]
        return [experimental_results_list.index(model_labels.index(l)) for l in labels if l in model_labels]


##########################
def _build_sorter(instance, attribute, value):
    if not hasattr(value, '__call__'):
        raise TypeError("Second constructor argument should be a callable object, in case the first is 'alphabetical'")
    # this is sorting from 'bigger' to 'smaller' (independently of how <,>, operators have been defined)
    instance.experimental_result_sorter = lambda exp_res_objs_list: sorted(exp_res_objs_list, key=value, reverse=True)

@attr.s(cmp=True, repr=True, str=True)
class MetricSorter(object):
    name = attr.ib(init=True, converter=str, cmp=True, repr=True)
    experimental_result_sorter = attr.ib(init=True, validator=_build_sorter, cmp=True, repr=True)

    @classmethod
    def from_function(cls, function):
        return ResultsSorter(function.__name__, function)

    def __call__(self, *args, **kwargs):
        return self.experimental_result_sorter(args[0])
############################


if __name__ == '__main__':
    ms = ModelSelector(collection_results_dir_path='/data/thesis/data/collections/dd/results')



1			import os
2			import re
3			import warnings
4			from glob import glob
5			from functools import reduce
6
7			import attr
8
9			from topic_modeling_toolkit.results import ExperimentalResults
10
11			from .fitness import FitnessFunction
12
13
14			KERNEL_SUB_ENTITIES = ('coherence', 'contrast', 'purity', 'size')
15
16			MAX_DECIMALS = 2 # this consant should agree with the patm.modeling.experiment.Experiment.MAX_DECIMALS
17
18			def _get_kernel_sub_hash(entity):
19			assert entity in KERNEL_SUB_ENTITIES
20			return {'kernel-'+entity: {'scalar-extractor': lambda x,y: getattr(getattr(x.tracked, 'kernel'+y[2:]).average, entity).last if hasattr(x.tracked, 'kernel'+y[2:]) else None,
21			'list-extractor': lambda x, y: getattr(getattr(x.tracked, 'kernel' + y[2:]).average, entity).all if hasattr(x.tracked, 'kernel' + y[2:]) else None,
22			'column-title': lambda x: 'k'+entity[:3:2]+'.'+str(x)[2:],
23			'to-string': '{:.4f}',
24			'definitions': lambda x: ['kernel-{}-{}'.format(entity, y) for y in x.tracked.kernel_thresholds]}}
25
26			COLUMNS_HASH = {
27			'nb-topics': {'scalar-extractor': lambda x: x.scalars.nb_topics,
28			'column-title': lambda: 'tpcs'},
29			'collection-passes': {'scalar-extractor': lambda x: x.scalars.dataset_iterations,
30			'column-title': lambda: 'col-i'},
31			'document-passes': {'scalar-extractor': lambda x: x.scalars.document_passes,
32			'column-title': lambda: 'doc-i'},
33			'total-phi-updates': {'scalar-extractor': lambda x: x.scalars.dataset_iterations * x.scalars.document_passes,
34			'column-title': lambda: 'phi-u'},
35			'perplexity': {'scalar-extractor': lambda x: x.tracked.perplexity.last,
36			'list-extractor': lambda x: x.tracked.perplexity.all,
37			'column-title': lambda: 'prpl',
38			'to-string': '{:.1f}'},
39			'top-tokens-coherence': {'scalar-extractor': lambda x,y: getattr(x.tracked, 'top'+str(y)).average_coherence.last if hasattr(x.tracked, 'top'+str(y)) else None,
40			'list-extractor': lambda x,y: getattr(x.tracked, 'top'+str(y)).average_coherence.all if hasattr(x.tracked, 'top'+str(y)) else None,
41			'column-title': lambda x: 'top'+str(x)+'ch',
42			'to-string': '{:.4f}',
43			'definitions': lambda x: ['top-tokens-coherence-'+str(y) for y in x.tracked.top_tokens_cardinalities]},
44			'sparsity-phi': {'scalar-extractor': lambda x,y: getattr(x.tracked, 'sparsity_phi_'+y).last if hasattr(x.tracked, 'sparsity_phi_'+y) else None,
45			'list-extractor': lambda x,y: getattr(x.tracked, 'sparsity_phi_'+y).all if hasattr(x.tracked, 'sparsity_phi_'+y) else None,
46			'column-title': lambda y: 'spp@'+y,
47			'to-string': '{:.2f}',
48			'definitions': lambda x: ['sparsity-phi-{}'.format(y) for y in x.tracked.modalities_initials]},
49			'sparsity-theta': {'scalar-extractor': lambda x: x.tracked.sparsity_theta.last,
50			'list-extractor': lambda x: x.tracked.sparsity_theta.all,
51			'column-title': lambda: 'spt',
52			'to-string': '{:.2f}'},
53			'background-tokens-ratio': {'scalar-extractor': lambda x,y: getattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]).last if hasattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]) else None,
54			# 'list-extractor': lambda x,y: getattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]).all if hasattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]) else None,
55			'list-extractor': lambda x,y: getattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]).all if hasattr(x.tracked, 'background_tokens_ratio_'+str(y)[2:]) else None,
56			'column-title': lambda x: 'btr.'+str(x)[2:],
57			'to-string': '{:.2f}',
58			'definitions': lambda x: ['background-tokens-ratio-{}'.format(y[:4]) for y in x.tracked.background_tokens_thresholds]},
59			'regularizers': {'scalar-extractor': lambda x: '[{}]'.format(', '.join(map(regularizers_format, x.regularizers))),
60			'column-title': lambda: 'regs',},
61			'kernel-size': {'scalar-extractor': lambda x,y: getattr(x.tracked, 'kernel'+y[2:]).average.size.last if hasattr(x.tracked, 'kernel'+y[2:]) else None,
62			'list-extractor': lambda x, y: getattr(x.tracked, 'kernel' + y[2:]).average.size.all if hasattr(x.tracked, 'kernel' + y[2:]) else None,
63			'column-title': lambda x: 'k'+'size'[:3:2]+'.'+str(x)[2:],
64			'to-string': '{:.1f}',
65			'definitions': lambda x: ['kernel-size-{}'.format(y) for y in x.tracked.kernel_thresholds]}
66			}
67
68			def regularizers_format(reg_def_string):
69			return reg_def_string
70			# try:
71			# return '-'.join(re.findall(r"(?:^\|-)(\w{1,4})\w*", reg_def_string[:reg_def_string.index("\|")])) + reg_def_string[reg_def_string.index("\|"):]
72			# except ValueError as e:
73			# print(e)
74			# return reg_def_string
75
76			COLUMNS_HASH = reduce(lambda x, y: dict(y, **x), [COLUMNS_HASH] + [_get_kernel_sub_hash(z) for z in KERNEL_SUB_ENTITIES])
77
78
79			class ResultsHandler(object):
80			_list_selector_hash = {str: lambda x: x[0] if x[1] == 'all' else None,
81			range: lambda x: [x[0][_] for _ in x[1]],
82			int: lambda x: x[0][:x[1]],
83			list: lambda x: [x[0][_] for _ in x[1]]}
84			_QUANTITY_2_EXTRACTOR_KEY = {'last': 'scalar', 'all': 'list'}
85			DYNAMIC_COLUMNS = ['kernel-size', 'kernel-coherence', 'kernel-contrast', 'kernel-purity', 'top-tokens-coherence', 'sparsity-phi', 'background-tokens-ratio']
86			DEFAULT_COLUMNS = ['nb-topics', 'collection-passes', 'document-passes', 'total-phi-updates', 'perplexity'] +\
87			DYNAMIC_COLUMNS[:-1] + ['sparsity-theta'] + [DYNAMIC_COLUMNS[-1]] + ['regularizers']
88
89			def __init__(self, collections_root_path, results_dir_name='results'):
90			self._collections_root = collections_root_path
91			self._results_dir_name = results_dir_name
92			self._results_hash = {}
93			self._fitness_function_hash = {}
94			self._list_selector = None
95
96			def get_experimental_results(self, collection_name, sort='', selection='all'):
97			"""
98			Call this method to get a list of experimental result objects from topic models trained on the given collection.\n
99			:param str collection_name:
100			:param str sort: if None the experimental results are obtained alphabetically on their json path
101			:param str or range or int or list selection: whether to select a subset of the experimental results fron the given collection\n
102			- if selection == 'all', returns every experimental results object "extracted" from the jsons
103			- if type(selection) == range, returns a "slice" of the experimental results based on the range
104			- if type(selection) == int, returns the first n experimental results
105			- if type(selection) == list, then it represents specific indices to sample the list of experimental results from
106			:return: the ExperimentalResults objects
107			:rtype: list
108			"""
109			result_paths = glob('{}/*.json'.format(os.path.join(self._collections_root, collection_name, self._results_dir_name)))
110			if type(selection) == list and all(type(x) == str for x in selection): # if input list contains model labels
111			e = self._get_experimental_results([_ for _ in result_paths if re.search(r'/(?:{})\.json'.format('\|'.join(selection)), _)])
112			if len(e) != len(selection):
113			raise ValueError("len1 = {}, len2 = {}\nseq1 = {}\nseq2 = {}".format(len(e), len(selection), [_.scalars.model_label for _ in e], selection))
114			return e
115			self._list_selector = lambda y: ResultsHandler._list_selector_hash[type(selection)]([y, selection])
116			r = self._get_experimental_results(result_paths, metric_sorter=self._get_metric(sort))
117
118			assert len(result_paths) == len(r)
119			return self._list_selector(r)
120
121			def _get_experimental_results(self, results_paths, metric_sorter=None):
122			# print('_get_experimental_results.metric_sorter: {}'.format(metric_sorter))
123			if metric_sorter is None:
124			return [self._process_result_path(_) for _ in sorted(results_paths)]
125			return metric_sorter([self._process_result_path(x) for x in results_paths])
126			# sorted([self._process_result_path(x) for x in results_paths], key=metric_sorter, reverse=True)
127			# sorter = ResultsSorter.from_function(metric_sorter)
128			# return sorter([self._process_result_path(x) for x in results_paths])
129
130			# if metric_sorter:
131			# assert hasattr(metric_sorter, '__call__')
132			# print(' Metric function:', metric_sorter.__name__)
133			#
134			#
135
136			def _process_result_path(self, result_path):
137			if result_path not in self._results_hash:
138			self._results_hash[result_path] = ExperimentalResults.create_from_json_file(result_path)
139			return self._results_hash[result_path]
140
141			def _get_metric(self, metric):
142			if not metric:
143			return None
144			if metric not in self._fitness_function_hash:
145			self._fitness_function_hash[metric] = FitnessFunction.single_metric(metric)
146			return MetricSorter(metric, lambda x: self._fitness_function_hash[metric].compute([ResultsHandler.extract(x, metric, 'last')]))
147
148			@staticmethod
149			def get_titles(column_definitions):
150			return [ResultsHandler.get_abbreviation(x) for x in column_definitions]
151
152			@staticmethod
153			def get_abbreviation(definition):
154			tokens, parameters = ResultsHandler._parse_column_definition(definition)
155			return COLUMNS_HASH['-'.join(tokens)]['column-title'](*parameters)
156
157			@staticmethod
158			def stringnify(column, value):
159			"""
160			:param str column: key or definition; example values: 'perplexity', 'kernel-coherence', 'kernel-coherence-0.80'
161			:param value:
162			:return:
163			:rtype str
164			"""
165			return COLUMNS_HASH.get(column, COLUMNS_HASH[ResultsHandler._get_hash_key(column)]).get('to-string', '{}').format(value)
166
167			# @staticmethod
168			# def get_tau_trajectory(exp_results, matrix_name):
169			# """
170			#
171			# :param results.experimental_results.ExperimentalResults exp_results:
172			# :param matrix_name:
173			# :return:
174			# """
175			# return getattr(exp_results.tracked.tau_trajectories, matrix_name).all
176
177			@staticmethod
178			def extract(exp_results, column_definition, quantity):
179			"""
180			Call this method to query the given experimental results object about a specific metric. Supports requesting all
181			values tracked along the training process.
182			:param results.experimental_results.ExperimentalResults exp_results:
183			:param str column_definition:
184			:param str quantity: must be one of {'last', 'all'}
185			:return:
186			"""
187			tokens, parameters = ResultsHandler._parse_column_definition(column_definition)
188			return COLUMNS_HASH['-'.join(tokens)][ResultsHandler._QUANTITY_2_EXTRACTOR_KEY[quantity] + '-extractor'](*list([exp_results] + parameters))
189
190			@staticmethod
191			def get_all_columns(exp_results, requested_entities):
192			return reduce(lambda i, j: i + j,
193			[COLUMNS_HASH[x]['definitions'](exp_results) if x in ResultsHandler.DYNAMIC_COLUMNS else [x] for x in requested_entities])
194
195			###### UTILITY FUNCTIONS ######
196			@staticmethod
197			def _parse_column_definition(definition):
198			return [list([_f for _f in y if _f]) for y in zip(*[(x, None) if ResultsHandler._is_token(x) else (None, x) for x in definition.split('-')])]
199
200			@staticmethod
201			def _get_hash_key(column_definition):
202			return '-'.join([_ for _ in column_definition.split('-') if ResultsHandler._is_token(_)])
203
204			@staticmethod
205			def _is_token(definition_element):
206			try:
207			_ = float(definition_element)
208			return False
209			except ValueError:
210			if definition_element[0] == '@' or len(definition_element) == 1:
211			return False
212			return True
213
214			@staticmethod
215			def _label_selection(labels, experimental_results_list):
216			"""Returns the indices of the input labels based on the input experimental results list"""
217			model_labels = [x.scalars.model_label for x in experimental_results_list]
218			return [experimental_results_list.index(model_labels.index(l)) for l in labels if l in model_labels]
219
220
221			##########################
222			def _build_sorter(instance, attribute, value):
223			if not hasattr(value, '__call__'):
224			raise TypeError("Second constructor argument should be a callable object, in case the first is 'alphabetical'")
225			# this is sorting from 'bigger' to 'smaller' (independently of how <,>, operators have been defined)
226			instance.experimental_result_sorter = lambda exp_res_objs_list: sorted(exp_res_objs_list, key=value, reverse=True)
227
228			@attr.s(cmp=True, repr=True, str=True)
229			class MetricSorter(object):
230			name = attr.ib(init=True, converter=str, cmp=True, repr=True)
231			experimental_result_sorter = attr.ib(init=True, validator=_build_sorter, cmp=True, repr=True)
232
233			@classmethod
234			def from_function(cls, function):
235			return ResultsSorter(function.__name__, function)
			0 ignored issues – show Comprehensibility Best Practice introduced 2019-09-23 04:06 UTC by Report Bug Copy Issue Report The variable `ResultsSorter` does not seem to be defined. Loading history...
236			def __call__(self, args, *kwargs):
237			return self.experimental_result_sorter(args[0])
238			############################
239
240
241			if __name__ == '__main__':
242			ms = ModelSelector(collection_results_dir_path='/data/thesis/data/collections/dd/results')
			0 ignored issues – show Comprehensibility Best Practice introduced 2019-09-23 04:06 UTC by Report Bug Copy Issue Report The variable `ModelSelector` does not seem to be defined. Loading history...
243

boromir674 / topic-modeling-toolkit

reporting.model_selection A last analyzed 2022-01-13 13:56 UTC

Complexity

Size/Duplication

Importance

16 Methods

3 Functions

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like

reporting.model_selection A
last analyzed 2022-01-13 13:56 UTC