Completed
Push — appveyor ( 280314...2c0e2c )
by Konstantinos
02:09
created

DistroReporter._set_state()   A

Complexity

Conditions 5

Size

Total Lines 10
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 10
nop 5
dl 0
loc 10
rs 9.3333
c 0
b 0
f 0
1
import numpy as np
2
3
4
class DistroReporter(object):
5
6
    def __init__(self):
7
        self.cl = None
8
        self.var = ''
9
        self.sl = None
10
11
        self.ordered_value_labels = []
12
        self.max_nb_rows = 0
13
        self.max_label_len = 0
14
        self.generators = []
15
16
    def print_distros(self, clustering, variable, selected_clusters='all', prec=3):
17
        """
18
        Prints the discrete distribution of the values the input variable takes for evry selected cluster. Frequencies are shown in descending order.\n
19
        :param clustering: the Clustering to select from
20
        :type clustering: clustering.Cluster
21
        :param variable: the field name of interest
22
        :type variable: str
23
        :param selected_clusters: can be a list of indices pointing to Cluster objects in the Clustering structure. Can take the 'all' value to indicate selecting every cluster
24
        :type selected_clusters: list or str
25
        :param prec: the precision of the frequencies to visualize; the number of decimal digits to show
26
        :type prec: int
27
        """
28
        self._set_state(clustering, variable, selected_clusters, prec)
29
        body = ''
30
        for i in range(self.max_nb_rows):
31
            body += ' | '.join(str(self.generators[j].__next__()) for j in range(len(self.sl))) + '\n'
32
        header = ' - '.join('id:{} len:{}'.format(cl.id, len(cl)) + ' '*(prec + self.max_label_len[i] - len(str(len(cl))) - 6) for i, cl in enumerate(self.cl.gen_clusters(self.sl))) + '\n'
33
        print(header + body)
34
35
    def _set_state(self, clustering, variable, selected_clusters, prec):
36
        self.cl = clustering
37
        self.var = variable
38
        if selected_clusters == 'all':
39
            selected_clusters = range(len(self.cl))
40
        self.sl = selected_clusters
41
        self.ordered_value_labels = [sorted(cl.freqs[self.var], key=lambda x: cl.freqs[self.var][x], reverse=True) for cl in self.cl.gen_clusters(self.sl)]
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable cl does not seem to be defined.
Loading history...
42
        self.max_nb_rows = max(map(lambda x: len(x), self.ordered_value_labels))
43
        self.max_label_len = [max(map(lambda x: len(x), cl.freqs[variable])) for cl in self.cl.gen_clusters(self.sl)]
44
        self.generators = [self._get_generator(i, prec) for i in range(len(self.sl))]
45
46
    def _gen_entries(self, ind, prec):
47
        i = 0
48
        for i, el in enumerate(self.ordered_value_labels[ind]):
49
            yield '{0} {1}{3:.{2}f}'.format(el, ' '*(self.max_label_len[ind] - len(el)), prec, self.cl[self.sl[ind]].freqs[self.var][el])
50
        while i < self.max_nb_rows - 1:
51
            yield ' ' * (self.max_label_len[ind] + prec + 3)
52
            i += 1
53
54
    def _get_generator(self, ind, prec):
55
        return (_ for _ in self._gen_entries(ind, prec))
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable _ does not seem to be defined.
Loading history...
56