Passed
Push — datapoints-package ( a11eff )
by Konstantinos
02:48
created

so_magic.clustering.helpers.DistroReporter._set_state()   A

Complexity

Conditions 5

Size

Total Lines 10
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 10
nop 5
dl 0
loc 10
rs 9.3333
c 0
b 0
f 0
1
# import numpy as np
2
3
4
# class DistroReporter(object):
5
6
#     def __init__(self):
7
#         self.cl = None
8
#         self.var = ''
9
#         self.sl = None
10
11
#         self.ordered_value_labels = []
12
#         self.max_nb_rows = 0
13
#         self.max_label_len = 0
14
#         self.generators = []
15
16
#     def print_distros(self, clustering, variable, selected_clusters='all', prec=3):
17
#         """
18
#         Prints the discrete distribution of the values the input variable takes for evry selected cluster. Frequencies are shown in descending order.\n
19
#         :param clustering: the Clustering to select from
20
#         :type clustering: clustering.Cluster
21
#         :param variable: the field name of interest
22
#         :type variable: str
23
#         :param selected_clusters: can be a list of indices pointing to Cluster objects in the Clustering structure. Can take the 'all' value to indicate selecting every cluster
24
#         :type selected_clusters: list or str
25
#         :param prec: the precision of the frequencies to visualize; the number of decimal digits to show
26
#         :type prec: int
27
#         """
28
#         self._set_state(clustering, variable, selected_clusters, prec)
29
#         body = ''
30
#         for i in range(self.max_nb_rows):
31
#             body += ' | '.join(str(self.generators[j].__next__()) for j in range(len(self.sl))) + '\n'
32
#         header = ' - '.join('id:{} len:{}'.format(cl.id, len(cl)) + ' '*(prec + self.max_label_len[i] - len(str(len(cl))) - 6) for i, cl in enumerate(self.cl.gen_clusters(self.sl))) + '\n'
33
#         print(header + body)
34
35
#     def _set_state(self, clustering, variable, selected_clusters, prec):
36
#         self.cl = clustering
37
#         self.var = variable
38
#         if selected_clusters == 'all':
39
#             selected_clusters = range(len(self.cl))
40
#         self.sl = selected_clusters
41
#         self.ordered_value_labels = [sorted(cl.freqs[self.var], key=lambda x: cl.freqs[self.var][x], reverse=True) for cl in self.cl.gen_clusters(self.sl)]
42
#         self.max_nb_rows = max(map(lambda x: len(x), self.ordered_value_labels))
43
#         self.max_label_len = [max(map(lambda x: len(x), cl.freqs[variable])) for cl in self.cl.gen_clusters(self.sl)]
44
#         self.generators = [self._get_generator(i, prec) for i in range(len(self.sl))]
45
46
#     def _gen_entries(self, ind, prec):
47
#         i = 0
48
#         for i, el in enumerate(self.ordered_value_labels[ind]):
49
#             yield '{0} {1}{3:.{2}f}'.format(el, ' '*(self.max_label_len[ind] - len(el)), prec, self.cl[self.sl[ind]].freqs[self.var][el])
50
#         while i < self.max_nb_rows - 1:
51
#             yield ' ' * (self.max_label_len[ind] + prec + 3)
52
#             i += 1
53
54
#     def _get_generator(self, ind, prec):
55
#         return (_ for _ in self._gen_entries(ind, prec))
56