|
1
|
|
|
# import numpy as np |
|
2
|
|
|
|
|
3
|
|
|
|
|
4
|
|
|
# class DistroReporter(object): |
|
5
|
|
|
|
|
6
|
|
|
# def __init__(self): |
|
7
|
|
|
# self.cl = None |
|
8
|
|
|
# self.var = '' |
|
9
|
|
|
# self.sl = None |
|
10
|
|
|
|
|
11
|
|
|
# self.ordered_value_labels = [] |
|
12
|
|
|
# self.max_nb_rows = 0 |
|
13
|
|
|
# self.max_label_len = 0 |
|
14
|
|
|
# self.generators = [] |
|
15
|
|
|
|
|
16
|
|
|
# def print_distros(self, clustering, variable, selected_clusters='all', prec=3): |
|
17
|
|
|
# """ |
|
18
|
|
|
# Prints the discrete distribution of the values the input variable takes for evry selected cluster. Frequencies are shown in descending order.\n |
|
19
|
|
|
# :param clustering: the Clustering to select from |
|
20
|
|
|
# :type clustering: clustering.Cluster |
|
21
|
|
|
# :param variable: the field name of interest |
|
22
|
|
|
# :type variable: str |
|
23
|
|
|
# :param selected_clusters: can be a list of indices pointing to Cluster objects in the Clustering structure. Can take the 'all' value to indicate selecting every cluster |
|
24
|
|
|
# :type selected_clusters: list or str |
|
25
|
|
|
# :param prec: the precision of the frequencies to visualize; the number of decimal digits to show |
|
26
|
|
|
# :type prec: int |
|
27
|
|
|
# """ |
|
28
|
|
|
# self._set_state(clustering, variable, selected_clusters, prec) |
|
29
|
|
|
# body = '' |
|
30
|
|
|
# for i in range(self.max_nb_rows): |
|
31
|
|
|
# body += ' | '.join(str(self.generators[j].__next__()) for j in range(len(self.sl))) + '\n' |
|
32
|
|
|
# header = ' - '.join('id:{} len:{}'.format(cl.id, len(cl)) + ' '*(prec + self.max_label_len[i] - len(str(len(cl))) - 6) for i, cl in enumerate(self.cl.gen_clusters(self.sl))) + '\n' |
|
33
|
|
|
# print(header + body) |
|
34
|
|
|
|
|
35
|
|
|
# def _set_state(self, clustering, variable, selected_clusters, prec): |
|
36
|
|
|
# self.cl = clustering |
|
37
|
|
|
# self.var = variable |
|
38
|
|
|
# if selected_clusters == 'all': |
|
39
|
|
|
# selected_clusters = range(len(self.cl)) |
|
40
|
|
|
# self.sl = selected_clusters |
|
41
|
|
|
# self.ordered_value_labels = [sorted(cl.freqs[self.var], key=lambda x: cl.freqs[self.var][x], reverse=True) for cl in self.cl.gen_clusters(self.sl)] |
|
42
|
|
|
# self.max_nb_rows = max(map(lambda x: len(x), self.ordered_value_labels)) |
|
43
|
|
|
# self.max_label_len = [max(map(lambda x: len(x), cl.freqs[variable])) for cl in self.cl.gen_clusters(self.sl)] |
|
44
|
|
|
# self.generators = [self._get_generator(i, prec) for i in range(len(self.sl))] |
|
45
|
|
|
|
|
46
|
|
|
# def _gen_entries(self, ind, prec): |
|
47
|
|
|
# i = 0 |
|
48
|
|
|
# for i, el in enumerate(self.ordered_value_labels[ind]): |
|
49
|
|
|
# yield '{0} {1}{3:.{2}f}'.format(el, ' '*(self.max_label_len[ind] - len(el)), prec, self.cl[self.sl[ind]].freqs[self.var][el]) |
|
50
|
|
|
# while i < self.max_nb_rows - 1: |
|
51
|
|
|
# yield ' ' * (self.max_label_len[ind] + prec + 3) |
|
52
|
|
|
# i += 1 |
|
53
|
|
|
|
|
54
|
|
|
# def _get_generator(self, ind, prec): |
|
55
|
|
|
# return (_ for _ in self._gen_entries(ind, prec)) |
|
56
|
|
|
|