so_magic.clustering.clustering.ReportingClustering._get_rows() - Code Metrics - Inspection of "git tl" - boromir674/so-magic - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — datapoints-package ( a11eff )

by Konstantinos

created 2021-03-26 14:41 UTC

so_magic.clustering.clustering.ReportingClustering._get_rows() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines	9
Code Lines	9

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	3
eloc	9
nop	3
dl	0
loc	9
rs	9.95
c	0
b	0
f	0

# import attr

# from .computing import ClusterDistroComputer

# @attr.s
# class BaseClustering:
#     """Items grouped in clusters/subgroups (eg based on proximity, similarity)"""
#     clusters = attr.ib(init=True)
#     id = attr.ib(init=True)

#     def __iter__(self):
#         return iter([(cluster.id, cluster) for cluster in self.clusters])

#     def __len__(self):
#         return len(self.clusters)

#     def __getitem__(self, item):
#         return self.clusters[item]

#     def members_n_assigned_clusters(self):
#         """Generate tuples of cluster members and their assigned cluster index"""
#         for i, cl in enumerate(self.clusters):
#             for member_id in iter(cl):
#                 yield member_id, i


# @attr.s
# class DatapointsCluster(BaseClustering):
#     """
#     Provide a method to return datapoints from a cluster and a method to get the values of all datapoints for a given attribute.
#     """
#     datapoints_extractor = attr.ib(init=True)  # call(cluster) -> datapoints
#     attributes_extractor = attr.ib(init=True)  # call(datapoints, attribute) -> attribute_value per datapoint iterable

#     distro_computer = attr.ib(init=False, default=attr.Factory(lambda self: ClusterDistroComputer.from_extractors(
#         self.datapoints_extractor, self.attributes_extractor), takes_self=True))
#     members = attr.ib(init=False, default={})  # structure to use to cache found items in the clustering, so that we do not seek them next time


# @attr.s
# class ReportingClustering(DatapointsCluster):
#     """
#     An instance of this class encapsulates the behaviour of a clustering; a set of clusters estimated on some data
#     """

#     pre = 2
#     def __str__(self):
#         body, max_lens = self._get_rows(threshold=10, prob_precision=pre)
#         header = self._get_header(max_lens, pre, [_ for _ in range(len(self))])
#         return header + body

#     def cluster_of(self, item):
#         h = hash(item)
#         return self.members.get(h, self._find_cluster(h))

#     def _find_cluster(self, item):
#         """Call this method to seek through the clusters for the given item."""
#         for cluster in self:
#             if item in cluster.members:
#                 self.members[item] = cluster.id
#                 return self.members[item]

#     def gen_clusters(self, selected):
#         """
#         Generates Cluster objects according to the indices in the selected clusters list
#         :param selected: the indices of the clusters to select
#         :type selected: list
#         :return: the generated cluster
#         :rtype: Cluster
#         """
#         for i in selected:
#             yield self[i]

#     def get_closest(self, an_id, n, metric='euclidean'):
#         """Call this method to find n closest vectors (within the same cluster) to the vector corresponding to the input id."""
#         return sorted((map(lambda x: distance(self.id2vec[an_id], x, metric=metric),
#                            [_ for _ in self[self.find_cluster(an_id)]])), reverse=True)[:n]

#     def compute_stats1(self, cluster, attributes):
#         self._stats = self.distro_computer(cluster, attributes)

#     def print_clusters(self, selected_clusters='all', threshold=10, prec=2):
#         if selected_clusters == 'all':
#             selected_clusters = range(len(self))
#         body, max_lens = self._get_rows(threshold=threshold, prob_precision=prec)
#         header = self._get_header(max_lens, prec, selected_clusters)
#         # header = ' - '.join('id:{} len:{}'.format(i, len(self[i])) + ' ' * (3-9 + prec + max_lens[i] - len(str(len(self[i])))) for i in selected_clusters) + '\n'
#         print(header + body)

#     def print_map(self):
#         print(self.map_buffer)

#     def _get_header(self, max_lens, prec, selected_clusters):
#         assert len(max_lens) == len(selected_clusters)
#         return ' - '.join(
#             'id:{} len:{}'.format(cl.id, len(cl)) + ' ' * (prec + max_lens[i] - len(str(len(cl))) - 6) for i, cl in
#             enumerate(self.gen_clusters(selected_clusters))) + '\n'

#     def _get_rows(self, threshold=10, prob_precision=3):
#         max_token_lens = [max(map(lambda x: len(x[0]), cl.grams.most_common(threshold))) for cl in self.clusters]
#         b = ''
#         for i in range(threshold):
#             b += ' | '.join('{} '.format(cl.grams.most_common(threshold)[i][0]) + ' ' * (
#                         max_token_lens[j] - len(cl.grams.most_common(threshold)[i][0])) +
#                             "{1:.{0}f}".format(prob_precision, cl.grams.most_common(threshold)[i][1] / len(cl)) for
#                             j, cl in enumerate(self.clusters)) + '\n'
#         return b, max_token_lens


# def distance(vec1, vec2, metric='euclidean'):
#     return DistanceMetric.get_metric(metric).pairwise([vec1, vec2])[0][1]


1			# import attr
2
3			# from .computing import ClusterDistroComputer
4
5			# @attr.s
6			# class BaseClustering:
7			# """Items grouped in clusters/subgroups (eg based on proximity, similarity)"""
8			# clusters = attr.ib(init=True)
9			# id = attr.ib(init=True)
10
11			# def __iter__(self):
12			# return iter([(cluster.id, cluster) for cluster in self.clusters])
13
14			# def __len__(self):
15			# return len(self.clusters)
16
17			# def __getitem__(self, item):
18			# return self.clusters[item]
19
20			# def members_n_assigned_clusters(self):
21			# """Generate tuples of cluster members and their assigned cluster index"""
22			# for i, cl in enumerate(self.clusters):
23			# for member_id in iter(cl):
24			# yield member_id, i
25
26
27			# @attr.s
28			# class DatapointsCluster(BaseClustering):
29			# """
30			# Provide a method to return datapoints from a cluster and a method to get the values of all datapoints for a given attribute.
31			# """
32			# datapoints_extractor = attr.ib(init=True) # call(cluster) -> datapoints
33			# attributes_extractor = attr.ib(init=True) # call(datapoints, attribute) -> attribute_value per datapoint iterable
34
35			# distro_computer = attr.ib(init=False, default=attr.Factory(lambda self: ClusterDistroComputer.from_extractors(
36			# self.datapoints_extractor, self.attributes_extractor), takes_self=True))
37			# members = attr.ib(init=False, default={}) # structure to use to cache found items in the clustering, so that we do not seek them next time
38
39
40			# @attr.s
41			# class ReportingClustering(DatapointsCluster):
42			# """
43			# An instance of this class encapsulates the behaviour of a clustering; a set of clusters estimated on some data
44			# """
45
46			# pre = 2
47			# def __str__(self):
48			# body, max_lens = self._get_rows(threshold=10, prob_precision=pre)
49			# header = self._get_header(max_lens, pre, [_ for _ in range(len(self))])
50			# return header + body
51
52			# def cluster_of(self, item):
53			# h = hash(item)
54			# return self.members.get(h, self._find_cluster(h))
55
56			# def _find_cluster(self, item):
57			# """Call this method to seek through the clusters for the given item."""
58			# for cluster in self:
59			# if item in cluster.members:
60			# self.members[item] = cluster.id
61			# return self.members[item]
62
63			# def gen_clusters(self, selected):
64			# """
65			# Generates Cluster objects according to the indices in the selected clusters list
66			# :param selected: the indices of the clusters to select
67			# :type selected: list
68			# :return: the generated cluster
69			# :rtype: Cluster
70			# """
71			# for i in selected:
72			# yield self[i]
73
74			# def get_closest(self, an_id, n, metric='euclidean'):
75			# """Call this method to find n closest vectors (within the same cluster) to the vector corresponding to the input id."""
76			# return sorted((map(lambda x: distance(self.id2vec[an_id], x, metric=metric),
77			# [_ for _ in self[self.find_cluster(an_id)]])), reverse=True)[:n]
78
79			# def compute_stats1(self, cluster, attributes):
80			# self._stats = self.distro_computer(cluster, attributes)
81
82			# def print_clusters(self, selected_clusters='all', threshold=10, prec=2):
83			# if selected_clusters == 'all':
84			# selected_clusters = range(len(self))
85			# body, max_lens = self._get_rows(threshold=threshold, prob_precision=prec)
86			# header = self._get_header(max_lens, prec, selected_clusters)
87			# # header = ' - '.join('id:{} len:{}'.format(i, len(self[i])) + ' ' * (3-9 + prec + max_lens[i] - len(str(len(self[i])))) for i in selected_clusters) + '\n'
88			# print(header + body)
89
90			# def print_map(self):
91			# print(self.map_buffer)
92
93			# def _get_header(self, max_lens, prec, selected_clusters):
94			# assert len(max_lens) == len(selected_clusters)
95			# return ' - '.join(
96			# 'id:{} len:{}'.format(cl.id, len(cl)) + ' ' * (prec + max_lens[i] - len(str(len(cl))) - 6) for i, cl in
97			# enumerate(self.gen_clusters(selected_clusters))) + '\n'
98
99			# def _get_rows(self, threshold=10, prob_precision=3):
100			# max_token_lens = [max(map(lambda x: len(x[0]), cl.grams.most_common(threshold))) for cl in self.clusters]
101			# b = ''
102			# for i in range(threshold):
103			# b += ' \| '.join('{} '.format(cl.grams.most_common(threshold)[i][0]) + ' ' * (
104			# max_token_lens[j] - len(cl.grams.most_common(threshold)[i][0])) +
105			# "{1:.{0}f}".format(prob_precision, cl.grams.most_common(threshold)[i][1] / len(cl)) for
106			# j, cl in enumerate(self.clusters)) + '\n'
107			# return b, max_token_lens
108
109
110			# def distance(vec1, vec2, metric='euclidean'):
111			# return DistanceMetric.get_metric(metric).pairwise([vec1, vec2])[0][1]
112

boromir674 / so-magic

Push — datapoints-package ( a11eff )

so_magic.clustering.clustering.ReportingClustering._get_rows() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like