Passed
Push — dev ( 3b058b...d73523 )
by Konstantinos
01:35
created

so_magic.clustering.factory   A

Complexity

Total Complexity 3

Size/Duplication

Total Lines 31
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 24
dl 0
loc 31
rs 10
c 0
b 0
f 0
wmc 3

2 Methods

Rating   Name   Duplication   Size   Complexity  
A ClusteringFactory.from_som() 0 17 2
A ClusteringFactory.inferred() 0 3 1
1
from .clustering import ReportingClustering
2
from .cluster import SOMCluster
3
4
import attr
5
6
@attr.s
7
class ClusteringFactory(object):
8
    # algorithms = attr.ib(init=True)
9
10
    @classmethod
11
    def inferred(cls, x):
12
        pass
13
14
    def from_som(self, dataset, som, nb_clusters, **kwargs):  #  algorithm, nb_clusters=8, ngrams=1, random_state=None, vars=None):
15
        id2members = dict.fromkeys(range(nb_clusters), set())  # cluster id => members set mapping
16
        som.cluster(nb_clusters, random_state=kwargs.get('random_state', None))
17
        # som.cluster(algorithm=self.algorithms[algorithm](nb_clusters, kwargs.get('random_state', None)))
18
        for i, arr in enumerate(som.bmus):  # iterate through the array of shape [nb_datapoints, 2]. Each row is the coordinates
19
            # of the neuron the datapoint gets attributed to (closest distance)
20
            attributed_cluster = som.clusters[arr[0], arr[1]]  # >= 0
21
            id2members[attributed_cluster].add(dataset.datapoints[i])
22
        def ex1(a_cluster):
23
            return [_ for _ in a_cluster]
24
        def ex2(datapoints, attribute):
25
            return datapoints[str(attribute)]
26
27
        return ReportingClustering([SOMCluster(cluster_members) for cluster_members in id2members.values()],
28
                                   str(dataset)+'-'+str(som),
29
                                   ex1,
30
                                   ex2,
31
                                   )
32