Completed
Push — appveyor ( 280314...2c0e2c )
by Konstantinos
02:09
created

so_magic.som.self_organising_map   A

Complexity

Total Complexity 18

Size/Duplication

Total Lines 86
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 60
dl 0
loc 86
rs 10
c 0
b 0
f 0
wmc 18

13 Methods

Rating   Name   Duplication   Size   Complexity  
A SomTrainer.infer_map() 0 10 2
A SelfOrganizingMap.__getattr__() 0 4 2
A SelfOrganizingMap.get_map_id() 0 5 2
A SelfOrganizingMap.grid_type() 0 3 1
A SelfOrganizingMap.project() 0 3 1
A SelfOrganizingMap.type() 0 3 1
A SelfOrganizingMap.height() 0 3 1
A SelfOrganizingMap.cluster() 0 2 1
A SelfOrganizingMap.nb_clusters() 0 3 1
A SelfOrganizingMap.visual_umatrix() 0 7 2
A SelfOrganizingMap.neurons_coordinates() 0 6 2
A SelfOrganizingMap.datapoint_coordinates() 0 4 1
A SelfOrganizingMap.width() 0 3 1
1
import attr
2
import numpy as np
3
import somoclu
4
from sklearn.cluster import KMeans
5
import logging
6
7
logger = logging.getLogger(__name__)
8
9
10
class SomTrainer:
11
12
    def infer_map(self, nb_cols, nb_rows, dataset, **kwargs):
13
        """Infer a self-organizing map from dataset.\n
14
        initialcodebook = None, kerneltype = 0, maptype = 'planar', gridtype = 'rectangular',
15
        compactsupport = False, neighborhood = 'gaussian', std_coeff = 0.5, initialization = None
16
        """
17
        if not hasattr(dataset, 'feature_vectors'):
18
            raise NoFeatureVectorsError("Attempted to train a Som model, but did not find feature vectors in the dataset.")
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable NoFeatureVectorsError does not seem to be defined.
Loading history...
19
        som = somoclu.Somoclu(nb_cols, nb_rows, **kwargs)
20
        som.train(data=np.array(dataset.feature_vectors, dtype=np.float32))
21
        return som
22
23
24
@attr.s
25
class SelfOrganizingMap:
26
    som = attr.ib(init=True)
27
    dataset_name = attr.ib(init=True)
28
29
    @property
30
    def height(self):
31
        return self.som._n_rows
32
33
    @property
34
    def width(self):
35
        return self.som._n_columns
36
37
    @property
38
    def type(self):
39
        return self.som._map_type
40
41
    @property
42
    def grid_type(self):
43
        return self.som._grid_type
44
45
    def __getattr__(self, item):
46
        if item in ('n_rows', 'n_columns', 'initialization', 'map_type', 'grid_type'):
47
            item = f'_{item}'
48
        return getattr(self.som, item)
49
50
    def get_map_id(self):
51
        _ = '_'.join(getattr(self, attribute) for attribute in ['dataset_name', 'n_rows', 'n_columns', 'initialization', 'map_type', 'grid_type'])
52
        if self.som.clusters:
53
            return f'{_}_cl{self.nb_clusters}'
54
        return _
55
56
    @property
57
    def nb_clusters(self):
58
        return np.max(self.som.clusters)
59
60
    def neurons_coordinates(self):
61
        """"""
62
        for i, arr in enumerate(self.som.bmus):  # iterate through the array of shape [nb_datapoints, 2]. Each row is the coordinates
63
            # of the neuron the datapoint gets attributed to (closest distance)
64
            attributed_cluster = self.som.clusters[arr[0], arr[1]]  # >= 0
65
            id2members[attributed_cluster].add(dataset[i].id)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable dataset does not seem to be defined.
Loading history...
Comprehensibility Best Practice introduced by
The variable id2members does not seem to be defined.
Loading history...
66
67
    def datapoint_coordinates(self, index):
68
        """Call this method to get the best-matching unit (bmu) coordinates of the datapoint indexed byt the input pointer.\n
69
        Bmu is simply the neuron on the som grid that is closest to the datapoint after being projected to the 2D space."""
70
        return self.som.bmus[index][0], self.som.bmus[index][1]
71
72
    def project(self, datapoint):
73
        """Compute the coordinates of a (potentially unseen) datapoint. It is assumed that the codebook has been computed already."""
74
        pass
75
76
    def cluster(self, nb_clusters, random_state=None):
77
        self.som.cluster(algorithm=KMeans(n_clusters=nb_clusters, random_state=random_state))
78
79
    @property
80
    def visual_umatrix(self):
81
        b = ''
82
        max_len = len(str(np.max(self.som.clusters)))  # i.e. a clustering of 11 clusters with ids 0, 1, .., 10 has a max_len = 2
83
        for j in range(self.som.umatrix.shape[0]):
84
            b += ' '.join(' ' * (max_len - len(str(i))) + str(i) for i in self.som.clusters[j, :]) + '\n'
85
        return b
86