Completed
Push — 0.8.dev ( 7407bb...c11acd )
by Andrei
53s
created

bsas   A

Complexity

Total Complexity 13

Size/Duplication

Total Lines 156
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
dl 0
loc 156
rs 10
c 3
b 0
f 0
wmc 13

7 Methods

Rating   Name   Duplication   Size   Complexity  
A get_cluster_encoding() 0 11 1
A get_clusters() 0 9 1
A __init__() 0 23 1
A get_representatives() 0 9 1
B process() 0 24 4
A _update_representative() 0 13 2
A _find_nearest_cluster() 0 19 3
1
"""!
2
3
@brief Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
4
@details Implementation based on book:
5
         - Theodoridis, Koutroumbas, Konstantinos. Elsevier Academic Press - Pattern Recognition - 2nd Edition. 2003.
6
7
@authors Andrei Novikov ([email protected])
8
@date 2014-2018
9
@copyright GNU Public License
10
11
@cond GNU_PUBLIC_LICENSE
12
    PyClustering is free software: you can redistribute it and/or modify
13
    it under the terms of the GNU General Public License as published by
14
    the Free Software Foundation, either version 3 of the License, or
15
    (at your option) any later version.
16
17
    PyClustering is distributed in the hope that it will be useful,
18
    but WITHOUT ANY WARRANTY; without even the implied warranty of
19
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
    GNU General Public License for more details.
21
22
    You should have received a copy of the GNU General Public License
23
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
@endcond
25
26
"""
27
28
29
from pyclustering.cluster import cluster_visualizer;
30
from pyclustering.cluster.encoder import type_encoding;
31
32
from pyclustering.utils.metric import type_metric, distance_metric;
33
34
35
class bsas_visualizer:
36
    """!
37
    @brief Visualizer of BSAS algorithm's results.
38
    @details BSAS visualizer provides visualization services that are specific for BSAS algorithm.
39
40
    """
41
42
    @staticmethod
43
    def show_clusters(sample, clusters, representatives, **kwargs):
44
        """!
45
        @brief Display BSAS clustering results.
46
47
        @param[in] sample (list): Dataset that was used for clustering.
48
        @param[in] clusters (array_like): Clusters that were allocated by the algorithm.
49
        @param[in] representatives (array_like): Allocated representatives correspond to clusters.
50
51
        @return (figure) Figure where clusters were drawn.
52
53
        """
54
55
        figure = kwargs.get('figure', None);
56
        display = kwargs.get('display', True);
57
        offset = kwargs.get('offset', 0);
58
59
        visualizer = cluster_visualizer();
60
        visualizer.append_clusters(clusters, sample, canvas=offset);
61
62
        for cluster_index in range(len(clusters)):
63
            visualizer.append_cluster_attribute(offset, cluster_index, [representatives[cluster_index]], '*', 10);
64
65
        return visualizer.show(figure=figure, display=display);
66
67
68
class bsas:
69
    """!
0 ignored issues
show
Bug introduced by
A suspicious escape sequence \l was found. Did you maybe forget to add an r prefix?

Escape sequences in Python are generally interpreted according to rules similar to standard C. Only if strings are prefixed with r or R are they interpreted as regular expressions.

The escape sequence that was used indicates that you might have intended to write a regular expression.

Learn more about the available escape sequences. in the Python documentation.

Loading history...
70
    @brief Class represents BSAS clustering algorithm - basic sequential algorithmic scheme.
71
    @details Algorithm has two mandatory parameters: maximum allowable number of clusters and threshold
72
              of dissimilarity or in other words maximum distance between points. Distance metric also can
73
              be specified using 'metric' parameters, by default 'Manhattan' distance is used.
74
              BSAS using following rule for updating cluster representative:
75
76
    \f[
77
    \vec{m}_{C_{k}}^{new}=\frac{ \left ( n_{C_{k}^{new}} - 1 \right )\vec{m}_{C_{k}}^{old} + \vec{x} }{n_{C_{k}^{new}}}
78
    \f]
79
80
    Clustering results of this algorithm depends on objects order in input data.
81
82
    Example:
83
    @code
84
        # Read data sample from 'Simple02.data'.
85
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2);
86
87
        # Prepare algorithm's parameters.
88
        max_clusters = 2;
89
        threshold = 1.0;
90
91
        # Create instance of BSAS algorithm.
92
        bsas_instance = bsas(sample, max_clusters, threshold);
93
        bsas_instance.process();
94
95
        # Get clustering results.
96
        clusters = bsas_instance.get_clusters();
97
        representatives = bsas_instance.get_representatives();
98
    @endcode
99
100
    @see pyclustering.cluster.mbsas, pyclustering.cluster.ttsas
101
102
    """
103
104
    def __init__(self, data, maximum_clusters, threshold, ccore=True, **kwargs):
105
        """!
106
        @brief Creates classical BSAS algorithm.
107
108
        @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
109
        @param[in] maximum_clusters: Maximum allowable number of clusters that can be allocated during processing.
110
        @param[in] threshold: Threshold of dissimilarity (maximum distance) between points.
111
        @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
112
        @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
113
114
        Keyword Args:
115
            metric (distance_metric): Metric that is used for distance calculation between two points.
116
117
        """
118
119
        self._data = data;
120
        self._amount = maximum_clusters;
121
        self._threshold = threshold;
122
        self._ccore = ccore;
123
        self._metric = kwargs.get('metric', distance_metric(type_metric.MANHATTAN));
124
125
        self._clusters = [];
126
        self._representatives = [];
127
128
129
    def process(self):
130
        """!
131
        @brief Performs cluster analysis in line with rules of BSAS algorithm.
132
133
        @remark Results of clustering can be obtained using corresponding get methods.
134
135
        @see get_clusters()
136
        @see get_medians()
137
138
        """
139
140
        self._clusters.append([0]);
141
        self._representatives.append(self._data[0]);
142
143
        for i in range(1, len(self._data)):
144
            point = self._data[i];
145
            index_cluster, distance = self._find_nearest_cluster(point);
146
147
            if (distance > self._threshold) and (len(self._clusters) < self._amount):
148
                self._representatives.append(point);
149
                self._clusters.append([i]);
150
            else:
151
                self._clusters[index_cluster].append(i);
152
                self._update_representative(index_cluster, point);
153
154
155
    def get_clusters(self):
156
        """!
157
        @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
158
159
        @see process()
160
        @see get_representatives()
161
162
        """
163
        return self._clusters;
164
165
166
    def get_representatives(self):
167
        """!
168
        @brief Returns list of representatives of allocated clusters.
169
170
        @see process()
171
        @see get_clusters()
172
173
        """
174
        return self._representatives;
175
176
177
    def get_cluster_encoding(self):
178
        """!
179
        @brief Returns clustering result representation type that indicate how clusters are encoded.
180
181
        @return (type_encoding) Clustering result representation.
182
183
        @see get_clusters()
184
185
        """
186
187
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
188
189
190
    def _find_nearest_cluster(self, point):
191
        """!
192
        @brief Find nearest cluster to the specified point.
193
194
        @param[in] point (list): Point from dataset.
195
196
        @return (uint, double) Index of nearest cluster and distance to it.
197
198
        """
199
        index_cluster = -1;
200
        nearest_distance = float('inf');
201
202
        for index in range(len(self._representatives)):
203
            distance = self._metric(point, self._representatives[index]);
204
            if distance < nearest_distance:
205
                index_cluster = index;
206
                nearest_distance = distance;
207
208
        return index_cluster, nearest_distance;
209
210
211
    def _update_representative(self, index_cluster, point):
212
        """!
213
        @brief Update cluster representative in line with new cluster size and added point to it.
214
215
        @param[in] index_cluster (uint): Index of cluster whose representative should be updated.
216
        @param[in] point (list): Point that was added to cluster.
217
218
        """
219
        length = len(self._clusters[index_cluster]);
220
        rep = self._representatives[index_cluster];
221
222
        for dimension in range(len(rep)):
223
            rep[dimension] = ( (length - 1) * rep[dimension] + point[dimension] ) / length;