Completed
Push — master ( 0ab444...af3192 )
by Andrei
01:25
created

bsas   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 174
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
dl 0
loc 174
rs 10
c 3
b 0
f 0
wmc 17

9 Methods

Rating   Name   Duplication   Size   Complexity  
A get_cluster_encoding() 0 11 1
A get_clusters() 0 9 1
B __init__() 0 26 2
A _update_representative() 0 13 2
A get_representatives() 0 9 1
A __process_by_ccore() 0 3 1
A _find_nearest_cluster() 0 19 3
A process() 0 15 2
A __prcess_by_python() 0 14 4
1
"""!
2
3
@brief Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
4
@details Implementation based on book:
5
         - Theodoridis, Koutroumbas, Konstantinos. Elsevier Academic Press - Pattern Recognition - 2nd Edition. 2003.
6
7
@authors Andrei Novikov ([email protected])
8
@date 2014-2018
9
@copyright GNU Public License
10
11
@cond GNU_PUBLIC_LICENSE
12
    PyClustering is free software: you can redistribute it and/or modify
13
    it under the terms of the GNU General Public License as published by
14
    the Free Software Foundation, either version 3 of the License, or
15
    (at your option) any later version.
16
17
    PyClustering is distributed in the hope that it will be useful,
18
    but WITHOUT ANY WARRANTY; without even the implied warranty of
19
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
    GNU General Public License for more details.
21
22
    You should have received a copy of the GNU General Public License
23
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
@endcond
25
26
"""
27
28
29
from pyclustering.core.wrapper import ccore_library;
30
from pyclustering.core.bsas_wrapper import bsas as bsas_wrapper;
31
from pyclustering.core.metric_wrapper import metric_wrapper;
32
33
from pyclustering.cluster import cluster_visualizer;
34
from pyclustering.cluster.encoder import type_encoding;
35
36
from pyclustering.utils.metric import type_metric, distance_metric;
37
38
39
class bsas_visualizer:
40
    """!
41
    @brief Visualizer of BSAS algorithm's results.
42
    @details BSAS visualizer provides visualization services that are specific for BSAS algorithm.
43
44
    """
45
46
    @staticmethod
47
    def show_clusters(sample, clusters, representatives, **kwargs):
48
        """!
49
        @brief Display BSAS clustering results.
50
51
        @param[in] sample (list): Dataset that was used for clustering.
52
        @param[in] clusters (array_like): Clusters that were allocated by the algorithm.
53
        @param[in] representatives (array_like): Allocated representatives correspond to clusters.
54
        @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'figure', 'display').
55
56
        Keyword Args:
57
            figure (figure): If 'None' then new is figure is created, otherwise specified figure is used for visualization.
58
            display (bool): If 'True' then figure will be shown by the method, otherwise it should be shown manually using matplotlib function 'plt.show()'.
59
            offset (uint): Specify axes index on the figure where results should be drawn (only if argument 'figure' is specified).
60
61
        @return (figure) Figure where clusters were drawn.
62
63
        """
64
65
        figure = kwargs.get('figure', None);
66
        display = kwargs.get('display', True);
67
        offset = kwargs.get('offset', 0);
68
69
        visualizer = cluster_visualizer();
70
        visualizer.append_clusters(clusters, sample, canvas=offset);
71
72
        for cluster_index in range(len(clusters)):
73
            visualizer.append_cluster_attribute(offset, cluster_index, [representatives[cluster_index]], '*', 10);
74
75
        return visualizer.show(figure=figure, display=display);
76
77
78
class bsas:
79
    """!
0 ignored issues
show
Bug introduced by
A suspicious escape sequence \l was found. Did you maybe forget to add an r prefix?

Escape sequences in Python are generally interpreted according to rules similar to standard C. Only if strings are prefixed with r or R are they interpreted as regular expressions.

The escape sequence that was used indicates that you might have intended to write a regular expression.

Learn more about the available escape sequences. in the Python documentation.

Loading history...
80
    @brief Class represents BSAS clustering algorithm - basic sequential algorithmic scheme.
81
    @details Algorithm has two mandatory parameters: maximum allowable number of clusters and threshold
82
              of dissimilarity or in other words maximum distance between points. Distance metric also can
83
              be specified using 'metric' parameters, by default 'Manhattan' distance is used.
84
              BSAS using following rule for updating cluster representative:
85
86
    \f[
87
    \vec{m}_{C_{k}}^{new}=\frac{ \left ( n_{C_{k}^{new}} - 1 \right )\vec{m}_{C_{k}}^{old} + \vec{x} }{n_{C_{k}^{new}}}
88
    \f]
89
90
    Clustering results of this algorithm depends on objects order in input data.
91
92
    Example:
93
    @code
94
        # Read data sample from 'Simple02.data'.
95
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2);
96
97
        # Prepare algorithm's parameters.
98
        max_clusters = 2;
99
        threshold = 1.0;
100
101
        # Create instance of BSAS algorithm.
102
        bsas_instance = bsas(sample, max_clusters, threshold);
103
        bsas_instance.process();
104
105
        # Get clustering results.
106
        clusters = bsas_instance.get_clusters();
107
        representatives = bsas_instance.get_representatives();
108
109
        # Display results.
110
        bsas_visualizer.show_clusters(sample, clusters, representatives);
111
    @endcode
112
113
    @see pyclustering.cluster.mbsas, pyclustering.cluster.ttsas
114
115
    """
116
117
    def __init__(self, data, maximum_clusters, threshold, ccore=True, **kwargs):
118
        """!
119
        @brief Creates classical BSAS algorithm.
120
121
        @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
122
        @param[in] maximum_clusters: Maximum allowable number of clusters that can be allocated during processing.
123
        @param[in] threshold: Threshold of dissimilarity (maximum distance) between points.
124
        @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
125
        @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
126
127
        Keyword Args:
128
            metric (distance_metric): Metric that is used for distance calculation between two points.
129
130
        """
131
132
        self._data = data;
133
        self._amount = maximum_clusters;
134
        self._threshold = threshold;
135
        self._metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN));
136
        self._ccore = ccore and self._metric.get_type() != type_metric.USER_DEFINED;
137
138
        self._clusters = [];
139
        self._representatives = [];
140
141
        if self._ccore is True:
142
            self._ccore = ccore_library.workable();
143
144
145
    def process(self):
146
        """!
147
        @brief Performs cluster analysis in line with rules of BSAS algorithm.
148
149
        @remark Results of clustering can be obtained using corresponding get methods.
150
151
        @see get_clusters()
152
        @see get_representatives()
153
154
        """
155
156
        if self._ccore is True:
157
            self.__process_by_ccore();
158
        else:
159
            self.__prcess_by_python();
160
161
162
    def __process_by_ccore(self):
163
        ccore_metric = metric_wrapper.create_instance(self._metric);
164
        self._clusters, self._representatives = bsas_wrapper(self._data, self._amount, self._threshold, ccore_metric.get_pointer());
165
166
167
    def __prcess_by_python(self):
168
        self._clusters.append([0]);
169
        self._representatives.append(self._data[0]);
170
171
        for i in range(1, len(self._data)):
172
            point = self._data[i];
173
            index_cluster, distance = self._find_nearest_cluster(point);
174
175
            if (distance > self._threshold) and (len(self._clusters) < self._amount):
176
                self._representatives.append(point);
177
                self._clusters.append([i]);
178
            else:
179
                self._clusters[index_cluster].append(i);
180
                self._update_representative(index_cluster, point);
181
182
183
    def get_clusters(self):
184
        """!
185
        @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
186
187
        @see process()
188
        @see get_representatives()
189
190
        """
191
        return self._clusters;
192
193
194
    def get_representatives(self):
195
        """!
196
        @brief Returns list of representatives of allocated clusters.
197
198
        @see process()
199
        @see get_clusters()
200
201
        """
202
        return self._representatives;
203
204
205
    def get_cluster_encoding(self):
206
        """!
207
        @brief Returns clustering result representation type that indicate how clusters are encoded.
208
209
        @return (type_encoding) Clustering result representation.
210
211
        @see get_clusters()
212
213
        """
214
215
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
216
217
218
    def _find_nearest_cluster(self, point):
219
        """!
220
        @brief Find nearest cluster to the specified point.
221
222
        @param[in] point (list): Point from dataset.
223
224
        @return (uint, double) Index of nearest cluster and distance to it.
225
226
        """
227
        index_cluster = -1;
228
        nearest_distance = float('inf');
229
230
        for index in range(len(self._representatives)):
231
            distance = self._metric(point, self._representatives[index]);
232
            if distance < nearest_distance:
233
                index_cluster = index;
234
                nearest_distance = distance;
235
236
        return index_cluster, nearest_distance;
237
238
239
    def _update_representative(self, index_cluster, point):
240
        """!
241
        @brief Update cluster representative in line with new cluster size and added point to it.
242
243
        @param[in] index_cluster (uint): Index of cluster whose representative should be updated.
244
        @param[in] point (list): Point that was added to cluster.
245
246
        """
247
        length = len(self._clusters[index_cluster]);
248
        rep = self._representatives[index_cluster];
249
250
        for dimension in range(len(rep)):
251
            rep[dimension] = ( (length - 1) * rep[dimension] + point[dimension] ) / length;