bsas.__find_nearest_cluster() - Code Metrics - Inspection of "#398: BSAS algorithm implementation, unit-tests an..." - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — 0.8.dev ( 0ab444...033ef3 )

by Andrei

created 2018-04-18 19:09 UTC

bsas.__find_nearest_cluster() A

↳ Parent: bsas

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	3
dl	0
loc	19
rs	9.4285
c	0
b	0
f	0

"""!

@brief Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
@details Implementation based on book:
         - Theodoridis, Koutroumbas, Konstantinos. Elsevier Academic Press - Pattern Recognition - 2nd Edition. 2003.

@authors Andrei Novikov ([email protected])
@date 2014-2018
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""


from pyclustering.cluster.encoder import type_encoding;

from pyclustering.utils.metric import type_metric, distance_metric;


class bsas:
    """!

    @brief Class represents BSAS clustering algorithm - basic sequential algorithmic scheme.
    @details Algorithm has two mandatory parameters: maximum allowable number of clusters and threshold
              of dissimilarity or in other words maximum distance between points. Distance metric also can
              be specified using 'metric' parameters. BSAS using following rule for updating cluster representative:

    \f[
    \vec{m}_{C_{k}}^{new}=\frac{ \left ( n_{C_{k}^{new}} - 1 \right )\vec{m}_{C_{k}}^{old} + \vec{x} }{n_{C_{k}^{new}}}
    \f]

    @see pyclustering.cluster.mbsas, pyclustering.cluster.ttsas

    """

    def __init__(self,  data, maximum_clusters, threshold, ccore=True, **kwargs):
        """!
        @brief Creates classical BSAS algorithm.

        @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
        @param[in] maximum_clusters: Maximum allowable number of clusters that can be allocated during processing.
        @param[in] threshold: Threshold of dissimilarity (maximum distance) between points.
        @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
        @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').

        Keyword Args:
            metric (distance_metric): Metric that is used for distance calculation between two points.

        """

        self.__data = data;
        self.__amount = maximum_clusters;
        self.__threshold = threshold;
        self.__ccore = ccore;
        self.__metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN));

        self.__clusters = [];
        self.__representatives = [];


    def process(self):
        """!
        @brief Performs cluster analysis in line with rules of BSAS algorithm.

        @remark Results of clustering can be obtained using corresponding get methods.

        @see get_clusters()
        @see get_medians()

        """

        self.__clusters.append([0]);
        self.__representatives.append(self.__data[0]);

        for i in range(1, len(self.__data)):
            point = self.__data[i];
            index_cluster, distance = self.__find_nearest_cluster(point);

            if (distance > self.__threshold) and (len(self.__clusters) < self.__amount):
                self.__representatives.append(point);
                self.__clusters.append([i]);
            else:
                self.__clusters[index_cluster].append(i);
                self.__update_representative(index_cluster, point);


    def get_clusters(self):
        """!
        @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.

        @see process()
        @see get_representatives()

        """
        return self.__clusters;


    def get_representatives(self):
        """!
        @brief Returns list of representatives of allocated clusters.

        @see process()
        @see get_clusters()

        """
        return self.__representatives;


    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.

        @return (type_encoding) Clustering result representation.

        @see get_clusters()

        """

        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __find_nearest_cluster(self, point):
        """!
        @brief Find nearest cluster to the specified point.

        @param[in] point (list): Point from dataset.

        @return (uint, double) Index of nearest cluster and distance to it.

        """
        index_cluster = -1;
        nearest_distance = float('inf');

        for index in range(len(self.__representatives)):
            distance = self.__metric(point, self.__representatives[index]);
            if distance < nearest_distance:
                index_cluster = index;
                nearest_distance = distance;

        return index_cluster, nearest_distance;


    def __update_representative(self, index_cluster, point):
        """!
        @brief Update cluster representative in line with new cluster size and added point to it.

        @param[in] index_cluster (uint): Index of cluster whose representative should be updated.
        @param[in] point (list): Point that was added to cluster.

        """
        length = len(self.__clusters[index_cluster]);
        rep = self.__representatives[index_cluster];

        for dimension in range(len(rep)):
            rep[dimension] = ( (length - 1) * rep[dimension] + point[dimension] ) / length;

1			"""!
2
3			@brief Cluster analysis algorithm: BSAS (Basic Sequential Algorithmic Scheme).
4			@details Implementation based on book:
5			- Theodoridis, Koutroumbas, Konstantinos. Elsevier Academic Press - Pattern Recognition - 2nd Edition. 2003.
6
7			@authors Andrei Novikov ([email protected])
8			@date 2014-2018
9			@copyright GNU Public License
10
11			@cond GNU_PUBLIC_LICENSE
12			PyClustering is free software: you can redistribute it and/or modify
13			it under the terms of the GNU General Public License as published by
14			the Free Software Foundation, either version 3 of the License, or
15			(at your option) any later version.
16
17			PyClustering is distributed in the hope that it will be useful,
18			but WITHOUT ANY WARRANTY; without even the implied warranty of
19			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20			GNU General Public License for more details.
21
22			You should have received a copy of the GNU General Public License
23			along with this program. If not, see <http://www.gnu.org/licenses/>.
24			@endcond
25
26			"""
27
28
29			from pyclustering.cluster.encoder import type_encoding;
30
31			from pyclustering.utils.metric import type_metric, distance_metric;
32
33
34			class bsas:
35			"""!
			0 ignored issues – show Bug introduced 2018-04-18 19:10 UTC by Report Bug Copy Issue Report A suspicious escape sequence `\l` was found. Did you maybe forget to add an `r` prefix? Escape sequences in Python are generally interpreted according to rules similar to standard C. Only if strings are prefixed with `r` or `R` are they interpreted as regular expressions. The escape sequence that was used indicates that you might have intended to write a regular expression. Learn more about the available escape sequences. in the Python documentation. Loading history...
36			@brief Class represents BSAS clustering algorithm - basic sequential algorithmic scheme.
37			@details Algorithm has two mandatory parameters: maximum allowable number of clusters and threshold
38			of dissimilarity or in other words maximum distance between points. Distance metric also can
39			be specified using 'metric' parameters. BSAS using following rule for updating cluster representative:
40
41			\f[
42			\vec{m}_{C_{k}}^{new}=\frac{ \left ( n_{C_{k}^{new}} - 1 \right )\vec{m}_{C_{k}}^{old} + \vec{x} }{n_{C_{k}^{new}}}
43			\f]
44
45			@see pyclustering.cluster.mbsas, pyclustering.cluster.ttsas
46
47			"""
48
49			def __init__(self, data, maximum_clusters, threshold, ccore=True, **kwargs):
50			"""!
51			@brief Creates classical BSAS algorithm.
52
53			@param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
54			@param[in] maximum_clusters: Maximum allowable number of clusters that can be allocated during processing.
55			@param[in] threshold: Threshold of dissimilarity (maximum distance) between points.
56			@param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
57			@param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
58
59			Keyword Args:
60			metric (distance_metric): Metric that is used for distance calculation between two points.
61
62			"""
63
64			self.__data = data;
65			self.__amount = maximum_clusters;
66			self.__threshold = threshold;
67			self.__ccore = ccore;
68			self.__metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN));
69
70			self.__clusters = [];
71			self.__representatives = [];
72
73
74			def process(self):
75			"""!
76			@brief Performs cluster analysis in line with rules of BSAS algorithm.
77
78			@remark Results of clustering can be obtained using corresponding get methods.
79
80			@see get_clusters()
81			@see get_medians()
82
83			"""
84
85			self.__clusters.append([0]);
86			self.__representatives.append(self.__data[0]);
87
88			for i in range(1, len(self.__data)):
89			point = self.__data[i];
90			index_cluster, distance = self.__find_nearest_cluster(point);
91
92			if (distance > self.__threshold) and (len(self.__clusters) < self.__amount):
93			self.__representatives.append(point);
94			self.__clusters.append([i]);
95			else:
96			self.__clusters[index_cluster].append(i);
97			self.__update_representative(index_cluster, point);
98
99
100			def get_clusters(self):
101			"""!
102			@brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
103
104			@see process()
105			@see get_representatives()
106
107			"""
108			return self.__clusters;
109
110
111			def get_representatives(self):
112			"""!
113			@brief Returns list of representatives of allocated clusters.
114
115			@see process()
116			@see get_clusters()
117
118			"""
119			return self.__representatives;
120
121
122			def get_cluster_encoding(self):
123			"""!
124			@brief Returns clustering result representation type that indicate how clusters are encoded.
125
126			@return (type_encoding) Clustering result representation.
127
128			@see get_clusters()
129
130			"""
131
132			return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
133
134
135			def __find_nearest_cluster(self, point):
136			"""!
137			@brief Find nearest cluster to the specified point.
138
139			@param[in] point (list): Point from dataset.
140
141			@return (uint, double) Index of nearest cluster and distance to it.
142
143			"""
144			index_cluster = -1;
145			nearest_distance = float('inf');
146
147			for index in range(len(self.__representatives)):
148			distance = self.__metric(point, self.__representatives[index]);
149			if distance < nearest_distance:
150			index_cluster = index;
151			nearest_distance = distance;
152
153			return index_cluster, nearest_distance;
154
155
156			def __update_representative(self, index_cluster, point):
157			"""!
158			@brief Update cluster representative in line with new cluster size and added point to it.
159
160			@param[in] index_cluster (uint): Index of cluster whose representative should be updated.
161			@param[in] point (list): Point that was added to cluster.
162
163			"""
164			length = len(self.__clusters[index_cluster]);
165			rep = self.__representatives[index_cluster];
166
167			for dimension in range(len(rep)):
168			rep[dimension] = ( (length - 1) * rep[dimension] + point[dimension] ) / length;

annoviko / pyclustering

Push — 0.8.dev ( 0ab444...033ef3 )

bsas.__find_nearest_cluster() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like