dbscan.__neighbor_indexes_points() - Code Metrics - Inspection of "[pyclustering.cluster] Interface alignment." - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( af3192...a7eabf )

by Andrei

created 2018-04-25 10:40 UTC

dbscan.__neighbor_indexes_points() A

↳ Parent: dbscan

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	3
dl	0
loc	11
rs	9.4285
c	0
b	0
f	0

"""!

@brief Cluster analysis algorithm: DBSCAN.
@details Implementation based on article:
         - M.Ester, H.Kriegel, J.Sander, X.Xiaowei. A density-based algorithm for discovering clusters in large spatial databases with noise. 1996.

@authors Andrei Novikov ([email protected])
@date 2014-2018
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    
    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""


from enum import IntEnum;


from pyclustering.container.kdtree import kdtree;

from pyclustering.cluster.encoder import type_encoding;

from pyclustering.core.wrapper import ccore_library;

from pyclustering.utils import get_argument;


import pyclustering.core.dbscan_wrapper as wrapper;


class dbscan:
    """!
    @brief Class represents clustering algorithm DBSCAN.
    @details This DBSCAN algorithm is KD-tree optimized.
             
             CCORE option can be used to use the pyclustering core - C/C++ shared library for processing that significantly increases performance.
    
    Example:
    @code
        # sample for cluster analysis (represented by list)
        sample = read_sample(path_to_sample);
        
        # create object that uses CCORE for processing
        dbscan_instance = dbscan(sample, 0.5, 3, True);
        
        # cluster analysis
        dbscan_instance.process();
        
        # obtain results of clustering
        clusters = dbscan_instance.get_clusters();
        noise = dbscan_instance.get_noise();
    @endcode
    
    """
    
    def __init__(self, data, eps, neighbors, ccore = True, **kwargs):
        """!
        @brief Constructor of clustering algorithm DBSCAN.
        
        @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
        @param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less then the radius.
        @param[in] neighbors (uint): minimum number of shared neighbors that is required for establish links between points.
        @param[in] ccore (bool): if True than DLL CCORE (C++ solution) will be used for solving the problem.
        @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'data_type').

        <b>Keyword Args:</b><br>
            - data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix').
        
        """
        
        self.__pointer_data = data;
        self.__kdtree = None;
        self.__eps = eps;
        self.__sqrt_eps = eps * eps;
        self.__neighbors = neighbors;
        
        self.__visited = [False] * len(self.__pointer_data);
        self.__belong = [False] * len(self.__pointer_data);

        self.__data_type = kwargs.get('data_type', 'points');

        self.__clusters = [];
        self.__noise = [];

        self.__neighbor_searcher = self.__create_neighbor_searcher(self.__data_type);

        self.__ccore = ccore;
        if self.__ccore:
            self.__ccore = ccore_library.workable();


    def process(self):
        """!
        @brief Performs cluster analysis in line with rules of DBSCAN algorithm.
        
        @see get_clusters()
        @see get_noise()
        
        """
        
        if self.__ccore is True:
            (self.__clusters, self.__noise) = wrapper.dbscan(self.__pointer_data, self.__eps, self.__neighbors, self.__data_type);
            
        else:
            if self.__data_type == 'points':
                self.__kdtree = kdtree(self.__pointer_data, range(len(self.__pointer_data)));

            for i in range(0, len(self.__pointer_data)):
                if self.__visited[i] is False:
                     
                    cluster = self.__expand_cluster(i);
                    if cluster is not None:
                        self.__clusters.append(cluster);
                    else:
                        self.__noise.append(i);
                        self.__belong[i] = True;


    def get_clusters(self):
        """!
        @brief Returns allocated clusters.
        
        @remark Allocated clusters can be returned only after data processing (use method process()). Otherwise empty list is returned.
        
        @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
        
        @see process()
        @see get_noise()
        
        """
        
        return self.__clusters;


    def get_noise(self):
        """!
        @brief Returns allocated noise.
        
        @remark Allocated noise can be returned only after data processing (use method process() before). Otherwise empty list is returned.
        
        @return (list) List of indexes that are marked as a noise.
        
        @see process()
        @see get_clusters()
        
        """

        return self.__noise;


    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.
        
        @return (type_encoding) Clustering result representation.
        
        @see get_clusters()
        
        """
        
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __create_neighbor_searcher(self, data_type):
        """!
        @brief Returns neighbor searcher in line with data type.

        @param[in] data_type (string): Data type (points or distance matrix).

        """
        if data_type == 'points':
            return self.__neighbor_indexes_points;
        elif data_type == 'distance_matrix':
            return self.__neighbor_indexes_distance_matrix;
        else:
            raise TypeError("Unknown type of data is specified '%s'" % data_type);


    def __expand_cluster(self, index_point):
        """!
        @brief Expands cluster from specified point in the input data space.
        
        @param[in] index_point (list): Index of a point from the data.

        @return (list) Return tuple of list of indexes that belong to the same cluster and list of points that are marked as noise: (cluster, noise), or None if nothing has been expanded.
        
        """
        
        cluster = None;
        self.__visited[index_point] = True;
        neighbors = self.__neighbor_searcher(index_point);
         
        if len(neighbors) >= self.__neighbors:
            cluster = [ index_point ];
             
            self.__belong[index_point] = True;
             
            for i in neighbors:
                if self.__visited[i] is False:
                    self.__visited[i] = True;
                    next_neighbors = self.__neighbor_searcher(i);
                     
                    if len(next_neighbors) >= self.__neighbors:
                        # if some node has less then minimal number of neighbors than we shouldn't look at them
                        # because maybe it's a noise.
                        neighbors += [k for k in next_neighbors if ( (k in neighbors) == False)];
                 
                if self.__belong[i] is False:
                    cluster.append(i);
                    self.__belong[i] = True;
             
        return cluster;


    def __neighbor_indexes_points(self, index_point):
        """!
        @brief Return neighbors of the specified object in case of sequence of points.

        @param[in] index_point (uint): Index point whose neighbors are should be found.

        @return (list) List of indexes of neighbors in line the connectivity radius.

        """
        kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__pointer_data[index_point], self.__eps);
        return [node_tuple[1].payload for node_tuple in kdnodes if node_tuple[1].payload != index_point];


    def __neighbor_indexes_distance_matrix(self, index_point):
        """!
        @brief Return neighbors of the specified object in case of distance matrix.

        @param[in] index_point (uint): Index point whose neighbors are should be found.

        @return (list) List of indexes of neighbors in line the connectivity radius.

        """
        distances = self.__pointer_data[index_point];
        return [index_neighbor for index_neighbor in range(len(distances))
                if ((distances[index_neighbor] <= self.__eps) and (index_neighbor != index_point))];

1			"""!
2
3			@brief Cluster analysis algorithm: DBSCAN.
4			@details Implementation based on article:
5			- M.Ester, H.Kriegel, J.Sander, X.Xiaowei. A density-based algorithm for discovering clusters in large spatial databases with noise. 1996.
6
7			@authors Andrei Novikov ([email protected])
8			@date 2014-2018
9			@copyright GNU Public License
10
11			@cond GNU_PUBLIC_LICENSE
12			PyClustering is free software: you can redistribute it and/or modify
13			it under the terms of the GNU General Public License as published by
14			the Free Software Foundation, either version 3 of the License, or
15			(at your option) any later version.
16
17			PyClustering is distributed in the hope that it will be useful,
18			but WITHOUT ANY WARRANTY; without even the implied warranty of
19			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20			GNU General Public License for more details.
21
22			You should have received a copy of the GNU General Public License
23			along with this program. If not, see <http://www.gnu.org/licenses/>.
24			@endcond
25
26			"""
27
28
29			from enum import IntEnum;
			0 ignored issues – show Unused Code introduced 2018-04-25 09:53 UTC by Report Bug Copy Issue Report Unused IntEnum imported from enum Loading history...
30
31			from pyclustering.container.kdtree import kdtree;
32
33			from pyclustering.cluster.encoder import type_encoding;
34
35			from pyclustering.core.wrapper import ccore_library;
36
37			from pyclustering.utils import get_argument;
			0 ignored issues – show Unused Code introduced 2018-04-25 09:53 UTC by Report Bug Copy Issue Report Unused get_argument imported from pyclustering.utils Loading history...
38
39			import pyclustering.core.dbscan_wrapper as wrapper;
40
41
42			class dbscan:
43			"""!
44			@brief Class represents clustering algorithm DBSCAN.
45			@details This DBSCAN algorithm is KD-tree optimized.
46
47			CCORE option can be used to use the pyclustering core - C/C++ shared library for processing that significantly increases performance.
48
49			Example:
50			@code
51			# sample for cluster analysis (represented by list)
52			sample = read_sample(path_to_sample);
53
54			# create object that uses CCORE for processing
55			dbscan_instance = dbscan(sample, 0.5, 3, True);
56
57			# cluster analysis
58			dbscan_instance.process();
59
60			# obtain results of clustering
61			clusters = dbscan_instance.get_clusters();
62			noise = dbscan_instance.get_noise();
63			@endcode
64
65			"""
66
67			def __init__(self, data, eps, neighbors, ccore = True, **kwargs):
68			"""!
69			@brief Constructor of clustering algorithm DBSCAN.
70
71			@param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
72			@param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less then the radius.
73			@param[in] neighbors (uint): minimum number of shared neighbors that is required for establish links between points.
74			@param[in] ccore (bool): if True than DLL CCORE (C++ solution) will be used for solving the problem.
75			@param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'data_type').
76
77			<b>Keyword Args:</b><br>
78			- data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix').
79
80			"""
81
82			self.__pointer_data = data;
83			self.__kdtree = None;
84			self.__eps = eps;
85			self.__sqrt_eps = eps * eps;
86			self.__neighbors = neighbors;
87
88			self.__visited = [False] * len(self.__pointer_data);
89			self.__belong = [False] * len(self.__pointer_data);
90
91			self.__data_type = kwargs.get('data_type', 'points');
92
93			self.__clusters = [];
94			self.__noise = [];
95
96			self.__neighbor_searcher = self.__create_neighbor_searcher(self.__data_type);
97
98			self.__ccore = ccore;
99			if self.__ccore:
100			self.__ccore = ccore_library.workable();
101
102
103			def process(self):
104			"""!
105			@brief Performs cluster analysis in line with rules of DBSCAN algorithm.
106
107			@see get_clusters()
108			@see get_noise()
109
110			"""
111
112			if self.__ccore is True:
113			(self.__clusters, self.__noise) = wrapper.dbscan(self.__pointer_data, self.__eps, self.__neighbors, self.__data_type);
114
115			else:
116			if self.__data_type == 'points':
117			self.__kdtree = kdtree(self.__pointer_data, range(len(self.__pointer_data)));
118
119			for i in range(0, len(self.__pointer_data)):
120			if self.__visited[i] is False:
121
122			cluster = self.__expand_cluster(i);
123			if cluster is not None:
124			self.__clusters.append(cluster);
125			else:
126			self.__noise.append(i);
127			self.__belong[i] = True;
128
129
130			def get_clusters(self):
131			"""!
132			@brief Returns allocated clusters.
133
134			@remark Allocated clusters can be returned only after data processing (use method process()). Otherwise empty list is returned.
135
136			@return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
137
138			@see process()
139			@see get_noise()
140
141			"""
142
143			return self.__clusters;
144
145
146			def get_noise(self):
147			"""!
148			@brief Returns allocated noise.
149
150			@remark Allocated noise can be returned only after data processing (use method process() before). Otherwise empty list is returned.
151
152			@return (list) List of indexes that are marked as a noise.
153
154			@see process()
155			@see get_clusters()
156
157			"""
158
159			return self.__noise;
160
161
162			def get_cluster_encoding(self):
163			"""!
164			@brief Returns clustering result representation type that indicate how clusters are encoded.
165
166			@return (type_encoding) Clustering result representation.
167
168			@see get_clusters()
169
170			"""
171
172			return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
173
174
175			def __create_neighbor_searcher(self, data_type):
176			"""!
177			@brief Returns neighbor searcher in line with data type.
178
179			@param[in] data_type (string): Data type (points or distance matrix).
180
181			"""
182			if data_type == 'points':
183			return self.__neighbor_indexes_points;
184			elif data_type == 'distance_matrix':
185			return self.__neighbor_indexes_distance_matrix;
186			else:
187			raise TypeError("Unknown type of data is specified '%s'" % data_type);
188
189
190			def __expand_cluster(self, index_point):
191			"""!
192			@brief Expands cluster from specified point in the input data space.
193
194			@param[in] index_point (list): Index of a point from the data.
195
196			@return (list) Return tuple of list of indexes that belong to the same cluster and list of points that are marked as noise: (cluster, noise), or None if nothing has been expanded.
197
198			"""
199
200			cluster = None;
201			self.__visited[index_point] = True;
202			neighbors = self.__neighbor_searcher(index_point);
203
204			if len(neighbors) >= self.__neighbors:
205			cluster = [ index_point ];
206
207			self.__belong[index_point] = True;
208
209			for i in neighbors:
210			if self.__visited[i] is False:
211			self.__visited[i] = True;
212			next_neighbors = self.__neighbor_searcher(i);
213
214			if len(next_neighbors) >= self.__neighbors:
215			# if some node has less then minimal number of neighbors than we shouldn't look at them
216			# because maybe it's a noise.
217			neighbors += [k for k in next_neighbors if ( (k in neighbors) == False)];
218
219			if self.__belong[i] is False:
220			cluster.append(i);
221			self.__belong[i] = True;
222
223			return cluster;
224
225
226			def __neighbor_indexes_points(self, index_point):
227			"""!
228			@brief Return neighbors of the specified object in case of sequence of points.
229
230			@param[in] index_point (uint): Index point whose neighbors are should be found.
231
232			@return (list) List of indexes of neighbors in line the connectivity radius.
233
234			"""
235			kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__pointer_data[index_point], self.__eps);
236			return [node_tuple[1].payload for node_tuple in kdnodes if node_tuple[1].payload != index_point];
237
238
239			def __neighbor_indexes_distance_matrix(self, index_point):
240			"""!
241			@brief Return neighbors of the specified object in case of distance matrix.
242
243			@param[in] index_point (uint): Index point whose neighbors are should be found.
244
245			@return (list) List of indexes of neighbors in line the connectivity radius.
246
247			"""
248			distances = self.__pointer_data[index_point];
249			return [index_neighbor for index_neighbor in range(len(distances))
250			if ((distances[index_neighbor] <= self.__eps) and (index_neighbor != index_point))];

annoviko / pyclustering

Push — master ( af3192...a7eabf )

dbscan.__neighbor_indexes_points() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like