1
|
|
|
"""!
|
2
|
|
|
|
3
|
|
|
@brief Cluster analysis algorithm: OPTICS (Ordering Points To Identify Clustering Structure)
|
4
|
|
|
@details Based on article description:
|
5
|
|
|
- M.Ankerst, M.Breunig, H.Kriegel, J.Sander. OPTICS: Ordering Points To Identify the Clustering Structure. 1999.
|
6
|
|
|
|
7
|
|
|
@authors Andrei Novikov ([email protected])
|
8
|
|
|
@date 2014-2018
|
9
|
|
|
@copyright GNU Public License
|
10
|
|
|
|
11
|
|
|
@cond GNU_PUBLIC_LICENSE
|
12
|
|
|
PyClustering is free software: you can redistribute it and/or modify
|
13
|
|
|
it under the terms of the GNU General Public License as published by
|
14
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
15
|
|
|
(at your option) any later version.
|
16
|
|
|
|
17
|
|
|
PyClustering is distributed in the hope that it will be useful,
|
18
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
19
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
20
|
|
|
GNU General Public License for more details.
|
21
|
|
|
|
22
|
|
|
You should have received a copy of the GNU General Public License
|
23
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
24
|
|
|
@endcond
|
25
|
|
|
|
26
|
|
|
"""
|
27
|
|
|
|
28
|
|
|
|
29
|
|
|
import math;
|
30
|
|
|
|
31
|
|
|
import matplotlib.pyplot as plt;
|
|
|
|
|
32
|
|
|
|
33
|
|
|
from enum import IntEnum;
|
|
|
|
|
34
|
|
|
|
35
|
|
|
from pyclustering.container.kdtree import kdtree;
|
36
|
|
|
|
37
|
|
|
from pyclustering.cluster.encoder import type_encoding;
|
38
|
|
|
|
39
|
|
|
from pyclustering.utils import get_argument;
|
|
|
|
|
40
|
|
|
from pyclustering.utils.color import color as color_list;
|
41
|
|
|
|
42
|
|
|
from pyclustering.core.wrapper import ccore_library;
|
43
|
|
|
|
44
|
|
|
import pyclustering.core.optics_wrapper as wrapper;
|
45
|
|
|
|
46
|
|
|
|
47
|
|
|
class ordering_visualizer:
|
48
|
|
|
"""!
|
49
|
|
|
@brief Cluster ordering diagram visualizer that represents dataset graphically as density-based clustering structure.
|
50
|
|
|
@details This OPTICS algorithm is KD-tree optimized.
|
51
|
|
|
|
52
|
|
|
@see ordering_analyser
|
53
|
|
|
|
54
|
|
|
"""
|
55
|
|
|
|
56
|
|
|
@staticmethod
|
57
|
|
|
def show_ordering_diagram(analyser, amount_clusters = None):
|
58
|
|
|
"""!
|
59
|
|
|
@brief Display cluster-ordering (reachability-plot) diagram.
|
60
|
|
|
|
61
|
|
|
@param[in] analyser (ordering_analyser): cluster-ordering analyser whose ordering diagram should be displayed.
|
62
|
|
|
@param[in] amount_clusters (uint): if it is not 'None' then it displays connectivity radius line that can used for allocation of specified amount of clusters
|
63
|
|
|
and colorize diagram by corresponding cluster colors.
|
64
|
|
|
|
65
|
|
|
Example demonstrates general abilities of 'ordering_visualizer' class:
|
66
|
|
|
@code
|
67
|
|
|
# Display cluster-ordering diagram with connectivity radius is used for allocation of three clusters.
|
68
|
|
|
ordering_visualizer.show_ordering_diagram(analyser, 3);
|
69
|
|
|
|
70
|
|
|
# Display cluster-ordering diagram without radius.
|
71
|
|
|
ordering_visualizer.show_ordering_diagram(analyser);
|
72
|
|
|
@endcode
|
73
|
|
|
|
74
|
|
|
"""
|
75
|
|
|
ordering = analyser.cluster_ordering;
|
76
|
|
|
axis = plt.subplot(111);
|
77
|
|
|
|
78
|
|
|
if (amount_clusters is not None):
|
79
|
|
|
radius, borders = analyser.calculate_connvectivity_radius(amount_clusters);
|
80
|
|
|
|
81
|
|
|
# divide into cluster groups to visualize by colors
|
82
|
|
|
left_index_border = 0;
|
83
|
|
|
current_index_border = 0;
|
84
|
|
|
for index_border in range(len(borders)):
|
85
|
|
|
right_index_border = borders[index_border];
|
86
|
|
|
axis.bar(range(left_index_border, right_index_border), ordering[left_index_border:right_index_border], width = 1.0, color = color_list.TITLES[index_border]);
|
87
|
|
|
left_index_border = right_index_border;
|
88
|
|
|
current_index_border = index_border;
|
89
|
|
|
|
90
|
|
|
axis.bar(range(left_index_border, len(ordering)), ordering[left_index_border:len(ordering)], width = 1.0, color = color_list.TITLES[current_index_border + 1]);
|
91
|
|
|
|
92
|
|
|
plt.xlim([0, len(ordering)]);
|
93
|
|
|
|
94
|
|
|
plt.axhline(y = radius, linewidth = 2, color = 'black');
|
95
|
|
|
plt.text(0, radius + radius * 0.03, " Radius: " + str(round(radius, 4)) + ";\n Clusters: " + str(amount_clusters), color = 'b', fontsize = 10);
|
96
|
|
|
|
97
|
|
|
else:
|
98
|
|
|
axis.bar(range(0, len(ordering)), ordering[0:len(ordering)], width = 1.0, color = 'black');
|
99
|
|
|
plt.xlim([0, len(ordering)]);
|
100
|
|
|
|
101
|
|
|
plt.show();
|
102
|
|
|
|
103
|
|
|
|
104
|
|
|
class ordering_analyser:
|
105
|
|
|
"""!
|
106
|
|
|
@brief Analyser of cluster ordering diagram.
|
107
|
|
|
@details Using cluster-ordering it is able to connectivity radius for allocation of specified amount of clusters and
|
108
|
|
|
calculate amount of clusters using specified connectivity radius. Cluster-ordering is formed by OPTICS algorithm
|
109
|
|
|
during cluster analysis.
|
110
|
|
|
|
111
|
|
|
@see optics
|
112
|
|
|
|
113
|
|
|
"""
|
114
|
|
|
|
115
|
|
|
@property
|
116
|
|
|
def cluster_ordering(self):
|
117
|
|
|
"""!
|
118
|
|
|
@brief (list) Returns values of dataset cluster ordering.
|
119
|
|
|
|
120
|
|
|
"""
|
121
|
|
|
return self.__ordering;
|
122
|
|
|
|
123
|
|
|
|
124
|
|
|
def __init__(self, ordering_diagram):
|
125
|
|
|
"""!
|
126
|
|
|
@brief Analyser of ordering diagram that is based on reachability-distances.
|
127
|
|
|
|
128
|
|
|
@see calculate_connvectivity_radius
|
129
|
|
|
|
130
|
|
|
"""
|
131
|
|
|
self.__ordering = ordering_diagram;
|
132
|
|
|
|
133
|
|
|
|
134
|
|
|
def __len__(self):
|
135
|
|
|
"""!
|
136
|
|
|
@brief Returns length of clustering-ordering diagram.
|
137
|
|
|
|
138
|
|
|
"""
|
139
|
|
|
return len(self.__ordering);
|
140
|
|
|
|
141
|
|
|
|
142
|
|
|
def calculate_connvectivity_radius(self, amount_clusters, maximum_iterations = 100):
|
143
|
|
|
"""!
|
144
|
|
|
@brief Calculates connectivity radius of allocation specified amount of clusters using ordering diagram and marks borders of clusters using indexes of values of ordering diagram.
|
145
|
|
|
@details Parameter 'maximum_iterations' is used to protect from hanging when it is impossible to allocate specified number of clusters.
|
146
|
|
|
|
147
|
|
|
@param[in] amount_clusters (uint): amount of clusters that should be allocated by calculated connectivity radius.
|
148
|
|
|
@param[in] maximum_iterations (uint): maximum number of iteration for searching connectivity radius to allocated specified amount of clusters (by default it is restricted by 100 iterations).
|
149
|
|
|
|
150
|
|
|
@return (double, list) Value of connectivity radius and borders of clusters like (radius, borders), radius may be 'None' as well as borders may be '[]'
|
151
|
|
|
if connectivity radius hasn't been found for the specified amount of iterations.
|
152
|
|
|
|
153
|
|
|
"""
|
154
|
|
|
|
155
|
|
|
maximum_distance = max(self.__ordering);
|
156
|
|
|
|
157
|
|
|
upper_distance = maximum_distance;
|
158
|
|
|
lower_distance = 0.0;
|
159
|
|
|
|
160
|
|
|
radius = None;
|
161
|
|
|
result = None;
|
162
|
|
|
|
163
|
|
|
amount, borders = self.extract_cluster_amount(maximum_distance);
|
164
|
|
|
if amount <= amount_clusters:
|
165
|
|
|
for _ in range(maximum_iterations):
|
166
|
|
|
radius = (lower_distance + upper_distance) / 2.0;
|
167
|
|
|
|
168
|
|
|
amount, borders = self.extract_cluster_amount(radius);
|
169
|
|
|
if amount == amount_clusters:
|
170
|
|
|
result = radius;
|
171
|
|
|
break;
|
172
|
|
|
|
173
|
|
|
elif amount == 0:
|
174
|
|
|
break;
|
175
|
|
|
|
176
|
|
|
elif amount > amount_clusters:
|
177
|
|
|
lower_distance = radius;
|
178
|
|
|
|
179
|
|
|
elif amount < amount_clusters:
|
180
|
|
|
upper_distance = radius;
|
181
|
|
|
|
182
|
|
|
return result, borders;
|
183
|
|
|
|
184
|
|
|
|
185
|
|
|
def extract_cluster_amount(self, radius):
|
186
|
|
|
"""!
|
187
|
|
|
@brief Obtains amount of clustering that can be allocated by using specified radius for ordering diagram and borders between them.
|
188
|
|
|
@details When growth of reachability-distances is detected than it is considered as a start point of cluster,
|
189
|
|
|
than pick is detected and after that recession is observed until new growth (that means end of the
|
190
|
|
|
current cluster and start of a new one) or end of diagram.
|
191
|
|
|
|
192
|
|
|
@param[in] radius (double): connectivity radius that is used for cluster allocation.
|
193
|
|
|
|
194
|
|
|
@return (unit, list) Amount of clusters that can be allocated by the connectivity radius on ordering diagram and borders between them using indexes
|
195
|
|
|
from ordering diagram (amount_clusters, border_clusters).
|
196
|
|
|
|
197
|
|
|
"""
|
198
|
|
|
|
199
|
|
|
amount_clusters = 1;
|
200
|
|
|
|
201
|
|
|
cluster_start = False;
|
202
|
|
|
cluster_pick = False;
|
203
|
|
|
total_similarity = True;
|
204
|
|
|
previous_cluster_distance = None;
|
205
|
|
|
previous_distance = None;
|
206
|
|
|
|
207
|
|
|
cluster_borders = [];
|
208
|
|
|
|
209
|
|
|
for index_ordering in range(len(self.__ordering)):
|
210
|
|
|
distance = self.__ordering[index_ordering];
|
211
|
|
|
if distance >= radius:
|
212
|
|
|
if cluster_start is False:
|
213
|
|
|
cluster_start = True;
|
214
|
|
|
amount_clusters += 1;
|
215
|
|
|
|
216
|
|
|
if index_ordering != 0:
|
217
|
|
|
cluster_borders.append(index_ordering);
|
218
|
|
|
|
219
|
|
|
else:
|
220
|
|
|
if (distance < previous_cluster_distance) and (cluster_pick is False):
|
221
|
|
|
cluster_pick = True;
|
222
|
|
|
|
223
|
|
|
elif (distance > previous_cluster_distance) and (cluster_pick is True):
|
224
|
|
|
cluster_pick = False;
|
225
|
|
|
amount_clusters += 1;
|
226
|
|
|
|
227
|
|
|
if index_ordering != 0:
|
228
|
|
|
cluster_borders.append(index_ordering);
|
229
|
|
|
|
230
|
|
|
previous_cluster_distance = distance;
|
231
|
|
|
|
232
|
|
|
else:
|
233
|
|
|
cluster_start = False;
|
234
|
|
|
cluster_pick = False;
|
235
|
|
|
|
236
|
|
|
if (previous_distance is not None) and (distance != previous_distance):
|
237
|
|
|
total_similarity = False;
|
238
|
|
|
|
239
|
|
|
previous_distance = distance;
|
240
|
|
|
|
241
|
|
|
if (total_similarity is True) and (previous_distance > radius):
|
242
|
|
|
amount_clusters = 0;
|
243
|
|
|
|
244
|
|
|
return amount_clusters, cluster_borders;
|
245
|
|
|
|
246
|
|
|
|
247
|
|
|
class optics_descriptor:
|
248
|
|
|
"""!
|
249
|
|
|
@brief Object description that used by OPTICS algorithm for cluster analysis.
|
250
|
|
|
|
251
|
|
|
"""
|
252
|
|
|
|
253
|
|
|
def __init__(self, index, core_distance = None, reachability_distance = None):
|
254
|
|
|
"""!
|
255
|
|
|
@brief Constructor of object description in optics terms.
|
256
|
|
|
|
257
|
|
|
@param[in] index (uint): Index of the object in the data set.
|
258
|
|
|
@param[in] core_distance (double): Core distance that is minimum distance to specified number of neighbors.
|
259
|
|
|
@param[in] reachability_distance (double): Reachability distance to this object.
|
260
|
|
|
|
261
|
|
|
"""
|
262
|
|
|
|
263
|
|
|
## Reachability distance - the smallest distance to be reachable by core object.
|
264
|
|
|
self.index_object = index;
|
265
|
|
|
|
266
|
|
|
## Core distance - the smallest distance to reach specified number of neighbors that is not greater then connectivity radius.
|
267
|
|
|
self.core_distance = core_distance;
|
268
|
|
|
|
269
|
|
|
## Index of object from the input data.
|
270
|
|
|
self.reachability_distance = reachability_distance;
|
271
|
|
|
|
272
|
|
|
## True is object has been already traversed.
|
273
|
|
|
self.processed = False;
|
274
|
|
|
|
275
|
|
|
def __repr__(self):
|
276
|
|
|
"""!
|
277
|
|
|
@brief Returns string representation of the optics descriptor.
|
278
|
|
|
|
279
|
|
|
"""
|
280
|
|
|
|
281
|
|
|
return '(%s, [c: %s, r: %s])' % (self.index_object, self.core_distance, self.reachability_distance);
|
282
|
|
|
|
283
|
|
|
|
284
|
|
|
class optics:
|
285
|
|
|
"""!
|
286
|
|
|
@brief Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure) with KD-tree optimization (ccore options is supported).
|
287
|
|
|
@details OPTICS is a density-based algorithm. Purpose of the algorithm is to provide explicit clusters, but create clustering-ordering representation of the input data.
|
288
|
|
|
Clustering-ordering information contains information about internal structures of data set in terms of density and proper connectivity radius can be obtained
|
289
|
|
|
for allocation required amount of clusters using this diagram. In case of usage additional input parameter 'amount of clusters' connectivity radius should be
|
290
|
|
|
bigger than real - because it will be calculated by the algorithms if requested amount of clusters is not allocated.
|
291
|
|
|
|
292
|
|
|
CCORE option can be used to use the pyclustering core - C/C++ shared library for processing that significantly increases performance.
|
293
|
|
|
|
294
|
|
|
@image html optics_example_clustering.png "Scheme how does OPTICS works. At the beginning only one cluster is allocated, but two is requested. At the second step OPTICS calculates connectivity radius using cluster-ordering and performs final cluster allocation."
|
295
|
|
|
|
296
|
|
|
Example:
|
297
|
|
|
@code
|
298
|
|
|
# Read sample for clustering from some file
|
299
|
|
|
sample = read_sample(path_sample);
|
300
|
|
|
|
301
|
|
|
# Create OPTICS algorithm for cluster analysis
|
302
|
|
|
optics_instance = optics(sample, 0.5, 6);
|
303
|
|
|
|
304
|
|
|
# Run cluster analysis
|
305
|
|
|
optics_instance.process();
|
306
|
|
|
|
307
|
|
|
# Obtain results of clustering
|
308
|
|
|
clusters = optics_instance.get_clusters();
|
309
|
|
|
noise = optics_instance.get_noise();
|
310
|
|
|
|
311
|
|
|
# Obtain rechability-distances
|
312
|
|
|
ordering = ordering_analyser(optics_instance.get_ordering());
|
313
|
|
|
|
314
|
|
|
# Visualization of cluster ordering in line with reachability distance.
|
315
|
|
|
ordering_visualizer.show_ordering_diagram(ordering);
|
316
|
|
|
@endcode
|
317
|
|
|
|
318
|
|
|
Amount of clusters that should be allocated can be also specified. In this case connectivity radius should be greater than real, for example:
|
319
|
|
|
@code
|
320
|
|
|
# Import required packages
|
321
|
|
|
from pyclustering.cluster.optics import optics;
|
322
|
|
|
from pyclustering.samples.definitions import FCPS_SAMPLES;
|
323
|
|
|
from pyclustering.utils import read_sample;
|
324
|
|
|
|
325
|
|
|
# Read sample for clustering from some file
|
326
|
|
|
sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN);
|
327
|
|
|
|
328
|
|
|
# Run cluster analysis where connvectivity radius is bigger than real
|
329
|
|
|
radius = 2.0;
|
330
|
|
|
neighbors = 3;
|
331
|
|
|
amount_of_clusters = 3;
|
332
|
|
|
|
333
|
|
|
optics_instance = optics(sample, radius, neighbors, amount_of_clusters);
|
334
|
|
|
|
335
|
|
|
# Obtain results of clustering
|
336
|
|
|
clusters = optics_instance.get_clusters();
|
337
|
|
|
noise = optics_instance.get_noise();
|
338
|
|
|
@endcode
|
339
|
|
|
|
340
|
|
|
"""
|
341
|
|
|
|
342
|
|
|
def __init__(self, sample, eps, minpts, amount_clusters = None, ccore = True, **kwargs):
|
343
|
|
|
"""!
|
344
|
|
|
@brief Constructor of clustering algorithm OPTICS.
|
345
|
|
|
|
346
|
|
|
@param[in] sample (list): Input data that is presented as a list of points (objects), where each point is represented by list or tuple.
|
347
|
|
|
@param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less than the radius.
|
348
|
|
|
@param[in] minpts (uint): Minimum number of shared neighbors that is required for establishing links between points.
|
349
|
|
|
@param[in] amount_clusters (uint): Optional parameter where amount of clusters that should be allocated is specified.
|
350
|
|
|
In case of usage 'amount_clusters' connectivity radius can be greater than real, in other words, there is place for mistake
|
351
|
|
|
in connectivity radius usage.
|
352
|
|
|
@param[in] ccore (bool): if True than DLL CCORE (C++ solution) will be used for solving the problem.
|
353
|
|
|
@param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'data_type').
|
354
|
|
|
|
355
|
|
|
<b>Keyword Args:</b><br>
|
356
|
|
|
- data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix').
|
357
|
|
|
|
358
|
|
|
"""
|
359
|
|
|
|
360
|
|
|
self.__sample_pointer = sample; # Algorithm parameter - pointer to sample for processing.
|
361
|
|
|
self.__eps = eps; # Algorithm parameter - connectivity radius between object for establish links between object.
|
362
|
|
|
self.__minpts = minpts; # Algorithm parameter - minimum number of neighbors that is required for establish links between object.
|
363
|
|
|
self.__amount_clusters = amount_clusters;
|
364
|
|
|
|
365
|
|
|
self.__ordering = None;
|
366
|
|
|
self.__clusters = None;
|
367
|
|
|
self.__noise = None;
|
368
|
|
|
|
369
|
|
|
self.__data_type = kwargs.get('data_type', 'points');
|
370
|
|
|
|
371
|
|
|
self.__kdtree = None;
|
372
|
|
|
self.__ccore = ccore;
|
373
|
|
|
|
374
|
|
|
self.__neighbor_searcher = self.__create_neighbor_searcher(self.__data_type);
|
375
|
|
|
|
376
|
|
|
if (self.__ccore):
|
377
|
|
|
self.__ccore = ccore_library.workable();
|
378
|
|
|
|
379
|
|
|
|
380
|
|
|
def process(self):
|
381
|
|
|
"""!
|
382
|
|
|
@brief Performs cluster analysis in line with rules of OPTICS algorithm.
|
383
|
|
|
|
384
|
|
|
@remark Results of clustering can be obtained using corresponding gets methods.
|
385
|
|
|
|
386
|
|
|
@see get_clusters()
|
387
|
|
|
@see get_noise()
|
388
|
|
|
@see get_ordering()
|
389
|
|
|
|
390
|
|
|
"""
|
391
|
|
|
|
392
|
|
|
if self.__ccore is True:
|
393
|
|
|
(self.__clusters, self.__noise, self.__ordering, self.__eps) = wrapper.optics(self.__sample_pointer, self.__eps, self.__minpts, self.__amount_clusters, self.__data_type);
|
394
|
|
|
|
395
|
|
|
else:
|
396
|
|
|
if self.__data_type == 'points':
|
397
|
|
|
self.__kdtree = kdtree(self.__sample_pointer, range(len(self.__sample_pointer)));
|
398
|
|
|
|
399
|
|
|
self.__allocate_clusters();
|
400
|
|
|
|
401
|
|
|
if (self.__amount_clusters is not None) and (self.__amount_clusters != len(self.get_clusters())):
|
402
|
|
|
analyser = ordering_analyser(self.get_ordering());
|
403
|
|
|
radius, _ = analyser.calculate_connvectivity_radius(self.__amount_clusters);
|
404
|
|
|
if radius is not None:
|
405
|
|
|
self.__eps = radius;
|
406
|
|
|
self.__allocate_clusters();
|
407
|
|
|
|
408
|
|
|
|
409
|
|
|
def __initialize(self, sample):
|
410
|
|
|
"""!
|
411
|
|
|
@brief Initializes internal states and resets clustering results in line with input sample.
|
412
|
|
|
|
413
|
|
|
"""
|
414
|
|
|
|
415
|
|
|
self.__processed = [False] * len(sample);
|
416
|
|
|
self.__optics_objects = [optics_descriptor(i) for i in range(len(sample))]; # List of OPTICS objects that corresponds to objects from input sample.
|
417
|
|
|
self.__ordered_database = []; # List of OPTICS objects in traverse order.
|
418
|
|
|
|
419
|
|
|
self.__clusters = None; # Result of clustering (list of clusters where each cluster contains indexes of objects from input data).
|
420
|
|
|
self.__noise = None; # Result of clustering (noise).
|
421
|
|
|
|
422
|
|
|
|
423
|
|
|
def __allocate_clusters(self):
|
424
|
|
|
"""!
|
425
|
|
|
@brief Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
|
426
|
|
|
|
427
|
|
|
"""
|
428
|
|
|
|
429
|
|
|
self.__initialize(self.__sample_pointer);
|
430
|
|
|
|
431
|
|
|
for optic_object in self.__optics_objects:
|
432
|
|
|
if optic_object.processed is False:
|
433
|
|
|
self.__expand_cluster_order(optic_object);
|
434
|
|
|
|
435
|
|
|
self.__extract_clusters();
|
436
|
|
|
|
437
|
|
|
|
438
|
|
|
def get_clusters(self):
|
439
|
|
|
"""!
|
440
|
|
|
@brief Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster is represented by list.
|
441
|
|
|
|
442
|
|
|
@return (list) List of allocated clusters.
|
443
|
|
|
|
444
|
|
|
@see process()
|
445
|
|
|
@see get_noise()
|
446
|
|
|
@see get_ordering()
|
447
|
|
|
@see get_radius()
|
448
|
|
|
|
449
|
|
|
"""
|
450
|
|
|
|
451
|
|
|
return self.__clusters;
|
452
|
|
|
|
453
|
|
|
|
454
|
|
|
def get_noise(self):
|
455
|
|
|
"""!
|
456
|
|
|
@brief Returns list of noise that contains indexes of objects that corresponds to input data.
|
457
|
|
|
|
458
|
|
|
@return (list) List of allocated noise objects.
|
459
|
|
|
|
460
|
|
|
@see process()
|
461
|
|
|
@see get_clusters()
|
462
|
|
|
@see get_ordering()
|
463
|
|
|
@see get_radius()
|
464
|
|
|
|
465
|
|
|
"""
|
466
|
|
|
|
467
|
|
|
return self.__noise;
|
468
|
|
|
|
469
|
|
|
|
470
|
|
|
def get_ordering(self):
|
471
|
|
|
"""!
|
472
|
|
|
@brief Returns clustering ordering information about the input data set.
|
473
|
|
|
@details Clustering ordering of data-set contains the information about the internal clustering structure in line with connectivity radius.
|
474
|
|
|
|
475
|
|
|
@return (ordering_analyser) Analyser of clustering ordering.
|
476
|
|
|
|
477
|
|
|
@see process()
|
478
|
|
|
@see get_clusters()
|
479
|
|
|
@see get_noise()
|
480
|
|
|
@see get_radius()
|
481
|
|
|
|
482
|
|
|
"""
|
483
|
|
|
|
484
|
|
|
if self.__ordering is None:
|
485
|
|
|
self.__ordering = [];
|
486
|
|
|
|
487
|
|
|
for cluster in self.__clusters:
|
488
|
|
|
for index_object in cluster:
|
489
|
|
|
optics_object = self.__optics_objects[index_object];
|
490
|
|
|
if optics_object.reachability_distance is not None:
|
491
|
|
|
self.__ordering.append(optics_object.reachability_distance);
|
492
|
|
|
|
493
|
|
|
return self.__ordering;
|
494
|
|
|
|
495
|
|
|
|
496
|
|
|
def get_radius(self):
|
497
|
|
|
"""!
|
498
|
|
|
@brief Returns connectivity radius that is calculated and used for clustering by the algorithm.
|
499
|
|
|
@details Connectivity radius may be changed only in case of usage additional parameter of the algorithm - amount of clusters for allocation.
|
500
|
|
|
|
501
|
|
|
@return (double) Connectivity radius.
|
502
|
|
|
|
503
|
|
|
@see get_ordering()
|
504
|
|
|
@see get_clusters()
|
505
|
|
|
@see get_noise()
|
506
|
|
|
|
507
|
|
|
"""
|
508
|
|
|
|
509
|
|
|
return self.__eps;
|
510
|
|
|
|
511
|
|
|
|
512
|
|
|
def get_cluster_encoding(self):
|
513
|
|
|
"""!
|
514
|
|
|
@brief Returns clustering result representation type that indicate how clusters are encoded.
|
515
|
|
|
|
516
|
|
|
@return (type_encoding) Clustering result representation.
|
517
|
|
|
|
518
|
|
|
@see get_clusters()
|
519
|
|
|
|
520
|
|
|
"""
|
521
|
|
|
|
522
|
|
|
return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
|
523
|
|
|
|
524
|
|
|
|
525
|
|
|
def __create_neighbor_searcher(self, data_type):
|
526
|
|
|
"""!
|
527
|
|
|
@brief Returns neighbor searcher in line with data type.
|
528
|
|
|
|
529
|
|
|
@param[in] data_type (string): Data type (points or distance matrix).
|
530
|
|
|
|
531
|
|
|
"""
|
532
|
|
|
if data_type == 'points':
|
533
|
|
|
return self.__neighbor_indexes_points;
|
534
|
|
|
elif data_type == 'distance_matrix':
|
535
|
|
|
return self.__neighbor_indexes_distance_matrix;
|
536
|
|
|
else:
|
537
|
|
|
raise TypeError("Unknown type of data is specified '%s'" % data_type);
|
538
|
|
|
|
539
|
|
|
|
540
|
|
|
def __expand_cluster_order(self, optics_object):
|
541
|
|
|
"""!
|
542
|
|
|
@brief Expand cluster order from not processed optic-object that corresponds to object from input data.
|
543
|
|
|
Traverse procedure is performed until objects are reachable from core-objects in line with connectivity radius.
|
544
|
|
|
Order database is updated during expanding.
|
545
|
|
|
|
546
|
|
|
@param[in] optics_object (optics_descriptor): Object that hasn't been processed.
|
547
|
|
|
|
548
|
|
|
"""
|
549
|
|
|
|
550
|
|
|
optics_object.processed = True;
|
551
|
|
|
|
552
|
|
|
neighbors_descriptor = self.__neighbor_searcher(optics_object);
|
553
|
|
|
optics_object.reachability_distance = None;
|
554
|
|
|
|
555
|
|
|
self.__ordered_database.append(optics_object);
|
556
|
|
|
|
557
|
|
|
# Check core distance
|
558
|
|
|
if len(neighbors_descriptor) >= self.__minpts:
|
559
|
|
|
neighbors_descriptor.sort(key = lambda obj: obj[1]);
|
560
|
|
|
optics_object.core_distance = neighbors_descriptor[self.__minpts - 1][1];
|
561
|
|
|
|
562
|
|
|
# Continue processing
|
563
|
|
|
order_seed = list();
|
564
|
|
|
self.__update_order_seed(optics_object, neighbors_descriptor, order_seed);
|
565
|
|
|
|
566
|
|
|
while len(order_seed) > 0:
|
567
|
|
|
optic_descriptor = order_seed[0];
|
568
|
|
|
order_seed.remove(optic_descriptor);
|
569
|
|
|
|
570
|
|
|
neighbors_descriptor = self.__neighbor_searcher(optic_descriptor);
|
571
|
|
|
optic_descriptor.processed = True;
|
572
|
|
|
|
573
|
|
|
self.__ordered_database.append(optic_descriptor);
|
574
|
|
|
|
575
|
|
|
if len(neighbors_descriptor) >= self.__minpts:
|
576
|
|
|
neighbors_descriptor.sort(key = lambda obj: obj[1]);
|
577
|
|
|
optic_descriptor.core_distance = neighbors_descriptor[self.__minpts - 1][1];
|
578
|
|
|
|
579
|
|
|
self.__update_order_seed(optic_descriptor, neighbors_descriptor, order_seed);
|
580
|
|
|
else:
|
581
|
|
|
optic_descriptor.core_distance = None;
|
582
|
|
|
|
583
|
|
|
else:
|
584
|
|
|
optics_object.core_distance = None;
|
585
|
|
|
|
586
|
|
|
|
587
|
|
|
def __extract_clusters(self):
|
588
|
|
|
"""!
|
589
|
|
|
@brief Extract clusters and noise from order database.
|
590
|
|
|
|
591
|
|
|
"""
|
592
|
|
|
|
593
|
|
|
self.__clusters = [];
|
594
|
|
|
self.__noise = [];
|
595
|
|
|
|
596
|
|
|
current_cluster = self.__noise;
|
597
|
|
|
for optics_object in self.__ordered_database:
|
598
|
|
|
if (optics_object.reachability_distance is None) or (optics_object.reachability_distance > self.__eps):
|
599
|
|
|
if (optics_object.core_distance is not None) and (optics_object.core_distance <= self.__eps):
|
600
|
|
|
self.__clusters.append([ optics_object.index_object ]);
|
601
|
|
|
current_cluster = self.__clusters[-1];
|
602
|
|
|
else:
|
603
|
|
|
self.__noise.append(optics_object.index_object);
|
604
|
|
|
else:
|
605
|
|
|
current_cluster.append(optics_object.index_object);
|
606
|
|
|
|
607
|
|
|
|
608
|
|
|
def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed):
|
609
|
|
|
"""!
|
610
|
|
|
@brief Update sorted list of reachable objects (from core-object) that should be processed using neighbors of core-object.
|
611
|
|
|
|
612
|
|
|
@param[in] optic_descriptor (optics_descriptor): Core-object whose neighbors should be analysed.
|
613
|
|
|
@param[in] neighbors_descriptors (list): List of neighbors of core-object.
|
614
|
|
|
@param[in|out] order_seed (list): List of sorted object in line with reachable distance.
|
615
|
|
|
|
616
|
|
|
"""
|
617
|
|
|
|
618
|
|
|
for neighbor_descriptor in neighbors_descriptors:
|
619
|
|
|
index_neighbor = neighbor_descriptor[0];
|
620
|
|
|
current_reachable_distance = neighbor_descriptor[1];
|
621
|
|
|
|
622
|
|
|
if self.__optics_objects[index_neighbor].processed is not True:
|
623
|
|
|
reachable_distance = max(current_reachable_distance, optic_descriptor.core_distance);
|
624
|
|
|
if self.__optics_objects[index_neighbor].reachability_distance is None:
|
625
|
|
|
self.__optics_objects[index_neighbor].reachability_distance = reachable_distance;
|
626
|
|
|
|
627
|
|
|
# insert element in queue O(n) - worst case.
|
628
|
|
|
index_insertion = len(order_seed);
|
629
|
|
|
for index_seed in range(0, len(order_seed)):
|
630
|
|
|
if reachable_distance < order_seed[index_seed].reachability_distance:
|
631
|
|
|
index_insertion = index_seed;
|
632
|
|
|
break;
|
633
|
|
|
|
634
|
|
|
order_seed.insert(index_insertion, self.__optics_objects[index_neighbor]);
|
635
|
|
|
|
636
|
|
|
else:
|
637
|
|
|
if reachable_distance < self.__optics_objects[index_neighbor].reachability_distance:
|
638
|
|
|
self.__optics_objects[index_neighbor].reachability_distance = reachable_distance;
|
639
|
|
|
order_seed.sort(key = lambda obj: obj.reachability_distance);
|
640
|
|
|
|
641
|
|
|
|
642
|
|
|
def __neighbor_indexes_points(self, optic_object):
|
643
|
|
|
"""!
|
644
|
|
|
@brief Return neighbors of the specified object in case of sequence of points.
|
645
|
|
|
|
646
|
|
|
@param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
|
647
|
|
|
|
648
|
|
|
@return (list) List of indexes of neighbors in line the connectivity radius.
|
649
|
|
|
|
650
|
|
|
"""
|
651
|
|
|
kdnodes = self.__kdtree.find_nearest_dist_nodes(self.__sample_pointer[optic_object.index_object], self.__eps);
|
652
|
|
|
return [[node_tuple[1].payload, math.sqrt(node_tuple[0])] for node_tuple in kdnodes if
|
653
|
|
|
node_tuple[1].payload != optic_object.index_object];
|
654
|
|
|
|
655
|
|
|
|
656
|
|
|
def __neighbor_indexes_distance_matrix(self, optic_object):
|
657
|
|
|
"""!
|
658
|
|
|
@brief Return neighbors of the specified object in case of distance matrix.
|
659
|
|
|
|
660
|
|
|
@param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
|
661
|
|
|
|
662
|
|
|
@return (list) List of indexes of neighbors in line the connectivity radius.
|
663
|
|
|
|
664
|
|
|
"""
|
665
|
|
|
distances = self.__sample_pointer[optic_object.index_object];
|
666
|
|
|
return [[index_neighbor, distances[index_neighbor]] for index_neighbor in range(len(distances))
|
667
|
|
|
if ((distances[index_neighbor] <= self.__eps) and (index_neighbor != optic_object.index_object))]; |
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.