|
1
|
|
|
"""!
|
|
2
|
|
|
|
|
3
|
|
|
@brief Module provides various distance metrics - abstraction of the notion of distance in a metric space.
|
|
4
|
|
|
|
|
5
|
|
|
@authors Andrei Novikov ([email protected])
|
|
6
|
|
|
@date 2014-2018
|
|
7
|
|
|
@copyright GNU Public License
|
|
8
|
|
|
|
|
9
|
|
|
@cond GNU_PUBLIC_LICENSE
|
|
10
|
|
|
PyClustering is free software: you can redistribute it and/or modify
|
|
11
|
|
|
it under the terms of the GNU General Public License as published by
|
|
12
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
13
|
|
|
(at your option) any later version.
|
|
14
|
|
|
|
|
15
|
|
|
PyClustering is distributed in the hope that it will be useful,
|
|
16
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
18
|
|
|
GNU General Public License for more details.
|
|
19
|
|
|
|
|
20
|
|
|
You should have received a copy of the GNU General Public License
|
|
21
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
22
|
|
|
@endcond
|
|
23
|
|
|
|
|
24
|
|
|
"""
|
|
25
|
|
|
|
|
26
|
|
|
|
|
27
|
|
|
from enum import IntEnum;
|
|
28
|
|
|
|
|
29
|
|
|
|
|
30
|
|
|
class type_metric(IntEnum):
|
|
31
|
|
|
"""!
|
|
32
|
|
|
@brief Enumeration of supported metrics in the module for distance calculation between two points.
|
|
33
|
|
|
|
|
34
|
|
|
"""
|
|
35
|
|
|
|
|
36
|
|
|
## Euclidean distance, for more information see function 'euclidean_distance'.
|
|
37
|
|
|
EUCLIDEAN = 0;
|
|
38
|
|
|
|
|
39
|
|
|
## Square Euclidean distance, for more information see function 'euclidean_distance_square'.
|
|
40
|
|
|
EUCLIDEAN_SQUARE = 1;
|
|
41
|
|
|
|
|
42
|
|
|
## Manhattan distance, for more information see function 'manhattan_distance'.
|
|
43
|
|
|
MANHATTAN = 2;
|
|
44
|
|
|
|
|
45
|
|
|
## Chebyshev distance, for more information see function 'chebyshev_distance'.
|
|
46
|
|
|
CHEBYSHEV = 3;
|
|
47
|
|
|
|
|
48
|
|
|
## Minkowski distance, for more information see function 'minkowski_distance'.
|
|
49
|
|
|
MINKOWSKI = 4;
|
|
50
|
|
|
|
|
51
|
|
|
## User defined function for distance calculation between two points.
|
|
52
|
|
|
USER_DEFINED = 1000;
|
|
53
|
|
|
|
|
54
|
|
|
|
|
55
|
|
|
|
|
56
|
|
|
class distance_metric:
|
|
57
|
|
|
"""!
|
|
58
|
|
|
@brief Distance metric performs distance calculation between two points in line with encapsulated function, for
|
|
59
|
|
|
example, euclidean distance or chebyshev distance, or even user-defined.
|
|
60
|
|
|
|
|
61
|
|
|
@details
|
|
62
|
|
|
|
|
63
|
|
|
Example of Euclidean distance metric:
|
|
64
|
|
|
@code
|
|
65
|
|
|
metric = distance_metric(type_metric.EUCLIDEAN);
|
|
66
|
|
|
distance = metric([1.0, 2.5], [-1.2, 3.4]);
|
|
67
|
|
|
@endcode
|
|
68
|
|
|
|
|
69
|
|
|
Example of Chebyshev distance metric:
|
|
70
|
|
|
@code
|
|
71
|
|
|
metric = distance_metric(type_metric.CHEBYSHEV);
|
|
72
|
|
|
distance = metric([0.0, 0.0], [2.5, 6.0]);
|
|
73
|
|
|
@endcode
|
|
74
|
|
|
|
|
75
|
|
|
In following example additional argument should be specified (generally, 'degree' is a optional argument that is
|
|
76
|
|
|
equal to '2' by default) that is specific for Minkowski distance:
|
|
77
|
|
|
@code
|
|
78
|
|
|
metric = distance_metric(type_metric.MINKOWSKI, degree=4);
|
|
79
|
|
|
distance = metric([4.0, 9.2, 1.0], [3.4, 2.5, 6.2]);
|
|
80
|
|
|
@endcode
|
|
81
|
|
|
|
|
82
|
|
|
User may define its own function for distance calculation:
|
|
83
|
|
|
@code
|
|
84
|
|
|
user_function = lambda point1, point2: point1[0] + point2[0] + 2;
|
|
85
|
|
|
metric = distance_metric(type_metric.USER_DEFINED, func=user_function);
|
|
86
|
|
|
distance = metric([2.0, 3.0], [1.0, 3.0]);
|
|
87
|
|
|
@endcode
|
|
88
|
|
|
|
|
89
|
|
|
"""
|
|
90
|
|
|
def __init__(self, type, **kwargs):
|
|
91
|
|
|
"""!
|
|
92
|
|
|
@brief Creates distance metric instance for calculation distance between two points.
|
|
93
|
|
|
|
|
94
|
|
|
@param[in] type (type_metric):
|
|
95
|
|
|
@param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'func' and corresponding additional argument for
|
|
96
|
|
|
for specific metric types).
|
|
97
|
|
|
|
|
98
|
|
|
Keyword Args:
|
|
99
|
|
|
func (callable): Callable object with two arguments (point #1 and point #2) that is used only if metric is 'type_metric.USER_DEFINED'.
|
|
100
|
|
|
degree (numeric): Only for 'type_metric.MINKOWSKI' - degree of Minkowski equation.
|
|
101
|
|
|
|
|
102
|
|
|
"""
|
|
103
|
|
|
self.__type = type;
|
|
104
|
|
|
self.__args = kwargs;
|
|
105
|
|
|
self.__func = self.__args.get('func', None);
|
|
106
|
|
|
|
|
107
|
|
|
|
|
108
|
|
|
def __call__(self, point1, point2):
|
|
109
|
|
|
"""!
|
|
110
|
|
|
@brief Calculates distance between two points.
|
|
111
|
|
|
|
|
112
|
|
|
@param[in] point1 (list): The first point.
|
|
113
|
|
|
@param[in] point2 (list): The second point.
|
|
114
|
|
|
|
|
115
|
|
|
@return (double) Distance between two points.
|
|
116
|
|
|
|
|
117
|
|
|
"""
|
|
118
|
|
|
if self.__type == type_metric.EUCLIDEAN:
|
|
119
|
|
|
return euclidean_distance(point1, point2);
|
|
120
|
|
|
|
|
121
|
|
|
elif self.__type == type_metric.EUCLIDEAN_SQUARE:
|
|
122
|
|
|
return euclidean_distance_square(point1, point2);
|
|
123
|
|
|
|
|
124
|
|
|
elif self.__type == type_metric.MANHATTAN:
|
|
125
|
|
|
return manhattan_distance(point1, point2);
|
|
126
|
|
|
|
|
127
|
|
|
elif self.__type == type_metric.CHEBYSHEV:
|
|
128
|
|
|
return chebyshev_distance(point1, point2);
|
|
129
|
|
|
|
|
130
|
|
|
elif self.__type == type_metric.MINKOWSKI:
|
|
131
|
|
|
return minkowski_distance(point1, point2, self.__args.get('degree', 2));
|
|
132
|
|
|
|
|
133
|
|
|
elif self.__type == type_metric.USER_DEFINED:
|
|
134
|
|
|
return self.__func(point1, point2);
|
|
135
|
|
|
|
|
136
|
|
|
else:
|
|
137
|
|
|
raise ValueError("Unknown type of metric: '%d'", self.__type);
|
|
138
|
|
|
|
|
139
|
|
|
|
|
140
|
|
|
def get_type(self):
|
|
141
|
|
|
"""!
|
|
142
|
|
|
@brief Return type of distance metric that is used.
|
|
143
|
|
|
|
|
144
|
|
|
@return (type_metric) Type of distance metric.
|
|
145
|
|
|
|
|
146
|
|
|
"""
|
|
147
|
|
|
return self.__type;
|
|
148
|
|
|
|
|
149
|
|
|
|
|
150
|
|
|
def get_arguments(self):
|
|
151
|
|
|
"""!
|
|
152
|
|
|
@brief Return additional arguments that are used by distance metric.
|
|
153
|
|
|
|
|
154
|
|
|
@return (dict) Additional arguments.
|
|
155
|
|
|
|
|
156
|
|
|
"""
|
|
157
|
|
|
return self.__args;
|
|
158
|
|
|
|
|
159
|
|
|
|
|
160
|
|
|
def get_function(self):
|
|
161
|
|
|
"""!
|
|
162
|
|
|
@brief Return user-defined function for calculation distance metric.
|
|
163
|
|
|
|
|
164
|
|
|
@return (callable): User-defined distance metric function.
|
|
165
|
|
|
|
|
166
|
|
|
"""
|
|
167
|
|
|
return self.__func;
|
|
168
|
|
|
|
|
169
|
|
|
|
|
170
|
|
|
|
|
171
|
|
|
def euclidean_distance(point1, point2):
|
|
172
|
|
|
"""!
|
|
|
|
|
|
|
173
|
|
|
@brief Calculate Euclidean distance between two vectors.
|
|
174
|
|
|
@details The Euclidean between vectors (points) a and b is calculated by following formula:
|
|
175
|
|
|
|
|
176
|
|
|
\f[
|
|
177
|
|
|
dist(a, b) = \sqrt{ \sum_{i=0}^{N}(a_{i} - b_{i})^{2} };
|
|
178
|
|
|
\f]
|
|
179
|
|
|
|
|
180
|
|
|
Where N is a length of each vector.
|
|
181
|
|
|
|
|
182
|
|
|
@param[in] point1 (list): The first vector.
|
|
183
|
|
|
@param[in] point2 (list): The second vector.
|
|
184
|
|
|
|
|
185
|
|
|
@return (double) Euclidean distance between two vectors.
|
|
186
|
|
|
|
|
187
|
|
|
@see euclidean_distance_square, manhattan_distance, chebyshev_distance
|
|
188
|
|
|
|
|
189
|
|
|
"""
|
|
190
|
|
|
distance = euclidean_distance_square(point1, point2);
|
|
191
|
|
|
return distance ** 0.5;
|
|
192
|
|
|
|
|
193
|
|
|
|
|
194
|
|
|
def euclidean_distance_square(point1, point2):
|
|
195
|
|
|
"""!
|
|
|
|
|
|
|
196
|
|
|
@brief Calculate square Euclidean distance between two vectors.
|
|
197
|
|
|
|
|
198
|
|
|
\f[
|
|
199
|
|
|
dist(a, b) = \sum_{i=0}^{N}(a_{i} - b_{i})^{2};
|
|
200
|
|
|
\f]
|
|
201
|
|
|
|
|
202
|
|
|
@param[in] point1 (list): The first vector.
|
|
203
|
|
|
@param[in] point2 (list): The second vector.
|
|
204
|
|
|
|
|
205
|
|
|
@return (double) Square Euclidean distance between two vectors.
|
|
206
|
|
|
|
|
207
|
|
|
@see euclidean_distance, manhattan_distance, chebyshev_distance
|
|
208
|
|
|
|
|
209
|
|
|
"""
|
|
210
|
|
|
distance = 0.0;
|
|
211
|
|
|
for i in range(len(point1)):
|
|
212
|
|
|
distance += (point1[i] - point2[i]) ** 2.0;
|
|
213
|
|
|
|
|
214
|
|
|
return distance;
|
|
215
|
|
|
|
|
216
|
|
|
|
|
217
|
|
|
def manhattan_distance(point1, point2):
|
|
218
|
|
|
"""!
|
|
|
|
|
|
|
219
|
|
|
@brief Calculate Manhattan distance between between two vectors.
|
|
220
|
|
|
|
|
221
|
|
|
\f[
|
|
222
|
|
|
dist(a, b) = \sum_{i=0}^{N}\left | a_{i} - b_{i} \right |;
|
|
223
|
|
|
\f]
|
|
224
|
|
|
|
|
225
|
|
|
@param[in] point1 (list): The first vector.
|
|
226
|
|
|
@param[in] point2 (list): The second vector.
|
|
227
|
|
|
|
|
228
|
|
|
@return (double) Manhattan distance between two vectors.
|
|
229
|
|
|
|
|
230
|
|
|
@see euclidean_distance_square, euclidean_distance, chebyshev_distance
|
|
231
|
|
|
|
|
232
|
|
|
"""
|
|
233
|
|
|
distance = 0.0;
|
|
234
|
|
|
dimension = len(point1);
|
|
235
|
|
|
|
|
236
|
|
|
for i in range(dimension):
|
|
237
|
|
|
distance += abs(point1[i] - point2[i]);
|
|
238
|
|
|
|
|
239
|
|
|
return distance;
|
|
240
|
|
|
|
|
241
|
|
|
|
|
242
|
|
|
def chebyshev_distance(point1, point2):
|
|
243
|
|
|
"""!
|
|
|
|
|
|
|
244
|
|
|
@brief Calculate Chebyshev distance between between two vectors.
|
|
245
|
|
|
|
|
246
|
|
|
\f[
|
|
247
|
|
|
dist(a, b) = \max_{}i\left (\left | a_{i} - b_{i} \right |\right );
|
|
248
|
|
|
\f]
|
|
249
|
|
|
|
|
250
|
|
|
@param[in] point1 (list): The first vector.
|
|
251
|
|
|
@param[in] point2 (list): The second vector.
|
|
252
|
|
|
|
|
253
|
|
|
@return (double) Chebyshev distance between two vectors.
|
|
254
|
|
|
|
|
255
|
|
|
@see euclidean_distance_square, euclidean_distance, minkowski_distance
|
|
256
|
|
|
|
|
257
|
|
|
"""
|
|
258
|
|
|
distance = 0.0;
|
|
259
|
|
|
dimension = len(point1);
|
|
260
|
|
|
|
|
261
|
|
|
for i in range(dimension):
|
|
262
|
|
|
distance = max(distance, abs(point1[i] - point2[i]));
|
|
263
|
|
|
|
|
264
|
|
|
return distance;
|
|
265
|
|
|
|
|
266
|
|
|
|
|
267
|
|
|
def minkowski_distance(point1, point2, degree=2):
|
|
268
|
|
|
"""!
|
|
|
|
|
|
|
269
|
|
|
@brief Calculate Minkowski distance between two vectors.
|
|
270
|
|
|
|
|
271
|
|
|
\f[
|
|
272
|
|
|
dist(a, b) = \sqrt[p]{ \sum_{i=0}^{N}\left(a_{i} - b_{i}\right)^{p} };
|
|
273
|
|
|
\f]
|
|
274
|
|
|
|
|
275
|
|
|
@param[in] point1 (list): The first vector.
|
|
276
|
|
|
@param[in] point2 (list): The second vector.
|
|
277
|
|
|
@param[in] degree (numeric): Degree of that is used for Minkowski distance.
|
|
278
|
|
|
|
|
279
|
|
|
@return (double) Minkowski distance between two vectors.
|
|
280
|
|
|
|
|
281
|
|
|
@see euclidean_distance
|
|
282
|
|
|
|
|
283
|
|
|
"""
|
|
284
|
|
|
distance = 0.0;
|
|
285
|
|
|
for i in range(len(point1)):
|
|
286
|
|
|
distance += (point1[i] - point2[i]) ** degree;
|
|
287
|
|
|
|
|
288
|
|
|
return distance ** (1.0 / degree); |
Escape sequences in Python are generally interpreted according to rules similar to standard C. Only if strings are prefixed with
rorRare they interpreted as regular expressions.The escape sequence that was used indicates that you might have intended to write a regular expression.
Learn more about the available escape sequences. in the Python documentation.