 metric.py
1 """!
2
3 @brief Module provides various distance metrics - abstraction of the notion of distance in a metric space.
4
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2019
8
10  PyClustering is free software: you can redistribute it and/or modify
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14
15  PyClustering is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19
20  You should have received a copy of the GNU General Public License
21  along with this program. If not, see <http://www.gnu.org/licenses/>.
22 @endcond
23
24 """
25
26
27 import numpy
28
29 from enum import IntEnum
30
31
32 class type_metric(IntEnum):
33  """!
34  @brief Enumeration of supported metrics in the module for distance calculation between two points.
35
36  """
37
38
39  EUCLIDEAN = 0
40
41
42  EUCLIDEAN_SQUARE = 1
43
44
45  MANHATTAN = 2
46
47
48  CHEBYSHEV = 3
49
50
51  MINKOWSKI = 4
52
53
54  CANBERRA = 5
55
56
57  CHI_SQUARE = 6
58
59
60  USER_DEFINED = 1000
61
62
63
65  """!
66  @brief Distance metric performs distance calculation between two points in line with encapsulated function, for
67  example, euclidean distance or chebyshev distance, or even user-defined.
68
69  @details
70
71  Example of Euclidean distance metric:
72  @code
73  metric = distance_metric(type_metric.EUCLIDEAN)
74  distance = metric([1.0, 2.5], [-1.2, 3.4])
75  @endcode
76
77  Example of Chebyshev distance metric:
78  @code
79  metric = distance_metric(type_metric.CHEBYSHEV)
80  distance = metric([0.0, 0.0], [2.5, 6.0])
81  @endcode
82
83  In following example additional argument should be specified (generally, 'degree' is a optional argument that is
84  equal to '2' by default) that is specific for Minkowski distance:
85  @code
86  metric = distance_metric(type_metric.MINKOWSKI, degree=4)
87  distance = metric([4.0, 9.2, 1.0], [3.4, 2.5, 6.2])
88  @endcode
89
90  User may define its own function for distance calculation. In this case input is two points, for example, you
91  want to implement your own version of Manhattan distance:
92  @code
93  from pyclustering.utils.metric import distance_metric, type_metric
94
95  def my_manhattan(point1, point2):
96  dimension = len(point1)
97  result = 0.0
98  for i in range(dimension):
99  result += abs(point1[i] - point2[i]) * 0.1
100  return result
101
102  metric = distance_metric(type_metric.USER_DEFINED, func=my_manhattan)
103  distance = metric([2.0, 3.0], [1.0, 3.0])
104  @endcode
105
106  """
107  def __init__(self, metric_type, **kwargs):
108  """!
109  @brief Creates distance metric instance for calculation distance between two points.
110
111  @param[in] metric_type (type_metric):
112  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'numpy_usage' 'func' and corresponding additional argument for
113  for specific metric types).
114
115  <b>Keyword Args:</b><br>
116  - func (callable): Callable object with two arguments (point #1 and point #2) or (object #1 and object #2) in case of numpy usage.
117  This argument is used only if metric is 'type_metric.USER_DEFINED'.
118  - degree (numeric): Only for 'type_metric.MINKOWSKI' - degree of Minkowski equation.
119  - numpy_usage (bool): If True then numpy is used for calculation (by default is False).
120
121  """
122  self.__type = metric_type
123  self.__args = kwargs
124  self.__func = self.__args.get('func', None)
125  self.__numpy = self.__args.get('numpy_usage', False)
126
128
129
130  def __call__(self, point1, point2):
131  """!
132  @brief Calculates distance between two points.
133
134  @param[in] point1 (list): The first point.
135  @param[in] point2 (list): The second point.
136
137  @return (double) Distance between two points.
138
139  """
140  return self.__calculator(point1, point2)
141
142
143  def get_type(self):
144  """!
145  @brief Return type of distance metric that is used.
146
147  @return (type_metric) Type of distance metric.
148
149  """
150  return self.__type
151
152
153  def get_arguments(self):
154  """!
155  @brief Return additional arguments that are used by distance metric.
156
158
159  """
160  return self.__args
161
162
163  def get_function(self):
164  """!
165  @brief Return user-defined function for calculation distance metric.
166
167  @return (callable): User-defined distance metric function.
168
169  """
170  return self.__func
171
172
174  """!
175  @brief Start numpy for distance calculation.
176  @details Useful in case matrices to increase performance. No effect in case of type_metric.USER_DEFINED type.
177
178  """
179  self.__numpy = True
180  if self.__type != type_metric.USER_DEFINED:
182
183
185  """!
186  @brief Stop using numpy for distance calculation.
187  @details Useful in case of big amount of small data portion when numpy call is longer than calculation itself.
188  No effect in case of type_metric.USER_DEFINED type.
189
190  """
191  self.__numpy = False
193
194
195  def __create_distance_calculator(self):
196  """!
197  @brief Creates distance metric calculator.
198
199  @return (callable) Callable object of distance metric calculator.
200
201  """
202  if self.__numpy is True:
204
206
207
208  def __create_distance_calculator_basic(self):
209  """!
210  @brief Creates distance metric calculator that does not use numpy.
211
212  @return (callable) Callable object of distance metric calculator.
213
214  """
215  if self.__type == type_metric.EUCLIDEAN:
216  return euclidean_distance
217
218  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
219  return euclidean_distance_square
220
221  elif self.__type == type_metric.MANHATTAN:
222  return manhattan_distance
223
224  elif self.__type == type_metric.CHEBYSHEV:
225  return chebyshev_distance
226
227  elif self.__type == type_metric.MINKOWSKI:
228  return lambda point1, point2: minkowski_distance(point1, point2, self.__args.get('degree', 2))
229
230  elif self.__type == type_metric.CANBERRA:
231  return canberra_distance
232
233  elif self.__type == type_metric.CHI_SQUARE:
234  return chi_square_distance
235
236  elif self.__type == type_metric.USER_DEFINED:
237  return self.__func
238
239  else:
240  raise ValueError("Unknown type of metric: '%d'", self.__type)
241
242
243  def __create_distance_calculator_numpy(self):
244  """!
245  @brief Creates distance metric calculator that uses numpy.
246
247  @return (callable) Callable object of distance metric calculator.
248
249  """
250  if self.__type == type_metric.EUCLIDEAN:
251  return euclidean_distance_numpy
252
253  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
254  return euclidean_distance_square_numpy
255
256  elif self.__type == type_metric.MANHATTAN:
257  return manhattan_distance_numpy
258
259  elif self.__type == type_metric.CHEBYSHEV:
260  return chebyshev_distance_numpy
261
262  elif self.__type == type_metric.MINKOWSKI:
263  return lambda object1, object2: minkowski_distance_numpy(object1, object2, self.__args.get('degree', 2))
264
265  elif self.__type == type_metric.CANBERRA:
266  return canberra_distance_numpy
267
268  elif self.__type == type_metric.CHI_SQUARE:
269  return chi_square_distance_numpy
270
271  elif self.__type == type_metric.USER_DEFINED:
272  return self.__func
273
274  else:
275  raise ValueError("Unknown type of metric: '%d'", self.__type)
276
277
278
279 def euclidean_distance(point1, point2):
280  """!
281  @brief Calculate Euclidean distance between two vectors.
282  @details The Euclidean between vectors (points) a and b is calculated by following formula:
283
284  \f[
285  dist(a, b) = \sqrt{ \sum_{i=0}^{N}(a_{i} - b_{i})^{2} };
286  \f]
287
288  Where N is a length of each vector.
289
290  @param[in] point1 (array_like): The first vector.
291  @param[in] point2 (array_like): The second vector.
292
293  @return (double) Euclidean distance between two vectors.
294
295  @see euclidean_distance_square, manhattan_distance, chebyshev_distance
296
297  """
298  distance = euclidean_distance_square(point1, point2)
299  return distance ** 0.5
300
301
302 def euclidean_distance_numpy(object1, object2):
303  """!
304  @brief Calculate Euclidean distance between two objects using numpy.
305
306  @param[in] object1 (array_like): The first array_like object.
307  @param[in] object2 (array_like): The second array_like object.
308
309  @return (double) Euclidean distance between two objects.
310
311  """
312  return numpy.sum(numpy.sqrt(numpy.square(object1 - object2)), axis=1).T
313
314
315 def euclidean_distance_square(point1, point2):
316  """!
317  @brief Calculate square Euclidean distance between two vectors.
318
319  \f[
320  dist(a, b) = \sum_{i=0}^{N}(a_{i} - b_{i})^{2};
321  \f]
322
323  @param[in] point1 (array_like): The first vector.
324  @param[in] point2 (array_like): The second vector.
325
326  @return (double) Square Euclidean distance between two vectors.
327
328  @see euclidean_distance, manhattan_distance, chebyshev_distance
329
330  """
331  distance = 0.0
332  for i in range(len(point1)):
333  distance += (point1[i] - point2[i]) ** 2.0
334
335  return distance
336
337
338 def euclidean_distance_square_numpy(object1, object2):
339  """!
340  @brief Calculate square Euclidean distance between two objects using numpy.
341
342  @param[in] object1 (array_like): The first array_like object.
343  @param[in] object2 (array_like): The second array_like object.
344
345  @return (double) Square Euclidean distance between two objects.
346
347  """
348  return numpy.sum(numpy.square(object1 - object2), axis=1).T
349
350
351 def manhattan_distance(point1, point2):
352  """!
353  @brief Calculate Manhattan distance between between two vectors.
354
355  \f[
356  dist(a, b) = \sum_{i=0}^{N}\left | a_{i} - b_{i} \right |;
357  \f]
358
359  @param[in] point1 (array_like): The first vector.
360  @param[in] point2 (array_like): The second vector.
361
362  @return (double) Manhattan distance between two vectors.
363
364  @see euclidean_distance_square, euclidean_distance, chebyshev_distance
365
366  """
367  distance = 0.0
368  dimension = len(point1)
369
370  for i in range(dimension):
371  distance += abs(point1[i] - point2[i])
372
373  return distance
374
375
376 def manhattan_distance_numpy(object1, object2):
377  """!
378  @brief Calculate Manhattan distance between two objects using numpy.
379
380  @param[in] object1 (array_like): The first array_like object.
381  @param[in] object2 (array_like): The second array_like object.
382
383  @return (double) Manhattan distance between two objects.
384
385  """
386  return numpy.sum(numpy.absolute(object1 - object2), axis=1).T
387
388
389 def chebyshev_distance(point1, point2):
390  """!
391  @brief Calculate Chebyshev distance between between two vectors.
392
393  \f[
394  dist(a, b) = \max_{}i\left (\left | a_{i} - b_{i} \right |\right );
395  \f]
396
397  @param[in] point1 (array_like): The first vector.
398  @param[in] point2 (array_like): The second vector.
399
400  @return (double) Chebyshev distance between two vectors.
401
402  @see euclidean_distance_square, euclidean_distance, minkowski_distance
403
404  """
405  distance = 0.0
406  dimension = len(point1)
407
408  for i in range(dimension):
409  distance = max(distance, abs(point1[i] - point2[i]))
410
411  return distance
412
413
414 def chebyshev_distance_numpy(object1, object2):
415  """!
416  @brief Calculate Chebyshev distance between two objects using numpy.
417
418  @param[in] object1 (array_like): The first array_like object.
419  @param[in] object2 (array_like): The second array_like object.
420
421  @return (double) Chebyshev distance between two objects.
422
423  """
424  return numpy.max(numpy.absolute(object1 - object2), axis=1).T
425
426
427 def minkowski_distance(point1, point2, degree=2):
428  """!
429  @brief Calculate Minkowski distance between two vectors.
430
431  \f[
432  dist(a, b) = \sqrt[p]{ \sum_{i=0}^{N}\left(a_{i} - b_{i}\right)^{p} };
433  \f]
434
435  @param[in] point1 (array_like): The first vector.
436  @param[in] point2 (array_like): The second vector.
437  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
438
439  @return (double) Minkowski distance between two vectors.
440
441  @see euclidean_distance
442
443  """
444  distance = 0.0
445  for i in range(len(point1)):
446  distance += (point1[i] - point2[i]) ** degree
447
448  return distance ** (1.0 / degree)
449
450
451 def minkowski_distance_numpy(object1, object2, degree=2):
452  """!
453  @brief Calculate Minkowski distance between objects using numpy.
454
455  @param[in] object1 (array_like): The first array_like object.
456  @param[in] object2 (array_like): The second array_like object.
457  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
458
459  @return (double) Minkowski distance between two object.
460
461  """
462  return numpy.sum(numpy.power(numpy.power(object1 - object2, degree), 1/degree), axis=1).T
463
464
465 def canberra_distance(point1, point2):
466  """!
467  @brief Calculate Canberra distance between two vectors.
468
469  \f[
470  dist(a, b) = \sum_{i=0}^{N}\frac{\left | a_{i} - b_{i} \right |}{\left | a_{i} \right | + \left | b_{i} \right |};
471  \f]
472
473  @param[in] point1 (array_like): The first vector.
474  @param[in] point2 (array_like): The second vector.
475
476  @return (float) Canberra distance between two objects.
477
478  """
479  distance = 0.0
480  for i in range(len(point1)):
481  divider = abs(point1[i]) + abs(point2[i])
482  if divider == 0.0:
483  continue
484
485  distance += abs(point1[i] - point2[i]) / divider
486
487  return distance
488
489
490 def canberra_distance_numpy(object1, object2):
491  """!
492  @brief Calculate Canberra distance between two objects using numpy.
493
494  @param[in] object1 (array_like): The first vector.
495  @param[in] object2 (array_like): The second vector.
496
497  @return (float) Canberra distance between two objects.
498
499  """
500  with numpy.errstate(divide='ignore', invalid='ignore'):
501  result = numpy.divide(numpy.abs(object1 - object2), numpy.abs(object1) + numpy.abs(object2))
502
503  if len(result.shape) > 1:
504  return numpy.sum(numpy.nan_to_num(result), axis=1).T
505  else:
506  return numpy.sum(numpy.nan_to_num(result))
507
508
509 def chi_square_distance(point1, point2):
510  """!
511  @brief Calculate Chi square distance between two vectors.
512
513  \f[
514  dist(a, b) = \sum_{i=0}^{N}\frac{\left ( a_{i} - b_{i} \right )^{2}}{\left | a_{i} \right | + \left | b_{i} \right |};
515  \f]
516
517  @param[in] point1 (array_like): The first vector.
518  @param[in] point2 (array_like): The second vector.
519
520  @return (float) Chi square distance between two objects.
521
522  """
523  distance = 0.0
524  for i in range(len(point1)):
525  divider = abs(point1[i]) + abs(point2[i])
526  if divider == 0.0:
527  continue
528
529  distance += ((point1[i] - point2[i]) ** 2.0) / divider
530
531  return distance
532
533
534 def chi_square_distance_numpy(object1, object2):
535  """!
536  @brief Calculate Chi square distance between two vectors using numpy.
537
538  @param[in] object1 (array_like): The first vector.
539  @param[in] object2 (array_like): The second vector.
540
541  @return (float) Chi square distance between two objects.
542
543  """
544  with numpy.errstate(divide='ignore', invalid='ignore'):
545  result = numpy.divide(numpy.power(object1 - object2, 2), numpy.abs(object1) + numpy.abs(object2))
546
547  if len(result.shape) > 1:
548  return numpy.sum(numpy.nan_to_num(result), axis=1).T
549  else:
550  return numpy.sum(numpy.nan_to_num(result))
