metric.py
1 """!
2
3 @brief Module provides various distance metrics - abstraction of the notion of distance in a metric space.
4
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2019
8
10  PyClustering is free software: you can redistribute it and/or modify
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14
15  PyClustering is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19
20  You should have received a copy of the GNU General Public License
21  along with this program. If not, see <http://www.gnu.org/licenses/>.
22 @endcond
23
24 """
25
26
27 import numpy
28
29 from enum import IntEnum
30
31
32 class type_metric(IntEnum):
33  """!
34  @brief Enumeration of supported metrics in the module for distance calculation between two points.
35
36  """
37
38
39  EUCLIDEAN = 0
40
41
42  EUCLIDEAN_SQUARE = 1
43
44
45  MANHATTAN = 2
46
47
48  CHEBYSHEV = 3
49
50
51  MINKOWSKI = 4
52
53
54  CANBERRA = 5
55
56
57  CHI_SQUARE = 6
58
59
60  GOWER = 7
61
62
63  USER_DEFINED = 1000
64
65
66
68  """!
69  @brief Distance metric performs distance calculation between two points in line with encapsulated function, for
70  example, euclidean distance or chebyshev distance, or even user-defined.
71
72  @details
73
74  Example of Euclidean distance metric:
75  @code
76  metric = distance_metric(type_metric.EUCLIDEAN)
77  distance = metric([1.0, 2.5], [-1.2, 3.4])
78  @endcode
79
80  Example of Chebyshev distance metric:
81  @code
82  metric = distance_metric(type_metric.CHEBYSHEV)
83  distance = metric([0.0, 0.0], [2.5, 6.0])
84  @endcode
85
86  In following example additional argument should be specified (generally, 'degree' is a optional argument that is
87  equal to '2' by default) that is specific for Minkowski distance:
88  @code
89  metric = distance_metric(type_metric.MINKOWSKI, degree=4)
90  distance = metric([4.0, 9.2, 1.0], [3.4, 2.5, 6.2])
91  @endcode
92
93  User may define its own function for distance calculation. In this case input is two points, for example, you
94  want to implement your own version of Manhattan distance:
95  @code
96  from pyclustering.utils.metric import distance_metric, type_metric
97
98  def my_manhattan(point1, point2):
99  dimension = len(point1)
100  result = 0.0
101  for i in range(dimension):
102  result += abs(point1[i] - point2[i]) * 0.1
103  return result
104
105  metric = distance_metric(type_metric.USER_DEFINED, func=my_manhattan)
106  distance = metric([2.0, 3.0], [1.0, 3.0])
107  @endcode
108
109  """
110  def __init__(self, metric_type, **kwargs):
111  """!
112  @brief Creates distance metric instance for calculation distance between two points.
113
114  @param[in] metric_type (type_metric):
115  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'numpy_usage' 'func' and corresponding additional argument for
116  for specific metric types).
117
118  <b>Keyword Args:</b><br>
119  - func (callable): Callable object with two arguments (point #1 and point #2) or (object #1 and object #2) in case of numpy usage.
120  This argument is used only if metric is 'type_metric.USER_DEFINED'.
121  - degree (numeric): Only for 'type_metric.MINKOWSKI' - degree of Minkowski equation.
122  - max_range (array_like): Only for 'type_metric.GOWER' - max range in each dimension. 'data' can be used
124  - data (array_like): Only for 'type_metric.GOWER' - input data that used for 'max_range' calculation.
125  'max_range' can be used instead of this parameter.
126  - numpy_usage (bool): If True then numpy is used for calculation (by default is False).
127
128  """
129  self.__type = metric_type
130  self.__args = kwargs
131  self.__func = self.__args.get('func', None)
132  self.__numpy = self.__args.get('numpy_usage', False)
133
135
136
137  def __call__(self, point1, point2):
138  """!
139  @brief Calculates distance between two points.
140
141  @param[in] point1 (list): The first point.
142  @param[in] point2 (list): The second point.
143
144  @return (double) Distance between two points.
145
146  """
147  return self.__calculator(point1, point2)
148
149
150  def get_type(self):
151  """!
152  @brief Return type of distance metric that is used.
153
154  @return (type_metric) Type of distance metric.
155
156  """
157  return self.__type
158
159
160  def get_arguments(self):
161  """!
162  @brief Return additional arguments that are used by distance metric.
163
165
166  """
167  return self.__args
168
169
170  def get_function(self):
171  """!
172  @brief Return user-defined function for calculation distance metric.
173
174  @return (callable): User-defined distance metric function.
175
176  """
177  return self.__func
178
179
181  """!
182  @brief Start numpy for distance calculation.
183  @details Useful in case matrices to increase performance. No effect in case of type_metric.USER_DEFINED type.
184
185  """
186  self.__numpy = True
187  if self.__type != type_metric.USER_DEFINED:
189
190
192  """!
193  @brief Stop using numpy for distance calculation.
194  @details Useful in case of big amount of small data portion when numpy call is longer than calculation itself.
195  No effect in case of type_metric.USER_DEFINED type.
196
197  """
198  self.__numpy = False
200
201
202  def __create_distance_calculator(self):
203  """!
204  @brief Creates distance metric calculator.
205
206  @return (callable) Callable object of distance metric calculator.
207
208  """
209  if self.__numpy is True:
211
213
214
215  def __create_distance_calculator_basic(self):
216  """!
217  @brief Creates distance metric calculator that does not use numpy.
218
219  @return (callable) Callable object of distance metric calculator.
220
221  """
222  if self.__type == type_metric.EUCLIDEAN:
223  return euclidean_distance
224
225  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
226  return euclidean_distance_square
227
228  elif self.__type == type_metric.MANHATTAN:
229  return manhattan_distance
230
231  elif self.__type == type_metric.CHEBYSHEV:
232  return chebyshev_distance
233
234  elif self.__type == type_metric.MINKOWSKI:
235  return lambda point1, point2: minkowski_distance(point1, point2, self.__args.get('degree', 2))
236
237  elif self.__type == type_metric.CANBERRA:
238  return canberra_distance
239
240  elif self.__type == type_metric.CHI_SQUARE:
241  return chi_square_distance
242
243  elif self.__type == type_metric.GOWER:
244  max_range = self.__get_gower_max_range()
245  return lambda point1, point2: gower_distance(point1, point2, max_range)
246
247  elif self.__type == type_metric.USER_DEFINED:
248  return self.__func
249
250  else:
251  raise ValueError("Unknown type of metric: '%d'", self.__type)
252
253
254  def __get_gower_max_range(self):
255  """!
256  @brief Returns max range for Gower distance using input parameters ('max_range' or 'data').
257
258  @return (numpy.array) Max range for Gower distance.
259
260  """
261  max_range = self.__args.get('max_range', None)
262  if max_range is None:
263  data = self.__args.get('data', None)
264  if data is None:
265  raise ValueError("Gower distance requires 'data' or 'max_range' argument to construct metric.")
266
267  max_range = numpy.max(data, axis=0) - numpy.min(data, axis=0)
268  self.__args['max_range'] = max_range
269
270  return max_range
271
272
273  def __create_distance_calculator_numpy(self):
274  """!
275  @brief Creates distance metric calculator that uses numpy.
276
277  @return (callable) Callable object of distance metric calculator.
278
279  """
280  if self.__type == type_metric.EUCLIDEAN:
281  return euclidean_distance_numpy
282
283  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
284  return euclidean_distance_square_numpy
285
286  elif self.__type == type_metric.MANHATTAN:
287  return manhattan_distance_numpy
288
289  elif self.__type == type_metric.CHEBYSHEV:
290  return chebyshev_distance_numpy
291
292  elif self.__type == type_metric.MINKOWSKI:
293  return lambda object1, object2: minkowski_distance_numpy(object1, object2, self.__args.get('degree', 2))
294
295  elif self.__type == type_metric.CANBERRA:
296  return canberra_distance_numpy
297
298  elif self.__type == type_metric.CHI_SQUARE:
299  return chi_square_distance_numpy
300
301  elif self.__type == type_metric.GOWER:
302  max_range = self.__get_gower_max_range()
303  return lambda object1, object2: gower_distance_numpy(object1, object2, max_range)
304
305  elif self.__type == type_metric.USER_DEFINED:
306  return self.__func
307
308  else:
309  raise ValueError("Unknown type of metric: '%d'", self.__type)
310
311
312
313 def euclidean_distance(point1, point2):
314  """!
315  @brief Calculate Euclidean distance between two vectors.
316  @details The Euclidean between vectors (points) a and b is calculated by following formula:
317
318  \f[
319  dist(a, b) = \sqrt{ \sum_{i=0}^{N}(a_{i} - b_{i})^{2} };
320  \f]
321
322  Where N is a length of each vector.
323
324  @param[in] point1 (array_like): The first vector.
325  @param[in] point2 (array_like): The second vector.
326
327  @return (double) Euclidean distance between two vectors.
328
329  @see euclidean_distance_square, manhattan_distance, chebyshev_distance
330
331  """
332  distance = euclidean_distance_square(point1, point2)
333  return distance ** 0.5
334
335
336 def euclidean_distance_numpy(object1, object2):
337  """!
338  @brief Calculate Euclidean distance between two objects using numpy.
339
340  @param[in] object1 (array_like): The first array_like object.
341  @param[in] object2 (array_like): The second array_like object.
342
343  @return (double) Euclidean distance between two objects.
344
345  """
346  return numpy.sum(numpy.sqrt(numpy.square(object1 - object2)), axis=1).T
347
348
349 def euclidean_distance_square(point1, point2):
350  """!
351  @brief Calculate square Euclidean distance between two vectors.
352
353  \f[
354  dist(a, b) = \sum_{i=0}^{N}(a_{i} - b_{i})^{2};
355  \f]
356
357  @param[in] point1 (array_like): The first vector.
358  @param[in] point2 (array_like): The second vector.
359
360  @return (double) Square Euclidean distance between two vectors.
361
362  @see euclidean_distance, manhattan_distance, chebyshev_distance
363
364  """
365  distance = 0.0
366  for i in range(len(point1)):
367  distance += (point1[i] - point2[i]) ** 2.0
368
369  return distance
370
371
372 def euclidean_distance_square_numpy(object1, object2):
373  """!
374  @brief Calculate square Euclidean distance between two objects using numpy.
375
376  @param[in] object1 (array_like): The first array_like object.
377  @param[in] object2 (array_like): The second array_like object.
378
379  @return (double) Square Euclidean distance between two objects.
380
381  """
382  return numpy.sum(numpy.square(object1 - object2), axis=1).T
383
384
385 def manhattan_distance(point1, point2):
386  """!
387  @brief Calculate Manhattan distance between between two vectors.
388
389  \f[
390  dist(a, b) = \sum_{i=0}^{N}\left | a_{i} - b_{i} \right |;
391  \f]
392
393  @param[in] point1 (array_like): The first vector.
394  @param[in] point2 (array_like): The second vector.
395
396  @return (double) Manhattan distance between two vectors.
397
398  @see euclidean_distance_square, euclidean_distance, chebyshev_distance
399
400  """
401  distance = 0.0
402  dimension = len(point1)
403
404  for i in range(dimension):
405  distance += abs(point1[i] - point2[i])
406
407  return distance
408
409
410 def manhattan_distance_numpy(object1, object2):
411  """!
412  @brief Calculate Manhattan distance between two objects using numpy.
413
414  @param[in] object1 (array_like): The first array_like object.
415  @param[in] object2 (array_like): The second array_like object.
416
417  @return (double) Manhattan distance between two objects.
418
419  """
420  return numpy.sum(numpy.absolute(object1 - object2), axis=1).T
421
422
423 def chebyshev_distance(point1, point2):
424  """!
425  @brief Calculate Chebyshev distance between between two vectors.
426
427  \f[
428  dist(a, b) = \max_{}i\left (\left | a_{i} - b_{i} \right |\right );
429  \f]
430
431  @param[in] point1 (array_like): The first vector.
432  @param[in] point2 (array_like): The second vector.
433
434  @return (double) Chebyshev distance between two vectors.
435
436  @see euclidean_distance_square, euclidean_distance, minkowski_distance
437
438  """
439  distance = 0.0
440  dimension = len(point1)
441
442  for i in range(dimension):
443  distance = max(distance, abs(point1[i] - point2[i]))
444
445  return distance
446
447
448 def chebyshev_distance_numpy(object1, object2):
449  """!
450  @brief Calculate Chebyshev distance between two objects using numpy.
451
452  @param[in] object1 (array_like): The first array_like object.
453  @param[in] object2 (array_like): The second array_like object.
454
455  @return (double) Chebyshev distance between two objects.
456
457  """
458  return numpy.max(numpy.absolute(object1 - object2), axis=1).T
459
460
461 def minkowski_distance(point1, point2, degree=2):
462  """!
463  @brief Calculate Minkowski distance between two vectors.
464
465  \f[
466  dist(a, b) = \sqrt[p]{ \sum_{i=0}^{N}\left(a_{i} - b_{i}\right)^{p} };
467  \f]
468
469  @param[in] point1 (array_like): The first vector.
470  @param[in] point2 (array_like): The second vector.
471  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
472
473  @return (double) Minkowski distance between two vectors.
474
475  @see euclidean_distance
476
477  """
478  distance = 0.0
479  for i in range(len(point1)):
480  distance += (point1[i] - point2[i]) ** degree
481
482  return distance ** (1.0 / degree)
483
484
485 def minkowski_distance_numpy(object1, object2, degree=2):
486  """!
487  @brief Calculate Minkowski distance between objects using numpy.
488
489  @param[in] object1 (array_like): The first array_like object.
490  @param[in] object2 (array_like): The second array_like object.
491  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
492
493  @return (double) Minkowski distance between two object.
494
495  """
496  return numpy.sum(numpy.power(numpy.power(object1 - object2, degree), 1/degree), axis=1).T
497
498
499 def canberra_distance(point1, point2):
500  """!
501  @brief Calculate Canberra distance between two vectors.
502
503  \f[
504  dist(a, b) = \sum_{i=0}^{N}\frac{\left | a_{i} - b_{i} \right |}{\left | a_{i} \right | + \left | b_{i} \right |};
505  \f]
506
507  @param[in] point1 (array_like): The first vector.
508  @param[in] point2 (array_like): The second vector.
509
510  @return (float) Canberra distance between two objects.
511
512  """
513  distance = 0.0
514  for i in range(len(point1)):
515  divider = abs(point1[i]) + abs(point2[i])
516  if divider == 0.0:
517  continue
518
519  distance += abs(point1[i] - point2[i]) / divider
520
521  return distance
522
523
524 def canberra_distance_numpy(object1, object2):
525  """!
526  @brief Calculate Canberra distance between two objects using numpy.
527
528  @param[in] object1 (array_like): The first vector.
529  @param[in] object2 (array_like): The second vector.
530
531  @return (float) Canberra distance between two objects.
532
533  """
534  with numpy.errstate(divide='ignore', invalid='ignore'):
535  result = numpy.divide(numpy.abs(object1 - object2), numpy.abs(object1) + numpy.abs(object2))
536
537  if len(result.shape) > 1:
538  return numpy.sum(numpy.nan_to_num(result), axis=1).T
539  else:
540  return numpy.sum(numpy.nan_to_num(result))
541
542
543 def chi_square_distance(point1, point2):
544  """!
545  @brief Calculate Chi square distance between two vectors.
546
547  \f[
548  dist(a, b) = \sum_{i=0}^{N}\frac{\left ( a_{i} - b_{i} \right )^{2}}{\left | a_{i} \right | + \left | b_{i} \right |};
549  \f]
550
551  @param[in] point1 (array_like): The first vector.
552  @param[in] point2 (array_like): The second vector.
553
554  @return (float) Chi square distance between two objects.
555
556  """
557  distance = 0.0
558  for i in range(len(point1)):
559  divider = abs(point1[i]) + abs(point2[i])
560  if divider != 0.0:
561  distance += ((point1[i] - point2[i]) ** 2.0) / divider
562
563  return distance
564
565
566 def chi_square_distance_numpy(object1, object2):
567  """!
568  @brief Calculate Chi square distance between two vectors using numpy.
569
570  @param[in] object1 (array_like): The first vector.
571  @param[in] object2 (array_like): The second vector.
572
573  @return (float) Chi square distance between two objects.
574
575  """
576  with numpy.errstate(divide='ignore', invalid='ignore'):
577  result = numpy.divide(numpy.power(object1 - object2, 2), numpy.abs(object1) + numpy.abs(object2))
578
579  if len(result.shape) > 1:
580  return numpy.sum(numpy.nan_to_num(result), axis=1).T
581  else:
582  return numpy.sum(numpy.nan_to_num(result))
583
584
585 def gower_distance(point1, point2, max_range):
586  """!
587  @brief Calculate Gower distance between two vectors.
588  @details Implementation is based on the paper @cite article::utils::metric::gower. Gower distance is calculate
589  using following formula:
590  \f[
591  dist\left ( a, b \right )=\frac{1}{p}\sum_{i=0}^{p}\frac{\left | a_{i} - b_{i} \right |}{R_{i}},
592  \f]
593
594  where \f$R_{i}\f$ is a max range for ith dimension. \f$R\f$ is defined in line following formula:
595
596  \f[
597  R=max\left ( X \right )-min\left ( X \right )
598  \f]
599
600  @param[in] point1 (array_like): The first vector.
601  @param[in] point2 (array_like): The second vector.
602  @param[in] max_range (array_like): Max range in each data dimension.
603
604  @return (float) Gower distance between two objects.
605
606  """
607  distance = 0.0
608  dimensions = len(point1)
609  for i in range(dimensions):
610  if max_range[i] != 0.0:
611  distance += abs(point1[i] - point2[i]) / max_range[i]
612
613  return distance / dimensions
614
615
616 def gower_distance_numpy(point1, point2, max_range):
617  """!
618  @brief Calculate Gower distance between two vectors using numpy.
619
620  @param[in] point1 (array_like): The first vector.
621  @param[in] point2 (array_like): The second vector.
622  @param[in] max_range (array_like): Max range in each data dimension.
623
624  @return (float) Gower distance between two objects.
625
626  """
627  with numpy.errstate(divide='ignore', invalid='ignore'):
628  result = numpy.divide(numpy.abs(point1 - point2), max_range)
629
630  if len(result.shape) > 1:
631  return numpy.sum(numpy.nan_to_num(result), axis=1).T / len(point1)
632  else:
633  return numpy.sum(numpy.nan_to_num(result)) / len(point1)
def __create_distance_calculator_basic(self)
Creates distance metric calculator that does not use numpy.
Definition: metric.py:215
def chi_square_distance(point1, point2)
Calculate Chi square distance between two vectors.
Definition: metric.py:543
def get_arguments(self)
Return additional arguments that are used by distance metric.
Definition: metric.py:160
def euclidean_distance_square(point1, point2)
Calculate square Euclidean distance between two vectors.
Definition: metric.py:349
def minkowski_distance_numpy(object1, object2, degree=2)
Calculate Minkowski distance between objects using numpy.
Definition: metric.py:485
def chi_square_distance_numpy(object1, object2)
Calculate Chi square distance between two vectors using numpy.
Definition: metric.py:566
def __create_distance_calculator(self)
Creates distance metric calculator.
Definition: metric.py:202
def get_type(self)
Return type of distance metric that is used.
Definition: metric.py:150
def chebyshev_distance_numpy(object1, object2)
Calculate Chebyshev distance between two objects using numpy.
Definition: metric.py:448
Distance metric performs distance calculation between two points in line with encapsulated function...
Definition: metric.py:67
def manhattan_distance_numpy(object1, object2)
Calculate Manhattan distance between two objects using numpy.
Definition: metric.py:410
def __init__(self, metric_type, kwargs)
Creates distance metric instance for calculation distance between two points.
Definition: metric.py:110
def gower_distance(point1, point2, max_range)
Calculate Gower distance between two vectors.
Definition: metric.py:585
def get_function(self)
Return user-defined function for calculation distance metric.
Definition: metric.py:170
def gower_distance_numpy(point1, point2, max_range)
Calculate Gower distance between two vectors using numpy.
Definition: metric.py:616
def disable_numpy_usage(self)
Stop using numpy for distance calculation.
Definition: metric.py:191
def canberra_distance(point1, point2)
Calculate Canberra distance between two vectors.
Definition: metric.py:499
def canberra_distance_numpy(object1, object2)
Calculate Canberra distance between two objects using numpy.
Definition: metric.py:524
def euclidean_distance_square_numpy(object1, object2)
Calculate square Euclidean distance between two objects using numpy.
Definition: metric.py:372
def __call__(self, point1, point2)
Calculates distance between two points.
Definition: metric.py:137
def __create_distance_calculator_numpy(self)
Creates distance metric calculator that uses numpy.
Definition: metric.py:273
def euclidean_distance(point1, point2)
Calculate Euclidean distance between two vectors.
Definition: metric.py:313
def manhattan_distance(point1, point2)
Calculate Manhattan distance between between two vectors.
Definition: metric.py:385
def minkowski_distance(point1, point2, degree=2)
Calculate Minkowski distance between two vectors.
Definition: metric.py:461
def euclidean_distance_numpy(object1, object2)
Calculate Euclidean distance between two objects using numpy.
Definition: metric.py:336
def enable_numpy_usage(self)
Start numpy for distance calculation.
Definition: metric.py:180
def __get_gower_max_range(self)
Returns max range for Gower distance using input parameters (&#39;max_range&#39; or &#39;data&#39;).
Definition: metric.py:254
Enumeration of supported metrics in the module for distance calculation between two points...
Definition: metric.py:32
def chebyshev_distance(point1, point2)
Calculate Chebyshev distance between between two vectors.
Definition: metric.py:423