metric.py
1 """!
2 
3 @brief Module provides various distance metrics - abstraction of the notion of distance in a metric space.
4 
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2019
7 @copyright GNU Public License
8 
9 @cond GNU_PUBLIC_LICENSE
10  PyClustering is free software: you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14 
15  PyClustering is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program. If not, see <http://www.gnu.org/licenses/>.
22 @endcond
23 
24 """
25 
26 
27 import numpy
28 
29 from enum import IntEnum
30 
31 
32 class type_metric(IntEnum):
33  """!
34  @brief Enumeration of supported metrics in the module for distance calculation between two points.
35 
36  """
37 
38 
39  EUCLIDEAN = 0
40 
41 
42  EUCLIDEAN_SQUARE = 1
43 
44 
45  MANHATTAN = 2
46 
47 
48  CHEBYSHEV = 3
49 
50 
51  MINKOWSKI = 4
52 
53 
54  CANBERRA = 5
55 
56 
57  CHI_SQUARE = 6
58 
59 
60  GOWER = 7
61 
62 
63  USER_DEFINED = 1000
64 
65 
66 
68  """!
69  @brief Distance metric performs distance calculation between two points in line with encapsulated function, for
70  example, euclidean distance or chebyshev distance, or even user-defined.
71 
72  @details
73 
74  Example of Euclidean distance metric:
75  @code
76  metric = distance_metric(type_metric.EUCLIDEAN)
77  distance = metric([1.0, 2.5], [-1.2, 3.4])
78  @endcode
79 
80  Example of Chebyshev distance metric:
81  @code
82  metric = distance_metric(type_metric.CHEBYSHEV)
83  distance = metric([0.0, 0.0], [2.5, 6.0])
84  @endcode
85 
86  In following example additional argument should be specified (generally, 'degree' is a optional argument that is
87  equal to '2' by default) that is specific for Minkowski distance:
88  @code
89  metric = distance_metric(type_metric.MINKOWSKI, degree=4)
90  distance = metric([4.0, 9.2, 1.0], [3.4, 2.5, 6.2])
91  @endcode
92 
93  User may define its own function for distance calculation. In this case input is two points, for example, you
94  want to implement your own version of Manhattan distance:
95  @code
96  from pyclustering.utils.metric import distance_metric, type_metric
97 
98  def my_manhattan(point1, point2):
99  dimension = len(point1)
100  result = 0.0
101  for i in range(dimension):
102  result += abs(point1[i] - point2[i]) * 0.1
103  return result
104 
105  metric = distance_metric(type_metric.USER_DEFINED, func=my_manhattan)
106  distance = metric([2.0, 3.0], [1.0, 3.0])
107  @endcode
108 
109  """
110  def __init__(self, metric_type, **kwargs):
111  """!
112  @brief Creates distance metric instance for calculation distance between two points.
113 
114  @param[in] metric_type (type_metric):
115  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'numpy_usage' 'func' and corresponding additional argument for
116  for specific metric types).
117 
118  <b>Keyword Args:</b><br>
119  - func (callable): Callable object with two arguments (point #1 and point #2) or (object #1 and object #2) in case of numpy usage.
120  This argument is used only if metric is 'type_metric.USER_DEFINED'.
121  - degree (numeric): Only for 'type_metric.MINKOWSKI' - degree of Minkowski equation.
122  - max_range (array_like): Only for 'type_metric.GOWER' - max range in each dimension. 'data' can be used
123  instead of this parameter.
124  - data (array_like): Only for 'type_metric.GOWER' - input data that used for 'max_range' calculation.
125  'max_range' can be used instead of this parameter.
126  - numpy_usage (bool): If True then numpy is used for calculation (by default is False).
127 
128  """
129  self.__type = metric_type
130  self.__args = kwargs
131  self.__func = self.__args.get('func', None)
132  self.__numpy = self.__args.get('numpy_usage', False)
133 
135 
136 
137  def __call__(self, point1, point2):
138  """!
139  @brief Calculates distance between two points.
140 
141  @param[in] point1 (list): The first point.
142  @param[in] point2 (list): The second point.
143 
144  @return (double) Distance between two points.
145 
146  """
147  return self.__calculator(point1, point2)
148 
149 
150  def get_type(self):
151  """!
152  @brief Return type of distance metric that is used.
153 
154  @return (type_metric) Type of distance metric.
155 
156  """
157  return self.__type
158 
159 
160  def get_arguments(self):
161  """!
162  @brief Return additional arguments that are used by distance metric.
163 
164  @return (dict) Additional arguments.
165 
166  """
167  return self.__args
168 
169 
170  def get_function(self):
171  """!
172  @brief Return user-defined function for calculation distance metric.
173 
174  @return (callable): User-defined distance metric function.
175 
176  """
177  return self.__func
178 
179 
181  """!
182  @brief Start numpy for distance calculation.
183  @details Useful in case matrices to increase performance. No effect in case of type_metric.USER_DEFINED type.
184 
185  """
186  self.__numpy = True
187  if self.__type != type_metric.USER_DEFINED:
189 
190 
192  """!
193  @brief Stop using numpy for distance calculation.
194  @details Useful in case of big amount of small data portion when numpy call is longer than calculation itself.
195  No effect in case of type_metric.USER_DEFINED type.
196 
197  """
198  self.__numpy = False
200 
201 
202  def __create_distance_calculator(self):
203  """!
204  @brief Creates distance metric calculator.
205 
206  @return (callable) Callable object of distance metric calculator.
207 
208  """
209  if self.__numpy is True:
211 
213 
214 
215  def __create_distance_calculator_basic(self):
216  """!
217  @brief Creates distance metric calculator that does not use numpy.
218 
219  @return (callable) Callable object of distance metric calculator.
220 
221  """
222  if self.__type == type_metric.EUCLIDEAN:
223  return euclidean_distance
224 
225  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
226  return euclidean_distance_square
227 
228  elif self.__type == type_metric.MANHATTAN:
229  return manhattan_distance
230 
231  elif self.__type == type_metric.CHEBYSHEV:
232  return chebyshev_distance
233 
234  elif self.__type == type_metric.MINKOWSKI:
235  return lambda point1, point2: minkowski_distance(point1, point2, self.__args.get('degree', 2))
236 
237  elif self.__type == type_metric.CANBERRA:
238  return canberra_distance
239 
240  elif self.__type == type_metric.CHI_SQUARE:
241  return chi_square_distance
242 
243  elif self.__type == type_metric.GOWER:
244  max_range = self.__get_gower_max_range()
245  return lambda point1, point2: gower_distance(point1, point2, max_range)
246 
247  elif self.__type == type_metric.USER_DEFINED:
248  return self.__func
249 
250  else:
251  raise ValueError("Unknown type of metric: '%d'", self.__type)
252 
253 
254  def __get_gower_max_range(self):
255  """!
256  @brief Returns max range for Gower distance using input parameters ('max_range' or 'data').
257 
258  @return (numpy.array) Max range for Gower distance.
259 
260  """
261  max_range = self.__args.get('max_range', None)
262  if max_range is None:
263  data = self.__args.get('data', None)
264  if data is None:
265  raise ValueError("Gower distance requires 'data' or 'max_range' argument to construct metric.")
266 
267  max_range = numpy.max(data, axis=0) - numpy.min(data, axis=0)
268  self.__args['max_range'] = max_range
269 
270  return max_range
271 
272 
273  def __create_distance_calculator_numpy(self):
274  """!
275  @brief Creates distance metric calculator that uses numpy.
276 
277  @return (callable) Callable object of distance metric calculator.
278 
279  """
280  if self.__type == type_metric.EUCLIDEAN:
281  return euclidean_distance_numpy
282 
283  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
284  return euclidean_distance_square_numpy
285 
286  elif self.__type == type_metric.MANHATTAN:
287  return manhattan_distance_numpy
288 
289  elif self.__type == type_metric.CHEBYSHEV:
290  return chebyshev_distance_numpy
291 
292  elif self.__type == type_metric.MINKOWSKI:
293  return lambda object1, object2: minkowski_distance_numpy(object1, object2, self.__args.get('degree', 2))
294 
295  elif self.__type == type_metric.CANBERRA:
296  return canberra_distance_numpy
297 
298  elif self.__type == type_metric.CHI_SQUARE:
299  return chi_square_distance_numpy
300 
301  elif self.__type == type_metric.GOWER:
302  max_range = self.__get_gower_max_range()
303  return lambda object1, object2: gower_distance_numpy(object1, object2, max_range)
304 
305  elif self.__type == type_metric.USER_DEFINED:
306  return self.__func
307 
308  else:
309  raise ValueError("Unknown type of metric: '%d'", self.__type)
310 
311 
312 
313 def euclidean_distance(point1, point2):
314  """!
315  @brief Calculate Euclidean distance between two vectors.
316  @details The Euclidean between vectors (points) a and b is calculated by following formula:
317 
318  \f[
319  dist(a, b) = \sqrt{ \sum_{i=0}^{N}(a_{i} - b_{i})^{2} };
320  \f]
321 
322  Where N is a length of each vector.
323 
324  @param[in] point1 (array_like): The first vector.
325  @param[in] point2 (array_like): The second vector.
326 
327  @return (double) Euclidean distance between two vectors.
328 
329  @see euclidean_distance_square, manhattan_distance, chebyshev_distance
330 
331  """
332  distance = euclidean_distance_square(point1, point2)
333  return distance ** 0.5
334 
335 
336 def euclidean_distance_numpy(object1, object2):
337  """!
338  @brief Calculate Euclidean distance between two objects using numpy.
339 
340  @param[in] object1 (array_like): The first array_like object.
341  @param[in] object2 (array_like): The second array_like object.
342 
343  @return (double) Euclidean distance between two objects.
344 
345  """
346  return numpy.sum(numpy.sqrt(numpy.square(object1 - object2)), axis=1).T
347 
348 
349 def euclidean_distance_square(point1, point2):
350  """!
351  @brief Calculate square Euclidean distance between two vectors.
352 
353  \f[
354  dist(a, b) = \sum_{i=0}^{N}(a_{i} - b_{i})^{2};
355  \f]
356 
357  @param[in] point1 (array_like): The first vector.
358  @param[in] point2 (array_like): The second vector.
359 
360  @return (double) Square Euclidean distance between two vectors.
361 
362  @see euclidean_distance, manhattan_distance, chebyshev_distance
363 
364  """
365  distance = 0.0
366  for i in range(len(point1)):
367  distance += (point1[i] - point2[i]) ** 2.0
368 
369  return distance
370 
371 
372 def euclidean_distance_square_numpy(object1, object2):
373  """!
374  @brief Calculate square Euclidean distance between two objects using numpy.
375 
376  @param[in] object1 (array_like): The first array_like object.
377  @param[in] object2 (array_like): The second array_like object.
378 
379  @return (double) Square Euclidean distance between two objects.
380 
381  """
382  return numpy.sum(numpy.square(object1 - object2), axis=1).T
383 
384 
385 def manhattan_distance(point1, point2):
386  """!
387  @brief Calculate Manhattan distance between between two vectors.
388 
389  \f[
390  dist(a, b) = \sum_{i=0}^{N}\left | a_{i} - b_{i} \right |;
391  \f]
392 
393  @param[in] point1 (array_like): The first vector.
394  @param[in] point2 (array_like): The second vector.
395 
396  @return (double) Manhattan distance between two vectors.
397 
398  @see euclidean_distance_square, euclidean_distance, chebyshev_distance
399 
400  """
401  distance = 0.0
402  dimension = len(point1)
403 
404  for i in range(dimension):
405  distance += abs(point1[i] - point2[i])
406 
407  return distance
408 
409 
410 def manhattan_distance_numpy(object1, object2):
411  """!
412  @brief Calculate Manhattan distance between two objects using numpy.
413 
414  @param[in] object1 (array_like): The first array_like object.
415  @param[in] object2 (array_like): The second array_like object.
416 
417  @return (double) Manhattan distance between two objects.
418 
419  """
420  return numpy.sum(numpy.absolute(object1 - object2), axis=1).T
421 
422 
423 def chebyshev_distance(point1, point2):
424  """!
425  @brief Calculate Chebyshev distance between between two vectors.
426 
427  \f[
428  dist(a, b) = \max_{}i\left (\left | a_{i} - b_{i} \right |\right );
429  \f]
430 
431  @param[in] point1 (array_like): The first vector.
432  @param[in] point2 (array_like): The second vector.
433 
434  @return (double) Chebyshev distance between two vectors.
435 
436  @see euclidean_distance_square, euclidean_distance, minkowski_distance
437 
438  """
439  distance = 0.0
440  dimension = len(point1)
441 
442  for i in range(dimension):
443  distance = max(distance, abs(point1[i] - point2[i]))
444 
445  return distance
446 
447 
448 def chebyshev_distance_numpy(object1, object2):
449  """!
450  @brief Calculate Chebyshev distance between two objects using numpy.
451 
452  @param[in] object1 (array_like): The first array_like object.
453  @param[in] object2 (array_like): The second array_like object.
454 
455  @return (double) Chebyshev distance between two objects.
456 
457  """
458  return numpy.max(numpy.absolute(object1 - object2), axis=1).T
459 
460 
461 def minkowski_distance(point1, point2, degree=2):
462  """!
463  @brief Calculate Minkowski distance between two vectors.
464 
465  \f[
466  dist(a, b) = \sqrt[p]{ \sum_{i=0}^{N}\left(a_{i} - b_{i}\right)^{p} };
467  \f]
468 
469  @param[in] point1 (array_like): The first vector.
470  @param[in] point2 (array_like): The second vector.
471  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
472 
473  @return (double) Minkowski distance between two vectors.
474 
475  @see euclidean_distance
476 
477  """
478  distance = 0.0
479  for i in range(len(point1)):
480  distance += (point1[i] - point2[i]) ** degree
481 
482  return distance ** (1.0 / degree)
483 
484 
485 def minkowski_distance_numpy(object1, object2, degree=2):
486  """!
487  @brief Calculate Minkowski distance between objects using numpy.
488 
489  @param[in] object1 (array_like): The first array_like object.
490  @param[in] object2 (array_like): The second array_like object.
491  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
492 
493  @return (double) Minkowski distance between two object.
494 
495  """
496  return numpy.sum(numpy.power(numpy.power(object1 - object2, degree), 1/degree), axis=1).T
497 
498 
499 def canberra_distance(point1, point2):
500  """!
501  @brief Calculate Canberra distance between two vectors.
502 
503  \f[
504  dist(a, b) = \sum_{i=0}^{N}\frac{\left | a_{i} - b_{i} \right |}{\left | a_{i} \right | + \left | b_{i} \right |};
505  \f]
506 
507  @param[in] point1 (array_like): The first vector.
508  @param[in] point2 (array_like): The second vector.
509 
510  @return (float) Canberra distance between two objects.
511 
512  """
513  distance = 0.0
514  for i in range(len(point1)):
515  divider = abs(point1[i]) + abs(point2[i])
516  if divider == 0.0:
517  continue
518 
519  distance += abs(point1[i] - point2[i]) / divider
520 
521  return distance
522 
523 
524 def canberra_distance_numpy(object1, object2):
525  """!
526  @brief Calculate Canberra distance between two objects using numpy.
527 
528  @param[in] object1 (array_like): The first vector.
529  @param[in] object2 (array_like): The second vector.
530 
531  @return (float) Canberra distance between two objects.
532 
533  """
534  with numpy.errstate(divide='ignore', invalid='ignore'):
535  result = numpy.divide(numpy.abs(object1 - object2), numpy.abs(object1) + numpy.abs(object2))
536 
537  if len(result.shape) > 1:
538  return numpy.sum(numpy.nan_to_num(result), axis=1).T
539  else:
540  return numpy.sum(numpy.nan_to_num(result))
541 
542 
543 def chi_square_distance(point1, point2):
544  """!
545  @brief Calculate Chi square distance between two vectors.
546 
547  \f[
548  dist(a, b) = \sum_{i=0}^{N}\frac{\left ( a_{i} - b_{i} \right )^{2}}{\left | a_{i} \right | + \left | b_{i} \right |};
549  \f]
550 
551  @param[in] point1 (array_like): The first vector.
552  @param[in] point2 (array_like): The second vector.
553 
554  @return (float) Chi square distance between two objects.
555 
556  """
557  distance = 0.0
558  for i in range(len(point1)):
559  divider = abs(point1[i]) + abs(point2[i])
560  if divider != 0.0:
561  distance += ((point1[i] - point2[i]) ** 2.0) / divider
562 
563  return distance
564 
565 
566 def chi_square_distance_numpy(object1, object2):
567  """!
568  @brief Calculate Chi square distance between two vectors using numpy.
569 
570  @param[in] object1 (array_like): The first vector.
571  @param[in] object2 (array_like): The second vector.
572 
573  @return (float) Chi square distance between two objects.
574 
575  """
576  with numpy.errstate(divide='ignore', invalid='ignore'):
577  result = numpy.divide(numpy.power(object1 - object2, 2), numpy.abs(object1) + numpy.abs(object2))
578 
579  if len(result.shape) > 1:
580  return numpy.sum(numpy.nan_to_num(result), axis=1).T
581  else:
582  return numpy.sum(numpy.nan_to_num(result))
583 
584 
585 def gower_distance(point1, point2, max_range):
586  """!
587  @brief Calculate Gower distance between two vectors.
588  @details Implementation is based on the paper @cite article::utils::metric::gower. Gower distance is calculate
589  using following formula:
590  \f[
591  dist\left ( a, b \right )=\frac{1}{p}\sum_{i=0}^{p}\frac{\left | a_{i} - b_{i} \right |}{R_{i}},
592  \f]
593 
594  where \f$R_{i}\f$ is a max range for ith dimension. \f$R\f$ is defined in line following formula:
595 
596  \f[
597  R=max\left ( X \right )-min\left ( X \right )
598  \f]
599 
600  @param[in] point1 (array_like): The first vector.
601  @param[in] point2 (array_like): The second vector.
602  @param[in] max_range (array_like): Max range in each data dimension.
603 
604  @return (float) Gower distance between two objects.
605 
606  """
607  distance = 0.0
608  dimensions = len(point1)
609  for i in range(dimensions):
610  if max_range[i] != 0.0:
611  distance += abs(point1[i] - point2[i]) / max_range[i]
612 
613  return distance / dimensions
614 
615 
616 def gower_distance_numpy(point1, point2, max_range):
617  """!
618  @brief Calculate Gower distance between two vectors using numpy.
619 
620  @param[in] point1 (array_like): The first vector.
621  @param[in] point2 (array_like): The second vector.
622  @param[in] max_range (array_like): Max range in each data dimension.
623 
624  @return (float) Gower distance between two objects.
625 
626  """
627  with numpy.errstate(divide='ignore', invalid='ignore'):
628  result = numpy.divide(numpy.abs(point1 - point2), max_range)
629 
630  if len(result.shape) > 1:
631  return numpy.sum(numpy.nan_to_num(result), axis=1).T / len(point1)
632  else:
633  return numpy.sum(numpy.nan_to_num(result)) / len(point1)
def __create_distance_calculator_basic(self)
Creates distance metric calculator that does not use numpy.
Definition: metric.py:215
def chi_square_distance(point1, point2)
Calculate Chi square distance between two vectors.
Definition: metric.py:543
def get_arguments(self)
Return additional arguments that are used by distance metric.
Definition: metric.py:160
def euclidean_distance_square(point1, point2)
Calculate square Euclidean distance between two vectors.
Definition: metric.py:349
def minkowski_distance_numpy(object1, object2, degree=2)
Calculate Minkowski distance between objects using numpy.
Definition: metric.py:485
def chi_square_distance_numpy(object1, object2)
Calculate Chi square distance between two vectors using numpy.
Definition: metric.py:566
def __create_distance_calculator(self)
Creates distance metric calculator.
Definition: metric.py:202
def get_type(self)
Return type of distance metric that is used.
Definition: metric.py:150
def chebyshev_distance_numpy(object1, object2)
Calculate Chebyshev distance between two objects using numpy.
Definition: metric.py:448
Distance metric performs distance calculation between two points in line with encapsulated function...
Definition: metric.py:67
def manhattan_distance_numpy(object1, object2)
Calculate Manhattan distance between two objects using numpy.
Definition: metric.py:410
def __init__(self, metric_type, kwargs)
Creates distance metric instance for calculation distance between two points.
Definition: metric.py:110
def gower_distance(point1, point2, max_range)
Calculate Gower distance between two vectors.
Definition: metric.py:585
def get_function(self)
Return user-defined function for calculation distance metric.
Definition: metric.py:170
def gower_distance_numpy(point1, point2, max_range)
Calculate Gower distance between two vectors using numpy.
Definition: metric.py:616
def disable_numpy_usage(self)
Stop using numpy for distance calculation.
Definition: metric.py:191
def canberra_distance(point1, point2)
Calculate Canberra distance between two vectors.
Definition: metric.py:499
def canberra_distance_numpy(object1, object2)
Calculate Canberra distance between two objects using numpy.
Definition: metric.py:524
def euclidean_distance_square_numpy(object1, object2)
Calculate square Euclidean distance between two objects using numpy.
Definition: metric.py:372
def __call__(self, point1, point2)
Calculates distance between two points.
Definition: metric.py:137
def __create_distance_calculator_numpy(self)
Creates distance metric calculator that uses numpy.
Definition: metric.py:273
def euclidean_distance(point1, point2)
Calculate Euclidean distance between two vectors.
Definition: metric.py:313
def manhattan_distance(point1, point2)
Calculate Manhattan distance between between two vectors.
Definition: metric.py:385
def minkowski_distance(point1, point2, degree=2)
Calculate Minkowski distance between two vectors.
Definition: metric.py:461
def euclidean_distance_numpy(object1, object2)
Calculate Euclidean distance between two objects using numpy.
Definition: metric.py:336
def enable_numpy_usage(self)
Start numpy for distance calculation.
Definition: metric.py:180
def __get_gower_max_range(self)
Returns max range for Gower distance using input parameters (&#39;max_range&#39; or &#39;data&#39;).
Definition: metric.py:254
Enumeration of supported metrics in the module for distance calculation between two points...
Definition: metric.py:32
def chebyshev_distance(point1, point2)
Calculate Chebyshev distance between between two vectors.
Definition: metric.py:423