metric.py
1 """!
2 
3 @brief Module provides various distance metrics - abstraction of the notion of distance in a metric space.
4 
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2019
7 @copyright GNU Public License
8 
9 @cond GNU_PUBLIC_LICENSE
10  PyClustering is free software: you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14 
15  PyClustering is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program. If not, see <http://www.gnu.org/licenses/>.
22 @endcond
23 
24 """
25 
26 
27 import numpy
28 
29 from enum import IntEnum
30 
31 
32 class type_metric(IntEnum):
33  """!
34  @brief Enumeration of supported metrics in the module for distance calculation between two points.
35 
36  """
37 
38 
39  EUCLIDEAN = 0
40 
41 
42  EUCLIDEAN_SQUARE = 1
43 
44 
45  MANHATTAN = 2
46 
47 
48  CHEBYSHEV = 3
49 
50 
51  MINKOWSKI = 4
52 
53 
54  CANBERRA = 5
55 
56 
57  CHI_SQUARE = 6
58 
59 
60  USER_DEFINED = 1000
61 
62 
63 
65  """!
66  @brief Distance metric performs distance calculation between two points in line with encapsulated function, for
67  example, euclidean distance or chebyshev distance, or even user-defined.
68 
69  @details
70 
71  Example of Euclidean distance metric:
72  @code
73  metric = distance_metric(type_metric.EUCLIDEAN)
74  distance = metric([1.0, 2.5], [-1.2, 3.4])
75  @endcode
76 
77  Example of Chebyshev distance metric:
78  @code
79  metric = distance_metric(type_metric.CHEBYSHEV)
80  distance = metric([0.0, 0.0], [2.5, 6.0])
81  @endcode
82 
83  In following example additional argument should be specified (generally, 'degree' is a optional argument that is
84  equal to '2' by default) that is specific for Minkowski distance:
85  @code
86  metric = distance_metric(type_metric.MINKOWSKI, degree=4)
87  distance = metric([4.0, 9.2, 1.0], [3.4, 2.5, 6.2])
88  @endcode
89 
90  User may define its own function for distance calculation. In this case input is two points, for example, you
91  want to implement your own version of Manhattan distance:
92  @code
93  from pyclustering.utils.metric import distance_metric, type_metric
94 
95  def my_manhattan(point1, point2):
96  dimension = len(point1)
97  result = 0.0
98  for i in range(dimension):
99  result += abs(point1[i] - point2[i]) * 0.1
100  return result
101 
102  metric = distance_metric(type_metric.USER_DEFINED, func=my_manhattan)
103  distance = metric([2.0, 3.0], [1.0, 3.0])
104  @endcode
105 
106  """
107  def __init__(self, metric_type, **kwargs):
108  """!
109  @brief Creates distance metric instance for calculation distance between two points.
110 
111  @param[in] metric_type (type_metric):
112  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'numpy_usage' 'func' and corresponding additional argument for
113  for specific metric types).
114 
115  <b>Keyword Args:</b><br>
116  - func (callable): Callable object with two arguments (point #1 and point #2) or (object #1 and object #2) in case of numpy usage.
117  This argument is used only if metric is 'type_metric.USER_DEFINED'.
118  - degree (numeric): Only for 'type_metric.MINKOWSKI' - degree of Minkowski equation.
119  - numpy_usage (bool): If True then numpy is used for calculation (by default is False).
120 
121  """
122  self.__type = metric_type
123  self.__args = kwargs
124  self.__func = self.__args.get('func', None)
125  self.__numpy = self.__args.get('numpy_usage', False)
126 
128 
129 
130  def __call__(self, point1, point2):
131  """!
132  @brief Calculates distance between two points.
133 
134  @param[in] point1 (list): The first point.
135  @param[in] point2 (list): The second point.
136 
137  @return (double) Distance between two points.
138 
139  """
140  return self.__calculator(point1, point2)
141 
142 
143  def get_type(self):
144  """!
145  @brief Return type of distance metric that is used.
146 
147  @return (type_metric) Type of distance metric.
148 
149  """
150  return self.__type
151 
152 
153  def get_arguments(self):
154  """!
155  @brief Return additional arguments that are used by distance metric.
156 
157  @return (dict) Additional arguments.
158 
159  """
160  return self.__args
161 
162 
163  def get_function(self):
164  """!
165  @brief Return user-defined function for calculation distance metric.
166 
167  @return (callable): User-defined distance metric function.
168 
169  """
170  return self.__func
171 
172 
174  """!
175  @brief Start numpy for distance calculation.
176  @details Useful in case matrices to increase performance. No effect in case of type_metric.USER_DEFINED type.
177 
178  """
179  self.__numpy = True
180  if self.__type != type_metric.USER_DEFINED:
182 
183 
185  """!
186  @brief Stop using numpy for distance calculation.
187  @details Useful in case of big amount of small data portion when numpy call is longer than calculation itself.
188  No effect in case of type_metric.USER_DEFINED type.
189 
190  """
191  self.__numpy = False
193 
194 
195  def __create_distance_calculator(self):
196  """!
197  @brief Creates distance metric calculator.
198 
199  @return (callable) Callable object of distance metric calculator.
200 
201  """
202  if self.__numpy is True:
204 
206 
207 
208  def __create_distance_calculator_basic(self):
209  """!
210  @brief Creates distance metric calculator that does not use numpy.
211 
212  @return (callable) Callable object of distance metric calculator.
213 
214  """
215  if self.__type == type_metric.EUCLIDEAN:
216  return euclidean_distance
217 
218  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
219  return euclidean_distance_square
220 
221  elif self.__type == type_metric.MANHATTAN:
222  return manhattan_distance
223 
224  elif self.__type == type_metric.CHEBYSHEV:
225  return chebyshev_distance
226 
227  elif self.__type == type_metric.MINKOWSKI:
228  return lambda point1, point2: minkowski_distance(point1, point2, self.__args.get('degree', 2))
229 
230  elif self.__type == type_metric.CANBERRA:
231  return canberra_distance
232 
233  elif self.__type == type_metric.CHI_SQUARE:
234  return chi_square_distance
235 
236  elif self.__type == type_metric.USER_DEFINED:
237  return self.__func
238 
239  else:
240  raise ValueError("Unknown type of metric: '%d'", self.__type)
241 
242 
243  def __create_distance_calculator_numpy(self):
244  """!
245  @brief Creates distance metric calculator that uses numpy.
246 
247  @return (callable) Callable object of distance metric calculator.
248 
249  """
250  if self.__type == type_metric.EUCLIDEAN:
251  return euclidean_distance_numpy
252 
253  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
254  return euclidean_distance_square_numpy
255 
256  elif self.__type == type_metric.MANHATTAN:
257  return manhattan_distance_numpy
258 
259  elif self.__type == type_metric.CHEBYSHEV:
260  return chebyshev_distance_numpy
261 
262  elif self.__type == type_metric.MINKOWSKI:
263  return lambda object1, object2: minkowski_distance_numpy(object1, object2, self.__args.get('degree', 2))
264 
265  elif self.__type == type_metric.CANBERRA:
266  return canberra_distance_numpy
267 
268  elif self.__type == type_metric.CHI_SQUARE:
269  return chi_square_distance_numpy
270 
271  elif self.__type == type_metric.USER_DEFINED:
272  return self.__func
273 
274  else:
275  raise ValueError("Unknown type of metric: '%d'", self.__type)
276 
277 
278 
279 def euclidean_distance(point1, point2):
280  """!
281  @brief Calculate Euclidean distance between two vectors.
282  @details The Euclidean between vectors (points) a and b is calculated by following formula:
283 
284  \f[
285  dist(a, b) = \sqrt{ \sum_{i=0}^{N}(a_{i} - b_{i})^{2} };
286  \f]
287 
288  Where N is a length of each vector.
289 
290  @param[in] point1 (array_like): The first vector.
291  @param[in] point2 (array_like): The second vector.
292 
293  @return (double) Euclidean distance between two vectors.
294 
295  @see euclidean_distance_square, manhattan_distance, chebyshev_distance
296 
297  """
298  distance = euclidean_distance_square(point1, point2)
299  return distance ** 0.5
300 
301 
302 def euclidean_distance_numpy(object1, object2):
303  """!
304  @brief Calculate Euclidean distance between two objects using numpy.
305 
306  @param[in] object1 (array_like): The first array_like object.
307  @param[in] object2 (array_like): The second array_like object.
308 
309  @return (double) Euclidean distance between two objects.
310 
311  """
312  return numpy.sum(numpy.sqrt(numpy.square(object1 - object2)), axis=1).T
313 
314 
315 def euclidean_distance_square(point1, point2):
316  """!
317  @brief Calculate square Euclidean distance between two vectors.
318 
319  \f[
320  dist(a, b) = \sum_{i=0}^{N}(a_{i} - b_{i})^{2};
321  \f]
322 
323  @param[in] point1 (array_like): The first vector.
324  @param[in] point2 (array_like): The second vector.
325 
326  @return (double) Square Euclidean distance between two vectors.
327 
328  @see euclidean_distance, manhattan_distance, chebyshev_distance
329 
330  """
331  distance = 0.0
332  for i in range(len(point1)):
333  distance += (point1[i] - point2[i]) ** 2.0
334 
335  return distance
336 
337 
338 def euclidean_distance_square_numpy(object1, object2):
339  """!
340  @brief Calculate square Euclidean distance between two objects using numpy.
341 
342  @param[in] object1 (array_like): The first array_like object.
343  @param[in] object2 (array_like): The second array_like object.
344 
345  @return (double) Square Euclidean distance between two objects.
346 
347  """
348  return numpy.sum(numpy.square(object1 - object2), axis=1).T
349 
350 
351 def manhattan_distance(point1, point2):
352  """!
353  @brief Calculate Manhattan distance between between two vectors.
354 
355  \f[
356  dist(a, b) = \sum_{i=0}^{N}\left | a_{i} - b_{i} \right |;
357  \f]
358 
359  @param[in] point1 (array_like): The first vector.
360  @param[in] point2 (array_like): The second vector.
361 
362  @return (double) Manhattan distance between two vectors.
363 
364  @see euclidean_distance_square, euclidean_distance, chebyshev_distance
365 
366  """
367  distance = 0.0
368  dimension = len(point1)
369 
370  for i in range(dimension):
371  distance += abs(point1[i] - point2[i])
372 
373  return distance
374 
375 
376 def manhattan_distance_numpy(object1, object2):
377  """!
378  @brief Calculate Manhattan distance between two objects using numpy.
379 
380  @param[in] object1 (array_like): The first array_like object.
381  @param[in] object2 (array_like): The second array_like object.
382 
383  @return (double) Manhattan distance between two objects.
384 
385  """
386  return numpy.sum(numpy.absolute(object1 - object2), axis=1).T
387 
388 
389 def chebyshev_distance(point1, point2):
390  """!
391  @brief Calculate Chebyshev distance between between two vectors.
392 
393  \f[
394  dist(a, b) = \max_{}i\left (\left | a_{i} - b_{i} \right |\right );
395  \f]
396 
397  @param[in] point1 (array_like): The first vector.
398  @param[in] point2 (array_like): The second vector.
399 
400  @return (double) Chebyshev distance between two vectors.
401 
402  @see euclidean_distance_square, euclidean_distance, minkowski_distance
403 
404  """
405  distance = 0.0
406  dimension = len(point1)
407 
408  for i in range(dimension):
409  distance = max(distance, abs(point1[i] - point2[i]))
410 
411  return distance
412 
413 
414 def chebyshev_distance_numpy(object1, object2):
415  """!
416  @brief Calculate Chebyshev distance between two objects using numpy.
417 
418  @param[in] object1 (array_like): The first array_like object.
419  @param[in] object2 (array_like): The second array_like object.
420 
421  @return (double) Chebyshev distance between two objects.
422 
423  """
424  return numpy.max(numpy.absolute(object1 - object2), axis=1).T
425 
426 
427 def minkowski_distance(point1, point2, degree=2):
428  """!
429  @brief Calculate Minkowski distance between two vectors.
430 
431  \f[
432  dist(a, b) = \sqrt[p]{ \sum_{i=0}^{N}\left(a_{i} - b_{i}\right)^{p} };
433  \f]
434 
435  @param[in] point1 (array_like): The first vector.
436  @param[in] point2 (array_like): The second vector.
437  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
438 
439  @return (double) Minkowski distance between two vectors.
440 
441  @see euclidean_distance
442 
443  """
444  distance = 0.0
445  for i in range(len(point1)):
446  distance += (point1[i] - point2[i]) ** degree
447 
448  return distance ** (1.0 / degree)
449 
450 
451 def minkowski_distance_numpy(object1, object2, degree=2):
452  """!
453  @brief Calculate Minkowski distance between objects using numpy.
454 
455  @param[in] object1 (array_like): The first array_like object.
456  @param[in] object2 (array_like): The second array_like object.
457  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
458 
459  @return (double) Minkowski distance between two object.
460 
461  """
462  return numpy.sum(numpy.power(numpy.power(object1 - object2, degree), 1/degree), axis=1).T
463 
464 
465 def canberra_distance(point1, point2):
466  """!
467  @brief Calculate Canberra distance between two vectors.
468 
469  \f[
470  dist(a, b) = \sum_{i=0}^{N}\frac{\left | a_{i} - b_{i} \right |}{\left | a_{i} \right | + \left | b_{i} \right |};
471  \f]
472 
473  @param[in] point1 (array_like): The first vector.
474  @param[in] point2 (array_like): The second vector.
475 
476  @return (float) Canberra distance between two objects.
477 
478  """
479  distance = 0.0
480  for i in range(len(point1)):
481  divider = abs(point1[i]) + abs(point2[i])
482  if divider == 0.0:
483  continue
484 
485  distance += abs(point1[i] - point2[i]) / divider
486 
487  return distance
488 
489 
490 def canberra_distance_numpy(object1, object2):
491  """!
492  @brief Calculate Canberra distance between two objects using numpy.
493 
494  @param[in] object1 (array_like): The first vector.
495  @param[in] object2 (array_like): The second vector.
496 
497  @return (float) Canberra distance between two objects.
498 
499  """
500  with numpy.errstate(divide='ignore', invalid='ignore'):
501  result = numpy.divide(numpy.abs(object1 - object2), numpy.abs(object1) + numpy.abs(object2))
502 
503  if len(result.shape) > 1:
504  return numpy.sum(numpy.nan_to_num(result), axis=1).T
505  else:
506  return numpy.sum(numpy.nan_to_num(result))
507 
508 
509 def chi_square_distance(point1, point2):
510  """!
511  @brief Calculate Chi square distance between two vectors.
512 
513  \f[
514  dist(a, b) = \sum_{i=0}^{N}\frac{\left ( a_{i} - b_{i} \right )^{2}}{\left | a_{i} \right | + \left | b_{i} \right |};
515  \f]
516 
517  @param[in] point1 (array_like): The first vector.
518  @param[in] point2 (array_like): The second vector.
519 
520  @return (float) Chi square distance between two objects.
521 
522  """
523  distance = 0.0
524  for i in range(len(point1)):
525  divider = abs(point1[i]) + abs(point2[i])
526  if divider == 0.0:
527  continue
528 
529  distance += ((point1[i] - point2[i]) ** 2.0) / divider
530 
531  return distance
532 
533 
534 def chi_square_distance_numpy(object1, object2):
535  """!
536  @brief Calculate Chi square distance between two vectors using numpy.
537 
538  @param[in] object1 (array_like): The first vector.
539  @param[in] object2 (array_like): The second vector.
540 
541  @return (float) Chi square distance between two objects.
542 
543  """
544  with numpy.errstate(divide='ignore', invalid='ignore'):
545  result = numpy.divide(numpy.power(object1 - object2, 2), numpy.abs(object1) + numpy.abs(object2))
546 
547  if len(result.shape) > 1:
548  return numpy.sum(numpy.nan_to_num(result), axis=1).T
549  else:
550  return numpy.sum(numpy.nan_to_num(result))
def __create_distance_calculator_basic(self)
Creates distance metric calculator that does not use numpy.
Definition: metric.py:208
def chi_square_distance(point1, point2)
Calculate Chi square distance between two vectors.
Definition: metric.py:509
def get_arguments(self)
Return additional arguments that are used by distance metric.
Definition: metric.py:153
def euclidean_distance_square(point1, point2)
Calculate square Euclidean distance between two vectors.
Definition: metric.py:315
def minkowski_distance_numpy(object1, object2, degree=2)
Calculate Minkowski distance between objects using numpy.
Definition: metric.py:451
def chi_square_distance_numpy(object1, object2)
Calculate Chi square distance between two vectors using numpy.
Definition: metric.py:534
def __create_distance_calculator(self)
Creates distance metric calculator.
Definition: metric.py:195
def get_type(self)
Return type of distance metric that is used.
Definition: metric.py:143
def chebyshev_distance_numpy(object1, object2)
Calculate Chebyshev distance between two objects using numpy.
Definition: metric.py:414
Distance metric performs distance calculation between two points in line with encapsulated function...
Definition: metric.py:64
def manhattan_distance_numpy(object1, object2)
Calculate Manhattan distance between two objects using numpy.
Definition: metric.py:376
def __init__(self, metric_type, kwargs)
Creates distance metric instance for calculation distance between two points.
Definition: metric.py:107
def get_function(self)
Return user-defined function for calculation distance metric.
Definition: metric.py:163
def disable_numpy_usage(self)
Stop using numpy for distance calculation.
Definition: metric.py:184
def canberra_distance(point1, point2)
Calculate Canberra distance between two vectors.
Definition: metric.py:465
def canberra_distance_numpy(object1, object2)
Calculate Canberra distance between two objects using numpy.
Definition: metric.py:490
def euclidean_distance_square_numpy(object1, object2)
Calculate square Euclidean distance between two objects using numpy.
Definition: metric.py:338
def __call__(self, point1, point2)
Calculates distance between two points.
Definition: metric.py:130
def __create_distance_calculator_numpy(self)
Creates distance metric calculator that uses numpy.
Definition: metric.py:243
def euclidean_distance(point1, point2)
Calculate Euclidean distance between two vectors.
Definition: metric.py:279
def manhattan_distance(point1, point2)
Calculate Manhattan distance between between two vectors.
Definition: metric.py:351
def minkowski_distance(point1, point2, degree=2)
Calculate Minkowski distance between two vectors.
Definition: metric.py:427
def euclidean_distance_numpy(object1, object2)
Calculate Euclidean distance between two objects using numpy.
Definition: metric.py:302
def enable_numpy_usage(self)
Start numpy for distance calculation.
Definition: metric.py:173
Enumeration of supported metrics in the module for distance calculation between two points...
Definition: metric.py:32
def chebyshev_distance(point1, point2)
Calculate Chebyshev distance between between two vectors.
Definition: metric.py:389