metric.py
1 """!
2
3 @brief Module provides various distance metrics - abstraction of the notion of distance in a metric space.
4
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2018
8
10  PyClustering is free software: you can redistribute it and/or modify
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14
15  PyClustering is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19
20  You should have received a copy of the GNU General Public License
21  along with this program. If not, see <http://www.gnu.org/licenses/>.
22 @endcond
23
24 """
25
26
27 import numpy
28
29 from enum import IntEnum
30
31
32 class type_metric(IntEnum):
33  """!
34  @brief Enumeration of supported metrics in the module for distance calculation between two points.
35
36  """
37
38
39  EUCLIDEAN = 0
40
41
42  EUCLIDEAN_SQUARE = 1
43
44
45  MANHATTAN = 2
46
47
48  CHEBYSHEV = 3
49
50
51  MINKOWSKI = 4
52
53
54  USER_DEFINED = 1000
55
56
57
59  """!
60  @brief Distance metric performs distance calculation between two points in line with encapsulated function, for
61  example, euclidean distance or chebyshev distance, or even user-defined.
62
63  @details
64
65  Example of Euclidean distance metric:
66  @code
67  metric = distance_metric(type_metric.EUCLIDEAN)
68  distance = metric([1.0, 2.5], [-1.2, 3.4])
69  @endcode
70
71  Example of Chebyshev distance metric:
72  @code
73  metric = distance_metric(type_metric.CHEBYSHEV)
74  distance = metric([0.0, 0.0], [2.5, 6.0])
75  @endcode
76
77  In following example additional argument should be specified (generally, 'degree' is a optional argument that is
78  equal to '2' by default) that is specific for Minkowski distance:
79  @code
80  metric = distance_metric(type_metric.MINKOWSKI, degree=4)
81  distance = metric([4.0, 9.2, 1.0], [3.4, 2.5, 6.2])
82  @endcode
83
84  User may define its own function for distance calculation:
85  @code
86  user_function = lambda point1, point2: point1[0] + point2[0] + 2
87  metric = distance_metric(type_metric.USER_DEFINED, func=user_function)
88  distance = metric([2.0, 3.0], [1.0, 3.0])
89  @endcode
90
91  """
92  def __init__(self, type, **kwargs):
93  """!
94  @brief Creates distance metric instance for calculation distance between two points.
95
96  @param[in] type (type_metric):
97  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'numpy_usage' 'func' and corresponding additional argument for
98  for specific metric types).
99
100  <b>Keyword Args:</b><br>
101  - func (callable): Callable object with two arguments (point #1 and point #2) or (object #1 and object #2) in case of numpy usage.
102  This argument is used only if metric is 'type_metric.USER_DEFINED'.
103  - degree (numeric): Only for 'type_metric.MINKOWSKI' - degree of Minkowski equation.
104  - numpy_usage (bool): If True then numpy is used for calculation (by default is False).
105
106  """
107  self.__type = type
108  self.__args = kwargs
109  self.__func = self.__args.get('func', None)
110  self.__numpy = self.__args.get('numpy_usage', False)
111
113
114
115  def __call__(self, point1, point2):
116  """!
117  @brief Calculates distance between two points.
118
119  @param[in] point1 (list): The first point.
120  @param[in] point2 (list): The second point.
121
122  @return (double) Distance between two points.
123
124  """
125  return self.__calculator(point1, point2)
126
127
128  def get_type(self):
129  """!
130  @brief Return type of distance metric that is used.
131
132  @return (type_metric) Type of distance metric.
133
134  """
135  return self.__type
136
137
138  def get_arguments(self):
139  """!
140  @brief Return additional arguments that are used by distance metric.
141
143
144  """
145  return self.__args
146
147
148  def get_function(self):
149  """!
150  @brief Return user-defined function for calculation distance metric.
151
152  @return (callable): User-defined distance metric function.
153
154  """
155  return self.__func
156
157
159  """!
160  @brief Start numpy for distance calculation.
161  @details Useful in case matrices to increase performance. No effect in case of type_metric.USER_DEFINED type.
162
163  """
164  self.__numpy = True
165  if self.__type != type_metric.USER_DEFINED:
167
168
170  """!
171  @brief Stop using numpy for distance calculation.
172  @details Useful in case of big amount of small data portion when numpy call is longer than calculation itself.
173  No effect in case of type_metric.USER_DEFINED type.
174
175  """
176  self.__numpy = False
178
179
180  def __create_distance_calculator(self):
181  """!
182  @brief Creates distance metric calculator.
183
184  @return (callable) Callable object of distance metric calculator.
185
186  """
187  if self.__numpy is True:
189
191
192
193  def __create_distance_calculator_basic(self):
194  """!
195  @brief Creates distance metric calculator that does not use numpy.
196
197  @return (callable) Callable object of distance metric calculator.
198
199  """
200  if self.__type == type_metric.EUCLIDEAN:
201  return euclidean_distance
202
203  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
204  return euclidean_distance_square
205
206  elif self.__type == type_metric.MANHATTAN:
207  return manhattan_distance
208
209  elif self.__type == type_metric.CHEBYSHEV:
210  return chebyshev_distance
211
212  elif self.__type == type_metric.MINKOWSKI:
213  return lambda point1, point2: minkowski_distance(point1, point2, self.__args.get('degree', 2))
214
215  elif self.__type == type_metric.USER_DEFINED:
216  return self.__func
217
218  else:
219  raise ValueError("Unknown type of metric: '%d'", self.__type)
220
221
222  def __create_distance_calculator_numpy(self):
223  """!
224  @brief Creates distance metric calculator that uses numpy.
225
226  @return (callable) Callable object of distance metric calculator.
227
228  """
229  if self.__type == type_metric.EUCLIDEAN:
230  return euclidean_distance_numpy
231
232  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
233  return euclidean_distance_square_numpy
234
235  elif self.__type == type_metric.MANHATTAN:
236  return manhattan_distance_numpy
237
238  elif self.__type == type_metric.CHEBYSHEV:
239  return chebyshev_distance_numpy
240
241  elif self.__type == type_metric.MINKOWSKI:
242  return lambda object1, object2: minkowski_distance_numpy(object1, object2, self.__args.get('degree', 2))
243
244  elif self.__type == type_metric.USER_DEFINED:
245  return self.__func
246
247  else:
248  raise ValueError("Unknown type of metric: '%d'", self.__type)
249
250
251
252 def euclidean_distance(point1, point2):
253  """!
254  @brief Calculate Euclidean distance between two vectors.
255  @details The Euclidean between vectors (points) a and b is calculated by following formula:
256
257  \f[
258  dist(a, b) = \sqrt{ \sum_{i=0}^{N}(a_{i} - b_{i})^{2} };
259  \f]
260
261  Where N is a length of each vector.
262
263  @param[in] point1 (array_like): The first vector.
264  @param[in] point2 (array_like): The second vector.
265
266  @return (double) Euclidean distance between two vectors.
267
268  @see euclidean_distance_square, manhattan_distance, chebyshev_distance
269
270  """
271  distance = euclidean_distance_square(point1, point2)
272  return distance ** 0.5
273
274
275 def euclidean_distance_numpy(object1, object2):
276  """!
277  @brief Calculate Euclidean distance between two objects using numpy.
278
279  @param[in] object1 (array_like): The first array_like object.
280  @param[in] object2 (array_like): The second array_like object.
281
282  @return (double) Euclidean distance between two objects.
283
284  """
285  return numpy.sum(numpy.sqrt(numpy.square(object1 - object2)), axis=1).T
286
287
288 def euclidean_distance_square(point1, point2):
289  """!
290  @brief Calculate square Euclidean distance between two vectors.
291
292  \f[
293  dist(a, b) = \sum_{i=0}^{N}(a_{i} - b_{i})^{2};
294  \f]
295
296  @param[in] point1 (array_like): The first vector.
297  @param[in] point2 (array_like): The second vector.
298
299  @return (double) Square Euclidean distance between two vectors.
300
301  @see euclidean_distance, manhattan_distance, chebyshev_distance
302
303  """
304  distance = 0.0
305  for i in range(len(point1)):
306  distance += (point1[i] - point2[i]) ** 2.0
307
308  return distance
309
310
311 def euclidean_distance_square_numpy(object1, object2):
312  """!
313  @brief Calculate square Euclidean distance between two objects using numpy.
314
315  @param[in] object1 (array_like): The first array_like object.
316  @param[in] object2 (array_like): The second array_like object.
317
318  @return (double) Square Euclidean distance between two objects.
319
320  """
321  return numpy.sum(numpy.square(object1 - object2), axis=1).T
322
323
324 def manhattan_distance(point1, point2):
325  """!
326  @brief Calculate Manhattan distance between between two vectors.
327
328  \f[
329  dist(a, b) = \sum_{i=0}^{N}\left | a_{i} - b_{i} \right |;
330  \f]
331
332  @param[in] point1 (array_like): The first vector.
333  @param[in] point2 (array_like): The second vector.
334
335  @return (double) Manhattan distance between two vectors.
336
337  @see euclidean_distance_square, euclidean_distance, chebyshev_distance
338
339  """
340  distance = 0.0
341  dimension = len(point1)
342
343  for i in range(dimension):
344  distance += abs(point1[i] - point2[i])
345
346  return distance
347
348
349 def manhattan_distance_numpy(object1, object2):
350  """!
351  @brief Calculate Manhattan distance between two objects using numpy.
352
353  @param[in] object1 (array_like): The first array_like object.
354  @param[in] object2 (array_like): The second array_like object.
355
356  @return (double) Manhattan distance between two objects.
357
358  """
359  return numpy.sum(numpy.absolute(object1 - object2), axis=1).T
360
361
362 def chebyshev_distance(point1, point2):
363  """!
364  @brief Calculate Chebyshev distance between between two vectors.
365
366  \f[
367  dist(a, b) = \max_{}i\left (\left | a_{i} - b_{i} \right |\right );
368  \f]
369
370  @param[in] point1 (array_like): The first vector.
371  @param[in] point2 (array_like): The second vector.
372
373  @return (double) Chebyshev distance between two vectors.
374
375  @see euclidean_distance_square, euclidean_distance, minkowski_distance
376
377  """
378  distance = 0.0
379  dimension = len(point1)
380
381  for i in range(dimension):
382  distance = max(distance, abs(point1[i] - point2[i]))
383
384  return distance
385
386
387 def chebyshev_distance_numpy(object1, object2):
388  """!
389  @brief Calculate Chebyshev distance between two objects using numpy.
390
391  @param[in] object1 (array_like): The first array_like object.
392  @param[in] object2 (array_like): The second array_like object.
393
394  @return (double) Chebyshev distance between two objects.
395
396  """
397  return numpy.max(numpy.absolute(object1 - object2), axis=1).T
398
399
400 def minkowski_distance(point1, point2, degree=2):
401  """!
402  @brief Calculate Minkowski distance between two vectors.
403
404  \f[
405  dist(a, b) = \sqrt[p]{ \sum_{i=0}^{N}\left(a_{i} - b_{i}\right)^{p} };
406  \f]
407
408  @param[in] point1 (array_like): The first vector.
409  @param[in] point2 (array_like): The second vector.
410  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
411
412  @return (double) Minkowski distance between two vectors.
413
414  @see euclidean_distance
415
416  """
417  distance = 0.0
418  for i in range(len(point1)):
419  distance += (point1[i] - point2[i]) ** degree
420
421  return distance ** (1.0 / degree)
422
423
424 def minkowski_distance_numpy(object1, object2, degree=2):
425  """!
426  @brief Calculate Minkowski distance between objects using numpy.
427
428  @param[in] object1 (array_like): The first array_like object.
429  @param[in] object2 (array_like): The second array_like object.
430  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
431
432  @return (double) Minkowski distance between two object.
433
434  """
435  return numpy.sum(numpy.power(numpy.power(object1 - object2, degree), 1/degree), axis=1).T
def __create_distance_calculator_basic(self)
Creates distance metric calculator that does not use numpy.
Definition: metric.py:193
def get_arguments(self)
Return additional arguments that are used by distance metric.
Definition: metric.py:138
def euclidean_distance_square(point1, point2)
Calculate square Euclidean distance between two vectors.
Definition: metric.py:288
def minkowski_distance_numpy(object1, object2, degree=2)
Calculate Minkowski distance between objects using numpy.
Definition: metric.py:424
def __init__(self, type, kwargs)
Creates distance metric instance for calculation distance between two points.
Definition: metric.py:92
def __create_distance_calculator(self)
Creates distance metric calculator.
Definition: metric.py:180
def get_type(self)
Return type of distance metric that is used.
Definition: metric.py:128
def chebyshev_distance_numpy(object1, object2)
Calculate Chebyshev distance between two objects using numpy.
Definition: metric.py:387
Distance metric performs distance calculation between two points in line with encapsulated function...
Definition: metric.py:58
def manhattan_distance_numpy(object1, object2)
Calculate Manhattan distance between two objects using numpy.
Definition: metric.py:349
def get_function(self)
Return user-defined function for calculation distance metric.
Definition: metric.py:148
def disable_numpy_usage(self)
Stop using numpy for distance calculation.
Definition: metric.py:169
def euclidean_distance_square_numpy(object1, object2)
Calculate square Euclidean distance between two objects using numpy.
Definition: metric.py:311
def __call__(self, point1, point2)
Calculates distance between two points.
Definition: metric.py:115
def __create_distance_calculator_numpy(self)
Creates distance metric calculator that uses numpy.
Definition: metric.py:222
def euclidean_distance(point1, point2)
Calculate Euclidean distance between two vectors.
Definition: metric.py:252
def manhattan_distance(point1, point2)
Calculate Manhattan distance between between two vectors.
Definition: metric.py:324
def minkowski_distance(point1, point2, degree=2)
Calculate Minkowski distance between two vectors.
Definition: metric.py:400
def euclidean_distance_numpy(object1, object2)
Calculate Euclidean distance between two objects using numpy.
Definition: metric.py:275
def enable_numpy_usage(self)
Start numpy for distance calculation.
Definition: metric.py:158
Enumeration of supported metrics in the module for distance calculation between two points...
Definition: metric.py:32
def chebyshev_distance(point1, point2)
Calculate Chebyshev distance between between two vectors.
Definition: metric.py:362