metric.py
1 """!
2 
3 @brief Module provides various distance metrics - abstraction of the notion of distance in a metric space.
4 
5 @authors Andrei Novikov (pyclustering@yandex.ru)
6 @date 2014-2018
7 @copyright GNU Public License
8 
9 @cond GNU_PUBLIC_LICENSE
10  PyClustering is free software: you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14 
15  PyClustering is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program. If not, see <http://www.gnu.org/licenses/>.
22 @endcond
23 
24 """
25 
26 
27 import numpy
28 
29 from enum import IntEnum
30 
31 
32 class type_metric(IntEnum):
33  """!
34  @brief Enumeration of supported metrics in the module for distance calculation between two points.
35 
36  """
37 
38 
39  EUCLIDEAN = 0
40 
41 
42  EUCLIDEAN_SQUARE = 1
43 
44 
45  MANHATTAN = 2
46 
47 
48  CHEBYSHEV = 3
49 
50 
51  MINKOWSKI = 4
52 
53 
54  USER_DEFINED = 1000
55 
56 
57 
59  """!
60  @brief Distance metric performs distance calculation between two points in line with encapsulated function, for
61  example, euclidean distance or chebyshev distance, or even user-defined.
62 
63  @details
64 
65  Example of Euclidean distance metric:
66  @code
67  metric = distance_metric(type_metric.EUCLIDEAN)
68  distance = metric([1.0, 2.5], [-1.2, 3.4])
69  @endcode
70 
71  Example of Chebyshev distance metric:
72  @code
73  metric = distance_metric(type_metric.CHEBYSHEV)
74  distance = metric([0.0, 0.0], [2.5, 6.0])
75  @endcode
76 
77  In following example additional argument should be specified (generally, 'degree' is a optional argument that is
78  equal to '2' by default) that is specific for Minkowski distance:
79  @code
80  metric = distance_metric(type_metric.MINKOWSKI, degree=4)
81  distance = metric([4.0, 9.2, 1.0], [3.4, 2.5, 6.2])
82  @endcode
83 
84  User may define its own function for distance calculation:
85  @code
86  user_function = lambda point1, point2: point1[0] + point2[0] + 2
87  metric = distance_metric(type_metric.USER_DEFINED, func=user_function)
88  distance = metric([2.0, 3.0], [1.0, 3.0])
89  @endcode
90 
91  """
92  def __init__(self, type, **kwargs):
93  """!
94  @brief Creates distance metric instance for calculation distance between two points.
95 
96  @param[in] type (type_metric):
97  @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'numpy_usage' 'func' and corresponding additional argument for
98  for specific metric types).
99 
100  <b>Keyword Args:</b><br>
101  - func (callable): Callable object with two arguments (point #1 and point #2) or (object #1 and object #2) in case of numpy usage.
102  This argument is used only if metric is 'type_metric.USER_DEFINED'.
103  - degree (numeric): Only for 'type_metric.MINKOWSKI' - degree of Minkowski equation.
104  - numpy_usage (bool): If True then numpy is used for calculation (by default is False).
105 
106  """
107  self.__type = type
108  self.__args = kwargs
109  self.__func = self.__args.get('func', None)
110  self.__numpy = self.__args.get('numpy_usage', False)
111 
113 
114 
115  def __call__(self, point1, point2):
116  """!
117  @brief Calculates distance between two points.
118 
119  @param[in] point1 (list): The first point.
120  @param[in] point2 (list): The second point.
121 
122  @return (double) Distance between two points.
123 
124  """
125  return self.__calculator(point1, point2)
126 
127 
128  def get_type(self):
129  """!
130  @brief Return type of distance metric that is used.
131 
132  @return (type_metric) Type of distance metric.
133 
134  """
135  return self.__type
136 
137 
138  def get_arguments(self):
139  """!
140  @brief Return additional arguments that are used by distance metric.
141 
142  @return (dict) Additional arguments.
143 
144  """
145  return self.__args
146 
147 
148  def get_function(self):
149  """!
150  @brief Return user-defined function for calculation distance metric.
151 
152  @return (callable): User-defined distance metric function.
153 
154  """
155  return self.__func
156 
157 
159  """!
160  @brief Start numpy for distance calculation.
161  @details Useful in case matrices to increase performance. No effect in case of type_metric.USER_DEFINED type.
162 
163  """
164  self.__numpy = True
165  if self.__type != type_metric.USER_DEFINED:
167 
168 
170  """!
171  @brief Stop using numpy for distance calculation.
172  @details Useful in case of big amount of small data portion when numpy call is longer than calculation itself.
173  No effect in case of type_metric.USER_DEFINED type.
174 
175  """
176  self.__numpy = False
178 
179 
180  def __create_distance_calculator(self):
181  """!
182  @brief Creates distance metric calculator.
183 
184  @return (callable) Callable object of distance metric calculator.
185 
186  """
187  if self.__numpy is True:
189 
191 
192 
193  def __create_distance_calculator_basic(self):
194  """!
195  @brief Creates distance metric calculator that does not use numpy.
196 
197  @return (callable) Callable object of distance metric calculator.
198 
199  """
200  if self.__type == type_metric.EUCLIDEAN:
201  return euclidean_distance
202 
203  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
204  return euclidean_distance_square
205 
206  elif self.__type == type_metric.MANHATTAN:
207  return manhattan_distance
208 
209  elif self.__type == type_metric.CHEBYSHEV:
210  return chebyshev_distance
211 
212  elif self.__type == type_metric.MINKOWSKI:
213  return lambda point1, point2: minkowski_distance(point1, point2, self.__args.get('degree', 2))
214 
215  elif self.__type == type_metric.USER_DEFINED:
216  return self.__func
217 
218  else:
219  raise ValueError("Unknown type of metric: '%d'", self.__type)
220 
221 
222  def __create_distance_calculator_numpy(self):
223  """!
224  @brief Creates distance metric calculator that uses numpy.
225 
226  @return (callable) Callable object of distance metric calculator.
227 
228  """
229  if self.__type == type_metric.EUCLIDEAN:
230  return euclidean_distance_numpy
231 
232  elif self.__type == type_metric.EUCLIDEAN_SQUARE:
233  return euclidean_distance_square_numpy
234 
235  elif self.__type == type_metric.MANHATTAN:
236  return manhattan_distance_numpy
237 
238  elif self.__type == type_metric.CHEBYSHEV:
239  return chebyshev_distance_numpy
240 
241  elif self.__type == type_metric.MINKOWSKI:
242  return lambda object1, object2: minkowski_distance_numpy(object1, object2, self.__args.get('degree', 2))
243 
244  elif self.__type == type_metric.USER_DEFINED:
245  return self.__func
246 
247  else:
248  raise ValueError("Unknown type of metric: '%d'", self.__type)
249 
250 
251 
252 def euclidean_distance(point1, point2):
253  """!
254  @brief Calculate Euclidean distance between two vectors.
255  @details The Euclidean between vectors (points) a and b is calculated by following formula:
256 
257  \f[
258  dist(a, b) = \sqrt{ \sum_{i=0}^{N}(a_{i} - b_{i})^{2} };
259  \f]
260 
261  Where N is a length of each vector.
262 
263  @param[in] point1 (array_like): The first vector.
264  @param[in] point2 (array_like): The second vector.
265 
266  @return (double) Euclidean distance between two vectors.
267 
268  @see euclidean_distance_square, manhattan_distance, chebyshev_distance
269 
270  """
271  distance = euclidean_distance_square(point1, point2)
272  return distance ** 0.5
273 
274 
275 def euclidean_distance_numpy(object1, object2):
276  """!
277  @brief Calculate Euclidean distance between two objects using numpy.
278 
279  @param[in] object1 (array_like): The first array_like object.
280  @param[in] object2 (array_like): The second array_like object.
281 
282  @return (double) Euclidean distance between two objects.
283 
284  """
285  return numpy.sum(numpy.sqrt(numpy.square(object1 - object2)), axis=1).T
286 
287 
288 def euclidean_distance_square(point1, point2):
289  """!
290  @brief Calculate square Euclidean distance between two vectors.
291 
292  \f[
293  dist(a, b) = \sum_{i=0}^{N}(a_{i} - b_{i})^{2};
294  \f]
295 
296  @param[in] point1 (array_like): The first vector.
297  @param[in] point2 (array_like): The second vector.
298 
299  @return (double) Square Euclidean distance between two vectors.
300 
301  @see euclidean_distance, manhattan_distance, chebyshev_distance
302 
303  """
304  distance = 0.0
305  for i in range(len(point1)):
306  distance += (point1[i] - point2[i]) ** 2.0
307 
308  return distance
309 
310 
311 def euclidean_distance_square_numpy(object1, object2):
312  """!
313  @brief Calculate square Euclidean distance between two objects using numpy.
314 
315  @param[in] object1 (array_like): The first array_like object.
316  @param[in] object2 (array_like): The second array_like object.
317 
318  @return (double) Square Euclidean distance between two objects.
319 
320  """
321  return numpy.sum(numpy.square(object1 - object2), axis=1).T
322 
323 
324 def manhattan_distance(point1, point2):
325  """!
326  @brief Calculate Manhattan distance between between two vectors.
327 
328  \f[
329  dist(a, b) = \sum_{i=0}^{N}\left | a_{i} - b_{i} \right |;
330  \f]
331 
332  @param[in] point1 (array_like): The first vector.
333  @param[in] point2 (array_like): The second vector.
334 
335  @return (double) Manhattan distance between two vectors.
336 
337  @see euclidean_distance_square, euclidean_distance, chebyshev_distance
338 
339  """
340  distance = 0.0
341  dimension = len(point1)
342 
343  for i in range(dimension):
344  distance += abs(point1[i] - point2[i])
345 
346  return distance
347 
348 
349 def manhattan_distance_numpy(object1, object2):
350  """!
351  @brief Calculate Manhattan distance between two objects using numpy.
352 
353  @param[in] object1 (array_like): The first array_like object.
354  @param[in] object2 (array_like): The second array_like object.
355 
356  @return (double) Manhattan distance between two objects.
357 
358  """
359  return numpy.sum(numpy.absolute(object1 - object2), axis=1).T
360 
361 
362 def chebyshev_distance(point1, point2):
363  """!
364  @brief Calculate Chebyshev distance between between two vectors.
365 
366  \f[
367  dist(a, b) = \max_{}i\left (\left | a_{i} - b_{i} \right |\right );
368  \f]
369 
370  @param[in] point1 (array_like): The first vector.
371  @param[in] point2 (array_like): The second vector.
372 
373  @return (double) Chebyshev distance between two vectors.
374 
375  @see euclidean_distance_square, euclidean_distance, minkowski_distance
376 
377  """
378  distance = 0.0
379  dimension = len(point1)
380 
381  for i in range(dimension):
382  distance = max(distance, abs(point1[i] - point2[i]))
383 
384  return distance
385 
386 
387 def chebyshev_distance_numpy(object1, object2):
388  """!
389  @brief Calculate Chebyshev distance between two objects using numpy.
390 
391  @param[in] object1 (array_like): The first array_like object.
392  @param[in] object2 (array_like): The second array_like object.
393 
394  @return (double) Chebyshev distance between two objects.
395 
396  """
397  return numpy.max(numpy.absolute(object1 - object2), axis=1).T
398 
399 
400 def minkowski_distance(point1, point2, degree=2):
401  """!
402  @brief Calculate Minkowski distance between two vectors.
403 
404  \f[
405  dist(a, b) = \sqrt[p]{ \sum_{i=0}^{N}\left(a_{i} - b_{i}\right)^{p} };
406  \f]
407 
408  @param[in] point1 (array_like): The first vector.
409  @param[in] point2 (array_like): The second vector.
410  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
411 
412  @return (double) Minkowski distance between two vectors.
413 
414  @see euclidean_distance
415 
416  """
417  distance = 0.0
418  for i in range(len(point1)):
419  distance += (point1[i] - point2[i]) ** degree
420 
421  return distance ** (1.0 / degree)
422 
423 
424 def minkowski_distance_numpy(object1, object2, degree=2):
425  """!
426  @brief Calculate Minkowski distance between objects using numpy.
427 
428  @param[in] object1 (array_like): The first array_like object.
429  @param[in] object2 (array_like): The second array_like object.
430  @param[in] degree (numeric): Degree of that is used for Minkowski distance.
431 
432  @return (double) Minkowski distance between two object.
433 
434  """
435  return numpy.sum(numpy.power(numpy.power(object1 - object2, degree), 1/degree), axis=1).T
def __create_distance_calculator_basic(self)
Creates distance metric calculator that does not use numpy.
Definition: metric.py:193
def get_arguments(self)
Return additional arguments that are used by distance metric.
Definition: metric.py:138
def euclidean_distance_square(point1, point2)
Calculate square Euclidean distance between two vectors.
Definition: metric.py:288
def minkowski_distance_numpy(object1, object2, degree=2)
Calculate Minkowski distance between objects using numpy.
Definition: metric.py:424
def __init__(self, type, kwargs)
Creates distance metric instance for calculation distance between two points.
Definition: metric.py:92
def __create_distance_calculator(self)
Creates distance metric calculator.
Definition: metric.py:180
def get_type(self)
Return type of distance metric that is used.
Definition: metric.py:128
def chebyshev_distance_numpy(object1, object2)
Calculate Chebyshev distance between two objects using numpy.
Definition: metric.py:387
Distance metric performs distance calculation between two points in line with encapsulated function...
Definition: metric.py:58
def manhattan_distance_numpy(object1, object2)
Calculate Manhattan distance between two objects using numpy.
Definition: metric.py:349
def get_function(self)
Return user-defined function for calculation distance metric.
Definition: metric.py:148
def disable_numpy_usage(self)
Stop using numpy for distance calculation.
Definition: metric.py:169
def euclidean_distance_square_numpy(object1, object2)
Calculate square Euclidean distance between two objects using numpy.
Definition: metric.py:311
def __call__(self, point1, point2)
Calculates distance between two points.
Definition: metric.py:115
def __create_distance_calculator_numpy(self)
Creates distance metric calculator that uses numpy.
Definition: metric.py:222
def euclidean_distance(point1, point2)
Calculate Euclidean distance between two vectors.
Definition: metric.py:252
def manhattan_distance(point1, point2)
Calculate Manhattan distance between between two vectors.
Definition: metric.py:324
def minkowski_distance(point1, point2, degree=2)
Calculate Minkowski distance between two vectors.
Definition: metric.py:400
def euclidean_distance_numpy(object1, object2)
Calculate Euclidean distance between two objects using numpy.
Definition: metric.py:275
def enable_numpy_usage(self)
Start numpy for distance calculation.
Definition: metric.py:158
Enumeration of supported metrics in the module for distance calculation between two points...
Definition: metric.py:32
def chebyshev_distance(point1, point2)
Calculate Chebyshev distance between between two vectors.
Definition: metric.py:362