3 @brief Neural Network: Self-Organized Feature Map
4 @details Implementation based on paper @cite article::nnet::som::1, @cite article::nnet::som::2.
6 @authors Andrei Novikov (pyclustering@yandex.ru)
8 @copyright BSD-3-Clause
15 import matplotlib.pyplot
as plt
17 import pyclustering.core.som_wrapper
as wrapper
19 from pyclustering.core.wrapper
import ccore_library
22 from pyclustering.utils.dimension
import dimension_info
24 from enum
import IntEnum
29 @brief Enumeration of connection types for SOM.
50 @brief Enumeration of initialization types for SOM.
71 @brief Represents SOM parameters.
77 @brief Creates SOM parameters.
99 @brief Represents self-organized feature map (SOM).
100 @details The self-organizing feature map (SOM) method is a powerful tool for the visualization of
101 of high-dimensional data. It converts complex, nonlinear statistical relationships between
102 high-dimensional data into simple geometric relationships on a low-dimensional display.
104 @details `ccore` option can be specified in order to control using C++ implementation of pyclustering library. By
105 default C++ implementation is on. C++ implementation improves performance of the self-organized feature
112 from pyclustering.utils import read_sample
113 from pyclustering.nnet.som import som, type_conn, type_init, som_parameters
114 from pyclustering.samples.definitions import FCPS_SAMPLES
116 # read sample 'Lsun' from file
117 sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN)
119 # create SOM parameters
120 parameters = som_parameters()
122 # create self-organized feature map with size 7x7
123 rows = 10 # five rows
124 cols = 10 # five columns
125 structure = type_conn.grid_four; # each neuron has max. four neighbors.
126 network = som(rows, cols, structure, parameters)
128 # train network on 'Lsun' sample during 100 epouchs.
129 network.train(sample, 100)
131 # simulate trained network using randomly modified point from input dataset.
132 index_point = random.randint(0, len(sample) - 1)
133 point = sample[index_point] # obtain randomly point from data
134 point[0] += random.random() * 0.2 # change randomly X-coordinate
135 point[1] += random.random() * 0.2 # change randomly Y-coordinate
136 index_winner = network.simulate(point)
138 # check what are objects from input data are much close to randomly modified.
139 index_similar_objects = network.capture_objects[index_winner]
141 # neuron contains information of encoded objects
142 print("Point '%s' is similar to objects with indexes '%s'." % (str(point), str(index_similar_objects)))
143 print("Coordinates of similar objects:")
144 for index in index_similar_objects: print("\tPoint:", sample[index])
146 # result visualization:
147 # show distance matrix (U-matrix).
148 network.show_distance_matrix()
150 # show density matrix (P-matrix).
151 network.show_density_matrix()
153 # show winner matrix.
154 network.show_winner_matrix()
156 # show self-organized map.
157 network.show_network()
160 There is a visualization of 'Target' sample that was done by the self-organized feature map:
161 @image html target_som_processing.png
168 @brief Return size of self-organized map that is defined by total number of neurons.
170 @return (uint) Size of self-organized map (number of neurons).
182 @brief Return weight of each neuron.
184 @return (list) Weights of each neuron.
196 @brief Return amount of captured objects by each neuron after training.
198 @return (list) Amount of captured objects by each neuron.
212 @brief Returns indexes of captured objects by each neuron.
213 @details For example, a network with size 2x2 has been trained on a sample with five objects. Suppose neuron #1
214 won an object with index `1`, neuron #2 won objects `0`, `3`, `4`, neuron #3 did not won anything and
215 finally neuron #4 won an object with index `2`. Thus, for this example we will have the following
216 output `[[1], [0, 3, 4], [], [2]]`.
218 @return (list) Indexes of captured objects by each neuron.
227 def __init__(self, rows, cols, conn_type=type_conn.grid_eight, parameters=None, ccore=True):
229 @brief Constructor of self-organized map.
231 @param[in] rows (uint): Number of neurons in the column (number of rows).
232 @param[in] cols (uint): Number of neurons in the row (number of columns).
233 @param[in] conn_type (type_conn): Type of connection between oscillators in the network (grid four, grid eight, honeycomb, function neighbour).
234 @param[in] parameters (som_parameters): Other specific parameters.
235 @param[in] ccore (bool): If True simulation is performed by CCORE library (C++ implementation of pyclustering).
244 self.
_size = cols * rows
260 if self.
_params.init_radius
is None:
263 if (ccore
is True)
and ccore_library.workable():
283 if conn_type != type_conn.func_neighbor:
288 @brief Destructor of the self-organized feature map.
297 @brief Returns size of the network that defines by amount of neuron in it.
299 @return (uint) Size of self-organized map (amount of neurons).
307 @brief Returns state of SOM network that can be used to store network.
318 @brief Set state of SOM network that can be used to load network.
321 if som_state[
'ccore']
is True and ccore_library.workable():
326 def __initialize_initial_radius(self, rows, cols):
328 @brief Initialize initial radius using map sizes.
330 @param[in] rows (uint): Number of neurons in the column (number of rows).
331 @param[in] cols (uint): Number of neurons in the row (number of columns).
333 @return (list) Value of initial radius.
337 if (cols + rows) / 4.0 > 1.0:
340 elif (cols > 1)
and (rows > 1):
346 def __initialize_locations(self, rows, cols):
348 @brief Initialize locations (coordinates in SOM grid) of each neurons in the map.
350 @param[in] rows (uint): Number of neurons in the column (number of rows).
351 @param[in] cols (uint): Number of neurons in the row (number of columns).
353 @return (list) List of coordinates of each neuron in map.
358 for i
in range(rows):
359 for j
in range(cols):
360 location.append([float(i), float(j)])
364 def __initialize_distances(self, size, location):
366 @brief Initialize distance matrix in SOM grid.
368 @param[in] size (uint): Amount of neurons in the network.
369 @param[in] location (list): List of coordinates of each neuron in the network.
371 @return (list) Distance matrix between neurons in the network.
374 sqrt_distances = [[[]
for i
in range(size)]
for j
in range(size)]
375 for i
in range(size):
376 for j
in range(i, size, 1):
377 dist = euclidean_distance_square(location[i], location[j])
378 sqrt_distances[i][j] = dist
379 sqrt_distances[j][i] = dist
381 return sqrt_distances
383 def _create_initial_weights(self, init_type):
385 @brief Creates initial weights for neurons in line with the specified initialization.
387 @param[in] init_type (type_init): Type of initialization of initial neuron weights (random, random in center of the input data, random distributed in data, ditributed in line with uniform grid).
391 dim_info = dimension_info(self.
_data)
393 step_x = dim_info.get_center()[0]
395 step_x = dim_info.get_width()[0] / (self.
_rows - 1)
398 if dim_info.get_dimensions() > 1:
399 step_y = dim_info.get_center()[1]
401 step_y = dim_info.get_width()[1] / (self.
_cols - 1)
404 random.seed(self.
_params.random_state)
407 if init_type == type_init.uniform_grid:
409 self.
_weights = [[[]
for i
in range(dim_info.get_dimensions())]
for j
in range(self.
_size)]
410 for i
in range(self.
_size):
412 for dim
in range(dim_info.get_dimensions()):
415 self.
_weights[i][dim] = dim_info.get_minimum_coordinate()[dim] + step_x * location[dim]
417 self.
_weights[i][dim] = dim_info.get_center()[dim]
421 self.
_weights[i][dim] = dim_info.get_minimum_coordinate()[dim] + step_y * location[dim]
423 self.
_weights[i][dim] = dim_info.get_center()[dim]
425 self.
_weights[i][dim] = dim_info.get_center()[dim]
427 elif init_type == type_init.random_surface:
430 [random.uniform(dim_info.get_minimum_coordinate()[i], dim_info.get_maximum_coordinate()[i])
for i
in
431 range(dim_info.get_dimensions())]
for _
in range(self.
_size)]
433 elif init_type == type_init.random_centroid:
435 self.
_weights = [[(random.random() + dim_info.get_center()[i])
for i
in range(dim_info.get_dimensions())]
436 for _
in range(self.
_size)]
440 self.
_weights = [[random.random()
for i
in range(dim_info.get_dimensions())]
for _
in range(self.
_size)]
442 def _create_connections(self, conn_type):
444 @brief Create connections in line with input rule (grid four, grid eight, honeycomb, function neighbour).
446 @param[in] conn_type (type_conn): Type of connection between oscillators in the network.
452 for index
in range(0, self.
_size, 1):
453 upper_index = index - self.
_cols
454 upper_left_index = index - self.
_cols - 1
455 upper_right_index = index - self.
_cols + 1
457 lower_index = index + self.
_cols
458 lower_left_index = index + self.
_cols - 1
459 lower_right_index = index + self.
_cols + 1
461 left_index = index - 1
462 right_index = index + 1
464 node_row_index = math.floor(index / self.
_cols)
465 upper_row_index = node_row_index - 1
466 lower_row_index = node_row_index + 1
468 if (conn_type == type_conn.grid_eight)
or (conn_type == type_conn.grid_four):
472 if lower_index < self.
_size:
475 if (conn_type == type_conn.grid_eight)
or (conn_type == type_conn.grid_four)
or (
476 conn_type == type_conn.honeycomb):
477 if (left_index >= 0)
and (math.floor(left_index / self.
_cols) == node_row_index):
480 if (right_index < self.
_size)
and (math.floor(right_index / self.
_cols) == node_row_index):
483 if conn_type == type_conn.grid_eight:
484 if (upper_left_index >= 0)
and (math.floor(upper_left_index / self.
_cols) == upper_row_index):
485 self.
_neighbors[index].append(upper_left_index)
487 if (upper_right_index >= 0)
and (math.floor(upper_right_index / self.
_cols) == upper_row_index):
488 self.
_neighbors[index].append(upper_right_index)
490 if (lower_left_index < self.
_size)
and (math.floor(lower_left_index / self.
_cols) == lower_row_index):
491 self.
_neighbors[index].append(lower_left_index)
493 if (lower_right_index < self.
_size)
and (math.floor(lower_right_index / self.
_cols) == lower_row_index):
494 self.
_neighbors[index].append(lower_right_index)
496 if conn_type == type_conn.honeycomb:
497 if (node_row_index % 2) == 0:
498 upper_left_index = index - self.
_cols
499 upper_right_index = index - self.
_cols + 1
501 lower_left_index = index + self.
_cols
502 lower_right_index = index + self.
_cols + 1
504 upper_left_index = index - self.
_cols - 1
505 upper_right_index = index - self.
_cols
507 lower_left_index = index + self.
_cols - 1
508 lower_right_index = index + self.
_cols
510 if (upper_left_index >= 0)
and (math.floor(upper_left_index / self.
_cols) == upper_row_index):
511 self.
_neighbors[index].append(upper_left_index)
513 if (upper_right_index >= 0)
and (math.floor(upper_right_index / self.
_cols) == upper_row_index):
514 self.
_neighbors[index].append(upper_right_index)
516 if (lower_left_index < self.
_size)
and (math.floor(lower_left_index / self.
_cols) == lower_row_index):
517 self.
_neighbors[index].append(lower_left_index)
519 if (lower_right_index < self.
_size)
and (math.floor(lower_right_index / self.
_cols) == lower_row_index):
520 self.
_neighbors[index].append(lower_right_index)
522 def _competition(self, x):
524 @brief Calculates neuron winner (distance, neuron index).
526 @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point.
528 @return (uint) Returns index of neuron that is winner.
533 minimum = euclidean_distance_square(self.
_weights[0], x)
535 for i
in range(1, self.
_size, 1):
536 candidate = euclidean_distance_square(self.
_weights[i], x)
537 if candidate < minimum:
543 def _adaptation(self, index, x):
545 @brief Change weight of neurons in line with won neuron.
547 @param[in] index (uint): Index of neuron-winner.
548 @param[in] x (list): Input pattern from the input data set.
554 if self.
_conn_type == type_conn.func_neighbor:
555 for neuron_index
in range(self.
_size):
559 influence = math.exp(-(distance / (2.0 * self.
_local_radius)))
561 for i
in range(dimension):
564 x[i] - self.
_weights[neuron_index][i])
567 for i
in range(dimension):
573 influence = math.exp(-(distance / (2.0 * self.
_local_radius)))
575 for i
in range(dimension):
578 x[i] - self.
_weights[neighbor_index][i])
580 def train(self, data, epochs, autostop=False):
582 @brief Trains self-organized feature map (SOM).
584 @param[in] data (list): Input data - list of points where each point is represented by list of features, for example coordinates.
585 @param[in] epochs (uint): Number of epochs for training.
586 @param[in] autostop (bool): Automatic termination of learning process when adaptation is not occurred.
588 @return (uint) Number of learning iterations.
599 for i
in range(self.
_size):
606 previous_weights =
None
608 for epoch
in range(1, epochs + 1):
615 for i
in range(self.
_size):
619 for i
in range(len(self.
_data)):
627 if (autostop
is True)
or (epoch == epochs):
633 if previous_weights
is not None:
635 if maximal_adaptation < self.
_params.adaptation_threshold:
638 previous_weights = [item[:]
for item
in self.
_weights]
644 @brief Processes input pattern (no learining) and returns index of neuron-winner.
645 Using index of neuron winner catched object can be obtained using property capture_objects.
647 @param[in] input_pattern (list): Input pattern.
649 @return (uint) Returns index of neuron-winner.
660 def _get_maximal_adaptation(self, previous_weights):
662 @brief Calculates maximum changes of weight in line with comparison between previous weights and current weights.
664 @param[in] previous_weights (list): Weights from the previous step of learning process.
666 @return (double) Value that represents maximum changes of weight after adaptation process.
670 dimension = len(self.
_data[0])
671 maximal_adaptation = 0.0
673 for neuron_index
in range(self.
_size):
674 for dim
in range(dimension):
675 current_adaptation = previous_weights[neuron_index][dim] - self.
_weights[neuron_index][dim]
677 if current_adaptation < 0:
678 current_adaptation = -current_adaptation
680 if maximal_adaptation < current_adaptation:
681 maximal_adaptation = current_adaptation
683 return maximal_adaptation
687 @brief Calculates number of winner at the last step of learning process.
689 @return (uint) Number of winner.
697 for i
in range(self.
_size):
705 @brief Shows gray visualization of U-matrix (distance matrix).
707 @see get_distance_matrix()
712 plt.imshow(distance_matrix, cmap=plt.get_cmap(
'hot'), interpolation=
'kaiser')
713 plt.title(
"U-Matrix")
719 @brief Calculates distance matrix (U-matrix).
720 @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map.
722 @return (list) Distance matrix (U-matrix).
724 @see show_distance_matrix()
725 @see get_density_matrix()
731 if self.
_conn_type != type_conn.func_neighbor:
734 distance_matrix = [[0.0] * self.
_cols for i
in range(self.
_rows)]
736 for i
in range(self.
_rows):
737 for j
in range(self.
_cols):
738 neuron_index = i * self.
_cols + j
740 if self.
_conn_type == type_conn.func_neighbor:
743 for neighbor_index
in self.
_neighbors[neuron_index]:
744 distance_matrix[i][j] += euclidean_distance_square(self.
_weights[neuron_index],
747 distance_matrix[i][j] /= len(self.
_neighbors[neuron_index])
749 return distance_matrix
753 @brief Show density matrix (P-matrix) using kernel density estimation.
755 @param[in] surface_divider (double): Divider in each dimension that affect radius for density measurement.
757 @see show_distance_matrix()
762 plt.imshow(density_matrix, cmap=plt.get_cmap(
'hot'), interpolation=
'kaiser')
763 plt.title(
"P-Matrix")
769 @brief Calculates density matrix (P-Matrix).
771 @param[in] surface_divider (double): Divider in each dimension that affect radius for density measurement.
773 @return (list) Density matrix (P-Matrix).
775 @see get_distance_matrix()
782 density_matrix = [[0] * self.
_cols for i
in range(self.
_rows)]
785 dim_max = [float(
'-Inf')] * dimension
786 dim_min = [float(
'Inf')] * dimension
789 for index_dim
in range(dimension):
790 if weight[index_dim] > dim_max[index_dim]:
791 dim_max[index_dim] = weight[index_dim]
793 if weight[index_dim] < dim_min[index_dim]:
794 dim_min[index_dim] = weight[index_dim]
796 radius = [0.0] * len(self.
_weights[0])
797 for index_dim
in range(dimension):
798 radius[index_dim] = (dim_max[index_dim] - dim_min[index_dim]) / surface_divider
801 for point
in self.
_data:
802 for index_neuron
in range(len(self)):
805 for index_dim
in range(dimension):
806 if abs(point[index_dim] - self.
_weights[index_neuron][index_dim]) > radius[index_dim]:
807 point_covered =
False
810 row = int(math.floor(index_neuron / self.
_cols))
811 col = index_neuron - row * self.
_cols
813 if point_covered
is True:
814 density_matrix[row][col] += 1
816 return density_matrix
820 @brief Show a winner matrix where each element corresponds to neuron and value represents
821 amount of won objects from input data-space at the last training iteration.
823 @see show_distance_matrix()
830 (fig, ax) = plt.subplots()
831 winner_matrix = [[0] * self.
_cols for _
in range(self.
_rows)]
833 for i
in range(self.
_rows):
834 for j
in range(self.
_cols):
835 neuron_index = i * self.
_cols + j
837 winner_matrix[i][j] = self.
_award[neuron_index]
838 ax.text(i, j, str(winner_matrix[i][j]), va=
'center', ha=
'center')
840 ax.imshow(winner_matrix, cmap=plt.get_cmap(
'cool'), interpolation=
'none')
843 plt.title(
"Winner Matrix")
846 def show_network(self, awards=False, belongs=False, coupling=True, dataset=True, marker_type='o'):
848 @brief Shows neurons in the dimension of data.
850 @param[in] awards (bool): If True - displays how many objects won each neuron.
851 @param[in] belongs (bool): If True - marks each won object by according index of neuron-winner (only when
852 dataset is displayed too).
853 @param[in] coupling (bool): If True - displays connections between neurons (except case when function neighbor
855 @param[in] dataset (bool): If True - displays inputs data set.
856 @param[in] marker_type (string): Defines marker that is used to denote neurons on the plot.
871 if (dimension == 1)
or (dimension == 2):
872 axes = fig.add_subplot(111)
874 axes = fig.gca(projection=
'3d')
876 raise NotImplementedError(
'Impossible to show network in data-space that is differ from 1D, 2D or 3D.')
878 if (self.
_data is not None)
and (dataset
is True):
881 axes.plot(x[0], 0.0,
'b|', ms=30)
884 axes.plot(x[0], x[1],
'b.')
887 axes.scatter(x[0], x[1], x[2], c=
'b', marker=
'.')
890 for index
in range(self.
_size):
892 if self.
_award[index] == 0:
896 axes.plot(self.
_weights[index][0], 0.0, color + marker_type)
899 location =
'{0}'.format(self.
_award[index])
900 axes.text(self.
_weights[index][0], 0.0, location, color=
'black', fontsize=10)
902 if belongs
and self.
_data is not None:
903 location =
'{0}'.format(index)
904 axes.text(self.
_weights[index][0], 0.0, location, color=
'black', fontsize=12)
907 axes.text(point[0], 0.0, location, color=
'blue', fontsize=10)
910 axes.plot(self.
_weights[index][0], self.
_weights[index][1], color + marker_type)
913 location =
'{0}'.format(self.
_award[index])
914 axes.text(self.
_weights[index][0], self.
_weights[index][1], location, color=
'black', fontsize=10)
916 if belongs
and self.
_data is not None:
917 location =
'{0}'.format(index)
918 axes.text(self.
_weights[index][0], self.
_weights[index][1], location, color=
'black', fontsize=12)
921 axes.text(point[0], point[1], location, color=
'blue', fontsize=10)
923 if (self.
_conn_type != type_conn.func_neighbor)
and (coupling
is True):
934 if (self.
_conn_type != type_conn.func_neighbor)
and (coupling !=
False):
942 plt.title(
"Network Structure")
946 def __get_dump_from_python(self, ccore_usage):
947 return {
'ccore': ccore_usage,
948 'state': {
'cols': self.
_cols,
961 def __download_dump_from_ccore(self):
967 def __upload_common_part(self, state_dump):
968 self.
_cols = state_dump[
'cols']
969 self.
_rows = state_dump[
'rows']
970 self.
_size = state_dump[
'size']
975 self.
_params = state_dump[
'params']
978 def __upload_dump_to_python(self, state_dump):
984 self.
_weights = state_dump[
'weights']
985 self.
_award = state_dump[
'award']
991 def __upload_dump_to_ccore(self, state_dump):
995 state_dump[
'capture_objects'])