Completed
Push — master ( 0ab444...af3192 )
by Andrei
01:25
created

mbsas.__process_by_ccore()   A

Complexity

Conditions 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
dl 0
loc 3
rs 10
c 1
b 0
f 0
1
"""!
2
3
@brief Cluster analysis algorithm: MBSAS (Modified Basic Sequential Algorithmic Scheme).
4
@details Implementation based on book:
5
         - Theodoridis, Koutroumbas, Konstantinos. Elsevier Academic Press - Pattern Recognition - 2nd Edition. 2003.
6
7
@authors Andrei Novikov ([email protected])
8
@date 2014-2018
9
@copyright GNU Public License
10
11
@cond GNU_PUBLIC_LICENSE
12
    PyClustering is free software: you can redistribute it and/or modify
13
    it under the terms of the GNU General Public License as published by
14
    the Free Software Foundation, either version 3 of the License, or
15
    (at your option) any later version.
16
17
    PyClustering is distributed in the hope that it will be useful,
18
    but WITHOUT ANY WARRANTY; without even the implied warranty of
19
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
    GNU General Public License for more details.
21
22
    You should have received a copy of the GNU General Public License
23
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
@endcond
25
26
"""
27
28
29
from pyclustering.core.mbsas_wrapper import mbsas as mbsas_wrapper;
30
from pyclustering.core.metric_wrapper import metric_wrapper;
31
32
from pyclustering.cluster.bsas import bsas;
33
34
35
class mbsas(bsas):
36
    """!
37
    @brief Class represents MBSAS (Modified Basic Sequential Algorithmic Scheme).
38
    @details Interface of MBSAS algorithm is the same as for BSAS. This algorithm performs clustering in two steps.
39
              The first - is determination of amount of clusters. The second - is assignment of points that were not
40
              marked as a cluster representatives to clusters.
41
42
    Code example of MBSAS usage:
43
    @code
44
        # Read data sample from 'Simple02.data'.
45
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2);
46
47
        # Prepare algorithm's parameters.
48
        max_clusters = 2;
49
        threshold = 1.0;
50
51
        # Create instance of MBSAS algorithm.
52
        mbsas_instance = mbsas(sample, max_clusters, threshold);
53
        mbsas_instance.process();
54
55
        # Get clustering results.
56
        clusters = mbsas_instance.get_clusters();
57
        representatives = mbsas_instance.get_representatives();
58
59
        # Display results.
60
        bsas_visualizer.show_clusters(sample, clusters, representatives);
61
    @endcode
62
63
    @see pyclustering.cluster.bsas, pyclustering.cluster.ttsas
64
65
    """
66
67
    def __init__(self, data, maximum_clusters, threshold, ccore=True, **kwargs):
68
        """!
69
        @brief Creates MBSAS algorithm.
70
71
        @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
72
        @param[in] maximum_clusters: Maximum allowable number of clusters that can be allocated during processing.
73
        @param[in] threshold: Threshold of dissimilarity (maximum distance) between points.
74
        @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
75
        @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
76
77
        Keyword Args:
78
            metric (distance_metric): Metric that is used for distance calculation between two points.
79
80
        """
81
        super().__init__(data, maximum_clusters, threshold, ccore, **kwargs);
82
83
84
    def process(self):
85
        """!
86
        @brief Performs cluster analysis in line with rules of BSAS algorithm.
87
88
        @remark Results of clustering can be obtained using corresponding get methods.
89
90
        @see get_clusters()
91
        @see get_representatives()
92
93
        """
94
95
        if self._ccore is True:
96
            self.__process_by_ccore();
97
        else:
98
            self.__prcess_by_python();
99
100
101
    def __process_by_ccore(self):
102
        ccore_metric = metric_wrapper.create_instance(self._metric);
103
        self._clusters, self._representatives = mbsas_wrapper(self._data, self._amount, self._threshold, ccore_metric.get_pointer());
104
105
106
    def __prcess_by_python(self):
107
        self._clusters.append([0]);
108
        self._representatives.append(self._data[0]);
109
110
        skipped_objects = [];
111
112
        for i in range(1, len(self._data)):
113
            point = self._data[i];
114
            index_cluster, distance = self._find_nearest_cluster(point);
115
116
            if (distance > self._threshold) and (len(self._clusters) < self._amount):
117
                self._representatives.append(point);
118
                self._clusters.append([i]);
119
            else:
120
                skipped_objects.append(i);
121
122
        for i in skipped_objects:
123
            point = self._data[i];
124
            index_cluster, _ = self._find_nearest_cluster(point);
125
126
            self._clusters[index_cluster].append(i);
127
            self._update_representative(index_cluster, point);