Completed
Push — 0.8.dev ( 7e1bf5...af3192 )
by Andrei
01:19
created

ttsas.__prcess_by_python()   A

Complexity

Conditions 2

Size

Total Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
dl 0
loc 7
rs 9.4285
c 0
b 0
f 0
1
"""!
2
3
@brief Cluster analysis algorithm: TTSAS (Two-Threshold Sequential Algorithmic Scheme).
4
@details Implementation based on book:
5
         - Theodoridis, Koutroumbas, Konstantinos. Elsevier Academic Press - Pattern Recognition - 2nd Edition. 2003.
6
7
@authors Andrei Novikov ([email protected])
8
@date 2014-2018
9
@copyright GNU Public License
10
11
@cond GNU_PUBLIC_LICENSE
12
    PyClustering is free software: you can redistribute it and/or modify
13
    it under the terms of the GNU General Public License as published by
14
    the Free Software Foundation, either version 3 of the License, or
15
    (at your option) any later version.
16
17
    PyClustering is distributed in the hope that it will be useful,
18
    but WITHOUT ANY WARRANTY; without even the implied warranty of
19
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
    GNU General Public License for more details.
21
22
    You should have received a copy of the GNU General Public License
23
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
@endcond
25
26
"""
27
28
29
from pyclustering.core.ttsas_wrapper import ttsas as ttsas_wrapper;
30
from pyclustering.core.metric_wrapper import metric_wrapper;
31
32
from pyclustering.cluster.bsas import bsas;
33
34
35
class ttsas(bsas):
36
    """!
37
    @brief Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
38
    @details Clustering results of BSAS and MBSAS are strongly dependent on the order in which the points in data.
39
              TTSAS helps to overcome this shortcoming by using two threshold parameters. The first - if the distance
40
              to the nearest cluster is less than the first threshold then point is assigned to the cluster. The
41
              second - if distance to the nearest cluster is greater than the second threshold then new cluster is
42
              allocated.
43
44
    Code example of TTSAS usage:
45
    @code
46
        # Read data sample from 'Simple03.data'.
47
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3);
48
49
        # Prepare algorithm's parameters.
50
        threshold1 = 1.0;
51
        threshold2 = 2.0;
52
53
        # Create instance of TTSAS algorithm.
54
        ttsas_instance = ttsas(sample, max_clusters, threshold);
55
        ttsas_instance.process();
56
57
        # Get clustering results.
58
        clusters = ttsas_instance.get_clusters();
59
        representatives = ttsas_instance.get_representatives();
60
61
        # Display results using BSAS visualizer.
62
        bsas_visualizer.show_clusters(sample, clusters, representatives);
63
    @endcode
64
65
    @see pyclustering.cluster.bsas, pyclustering.cluster.mbsas
66
67
    """
68
69
    def __init__(self, data, threshold1, threshold2, ccore, **kwargs):
70
        """!
71
        @brief Creates TTSAS algorithm.
72
73
        @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
74
        @param[in] threshold1: Dissimilarity level (distance) between point and its closest cluster, if the distance is
75
                    less than 'threshold1' value then point is assigned to the cluster.
76
        @param[in] threshold2: Dissimilarity level (distance) between point and its closest cluster, if the distance is
77
                    greater than 'threshold2' value then point is considered as a new cluster.
78
        @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
79
        @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
80
81
        Keyword Args:
82
            metric (distance_metric): Metric that is used for distance calculation between two points.
83
84
        """
85
86
        self._threshold2 = threshold2;
87
        self._amount_skipped_objects = len(data);
88
        self._skipped_objects = [ True ] * len(data);
89
90
        super().__init__(data, len(data), threshold1, ccore, **kwargs);
91
92
93
    def process(self):
94
        """!
95
        @brief Performs cluster analysis in line with rules of BSAS algorithm.
96
97
        @remark Results of clustering can be obtained using corresponding get methods.
98
99
        @see get_clusters()
100
        @see get_representatives()
101
102
        """
103
104
        if self._ccore is True:
105
            self.__process_by_ccore();
106
        else:
107
            self.__prcess_by_python();
108
109
110
    def __process_by_ccore(self):
111
        ccore_metric = metric_wrapper.create_instance(self._metric);
112
        self._clusters, self._representatives = ttsas_wrapper(self._data, self._threshold, self._threshold2, ccore_metric.get_pointer());
113
114
115
    def __prcess_by_python(self):
116
        changes = 0;
117
        while self._amount_skipped_objects != 0:
118
            previous_amount = self._amount_skipped_objects;
119
            self.__process_objects(changes);
120
121
            changes = previous_amount - self._amount_skipped_objects;
122
123
124
    def __process_objects(self, changes):
125
        index_point = self._skipped_objects.index(True);
126
127
        if changes == 0:
128
            self.__allocate_cluster(index_point, self._data[index_point]);
129
            index_point += 1;
130
131
        for i in range(index_point, len(self._data)):
132
            if self._skipped_objects[i] is True:
133
                self.__process_skipped_object(i);
134
135
136
    def __process_skipped_object(self, index_point):
137
        point = self._data[index_point];
138
139
        index_cluster, distance = self._find_nearest_cluster(point);
140
141
        if distance <= self._threshold:
142
            self.__append_to_cluster(index_cluster, index_point, point);
143
        elif distance > self._threshold2:
144
            self.__allocate_cluster(index_point, point);
145
146
147
    def __append_to_cluster(self, index_cluster, index_point, point):
148
        self._clusters[index_cluster].append(index_point);
149
        self._update_representative(index_cluster, point);
150
151
        self._amount_skipped_objects -= 1;
152
        self._skipped_objects[index_point] = False;
153
154
155
    def __allocate_cluster(self, index_point, point):
156
        self._clusters.append( [index_point] );
157
        self._representatives.append(point);
158
159
        self._amount_skipped_objects -= 1;
160
        self._skipped_objects[index_point] = False;
161