ttsas.__prcess_by_python() - Code Metrics - Inspection of "#398: BSAS, MBSAS, TTSAS final changes." - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — 0.8.dev ( 7e1bf5...af3192 )

by Andrei

created 2018-04-24 11:45 UTC

ttsas.__prcess_by_python() A

↳ Parent: ttsas

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	2
dl	0
loc	7
rs	9.4285
c	0
b	0
f	0

"""!

@brief Cluster analysis algorithm: TTSAS (Two-Threshold Sequential Algorithmic Scheme).
@details Implementation based on book:
         - Theodoridis, Koutroumbas, Konstantinos. Elsevier Academic Press - Pattern Recognition - 2nd Edition. 2003.

@authors Andrei Novikov ([email protected])
@date 2014-2018
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""


from pyclustering.core.ttsas_wrapper import ttsas as ttsas_wrapper;
from pyclustering.core.metric_wrapper import metric_wrapper;

from pyclustering.cluster.bsas import bsas;


class ttsas(bsas):
    """!
    @brief Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
    @details Clustering results of BSAS and MBSAS are strongly dependent on the order in which the points in data.
              TTSAS helps to overcome this shortcoming by using two threshold parameters. The first - if the distance
              to the nearest cluster is less than the first threshold then point is assigned to the cluster. The
              second - if distance to the nearest cluster is greater than the second threshold then new cluster is
              allocated.

    Code example of TTSAS usage:
    @code
        # Read data sample from 'Simple03.data'.
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3);

        # Prepare algorithm's parameters.
        threshold1 = 1.0;
        threshold2 = 2.0;

        # Create instance of TTSAS algorithm.
        ttsas_instance = ttsas(sample, max_clusters, threshold);
        ttsas_instance.process();

        # Get clustering results.
        clusters = ttsas_instance.get_clusters();
        representatives = ttsas_instance.get_representatives();

        # Display results using BSAS visualizer.
        bsas_visualizer.show_clusters(sample, clusters, representatives);
    @endcode

    @see pyclustering.cluster.bsas, pyclustering.cluster.mbsas

    """

    def __init__(self, data, threshold1, threshold2, ccore, **kwargs):
        """!
        @brief Creates TTSAS algorithm.

        @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
        @param[in] threshold1: Dissimilarity level (distance) between point and its closest cluster, if the distance is
                    less than 'threshold1' value then point is assigned to the cluster.
        @param[in] threshold2: Dissimilarity level (distance) between point and its closest cluster, if the distance is
                    greater than 'threshold2' value then point is considered as a new cluster.
        @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
        @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').

        Keyword Args:
            metric (distance_metric): Metric that is used for distance calculation between two points.

        """

        self._threshold2 = threshold2;
        self._amount_skipped_objects = len(data);
        self._skipped_objects = [ True ] * len(data);

        super().__init__(data, len(data), threshold1, ccore, **kwargs);


    def process(self):
        """!
        @brief Performs cluster analysis in line with rules of BSAS algorithm.

        @remark Results of clustering can be obtained using corresponding get methods.

        @see get_clusters()
        @see get_representatives()

        """

        if self._ccore is True:
            self.__process_by_ccore();
        else:
            self.__prcess_by_python();


    def __process_by_ccore(self):
        ccore_metric = metric_wrapper.create_instance(self._metric);
        self._clusters, self._representatives = ttsas_wrapper(self._data, self._threshold, self._threshold2, ccore_metric.get_pointer());


    def __prcess_by_python(self):
        changes = 0;
        while self._amount_skipped_objects != 0:
            previous_amount = self._amount_skipped_objects;
            self.__process_objects(changes);

            changes = previous_amount - self._amount_skipped_objects;


    def __process_objects(self, changes):
        index_point = self._skipped_objects.index(True);

        if changes == 0:
            self.__allocate_cluster(index_point, self._data[index_point]);
            index_point += 1;

        for i in range(index_point, len(self._data)):
            if self._skipped_objects[i] is True:
                self.__process_skipped_object(i);


    def __process_skipped_object(self, index_point):
        point = self._data[index_point];

        index_cluster, distance = self._find_nearest_cluster(point);

        if distance <= self._threshold:
            self.__append_to_cluster(index_cluster, index_point, point);
        elif distance > self._threshold2:
            self.__allocate_cluster(index_point, point);


    def __append_to_cluster(self, index_cluster, index_point, point):
        self._clusters[index_cluster].append(index_point);
        self._update_representative(index_cluster, point);

        self._amount_skipped_objects -= 1;
        self._skipped_objects[index_point] = False;


    def __allocate_cluster(self, index_point, point):
        self._clusters.append( [index_point] );
        self._representatives.append(point);

        self._amount_skipped_objects -= 1;
        self._skipped_objects[index_point] = False;


1			"""!
2
3			@brief Cluster analysis algorithm: TTSAS (Two-Threshold Sequential Algorithmic Scheme).
4			@details Implementation based on book:
5			- Theodoridis, Koutroumbas, Konstantinos. Elsevier Academic Press - Pattern Recognition - 2nd Edition. 2003.
6
7			@authors Andrei Novikov ([email protected])
8			@date 2014-2018
9			@copyright GNU Public License
10
11			@cond GNU_PUBLIC_LICENSE
12			PyClustering is free software: you can redistribute it and/or modify
13			it under the terms of the GNU General Public License as published by
14			the Free Software Foundation, either version 3 of the License, or
15			(at your option) any later version.
16
17			PyClustering is distributed in the hope that it will be useful,
18			but WITHOUT ANY WARRANTY; without even the implied warranty of
19			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20			GNU General Public License for more details.
21
22			You should have received a copy of the GNU General Public License
23			along with this program. If not, see <http://www.gnu.org/licenses/>.
24			@endcond
25
26			"""
27
28
29			from pyclustering.core.ttsas_wrapper import ttsas as ttsas_wrapper;
30			from pyclustering.core.metric_wrapper import metric_wrapper;
31
32			from pyclustering.cluster.bsas import bsas;
33
34
35			class ttsas(bsas):
36			"""!
37			@brief Class represents TTSAS (Two-Threshold Sequential Algorithmic Scheme).
38			@details Clustering results of BSAS and MBSAS are strongly dependent on the order in which the points in data.
39			TTSAS helps to overcome this shortcoming by using two threshold parameters. The first - if the distance
40			to the nearest cluster is less than the first threshold then point is assigned to the cluster. The
41			second - if distance to the nearest cluster is greater than the second threshold then new cluster is
42			allocated.
43
44			Code example of TTSAS usage:
45			@code
46			# Read data sample from 'Simple03.data'.
47			sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3);
48
49			# Prepare algorithm's parameters.
50			threshold1 = 1.0;
51			threshold2 = 2.0;
52
53			# Create instance of TTSAS algorithm.
54			ttsas_instance = ttsas(sample, max_clusters, threshold);
55			ttsas_instance.process();
56
57			# Get clustering results.
58			clusters = ttsas_instance.get_clusters();
59			representatives = ttsas_instance.get_representatives();
60
61			# Display results using BSAS visualizer.
62			bsas_visualizer.show_clusters(sample, clusters, representatives);
63			@endcode
64
65			@see pyclustering.cluster.bsas, pyclustering.cluster.mbsas
66
67			"""
68
69			def __init__(self, data, threshold1, threshold2, ccore, **kwargs):
70			"""!
71			@brief Creates TTSAS algorithm.
72
73			@param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
74			@param[in] threshold1: Dissimilarity level (distance) between point and its closest cluster, if the distance is
75			less than 'threshold1' value then point is assigned to the cluster.
76			@param[in] threshold2: Dissimilarity level (distance) between point and its closest cluster, if the distance is
77			greater than 'threshold2' value then point is considered as a new cluster.
78			@param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
79			@param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric').
80
81			Keyword Args:
82			metric (distance_metric): Metric that is used for distance calculation between two points.
83
84			"""
85
86			self._threshold2 = threshold2;
87			self._amount_skipped_objects = len(data);
88			self._skipped_objects = [ True ] * len(data);
89
90			super().__init__(data, len(data), threshold1, ccore, **kwargs);
91
92
93			def process(self):
94			"""!
95			@brief Performs cluster analysis in line with rules of BSAS algorithm.
96
97			@remark Results of clustering can be obtained using corresponding get methods.
98
99			@see get_clusters()
100			@see get_representatives()
101
102			"""
103
104			if self._ccore is True:
105			self.__process_by_ccore();
106			else:
107			self.__prcess_by_python();
108
109
110			def __process_by_ccore(self):
111			ccore_metric = metric_wrapper.create_instance(self._metric);
112			self._clusters, self._representatives = ttsas_wrapper(self._data, self._threshold, self._threshold2, ccore_metric.get_pointer());
113
114
115			def __prcess_by_python(self):
116			changes = 0;
117			while self._amount_skipped_objects != 0:
118			previous_amount = self._amount_skipped_objects;
119			self.__process_objects(changes);
120
121			changes = previous_amount - self._amount_skipped_objects;
122
123
124			def __process_objects(self, changes):
125			index_point = self._skipped_objects.index(True);
126
127			if changes == 0:
128			self.__allocate_cluster(index_point, self._data[index_point]);
129			index_point += 1;
130
131			for i in range(index_point, len(self._data)):
132			if self._skipped_objects[i] is True:
133			self.__process_skipped_object(i);
134
135
136			def __process_skipped_object(self, index_point):
137			point = self._data[index_point];
138
139			index_cluster, distance = self._find_nearest_cluster(point);
140
141			if distance <= self._threshold:
142			self.__append_to_cluster(index_cluster, index_point, point);
143			elif distance > self._threshold2:
144			self.__allocate_cluster(index_point, point);
145
146
147			def __append_to_cluster(self, index_cluster, index_point, point):
148			self._clusters[index_cluster].append(index_point);
149			self._update_representative(index_cluster, point);
150
151			self._amount_skipped_objects -= 1;
152			self._skipped_objects[index_point] = False;
153
154
155			def __allocate_cluster(self, index_point, point):
156			self._clusters.append( [index_point] );
157			self._representatives.append(point);
158
159			self._amount_skipped_objects -= 1;
160			self._skipped_objects[index_point] = False;
161

annoviko / pyclustering

Push — 0.8.dev ( 7e1bf5...af3192 )

ttsas.__prcess_by_python() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like