ChineseWhispers::find_neighbor_ranges() - Code Metrics - matiasdelellis/facerecognition - Measure and Improve Code Quality continuously with Scrutinizer

ChineseWhispers::find_neighbor_ranges() A
last analyzed 2025-04-04 20:37 UTC

↳ Parent: ChineseWhispers

Complexity

Conditions	5
Paths	12

Size

Total Lines	18
Code Lines	11

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	0
CRAP Score	30

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	5
eloc	11
c	1
b	0
f	0
nc	12
nop	2
dl	0
loc	18
ccs	0
cts	12
cp	0
crap	30
rs	9.6111

<?php
declare(strict_types=1);
/**
 * @copyright Copyright (c) 2023, Matias De lellis
 *
 * @author Matias De lellis <[email protected]>
 *
 * @license AGPL-3.0-or-later
 *
 * This code is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License, version 3,
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License, version 3,
 * along with this program. If not, see <http://www.gnu.org/licenses/>
 *
 */

namespace OCA\FaceRecognition\Clusterer;


/**
 * This class implements the graph clustering algorithm described in the
 * paper: Chinese Whispers - an Efficient Graph Clustering Algorithm and its
 * Application to Natural Language Processing Problems by Chris Biemann.
 *
 * In particular, it tries to be a shameless copy of the original dlib
 * implementation.
 *  - https://github.com/davisking/dlib/blob/master/dlib/clustering/chinese_whispers.h
 */
class ChineseWhispers {

	/**
	 * Cluster the dataset by assigning a label to each sample.from the edges
	 */
	static public function predict(array &$edges, array &$labels, int $num_iterations = 100)
	{
		// To improve the stability of the clusters, we must
		// iterate the neighbors in a pseudo-random way.
		mt_srand(2023);

		$labels = [];
		if (count($edges) == 0)
			return 0;

		$neighbors = [];
		self::find_neighbor_ranges($edges, $neighbors);

		// Initialize the labels, each node gets a different label.
		for ($i = 0; $i < count($neighbors); ++$i)
for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
			$labels[$i] = $i;

		for ($iter = 0; $iter < count($neighbors)*$num_iterations; ++$iter)
		{
			// Pick a random node.
			$idx = mt_rand()%count($neighbors);

			// Count how many times each label happens amongst our neighbors.
			$labels_to_counts = [];
			$end = $neighbors[$idx][1];

			for ($i = $neighbors[$idx][0]; $i != $end; ++$i)
			{
				$iLabelFirst = $edges[$i][1];
				$iLabel = $labels[$iLabelFirst];
				if (isset($labels_to_counts[$iLabel]))
					$labels_to_counts[$iLabel]++;
				else
					$labels_to_counts[$iLabel] = 1;
			}

			// find the most common label
			// std::map<unsigned long, double>::iterator i;
			$best_score = PHP_INT_MIN;
			$best_label = $labels[$idx];
			foreach ($labels_to_counts as $key => $value)
			{
				if ($value > $best_score)
				{
					$best_score = $value;
					$best_label = $key;
				}
			}

			$labels[$idx] = $best_label;
		}

		// Remap the labels into a contiguous range.  First we find the
		// mapping.
		$label_remap = [];
		for ($i = 0; $i < count($labels); ++$i)
for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
		{
			$next_id = count($label_remap);
			if (!isset($label_remap[$labels[$i]]))
				$label_remap[$labels[$i]] = $next_id;
		}
		// now apply the mapping to all the labels.
		for ($i = 0; $i < count($labels); ++$i)
for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
		{
			$labels[$i] = $label_remap[$labels[$i]];
		}

		return count($label_remap);
	}

	static function find_neighbor_ranges (&$edges, &$neighbors) {

		// setup neighbors so that [neighbors[i].first, neighbors[i].second) is the range
		// within edges that contains all node i's edges.
		$num_nodes = self::max_index_plus_one($edges);
		for ($i = 0; $i < $num_nodes; ++$i) $neighbors[$i] = [0, 0];
		$cur_node = 0;
		$start_idx = 0;
		for ($i = 0; $i < count($edges); ++$i)
for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
		{
			if ($edges[$i][0] != $cur_node)
			{
				$neighbors[$cur_node] = [$start_idx, $i];
				$start_idx = $i;
				$cur_node = $edges[$i][0];
			}
		}
		if (count($neighbors) !== 0)
			$neighbors[$cur_node] = [$start_idx, count($edges)];
	}

	static function max_index_plus_one ($pairs): int {

		if (count($pairs) === 0)
		{
			return 0;
		}
		else {
			$max_idx = 0;
			for ($i = 0; $i < count($pairs); ++$i)
for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
			{
				if ($pairs[$i][0] > $max_idx)
					$max_idx = $pairs[$i][0];
				if ($pairs[$i][1] > $max_idx)
					$max_idx = $pairs[$i][1];
			}
			return $max_idx + 1;
		}
	}

	static function convert_unordered_to_ordered (&$edges, &$out_edges)

	{
		$out_edges = [];
		for ($i = 0; $i < count($edges); ++$i)
for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
		{
			$out_edges[] = [$edges[$i][0], $edges[$i][1]];
			if ($edges[$i][0] != $edges[$i][1])
				$out_edges[] = [$edges[$i][1], $edges[$i][0]];
		}
	}
}


1			<?php
2			declare(strict_types=1);
3			/**
4			* @copyright Copyright (c) 2023, Matias De lellis
5			*
6			* @author Matias De lellis <[email protected]>
7			*
8			* @license AGPL-3.0-or-later
9			*
10			* This code is free software: you can redistribute it and/or modify
11			* it under the terms of the GNU Affero General Public License, version 3,
12			* as published by the Free Software Foundation.
13			*
14			* This program is distributed in the hope that it will be useful,
15			* but WITHOUT ANY WARRANTY; without even the implied warranty of
16			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17			* GNU Affero General Public License for more details.
18			*
19			* You should have received a copy of the GNU Affero General Public License, version 3,
20			* along with this program. If not, see <http://www.gnu.org/licenses/>
21			*
22			*/
23
24			namespace OCA\FaceRecognition\Clusterer;
25
26
27			/**
28			* This class implements the graph clustering algorithm described in the
29			* paper: Chinese Whispers - an Efficient Graph Clustering Algorithm and its
30			* Application to Natural Language Processing Problems by Chris Biemann.
31			*
32			* In particular, it tries to be a shameless copy of the original dlib
33			* implementation.
34			* - https://github.com/davisking/dlib/blob/master/dlib/clustering/chinese_whispers.h
35			*/
36			class ChineseWhispers {
37
38			/**
39			* Cluster the dataset by assigning a label to each sample.from the edges
40			*/
41			static public function predict(array &$edges, array &$labels, int $num_iterations = 100)
42			{
43			// To improve the stability of the clusters, we must
44			// iterate the neighbors in a pseudo-random way.
45			mt_srand(2023);
46
47			$labels = [];
48			if (count($edges) == 0)
49			return 0;
50
51			$neighbors = [];
52			self::find_neighbor_ranges($edges, $neighbors);
53
54			// Initialize the labels, each node gets a different label.
55			for ($i = 0; $i < count($neighbors); ++$i)
			0 ignored issues – show Performance Best Practice introduced 2023-08-22 23:32 UTC by Report Bug Copy Issue Report It seems like you are calling the size function `count()` as part of the test condition. You might want to compute the size beforehand, and not on each iteration. If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: for ($i=0; $i<count($array); $i++) { // calls count() on each iteration } // Better for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once } Loading history...
56			$labels[$i] = $i;
57
58			for ($iter = 0; $iter < count($neighbors)*$num_iterations; ++$iter)
59			{
60			// Pick a random node.
61			$idx = mt_rand()%count($neighbors);
62
63			// Count how many times each label happens amongst our neighbors.
64			$labels_to_counts = [];
65			$end = $neighbors[$idx][1];
66
67			for ($i = $neighbors[$idx][0]; $i != $end; ++$i)
68			{
69			$iLabelFirst = $edges[$i][1];
70			$iLabel = $labels[$iLabelFirst];
71			if (isset($labels_to_counts[$iLabel]))
72			$labels_to_counts[$iLabel]++;
73			else
74			$labels_to_counts[$iLabel] = 1;
75			}
76
77			// find the most common label
78			// std::map<unsigned long, double>::iterator i;
79			$best_score = PHP_INT_MIN;
80			$best_label = $labels[$idx];
81			foreach ($labels_to_counts as $key => $value)
82			{
83			if ($value > $best_score)
84			{
85			$best_score = $value;
86			$best_label = $key;
87			}
88			}
89
90			$labels[$idx] = $best_label;
91			}
92
93			// Remap the labels into a contiguous range. First we find the
94			// mapping.
95			$label_remap = [];
96			for ($i = 0; $i < count($labels); ++$i)
			0 ignored issues – show Performance Best Practice introduced 2023-08-22 23:32 UTC by Report Bug Copy Issue Report It seems like you are calling the size function `count()` as part of the test condition. You might want to compute the size beforehand, and not on each iteration. If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: for ($i=0; $i<count($array); $i++) { // calls count() on each iteration } // Better for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once } Loading history...
97			{
98			$next_id = count($label_remap);
99			if (!isset($label_remap[$labels[$i]]))
100			$label_remap[$labels[$i]] = $next_id;
101			}
102			// now apply the mapping to all the labels.
103			for ($i = 0; $i < count($labels); ++$i)
			0 ignored issues – show Performance Best Practice introduced 2023-08-22 23:32 UTC by Report Bug Copy Issue Report It seems like you are calling the size function `count()` as part of the test condition. You might want to compute the size beforehand, and not on each iteration. If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: for ($i=0; $i<count($array); $i++) { // calls count() on each iteration } // Better for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once } Loading history...
104			{
105			$labels[$i] = $label_remap[$labels[$i]];
106			}
107
108			return count($label_remap);
109			}
110
111			static function find_neighbor_ranges (&$edges, &$neighbors) {
			0 ignored issues – show Best Practice introduced 2023-08-22 23:32 UTC by Report Bug Copy Issue Report It is generally recommended to explicitly declare the visibility for methods. Adding explicit visibility (`private`, `protected`, or `public`) is generally recommend to communicate to other developers how, and from where this method is intended to be used. Loading history...
112			// setup neighbors so that [neighbors[i].first, neighbors[i].second) is the range
113			// within edges that contains all node i's edges.
114			$num_nodes = self::max_index_plus_one($edges);
115			for ($i = 0; $i < $num_nodes; ++$i) $neighbors[$i] = [0, 0];
116			$cur_node = 0;
117			$start_idx = 0;
118			for ($i = 0; $i < count($edges); ++$i)
			0 ignored issues – show Performance Best Practice introduced 2023-08-22 23:32 UTC by Report Bug Copy Issue Report It seems like you are calling the size function `count()` as part of the test condition. You might want to compute the size beforehand, and not on each iteration. If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: for ($i=0; $i<count($array); $i++) { // calls count() on each iteration } // Better for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once } Loading history...
119			{
120			if ($edges[$i][0] != $cur_node)
121			{
122			$neighbors[$cur_node] = [$start_idx, $i];
123			$start_idx = $i;
124			$cur_node = $edges[$i][0];
125			}
126			}
127			if (count($neighbors) !== 0)
128			$neighbors[$cur_node] = [$start_idx, count($edges)];
129			}
130
131			static function max_index_plus_one ($pairs): int {
			0 ignored issues – show Best Practice introduced 2023-08-22 23:32 UTC by Report Bug Copy Issue Report It is generally recommended to explicitly declare the visibility for methods. Adding explicit visibility (`private`, `protected`, or `public`) is generally recommend to communicate to other developers how, and from where this method is intended to be used. Loading history...
132			if (count($pairs) === 0)
133			{
134			return 0;
135			}
136			else {
137			$max_idx = 0;
138			for ($i = 0; $i < count($pairs); ++$i)
			0 ignored issues – show Performance Best Practice introduced 2023-08-22 23:32 UTC by Report Bug Copy Issue Report It seems like you are calling the size function `count()` as part of the test condition. You might want to compute the size beforehand, and not on each iteration. If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: for ($i=0; $i<count($array); $i++) { // calls count() on each iteration } // Better for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once } Loading history...
139			{
140			if ($pairs[$i][0] > $max_idx)
141			$max_idx = $pairs[$i][0];
142			if ($pairs[$i][1] > $max_idx)
143			$max_idx = $pairs[$i][1];
144			}
145			return $max_idx + 1;
146			}
147			}
148
149			static function convert_unordered_to_ordered (&$edges, &$out_edges)
			0 ignored issues – show Best Practice introduced 2023-08-22 23:32 UTC by Report Bug Copy Issue Report It is generally recommended to explicitly declare the visibility for methods. Adding explicit visibility (`private`, `protected`, or `public`) is generally recommend to communicate to other developers how, and from where this method is intended to be used. Loading history...
150			{
151			$out_edges = [];
152			for ($i = 0; $i < count($edges); ++$i)
			0 ignored issues – show Performance Best Practice introduced 2023-08-22 23:32 UTC by Report Bug Copy Issue Report It seems like you are calling the size function `count()` as part of the test condition. You might want to compute the size beforehand, and not on each iteration. If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: for ($i=0; $i<count($array); $i++) { // calls count() on each iteration } // Better for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once } Loading history...
153			{
154			$out_edges[] = [$edges[$i][0], $edges[$i][1]];
155			if ($edges[$i][0] != $edges[$i][1])
156			$out_edges[] = [$edges[$i][1], $edges[$i][0]];
157			}
158			}
159			}
160

matiasdelellis / facerecognition

ChineseWhispers::find_neighbor_ranges() A last analyzed 2025-04-04 20:37 UTC

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like

ChineseWhispers::find_neighbor_ranges() A
last analyzed 2025-04-04 20:37 UTC