CreateClustersTask::createClusterIfNeeded()   C
last analyzed

Complexity

Conditions 11
Paths 34

Size

Total Lines 121
Code Lines 64

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 47
CRAP Score 12.7127

Importance

Changes 4
Bugs 0 Features 0
Metric Value
cc 11
eloc 64
c 4
b 0
f 0
nc 34
nop 1
dl 0
loc 121
ccs 47
cts 62
cp 0.7581
crap 12.7127
rs 6.6387

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2023 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
use OCA\FaceRecognition\Helper\Requirements;
37
38
use OCA\FaceRecognition\Clusterer\ChineseWhispers;
39
40
use OCA\FaceRecognition\Service\SettingsService;
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var PersonMapper Person mapper*/
46
	private $personMapper;
47
48
	/** @var ImageMapper Image mapper*/
49
	private $imageMapper;
50
51
	/** @var FaceMapper Face mapper*/
52
	private $faceMapper;
53
54
	/** @var SettingsService Settings service*/
55
	private $settingsService;
56
57
	/**
58
	 * @param PersonMapper $personMapper
59
	 * @param ImageMapper $imageMapper
60
	 * @param FaceMapper $faceMapper
61
	 * @param SettingsService $settingsService
62
	 */
63 3
	public function __construct(PersonMapper    $personMapper,
64
	                            ImageMapper     $imageMapper,
65
	                            FaceMapper      $faceMapper,
66
	                            SettingsService $settingsService)
67
	{
68 3
		parent::__construct();
69
70 3
		$this->personMapper    = $personMapper;
71 3
		$this->imageMapper     = $imageMapper;
72 3
		$this->faceMapper      = $faceMapper;
73 3
		$this->settingsService = $settingsService;
74
	}
75
76
	/**
77
	 * @inheritdoc
78
	 */
79 1
	public function description() {
80 1
		return "Create new persons or update existing persons";
81
	}
82
83
	/**
84
	 * @inheritdoc
85
	 */
86 1
	public function execute(FaceRecognitionContext $context) {
87 1
		$this->setContext($context);
88 1
		$eligable_users = $this->context->getEligibleUsers();
89 1
		foreach($eligable_users as $user) {
90 1
			$this->createClusterIfNeeded($user);
91 1
			yield;
92
		}
93
94 1
		return true;
95
	}
96
97
	/**
98
	 * @return void
99
	 */
100 1
	private function createClusterIfNeeded(string $userId) {
101 1
		$modelId = $this->settingsService->getCurrentFaceModel();
102
103
		// Depending on whether we already have clusters, decide if we should create/recreate them.
104
		//
105 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
106 1
		if ($hasPersons) {
107
			$forceRecreate = $this->needRecreateBySettings($userId);
108
			$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
109
			$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
110
111
			if ($forceRecreate) {
112
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
113
			}
114
			else if ($haveEnoughFaces || $haveStaled) {
115
				$this->logInfo('Face clustering will be recreated with new information or changes');
116
			}
117
			else {
118
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
119
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
120
				return;
121
			}
122
		}
123
		else {
124
			// User should not be able to use this directly, used in tests
125 1
			$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
126 1
			$needCreate = $this->needCreateFirstTime($userId, $modelId);
127
128 1
			if ($forceTestCreation) {
129 1
				$this->logInfo('Force the creation of clusters for testing');
130
			}
131 1
			else if ($needCreate) {
132
				$this->logInfo('Face clustering will be created for the first time.');
133
			}
134
			else {
135 1
				$this->logInfo(
136 1
					'Skipping cluster creation, not enough data (yet) collected. ' .
137 1
					'For cluster creation, you need either one of the following:');
138 1
				$this->logInfo('* have 1000 faces already processed');
139 1
				$this->logInfo('* or you need to have 95% of you images processed');
140 1
				$this->logInfo('Use stats command to track progress');
141 1
				return;
142
			}
143
		}
144
145
		// Ok. If we are here, the clusters must be recreated.
146
		//
147
148 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
149 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
150
151 1
		$faces = $this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence);
152
153 1
		$facesCount = count($faces);
154 1
		$this->logInfo('There are ' . $facesCount . ' faces for clustering.');
155
156
		// The default slice is just one for the total.
157 1
		$noSlices = 1;
158 1
		$sliceSize = $facesCount;
159
160
		// Now calculate it if there is a batch size configured.
161 1
		$batchSize = $this->settingsService->getClusterigBatchSize();
162 1
		if ($facesCount > 0 && $batchSize > 0) {
163
			// The minimum batch size is 2000 faces.
164
			$batchSize = max($batchSize, 2000);
165
			// The maximun batch size is the faces count.
166
			$batchSize = min($batchSize, $facesCount);
167
168
			// Calculate the number of slices and their sizes.
169
			$noSlices = intval($facesCount / $batchSize) + 1;
170
			$sliceSize = ceil($facesCount / $noSlices);
171
		}
172
173 1
		$this->logDebug('We will cluster these with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces.');
174
175 1
		$newClusters = [];
176
		// Obtain the clusters in batches and append them.
177 1
		for ($i = 0; $i < $noSlices ; $i++) {
178
			// Get the batches.
179 1
			$facesSliced = array_slice($faces, $i * $sliceSize, $sliceSize);
0 ignored issues
show
Bug introduced by
$i * $sliceSize of type double is incompatible with the type integer expected by parameter $offset of array_slice(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

179
			$facesSliced = array_slice($faces, /** @scrutinizer ignore-type */ $i * $sliceSize, $sliceSize);
Loading history...
Bug introduced by
It seems like $sliceSize can also be of type double; however, parameter $length of array_slice() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

179
			$facesSliced = array_slice($faces, $i * $sliceSize, /** @scrutinizer ignore-type */ $sliceSize);
Loading history...
180
			// Get the indices, obtain the partial clusters and incorporate them.
181 1
			$faceIds = array_map(function ($face) { return $face['id']; }, $facesSliced);
182 1
			$facesDescripted = $this->faceMapper->findDescriptorsBathed($faceIds);
183 1
			$newClusters = array_merge($newClusters, $this->getNewClusters($facesDescripted));
184
			// Discard variables aggressively to improve memory consumption.
185 1
			unset($facesDescripted);
186 1
			unset($facesSliced);
187
		}
188
189
		// Append non groupable faces on a single step.
190 1
		$nonGroupables = $this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence);
191 1
		$this->logInfo('We will add '. count($nonGroupables) . ' faces that cannot be grouped.');
192 1
		$newClusters = array_merge($newClusters, $this->getFakeClusters($nonGroupables));
193
194
		// Cluster is associative array where key is person ID.
195
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
196
		// and for new clusters is whatever chinese whispers decides to identify them.
197
		//
198 1
		$currentClusters = $this->getCurrentClusters(array_merge($faces, $nonGroupables));
199 1
		$this->logInfo(count($newClusters) . ' clusters found after clustering');
200
201
		// Discard variables aggressively to improve memory consumption.
202 1
		unset($faces);
203 1
		unset($nonGroupables);
204
205
		// New merge
206 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
207
208 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
209
210
		// Remove all orphaned persons (those without any faces)
211
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
212 1
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
213 1
		if ($orphansDeleted > 0) {
214
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
215
		}
216
217
		// Prevents not create/recreate the clusters unnecessarily.
218
219 1
		$this->settingsService->setNeedRecreateClusters(false, $userId);
220 1
		$this->settingsService->_setForceCreateClusters(false, $userId);
221
	}
222
223
	/**
224
	 * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
225
	 * - Some cluster/person is invalidated (is_valid is false for someone)
226
	 *   - This means some image that belonged to this user is changed, deleted etc.
227
	 * - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
228
	 *   - more than 25 new faces, or
229
	 *   - less than 25 new faces, but they are older than 2h
230
	 *
231
	 * (basically, we want to avoid recreating cluster for each new face being uploaded,
232
	 *  however, we don't want to wait too much as clusters could be changed a lot)
233
	 */
234
	private function hasNewFacesToRecreate(string $userId, int $modelId): bool {
235
		//
236
		$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
237
		$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
238
		                $facesWithoutPersons, $userId, $modelId));
239
240
		// todo: get rid of magic numbers (move to config)
241
		if ($facesWithoutPersons === 0)
242
			return false;
243
244
		if ($facesWithoutPersons >= 25)
245
			return true;
246
247
		// We have some faces, but not that many, let's see when oldest one is generated.
248
		$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
249
		$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
250
		$currentTimestamp = (new \DateTime())->getTimestamp();
251
		$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
252
		                $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
253
254
		// todo: get rid of magic numbers (move to config)
255
		if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
256
			return true;
257
258
		return false;
259
	}
260
261
	private function hasStalePersonsToRecreate(string $userId, int $modelId): bool {
262
		return $this->personMapper->countClusters($userId, $modelId, true) > 0;
263
	}
264
265
	private function needRecreateBySettings(string $userId): bool {
266
		return $this->settingsService->getNeedRecreateClusters($userId);
267
	}
268
269 1
	private function needCreateFirstTime(string $userId, int $modelId): bool {
270
		// User should not be able to use this directly, used in tests
271 1
		if ($this->settingsService->_getForceCreateClusters($userId))
272 1
			return true;
273
274 1
		$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
275 1
		if ($imageCount === 0)
276
			return false;
277
278 1
		$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
279 1
		if ($imageProcessed === 0)
280 1
			return false;
281
282
		// These are basic criteria without which we should not even consider creating clusters.
283
		// These clusters will be small and not "stable" enough and we should better wait for more images to come.
284
		// todo: get rid of magic numbers (move to config)
285
		$facesCount = $this->faceMapper->countFaces($userId, $modelId);
286
		if ($facesCount > 1000)
287
			return true;
288
289
		$percentImagesProcessed = $imageProcessed / floatval($imageCount);
290
		if ($percentImagesProcessed > 0.95)
291
			return true;
292
293
		return false;
294
	}
295
296 1
	private function getCurrentClusters(array $faces): array {
297 1
		$chineseClusters = array();
298 1
		foreach($faces as $face) {
299 1
			if ($face['person'] !== null) {
300
				if (!isset($chineseClusters[$face['person']])) {
301
					$chineseClusters[$face['person']] = array();
302
				}
303
				$chineseClusters[$face['person']][] = $face['id'];
304
			}
305
		}
306 1
		return $chineseClusters;
307
	}
308
309 1
	private function getFakeClusters(array $faces): array {
310 1
		$newClusters = array();
311 1
		for ($i = 0, $c = count($faces); $i < $c; $i++) {
312
			$fakeCluster = [];
313
			$fakeCluster[] = $faces[$i]['id'];
314
			$newClusters[] = $fakeCluster;
315
		}
316 1
		return $newClusters;
317
	}
318
319 1
	private function getNewClusters(array $faces): array {
320
		// Clustering parameters
321 1
		$sensitivity = $this->settingsService->getSensitivity();
322
323 1
		if (Requirements::pdlibLoaded()) {
324
			// Create edges (neighbors) for Chinese Whispers
325 1
			$edges = array();
326 1
			$faces_count = count($faces);
327 1
			for ($i = 0; $i < $faces_count; $i++) {
328 1
				$face1 = $faces[$i];
329 1
				for ($j = $i; $j < $faces_count; $j++) {
330 1
					$face2 = $faces[$j];
331 1
					$distance = dlib_vector_length($face1['descriptor'], $face2['descriptor']);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

331
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1['descriptor'], $face2['descriptor']);
Loading history...
332 1
					if ($distance < $sensitivity) {
333 1
						$edges[] = array($i, $j);
334
					}
335
				}
336
			}
337
338
			// Given the edges get the list of labels (found clusters) for each face.
339 1
			$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

339
			$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
340
		} else {
341
			// Create edges (neighbors) for Chinese Whispers
342
			$edges = array();
343
			$faces_count = count($faces);
344
345
			for ($i = 0; $i < $faces_count; $i++) {
346
				$face1 = $faces[$i];
347
				for ($j = $i; $j < $faces_count; $j++) {
348
					$face2 = $faces[$j];
349
					$distance = Euclidean::distance($face1['descriptor'], $face2['descriptor']);
350
					if ($distance < $sensitivity) {
351
						$edges[] = array($i, $j);
352
					}
353
				}
354
			}
355
356
			// The clustering algorithm actually expects ordered lists.
357
			$oedges = [];
358
			ChineseWhispers::convert_unordered_to_ordered($edges, $oedges);
359
			usort($oedges, function($a, $b) {
360
				if ($a[0] === $b[0]) return $a[1] - $b[1];
361
				return $a[0] - $b[0];
362
			});
363
364
			// Given the edges get the list of labels (found clusters) for each face.
365
			$newChineseClustersByIndex = [];
366
			ChineseWhispers::predict($oedges, $newChineseClustersByIndex);
367
		}
368
369 1
		$newClusters = array();
370 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
371 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
372 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
373
			}
374 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]['id'];
375
		}
376 1
		return $newClusters;
377
	}
378
379
	/**
380
	 * todo: only reason this is public is because of tests. Go figure it out better.
381
	 */
382 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
383
		// Create map of face transitions
384 3
		$transitions = array();
385 3
		foreach ($newCluster as $newPerson=>$newFaces) {
386 3
			foreach ($newFaces as $newFace) {
387 3
				$oldPersonFound = null;
388 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
389 2
					if (in_array($newFace, $oldFaces)) {
390 2
						$oldPersonFound = $oldPerson;
391 2
						break;
392
					}
393
				}
394 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
395
			}
396
		}
397
		// Count transitions
398 3
		$transitionCount = array();
399 3
		foreach ($transitions as $transition) {
400 3
			$key = $transition[0] . ':' . $transition[1];
401 3
			if (array_key_exists($key, $transitionCount)) {
402 2
				$transitionCount[$key]++;
403
			} else {
404 3
				$transitionCount[$key] = 1;
405
			}
406
		}
407
		// Create map of new person -> old person transitions
408 3
		$newOldPersonMapping = array();
409 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
410 3
		arsort($transitionCount);
411 3
		foreach ($transitionCount as $transitionKey => $count) {
412 3
			$transition = explode(":", $transitionKey);
413 3
			$oldPerson = intval($transition[0]);
414 3
			$newPerson = intval($transition[1]);
415 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
416 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
417 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
418 3
					$oldPersonProcessed[$oldPerson] = 0;
419
				} else {
420 2
					$newOldPersonMapping[$newPerson] = 0;
421
				}
422
			}
423
		}
424
		// Starting with new cluster, convert all new person IDs with old person IDs
425 3
		$maxOldPersonId = 1;
426 3
		if (count($oldCluster) > 0) {
427 2
			$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1;
428
		}
429
430 3
		$result = array();
431 3
		foreach ($newCluster as $newPerson => $newFaces) {
432 3
			$oldPerson = $newOldPersonMapping[$newPerson];
433 3
			if ($oldPerson === 0) {
434 3
				$result[$maxOldPersonId] = $newFaces;
435 3
				$maxOldPersonId++;
436
			} else {
437 2
				$result[$oldPerson] = $newFaces;
438
			}
439
		}
440 3
		return $result;
441
	}
442
}
443