Completed
Push — master ( 31eb51...a3fb7f )
by Matias
16s queued 15s
created

CreateClustersTask::getNewClusters()   B

Complexity

Conditions 11
Paths 24

Size

Total Lines 58
Code Lines 35

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 23.7398

Importance

Changes 4
Bugs 0 Features 1
Metric Value
cc 11
eloc 35
c 4
b 0
f 1
nc 24
nop 1
dl 0
loc 58
ccs 19
cts 36
cp 0.5278
crap 23.7398
rs 7.3166

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2023 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
use OCA\FaceRecognition\Helper\Requirements;
37
38
use OCA\FaceRecognition\Clusterer\ChineseWhispers;
39
40
use OCA\FaceRecognition\Service\SettingsService;
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var PersonMapper Person mapper*/
46
	private $personMapper;
47
48
	/** @var ImageMapper Image mapper*/
49
	private $imageMapper;
50
51
	/** @var FaceMapper Face mapper*/
52
	private $faceMapper;
53
54
	/** @var SettingsService Settings service*/
55
	private $settingsService;
56
57
	/**
58
	 * @param PersonMapper $personMapper
59
	 * @param ImageMapper $imageMapper
60
	 * @param FaceMapper $faceMapper
61
	 * @param SettingsService $settingsService
62
	 */
63 3
	public function __construct(PersonMapper    $personMapper,
64
	                            ImageMapper     $imageMapper,
65
	                            FaceMapper      $faceMapper,
66
	                            SettingsService $settingsService)
67
	{
68 3
		parent::__construct();
69
70 3
		$this->personMapper    = $personMapper;
71 3
		$this->imageMapper     = $imageMapper;
72 3
		$this->faceMapper      = $faceMapper;
73 3
		$this->settingsService = $settingsService;
74
	}
75
76
	/**
77
	 * @inheritdoc
78
	 */
79 1
	public function description() {
80 1
		return "Create new persons or update existing persons";
81
	}
82
83
	/**
84
	 * @inheritdoc
85
	 */
86 1
	public function execute(FaceRecognitionContext $context) {
87 1
		$this->setContext($context);
88 1
		$eligable_users = $this->context->getEligibleUsers();
89 1
		foreach($eligable_users as $user) {
90 1
			$this->createClusterIfNeeded($user);
91 1
			yield;
92
		}
93
94 1
		return true;
95
	}
96
97
	/**
98
	 * @return void
99
	 */
100 1
	private function createClusterIfNeeded(string $userId) {
101 1
		$modelId = $this->settingsService->getCurrentFaceModel();
102
103
		// Depending on whether we already have clusters, decide if we should create/recreate them.
104
		//
105 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
106 1
		if ($hasPersons) {
107
			$forceRecreate = $this->needRecreateBySettings($userId);
108
			$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
109
			$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
110
111
			if ($forceRecreate) {
112
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
113
			}
114
			else if ($haveEnoughFaces || $haveStaled) {
115
				$this->logInfo('Face clustering will be recreated with new information or changes');
116
			}
117
			else {
118
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
119
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
120
				return;
121
			}
122
		}
123
		else {
124
			// User should not be able to use this directly, used in tests
125 1
			$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
126 1
			$needCreate = $this->needCreateFirstTime($userId, $modelId);
127
128 1
			if ($forceTestCreation) {
129 1
				$this->logInfo('Force the creation of clusters for testing');
130
			}
131 1
			else if ($needCreate) {
132
				$this->logInfo('Face clustering will be created for the first time.');
133
			}
134
			else {
135 1
				$this->logInfo(
136 1
					'Skipping cluster creation, not enough data (yet) collected. ' .
137 1
					'For cluster creation, you need either one of the following:');
138 1
				$this->logInfo('* have 1000 faces already processed');
139 1
				$this->logInfo('* or you need to have 95% of you images processed');
140 1
				$this->logInfo('Use stats command to track progress');
141 1
				return;
142
			}
143
		}
144
145
		// Ok. If we are here, the clusters must be recreated.
146
		//
147
148 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
149 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
150
151 1
		$faces = $this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence);
152 1
		$nonGroupables = $this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence);
153
154 1
		$facesCount = count($faces);
155 1
		$this->logInfo('There are ' . $facesCount . ' faces for clustering and '. count($nonGroupables) . ' that cannot be grouped.');
156
157 1
		$noSlices = 1;
158 1
		$sliceSize = $facesCount;
159
160 1
		$defaultSlice = $this->settingsService->getClusterigBatchSize();
161 1
		if ($defaultSlice > 0)  {
162
			// The minimum batch size is 20000 faces
163
			$defaultSlice = max($defaultSlice, 2000);
164
			// The maximun batch size is the faces count.
165
			$defaultSlice = min($defaultSlice, $facesCount);
166
			$noSlices = intval($facesCount / $defaultSlice) + 1;
167
			$sliceSize = ceil($facesCount / $noSlices);
168
		}
169
170 1
		$this->logDebug('We will cluster these with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces.');
171
172 1
		$newClusters = [];
173
		// Obtain the clusters in batches and append them.
174 1
		for ($i = 0; $i < $noSlices ; $i++) {
175
			// Get the batches.
176 1
			$facesSliced = array_slice($faces, $i * $sliceSize, $sliceSize);
0 ignored issues
show
Bug introduced by
$i * $sliceSize of type double is incompatible with the type integer expected by parameter $offset of array_slice(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

176
			$facesSliced = array_slice($faces, /** @scrutinizer ignore-type */ $i * $sliceSize, $sliceSize);
Loading history...
Bug introduced by
It seems like $sliceSize can also be of type double; however, parameter $length of array_slice() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

176
			$facesSliced = array_slice($faces, $i * $sliceSize, /** @scrutinizer ignore-type */ $sliceSize);
Loading history...
177
			// Get the indices, obtain the partial clusters and incorporate them.
178 1
			$faceIds = array_map(function ($face) { return $face['id']; }, $facesSliced);
179 1
			$facesDescripted = $this->faceMapper->findDescriptorsBathed($faceIds);
180 1
			$newClusters = array_merge($newClusters, $this->getNewClusters($facesDescripted));
181
			// Discard variables aggressively to improve memory consumption.
182 1
			unset($facesDescripted);
183 1
			unset($facesSliced);
184
		}
185
186
		// Append non groupable faces on a single step.
187 1
		$newClusters = array_merge($newClusters, $this->getFakeClusters($nonGroupables));
188
189
		// Cluster is associative array where key is person ID.
190
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
191
		// and for new clusters is whatever chinese whispers decides to identify them.
192
		//
193 1
		$currentClusters = $this->getCurrentClusters(array_merge($faces, $nonGroupables));
194
195 1
		$this->logInfo(count($newClusters) . ' clusters found after clustering');
196
197
		// New merge
198 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
199
200 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
201
202
		// Remove all orphaned persons (those without any faces)
203
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
204 1
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
205 1
		if ($orphansDeleted > 0) {
206
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
207
		}
208
209
		// Prevents not create/recreate the clusters unnecessarily.
210
211 1
		$this->settingsService->setNeedRecreateClusters(false, $userId);
212 1
		$this->settingsService->_setForceCreateClusters(false, $userId);
213
	}
214
215
	/**
216
	 * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
217
	 * - Some cluster/person is invalidated (is_valid is false for someone)
218
	 *   - This means some image that belonged to this user is changed, deleted etc.
219
	 * - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
220
	 *   - more than 25 new faces, or
221
	 *   - less than 25 new faces, but they are older than 2h
222
	 *
223
	 * (basically, we want to avoid recreating cluster for each new face being uploaded,
224
	 *  however, we don't want to wait too much as clusters could be changed a lot)
225
	 */
226
	private function hasNewFacesToRecreate(string $userId, int $modelId): bool {
227
		//
228
		$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
229
		$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
230
		                $facesWithoutPersons, $userId, $modelId));
231
232
		// todo: get rid of magic numbers (move to config)
233
		if ($facesWithoutPersons === 0)
234
			return false;
235
236
		if ($facesWithoutPersons >= 25)
237
			return true;
238
239
		// We have some faces, but not that many, let's see when oldest one is generated.
240
		$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
241
		$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
242
		$currentTimestamp = (new \DateTime())->getTimestamp();
243
		$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
244
		                $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
245
246
		// todo: get rid of magic numbers (move to config)
247
		if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
248
			return true;
249
250
		return false;
251
	}
252
253
	private function hasStalePersonsToRecreate(string $userId, int $modelId): bool {
254
		return $this->personMapper->countClusters($userId, $modelId, true) > 0;
255
	}
256
257
	private function needRecreateBySettings(string $userId): bool {
258
		return $this->settingsService->getNeedRecreateClusters($userId);
259
	}
260
261 1
	private function needCreateFirstTime(string $userId, int $modelId): bool {
262
		// User should not be able to use this directly, used in tests
263 1
		if ($this->settingsService->_getForceCreateClusters($userId))
264 1
			return true;
265
266 1
		$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
267 1
		if ($imageCount === 0)
268
			return false;
269
270 1
		$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
271 1
		if ($imageProcessed === 0)
272 1
			return false;
273
274
		// These are basic criteria without which we should not even consider creating clusters.
275
		// These clusters will be small and not "stable" enough and we should better wait for more images to come.
276
		// todo: get rid of magic numbers (move to config)
277
		$facesCount = $this->faceMapper->countFaces($userId, $modelId);
278
		if ($facesCount > 1000)
279
			return true;
280
281
		$percentImagesProcessed = $imageProcessed / floatval($imageCount);
282
		if ($percentImagesProcessed > 0.95)
283
			return true;
284
285
		return false;
286
	}
287
288 1
	private function getCurrentClusters(array $faces): array {
289 1
		$chineseClusters = array();
290 1
		foreach($faces as $face) {
291 1
			if ($face['person'] !== null) {
292
				if (!isset($chineseClusters[$face['person']])) {
293
					$chineseClusters[$face['person']] = array();
294
				}
295
				$chineseClusters[$face['person']][] = $face['id'];
296
			}
297
		}
298 1
		return $chineseClusters;
299
	}
300
301 1
	private function getFakeClusters(array $faces): array {
302 1
		$newClusters = array();
303 1
		for ($i = 0, $c = count($faces); $i < $c; $i++) {
304
			$fakeCluster = [];
305
			$fakeCluster[] = $faces[$i]['id'];
306
			$newClusters[] = $fakeCluster;
307
		}
308 1
		return $newClusters;
309
	}
310
311 1
	private function getNewClusters(array $faces): array {
312
		// Clustering parameters
313 1
		$sensitivity = $this->settingsService->getSensitivity();
314
315 1
		if (Requirements::pdlibLoaded()) {
316
			// Create edges (neighbors) for Chinese Whispers
317 1
			$edges = array();
318 1
			$faces_count = count($faces);
319 1
			for ($i = 0; $i < $faces_count; $i++) {
320 1
				$face1 = $faces[$i];
321 1
				for ($j = $i; $j < $faces_count; $j++) {
322 1
					$face2 = $faces[$j];
323 1
					$distance = dlib_vector_length($face1['descriptor'], $face2['descriptor']);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

323
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1['descriptor'], $face2['descriptor']);
Loading history...
324 1
					if ($distance < $sensitivity) {
325 1
						$edges[] = array($i, $j);
326
					}
327
				}
328
			}
329
330
			// Given the edges get the list of labels (found clusters) for each face.
331 1
			$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

331
			$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
332
		} else {
333
			// Create edges (neighbors) for Chinese Whispers
334
			$edges = array();
335
			$faces_count = count($faces);
336
337
			for ($i = 0; $i < $faces_count; $i++) {
338
				$face1 = $faces[$i];
339
				for ($j = $i; $j < $faces_count; $j++) {
340
					$face2 = $faces[$j];
341
					$distance = Euclidean::distance($face1['descriptor'], $face2['descriptor']);
342
					if ($distance < $sensitivity) {
343
						$edges[] = array($i, $j);
344
					}
345
				}
346
			}
347
348
			// The clustering algorithm actually expects ordered lists.
349
			$oedges = [];
350
			ChineseWhispers::convert_unordered_to_ordered($edges, $oedges);
351
			usort($oedges, function($a, $b) {
352
				if ($a[0] === $b[0]) return $a[1] - $b[1];
353
				return $a[0] - $b[0];
354
			});
355
356
			// Given the edges get the list of labels (found clusters) for each face.
357
			$newChineseClustersByIndex = [];
358
			ChineseWhispers::predict($oedges, $newChineseClustersByIndex);
359
		}
360
361 1
		$newClusters = array();
362 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
363 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
364 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
365
			}
366 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]['id'];
367
		}
368 1
		return $newClusters;
369
	}
370
371
	/**
372
	 * todo: only reason this is public is because of tests. Go figure it out better.
373
	 */
374 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
375
		// Create map of face transitions
376 3
		$transitions = array();
377 3
		foreach ($newCluster as $newPerson=>$newFaces) {
378 3
			foreach ($newFaces as $newFace) {
379 3
				$oldPersonFound = null;
380 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
381 2
					if (in_array($newFace, $oldFaces)) {
382 2
						$oldPersonFound = $oldPerson;
383 2
						break;
384
					}
385
				}
386 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
387
			}
388
		}
389
		// Count transitions
390 3
		$transitionCount = array();
391 3
		foreach ($transitions as $transition) {
392 3
			$key = $transition[0] . ':' . $transition[1];
393 3
			if (array_key_exists($key, $transitionCount)) {
394 2
				$transitionCount[$key]++;
395
			} else {
396 3
				$transitionCount[$key] = 1;
397
			}
398
		}
399
		// Create map of new person -> old person transitions
400 3
		$newOldPersonMapping = array();
401 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
402 3
		arsort($transitionCount);
403 3
		foreach ($transitionCount as $transitionKey => $count) {
404 3
			$transition = explode(":", $transitionKey);
405 3
			$oldPerson = intval($transition[0]);
406 3
			$newPerson = intval($transition[1]);
407 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
408 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
409 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
410 3
					$oldPersonProcessed[$oldPerson] = 0;
411
				} else {
412 2
					$newOldPersonMapping[$newPerson] = 0;
413
				}
414
			}
415
		}
416
		// Starting with new cluster, convert all new person IDs with old person IDs
417 3
		$maxOldPersonId = 1;
418 3
		if (count($oldCluster) > 0) {
419 2
			$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1;
420
		}
421
422 3
		$result = array();
423 3
		foreach ($newCluster as $newPerson => $newFaces) {
424 3
			$oldPerson = $newOldPersonMapping[$newPerson];
425 3
			if ($oldPerson === 0) {
426 3
				$result[$maxOldPersonId] = $newFaces;
427 3
				$maxOldPersonId++;
428
			} else {
429 2
				$result[$oldPerson] = $newFaces;
430
			}
431
		}
432 3
		return $result;
433
	}
434
}
435