Completed
Push — master ( 31eb51...a3fb7f )
by Matias
16s queued 15s
created

CreateClustersTask::getFakeClusters()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2.3149

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 6
c 1
b 0
f 0
nc 2
nop 1
dl 0
loc 8
ccs 4
cts 7
cp 0.5714
crap 2.3149
rs 10
1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2023 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
use OCA\FaceRecognition\Helper\Requirements;
37
38
use OCA\FaceRecognition\Clusterer\ChineseWhispers;
39
40
use OCA\FaceRecognition\Service\SettingsService;
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var PersonMapper Person mapper*/
46
	private $personMapper;
47
48
	/** @var ImageMapper Image mapper*/
49
	private $imageMapper;
50
51
	/** @var FaceMapper Face mapper*/
52
	private $faceMapper;
53
54
	/** @var SettingsService Settings service*/
55
	private $settingsService;
56
57
	/**
58
	 * @param PersonMapper $personMapper
59
	 * @param ImageMapper $imageMapper
60
	 * @param FaceMapper $faceMapper
61
	 * @param SettingsService $settingsService
62
	 */
63 3
	public function __construct(PersonMapper    $personMapper,
64
	                            ImageMapper     $imageMapper,
65
	                            FaceMapper      $faceMapper,
66
	                            SettingsService $settingsService)
67
	{
68 3
		parent::__construct();
69
70 3
		$this->personMapper    = $personMapper;
71 3
		$this->imageMapper     = $imageMapper;
72 3
		$this->faceMapper      = $faceMapper;
73 3
		$this->settingsService = $settingsService;
74
	}
75
76
	/**
77
	 * @inheritdoc
78
	 */
79 1
	public function description() {
80 1
		return "Create new persons or update existing persons";
81
	}
82
83
	/**
84
	 * @inheritdoc
85
	 */
86 1
	public function execute(FaceRecognitionContext $context) {
87 1
		$this->setContext($context);
88 1
		$eligable_users = $this->context->getEligibleUsers();
89 1
		foreach($eligable_users as $user) {
90 1
			$this->createClusterIfNeeded($user);
91 1
			yield;
92
		}
93
94 1
		return true;
95
	}
96
97
	/**
98
	 * @return void
99
	 */
100 1
	private function createClusterIfNeeded(string $userId) {
101 1
		$modelId = $this->settingsService->getCurrentFaceModel();
102
103
		// Depending on whether we already have clusters, decide if we should create/recreate them.
104
		//
105 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
106 1
		if ($hasPersons) {
107
			$forceRecreate = $this->needRecreateBySettings($userId);
108
			$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
109
			$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
110
111
			if ($forceRecreate) {
112
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
113
			}
114
			else if ($haveEnoughFaces || $haveStaled) {
115
				$this->logInfo('Face clustering will be recreated with new information or changes');
116
			}
117
			else {
118
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
119
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
120
				return;
121
			}
122
		}
123
		else {
124
			// User should not be able to use this directly, used in tests
125 1
			$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
126 1
			$needCreate = $this->needCreateFirstTime($userId, $modelId);
127
128 1
			if ($forceTestCreation) {
129 1
				$this->logInfo('Force the creation of clusters for testing');
130
			}
131 1
			else if ($needCreate) {
132
				$this->logInfo('Face clustering will be created for the first time.');
133
			}
134
			else {
135 1
				$this->logInfo(
136 1
					'Skipping cluster creation, not enough data (yet) collected. ' .
137 1
					'For cluster creation, you need either one of the following:');
138 1
				$this->logInfo('* have 1000 faces already processed');
139 1
				$this->logInfo('* or you need to have 95% of you images processed');
140 1
				$this->logInfo('Use stats command to track progress');
141 1
				return;
142
			}
143
		}
144
145
		// Ok. If we are here, the clusters must be recreated.
146
		//
147
148 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
149 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
150
151 1
		$faces = $this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence);
152 1
		$nonGroupables = $this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence);
153
154 1
		$facesCount = count($faces);
155 1
		$this->logInfo('There are ' . $facesCount . ' faces for clustering and '. count($nonGroupables) . ' that cannot be grouped.');
156
157 1
		$noSlices = 1;
158 1
		$sliceSize = $facesCount;
159
160 1
		$defaultSlice = $this->settingsService->getClusterigBatchSize();
161 1
		if ($defaultSlice > 0)  {
162
			// The minimum batch size is 20000 faces
163
			$defaultSlice = max($defaultSlice, 2000);
164
			// The maximun batch size is the faces count.
165
			$defaultSlice = min($defaultSlice, $facesCount);
166
			$noSlices = intval($facesCount / $defaultSlice) + 1;
167
			$sliceSize = ceil($facesCount / $noSlices);
168
		}
169
170 1
		$this->logDebug('We will cluster these with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces.');
171
172 1
		$newClusters = [];
173
		// Obtain the clusters in batches and append them.
174 1
		for ($i = 0; $i < $noSlices ; $i++) {
175
			// Get the batches.
176 1
			$facesSliced = array_slice($faces, $i * $sliceSize, $sliceSize);
0 ignored issues
show
Bug introduced by
$i * $sliceSize of type double is incompatible with the type integer expected by parameter $offset of array_slice(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

176
			$facesSliced = array_slice($faces, /** @scrutinizer ignore-type */ $i * $sliceSize, $sliceSize);
Loading history...
Bug introduced by
It seems like $sliceSize can also be of type double; however, parameter $length of array_slice() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

176
			$facesSliced = array_slice($faces, $i * $sliceSize, /** @scrutinizer ignore-type */ $sliceSize);
Loading history...
177
			// Get the indices, obtain the partial clusters and incorporate them.
178 1
			$faceIds = array_map(function ($face) { return $face['id']; }, $facesSliced);
179 1
			$facesDescripted = $this->faceMapper->findDescriptorsBathed($faceIds);
180 1
			$newClusters = array_merge($newClusters, $this->getNewClusters($facesDescripted));
181
			// Discard variables aggressively to improve memory consumption.
182 1
			unset($facesDescripted);
183 1
			unset($facesSliced);
184
		}
185
186
		// Append non groupable faces on a single step.
187 1
		$newClusters = array_merge($newClusters, $this->getFakeClusters($nonGroupables));
188
189
		// Cluster is associative array where key is person ID.
190
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
191
		// and for new clusters is whatever chinese whispers decides to identify them.
192
		//
193 1
		$currentClusters = $this->getCurrentClusters(array_merge($faces, $nonGroupables));
194
195 1
		$this->logInfo(count($newClusters) . ' clusters found after clustering');
196
197
		// New merge
198 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
199
200 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
201
202
		// Remove all orphaned persons (those without any faces)
203
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
204 1
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
205 1
		if ($orphansDeleted > 0) {
206
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
207
		}
208
209
		// Prevents not create/recreate the clusters unnecessarily.
210
211 1
		$this->settingsService->setNeedRecreateClusters(false, $userId);
212 1
		$this->settingsService->_setForceCreateClusters(false, $userId);
213
	}
214
215
	/**
216
	 * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
217
	 * - Some cluster/person is invalidated (is_valid is false for someone)
218
	 *   - This means some image that belonged to this user is changed, deleted etc.
219
	 * - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
220
	 *   - more than 25 new faces, or
221
	 *   - less than 25 new faces, but they are older than 2h
222
	 *
223
	 * (basically, we want to avoid recreating cluster for each new face being uploaded,
224
	 *  however, we don't want to wait too much as clusters could be changed a lot)
225
	 */
226
	private function hasNewFacesToRecreate(string $userId, int $modelId): bool {
227
		//
228
		$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
229
		$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
230
		                $facesWithoutPersons, $userId, $modelId));
231
232
		// todo: get rid of magic numbers (move to config)
233
		if ($facesWithoutPersons === 0)
234
			return false;
235
236
		if ($facesWithoutPersons >= 25)
237
			return true;
238
239
		// We have some faces, but not that many, let's see when oldest one is generated.
240
		$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
241
		$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
242
		$currentTimestamp = (new \DateTime())->getTimestamp();
243
		$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
244
		                $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
245
246
		// todo: get rid of magic numbers (move to config)
247
		if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
248
			return true;
249
250
		return false;
251
	}
252
253
	private function hasStalePersonsToRecreate(string $userId, int $modelId): bool {
254
		return $this->personMapper->countClusters($userId, $modelId, true) > 0;
255
	}
256
257
	private function needRecreateBySettings(string $userId): bool {
258
		return $this->settingsService->getNeedRecreateClusters($userId);
259
	}
260
261 1
	private function needCreateFirstTime(string $userId, int $modelId): bool {
262
		// User should not be able to use this directly, used in tests
263 1
		if ($this->settingsService->_getForceCreateClusters($userId))
264 1
			return true;
265
266 1
		$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
267 1
		if ($imageCount === 0)
268
			return false;
269
270 1
		$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
271 1
		if ($imageProcessed === 0)
272 1
			return false;
273
274
		// These are basic criteria without which we should not even consider creating clusters.
275
		// These clusters will be small and not "stable" enough and we should better wait for more images to come.
276
		// todo: get rid of magic numbers (move to config)
277
		$facesCount = $this->faceMapper->countFaces($userId, $modelId);
278
		if ($facesCount > 1000)
279
			return true;
280
281
		$percentImagesProcessed = $imageProcessed / floatval($imageCount);
282
		if ($percentImagesProcessed > 0.95)
283
			return true;
284
285
		return false;
286
	}
287
288 1
	private function getCurrentClusters(array $faces): array {
289 1
		$chineseClusters = array();
290 1
		foreach($faces as $face) {
291 1
			if ($face['person'] !== null) {
292
				if (!isset($chineseClusters[$face['person']])) {
293
					$chineseClusters[$face['person']] = array();
294
				}
295
				$chineseClusters[$face['person']][] = $face['id'];
296
			}
297
		}
298 1
		return $chineseClusters;
299
	}
300
301 1
	private function getFakeClusters(array $faces): array {
302 1
		$newClusters = array();
303 1
		for ($i = 0, $c = count($faces); $i < $c; $i++) {
304
			$fakeCluster = [];
305
			$fakeCluster[] = $faces[$i]['id'];
306
			$newClusters[] = $fakeCluster;
307
		}
308 1
		return $newClusters;
309
	}
310
311 1
	private function getNewClusters(array $faces): array {
312
		// Clustering parameters
313 1
		$sensitivity = $this->settingsService->getSensitivity();
314
315 1
		if (Requirements::pdlibLoaded()) {
316
			// Create edges (neighbors) for Chinese Whispers
317 1
			$edges = array();
318 1
			$faces_count = count($faces);
319 1
			for ($i = 0; $i < $faces_count; $i++) {
320 1
				$face1 = $faces[$i];
321 1
				for ($j = $i; $j < $faces_count; $j++) {
322 1
					$face2 = $faces[$j];
323 1
					$distance = dlib_vector_length($face1['descriptor'], $face2['descriptor']);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

323
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1['descriptor'], $face2['descriptor']);
Loading history...
324 1
					if ($distance < $sensitivity) {
325 1
						$edges[] = array($i, $j);
326
					}
327
				}
328
			}
329
330
			// Given the edges get the list of labels (found clusters) for each face.
331 1
			$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

331
			$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
332
		} else {
333
			// Create edges (neighbors) for Chinese Whispers
334
			$edges = array();
335
			$faces_count = count($faces);
336
337
			for ($i = 0; $i < $faces_count; $i++) {
338
				$face1 = $faces[$i];
339
				for ($j = $i; $j < $faces_count; $j++) {
340
					$face2 = $faces[$j];
341
					$distance = Euclidean::distance($face1['descriptor'], $face2['descriptor']);
342
					if ($distance < $sensitivity) {
343
						$edges[] = array($i, $j);
344
					}
345
				}
346
			}
347
348
			// The clustering algorithm actually expects ordered lists.
349
			$oedges = [];
350
			ChineseWhispers::convert_unordered_to_ordered($edges, $oedges);
351
			usort($oedges, function($a, $b) {
352
				if ($a[0] === $b[0]) return $a[1] - $b[1];
353
				return $a[0] - $b[0];
354
			});
355
356
			// Given the edges get the list of labels (found clusters) for each face.
357
			$newChineseClustersByIndex = [];
358
			ChineseWhispers::predict($oedges, $newChineseClustersByIndex);
359
		}
360
361 1
		$newClusters = array();
362 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
363 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
364 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
365
			}
366 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]['id'];
367
		}
368 1
		return $newClusters;
369
	}
370
371
	/**
372
	 * todo: only reason this is public is because of tests. Go figure it out better.
373
	 */
374 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
375
		// Create map of face transitions
376 3
		$transitions = array();
377 3
		foreach ($newCluster as $newPerson=>$newFaces) {
378 3
			foreach ($newFaces as $newFace) {
379 3
				$oldPersonFound = null;
380 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
381 2
					if (in_array($newFace, $oldFaces)) {
382 2
						$oldPersonFound = $oldPerson;
383 2
						break;
384
					}
385
				}
386 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
387
			}
388
		}
389
		// Count transitions
390 3
		$transitionCount = array();
391 3
		foreach ($transitions as $transition) {
392 3
			$key = $transition[0] . ':' . $transition[1];
393 3
			if (array_key_exists($key, $transitionCount)) {
394 2
				$transitionCount[$key]++;
395
			} else {
396 3
				$transitionCount[$key] = 1;
397
			}
398
		}
399
		// Create map of new person -> old person transitions
400 3
		$newOldPersonMapping = array();
401 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
402 3
		arsort($transitionCount);
403 3
		foreach ($transitionCount as $transitionKey => $count) {
404 3
			$transition = explode(":", $transitionKey);
405 3
			$oldPerson = intval($transition[0]);
406 3
			$newPerson = intval($transition[1]);
407 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
408 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
409 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
410 3
					$oldPersonProcessed[$oldPerson] = 0;
411
				} else {
412 2
					$newOldPersonMapping[$newPerson] = 0;
413
				}
414
			}
415
		}
416
		// Starting with new cluster, convert all new person IDs with old person IDs
417 3
		$maxOldPersonId = 1;
418 3
		if (count($oldCluster) > 0) {
419 2
			$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1;
420
		}
421
422 3
		$result = array();
423 3
		foreach ($newCluster as $newPerson => $newFaces) {
424 3
			$oldPerson = $newOldPersonMapping[$newPerson];
425 3
			if ($oldPerson === 0) {
426 3
				$result[$maxOldPersonId] = $newFaces;
427 3
				$maxOldPersonId++;
428
			} else {
429 2
				$result[$oldPerson] = $newFaces;
430
			}
431
		}
432 3
		return $result;
433
	}
434
}
435