Completed
Push — master ( 76fe59...4ad739 )
by Matias
17s queued 14s
created

CreateClustersTask::getNewClusters()   C

Complexity

Conditions 15
Paths 18

Size

Total Lines 72
Code Lines 45

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 51.1227

Importance

Changes 3
Bugs 0 Features 1
Metric Value
cc 15
eloc 45
c 3
b 0
f 1
nc 18
nop 1
dl 0
loc 72
ccs 21
cts 46
cp 0.4565
crap 51.1227
rs 5.9166

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2023 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
use OCA\FaceRecognition\Helper\Requirements;
37
38
use OCA\FaceRecognition\Clusterer\ChineseWhispers;
39
40
use OCA\FaceRecognition\Service\SettingsService;
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var PersonMapper Person mapper*/
46
	private $personMapper;
47
48
	/** @var ImageMapper Image mapper*/
49
	private $imageMapper;
50
51
	/** @var FaceMapper Face mapper*/
52
	private $faceMapper;
53
54
	/** @var SettingsService Settings service*/
55
	private $settingsService;
56
57
	/**
58
	 * @param PersonMapper $personMapper
59
	 * @param ImageMapper $imageMapper
60
	 * @param FaceMapper $faceMapper
61
	 * @param SettingsService $settingsService
62
	 */
63 3
	public function __construct(PersonMapper    $personMapper,
64
	                            ImageMapper     $imageMapper,
65
	                            FaceMapper      $faceMapper,
66
	                            SettingsService $settingsService)
67
	{
68 3
		parent::__construct();
69
70 3
		$this->personMapper    = $personMapper;
71 3
		$this->imageMapper     = $imageMapper;
72 3
		$this->faceMapper      = $faceMapper;
73 3
		$this->settingsService = $settingsService;
74
	}
75
76
	/**
77
	 * @inheritdoc
78
	 */
79 1
	public function description() {
80 1
		return "Create new persons or update existing persons";
81
	}
82
83
	/**
84
	 * @inheritdoc
85
	 */
86 1
	public function execute(FaceRecognitionContext $context) {
87 1
		$this->setContext($context);
88
89
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
90
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
91
		//
92 1
		$eligable_users = array();
93 1
		if (is_null($this->context->user)) {
94
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
95
				$eligable_users[] = $user->getUID();
96
			});
97
		} else {
98 1
			$eligable_users[] = $this->context->user->getUID();
99
		}
100
101 1
		foreach($eligable_users as $user) {
102 1
			$this->createClusterIfNeeded($user);
103 1
			yield;
104
		}
105
106 1
		return true;
107
	}
108
109
	/**
110
	 * @return void
111
	 */
112 1
	private function createClusterIfNeeded(string $userId) {
113 1
		$modelId = $this->settingsService->getCurrentFaceModel();
114
115
		// Depending on whether we already have clusters, decide if we should create/recreate them.
116
		//
117 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
118 1
		if ($hasPersons) {
119
			$forceRecreate = $this->needRecreateBySettings($userId);
120
			$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
121
			$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
122
123
			if ($forceRecreate) {
124
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
125
			}
126
			else if ($haveEnoughFaces || $haveStaled) {
127
				$this->logInfo('Face clustering will be recreated with new information or changes');
128
			}
129
			else {
130
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
131
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
132
				return;
133
			}
134
		}
135
		else {
136
			// User should not be able to use this directly, used in tests
137 1
			$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
138 1
			$needCreate = $this->needCreateFirstTime($userId, $modelId);
139
140 1
			if ($forceTestCreation) {
141 1
				$this->logInfo('Force the creation of clusters for testing');
142
			}
143 1
			else if ($needCreate) {
144
				$this->logInfo('Face clustering will be created for the first time.');
145
			}
146
			else {
147 1
				$this->logInfo(
148 1
					'Skipping cluster creation, not enough data (yet) collected. ' .
149 1
					'For cluster creation, you need either one of the following:');
150 1
				$this->logInfo('* have 1000 faces already processed');
151 1
				$this->logInfo('* or you need to have 95% of you images processed');
152 1
				$this->logInfo('Use stats command to track progress');
153 1
				return;
154
			}
155
		}
156
157
		// Ok. If we are here, the clusters must be recreated.
158
		//
159
160 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
161 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
162
163 1
		$faces = array_merge(
164 1
			$this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence),
165 1
			$this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence)
166 1
		);
167
168 1
		$this->logInfo(count($faces) . ' faces found for clustering');
169
170
		// Cluster is associative array where key is person ID.
171
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
172
		// and for new clusters is whatever chinese whispers decides to identify them.
173
		//
174
175 1
		$currentClusters = $this->getCurrentClusters($faces);
176
177 1
		$newClusters = $this->getNewClusters($faces);
178 1
		$this->logInfo(count($newClusters) . ' clusters found after clustering');
179
180
		// New merge
181 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
182
183 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
184
185
		// Remove all orphaned persons (those without any faces)
186
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
187 1
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
188 1
		if ($orphansDeleted > 0) {
189
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
190
		}
191
192
		// Prevents not create/recreate the clusters unnecessarily.
193
194 1
		$this->settingsService->setNeedRecreateClusters(false, $userId);
195 1
		$this->settingsService->_setForceCreateClusters(false, $userId);
196
	}
197
198
	/**
199
	 * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
200
	 * - Some cluster/person is invalidated (is_valid is false for someone)
201
	 *   - This means some image that belonged to this user is changed, deleted etc.
202
	 * - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
203
	 *   - more than 25 new faces, or
204
	 *   - less than 25 new faces, but they are older than 2h
205
	 *
206
	 * (basically, we want to avoid recreating cluster for each new face being uploaded,
207
	 *  however, we don't want to wait too much as clusters could be changed a lot)
208
	 */
209
	private function hasNewFacesToRecreate(string $userId, int $modelId): bool {
210
		//
211
		$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
212
		$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
213
		                $facesWithoutPersons, $userId, $modelId));
214
215
		// todo: get rid of magic numbers (move to config)
216
		if ($facesWithoutPersons === 0)
217
			return false;
218
219
		if ($facesWithoutPersons >= 25)
220
			return true;
221
222
		// We have some faces, but not that many, let's see when oldest one is generated.
223
		$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
224
		$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
225
		$currentTimestamp = (new \DateTime())->getTimestamp();
226
		$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
227
		                $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
228
229
		// todo: get rid of magic numbers (move to config)
230
		if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
231
			return true;
232
233
		return false;
234
	}
235
236
	private function hasStalePersonsToRecreate(string $userId, int $modelId): bool {
237
		return $this->personMapper->countClusters($userId, $modelId, true) > 0;
238
	}
239
240
	private function needRecreateBySettings(string $userId): bool {
241
		return $this->settingsService->getNeedRecreateClusters($userId);
242
	}
243
244 1
	private function needCreateFirstTime(string $userId, int $modelId): bool {
245
		// User should not be able to use this directly, used in tests
246 1
		if ($this->settingsService->_getForceCreateClusters($userId))
247 1
			return true;
248
249 1
		$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
250 1
		if ($imageCount === 0)
251
			return false;
252
253 1
		$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
254 1
		if ($imageProcessed === 0)
255 1
			return false;
256
257
		// These are basic criteria without which we should not even consider creating clusters.
258
		// These clusters will be small and not "stable" enough and we should better wait for more images to come.
259
		// todo: get rid of magic numbers (move to config)
260
		$facesCount = $this->faceMapper->countFaces($userId, $modelId);
261
		if ($facesCount > 1000)
262
			return true;
263
264
		$percentImagesProcessed = $imageProcessed / floatval($imageCount);
265
		if ($percentImagesProcessed > 0.95)
266
			return true;
267
268
		return false;
269
	}
270
271 1
	private function getCurrentClusters(array $faces): array {
272 1
		$chineseClusters = array();
273 1
		foreach($faces as $face) {
274 1
			if ($face->person !== null) {
275
				if (!isset($chineseClusters[$face->person])) {
276
					$chineseClusters[$face->person] = array();
277
				}
278
				$chineseClusters[$face->person][] = $face->id;
279
			}
280
		}
281 1
		return $chineseClusters;
282
	}
283
284 1
	private function getNewClusters(array $faces): array {
285
		// Clustering parameters
286 1
		$sensitivity = $this->settingsService->getSensitivity();
287
288 1
		if (Requirements::pdlibLoaded()) {
289
			// Create edges (neighbors) for Chinese Whispers
290 1
			$edges = array();
291 1
			$faces_count = count($faces);
292 1
			for ($i = 0; $i < $faces_count; $i++) {
293 1
				$face1 = $faces[$i];
294 1
				if (!isset($face1->descriptor)) {
295
					$edges[] = array($i, $i);
296
					continue;
297
				}
298 1
				for ($j = $i; $j < $faces_count; $j++) {
299 1
					$face2 = $faces[$j];
300 1
					if (!isset($face2->descriptor)) {
301
						continue;
302
					}
303 1
					$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

303
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1->descriptor, $face2->descriptor);
Loading history...
304 1
					if ($distance < $sensitivity) {
305 1
						$edges[] = array($i, $j);
306
					}
307
				}
308
			}
309
310
			// Given the edges get the list of labels (found clusters) for each face.
311 1
			$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

311
			$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
312
		} else {
313
			// Create edges (neighbors) for Chinese Whispers
314
			$edges = array();
315
			$faces_count = count($faces);
316
317
			for ($i = 0; $i < $faces_count; $i++) {
318
				$face1 = $faces[$i];
319
				if (!isset($face1->descriptor)) {
320
					$edges[] = array($i, $i);
321
					continue;
322
				}
323
				for ($j = $i; $j < $faces_count; $j++) {
324
					$face2 = $faces[$j];
325
					if (!isset($face2->descriptor)) {
326
						continue;
327
					}
328
					$distance = Euclidean::distance($face1->descriptor, $face2->descriptor);
329
					if ($distance < $sensitivity) {
330
						$edges[] = array($i, $j);
331
					}
332
				}
333
			}
334
335
			// The clustering algorithm actually expects ordered lists.
336
			$oedges = [];
337
			ChineseWhispers::convert_unordered_to_ordered($edges, $oedges);
338
			usort($oedges, function($a, $b) {
339
				if ($a[0] === $b[0]) return $a[1] - $b[1];
340
				return $a[0] - $b[0];
341
			});
342
343
			// Given the edges get the list of labels (found clusters) for each face.
344
			$newChineseClustersByIndex = [];
345
			ChineseWhispers::predict($oedges, $newChineseClustersByIndex);
346
		}
347
348 1
		$newClusters = array();
349 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
350 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
351 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
352
			}
353 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
354
		}
355 1
		return $newClusters;
356
	}
357
358
	/**
359
	 * todo: only reason this is public is because of tests. Go figure it out better.
360
	 */
361 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
362
		// Create map of face transitions
363 3
		$transitions = array();
364 3
		foreach ($newCluster as $newPerson=>$newFaces) {
365 3
			foreach ($newFaces as $newFace) {
366 3
				$oldPersonFound = null;
367 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
368 2
					if (in_array($newFace, $oldFaces)) {
369 2
						$oldPersonFound = $oldPerson;
370 2
						break;
371
					}
372
				}
373 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
374
			}
375
		}
376
		// Count transitions
377 3
		$transitionCount = array();
378 3
		foreach ($transitions as $transition) {
379 3
			$key = $transition[0] . ':' . $transition[1];
380 3
			if (array_key_exists($key, $transitionCount)) {
381 2
				$transitionCount[$key]++;
382
			} else {
383 3
				$transitionCount[$key] = 1;
384
			}
385
		}
386
		// Create map of new person -> old person transitions
387 3
		$newOldPersonMapping = array();
388 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
389 3
		arsort($transitionCount);
390 3
		foreach ($transitionCount as $transitionKey => $count) {
391 3
			$transition = explode(":", $transitionKey);
392 3
			$oldPerson = intval($transition[0]);
393 3
			$newPerson = intval($transition[1]);
394 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
395 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
396 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
397 3
					$oldPersonProcessed[$oldPerson] = 0;
398
				} else {
399 2
					$newOldPersonMapping[$newPerson] = 0;
400
				}
401
			}
402
		}
403
		// Starting with new cluster, convert all new person IDs with old person IDs
404 3
		$maxOldPersonId = 1;
405 3
		if (count($oldCluster) > 0) {
406 2
			$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1;
407
		}
408
409 3
		$result = array();
410 3
		foreach ($newCluster as $newPerson => $newFaces) {
411 3
			$oldPerson = $newOldPersonMapping[$newPerson];
412 3
			if ($oldPerson === 0) {
413 3
				$result[$maxOldPersonId] = $newFaces;
414 3
				$maxOldPersonId++;
415
			} else {
416 2
				$result[$oldPerson] = $newFaces;
417
			}
418
		}
419 3
		return $result;
420
	}
421
}
422