Completed
Push — master ( 76fe59...4ad739 )
by Matias
17s queued 14s
created

CreateClustersTask   D

Complexity

Total Complexity 58

Size/Duplication

Total Lines 376
Duplicated Lines 0 %

Test Coverage

Coverage 63.54%

Importance

Changes 5
Bugs 0 Features 1
Metric Value
eloc 191
c 5
b 0
f 1
dl 0
loc 376
ccs 122
cts 192
cp 0.6354
rs 4.5599
wmc 58

11 Methods

Rating   Name   Duplication   Size   Complexity  
F mergeClusters() 0 59 14
A hasNewFacesToRecreate() 0 25 4
A getCurrentClusters() 0 11 4
A needRecreateBySettings() 0 2 1
A needCreateFirstTime() 0 25 6
A __construct() 0 11 1
A description() 0 2 1
A hasStalePersonsToRecreate() 0 2 1
B createClusterIfNeeded() 0 84 8
A execute() 0 21 3
C getNewClusters() 0 72 15

How to fix   Complexity   

Complex Class

Complex classes like CreateClustersTask often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use CreateClustersTask, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2023 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
use OCA\FaceRecognition\Helper\Requirements;
37
38
use OCA\FaceRecognition\Clusterer\ChineseWhispers;
39
40
use OCA\FaceRecognition\Service\SettingsService;
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var PersonMapper Person mapper*/
46
	private $personMapper;
47
48
	/** @var ImageMapper Image mapper*/
49
	private $imageMapper;
50
51
	/** @var FaceMapper Face mapper*/
52
	private $faceMapper;
53
54
	/** @var SettingsService Settings service*/
55
	private $settingsService;
56
57
	/**
58
	 * @param PersonMapper $personMapper
59
	 * @param ImageMapper $imageMapper
60
	 * @param FaceMapper $faceMapper
61
	 * @param SettingsService $settingsService
62
	 */
63 3
	public function __construct(PersonMapper    $personMapper,
64
	                            ImageMapper     $imageMapper,
65
	                            FaceMapper      $faceMapper,
66
	                            SettingsService $settingsService)
67
	{
68 3
		parent::__construct();
69
70 3
		$this->personMapper    = $personMapper;
71 3
		$this->imageMapper     = $imageMapper;
72 3
		$this->faceMapper      = $faceMapper;
73 3
		$this->settingsService = $settingsService;
74
	}
75
76
	/**
77
	 * @inheritdoc
78
	 */
79 1
	public function description() {
80 1
		return "Create new persons or update existing persons";
81
	}
82
83
	/**
84
	 * @inheritdoc
85
	 */
86 1
	public function execute(FaceRecognitionContext $context) {
87 1
		$this->setContext($context);
88
89
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
90
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
91
		//
92 1
		$eligable_users = array();
93 1
		if (is_null($this->context->user)) {
94
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
95
				$eligable_users[] = $user->getUID();
96
			});
97
		} else {
98 1
			$eligable_users[] = $this->context->user->getUID();
99
		}
100
101 1
		foreach($eligable_users as $user) {
102 1
			$this->createClusterIfNeeded($user);
103 1
			yield;
104
		}
105
106 1
		return true;
107
	}
108
109
	/**
110
	 * @return void
111
	 */
112 1
	private function createClusterIfNeeded(string $userId) {
113 1
		$modelId = $this->settingsService->getCurrentFaceModel();
114
115
		// Depending on whether we already have clusters, decide if we should create/recreate them.
116
		//
117 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
118 1
		if ($hasPersons) {
119
			$forceRecreate = $this->needRecreateBySettings($userId);
120
			$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
121
			$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
122
123
			if ($forceRecreate) {
124
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
125
			}
126
			else if ($haveEnoughFaces || $haveStaled) {
127
				$this->logInfo('Face clustering will be recreated with new information or changes');
128
			}
129
			else {
130
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
131
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
132
				return;
133
			}
134
		}
135
		else {
136
			// User should not be able to use this directly, used in tests
137 1
			$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
138 1
			$needCreate = $this->needCreateFirstTime($userId, $modelId);
139
140 1
			if ($forceTestCreation) {
141 1
				$this->logInfo('Force the creation of clusters for testing');
142
			}
143 1
			else if ($needCreate) {
144
				$this->logInfo('Face clustering will be created for the first time.');
145
			}
146
			else {
147 1
				$this->logInfo(
148 1
					'Skipping cluster creation, not enough data (yet) collected. ' .
149 1
					'For cluster creation, you need either one of the following:');
150 1
				$this->logInfo('* have 1000 faces already processed');
151 1
				$this->logInfo('* or you need to have 95% of you images processed');
152 1
				$this->logInfo('Use stats command to track progress');
153 1
				return;
154
			}
155
		}
156
157
		// Ok. If we are here, the clusters must be recreated.
158
		//
159
160 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
161 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
162
163 1
		$faces = array_merge(
164 1
			$this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence),
165 1
			$this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence)
166 1
		);
167
168 1
		$this->logInfo(count($faces) . ' faces found for clustering');
169
170
		// Cluster is associative array where key is person ID.
171
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
172
		// and for new clusters is whatever chinese whispers decides to identify them.
173
		//
174
175 1
		$currentClusters = $this->getCurrentClusters($faces);
176
177 1
		$newClusters = $this->getNewClusters($faces);
178 1
		$this->logInfo(count($newClusters) . ' clusters found after clustering');
179
180
		// New merge
181 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
182
183 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
184
185
		// Remove all orphaned persons (those without any faces)
186
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
187 1
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
188 1
		if ($orphansDeleted > 0) {
189
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
190
		}
191
192
		// Prevents not create/recreate the clusters unnecessarily.
193
194 1
		$this->settingsService->setNeedRecreateClusters(false, $userId);
195 1
		$this->settingsService->_setForceCreateClusters(false, $userId);
196
	}
197
198
	/**
199
	 * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
200
	 * - Some cluster/person is invalidated (is_valid is false for someone)
201
	 *   - This means some image that belonged to this user is changed, deleted etc.
202
	 * - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
203
	 *   - more than 25 new faces, or
204
	 *   - less than 25 new faces, but they are older than 2h
205
	 *
206
	 * (basically, we want to avoid recreating cluster for each new face being uploaded,
207
	 *  however, we don't want to wait too much as clusters could be changed a lot)
208
	 */
209
	private function hasNewFacesToRecreate(string $userId, int $modelId): bool {
210
		//
211
		$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
212
		$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
213
		                $facesWithoutPersons, $userId, $modelId));
214
215
		// todo: get rid of magic numbers (move to config)
216
		if ($facesWithoutPersons === 0)
217
			return false;
218
219
		if ($facesWithoutPersons >= 25)
220
			return true;
221
222
		// We have some faces, but not that many, let's see when oldest one is generated.
223
		$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
224
		$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
225
		$currentTimestamp = (new \DateTime())->getTimestamp();
226
		$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
227
		                $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
228
229
		// todo: get rid of magic numbers (move to config)
230
		if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
231
			return true;
232
233
		return false;
234
	}
235
236
	private function hasStalePersonsToRecreate(string $userId, int $modelId): bool {
237
		return $this->personMapper->countClusters($userId, $modelId, true) > 0;
238
	}
239
240
	private function needRecreateBySettings(string $userId): bool {
241
		return $this->settingsService->getNeedRecreateClusters($userId);
242
	}
243
244 1
	private function needCreateFirstTime(string $userId, int $modelId): bool {
245
		// User should not be able to use this directly, used in tests
246 1
		if ($this->settingsService->_getForceCreateClusters($userId))
247 1
			return true;
248
249 1
		$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
250 1
		if ($imageCount === 0)
251
			return false;
252
253 1
		$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
254 1
		if ($imageProcessed === 0)
255 1
			return false;
256
257
		// These are basic criteria without which we should not even consider creating clusters.
258
		// These clusters will be small and not "stable" enough and we should better wait for more images to come.
259
		// todo: get rid of magic numbers (move to config)
260
		$facesCount = $this->faceMapper->countFaces($userId, $modelId);
261
		if ($facesCount > 1000)
262
			return true;
263
264
		$percentImagesProcessed = $imageProcessed / floatval($imageCount);
265
		if ($percentImagesProcessed > 0.95)
266
			return true;
267
268
		return false;
269
	}
270
271 1
	private function getCurrentClusters(array $faces): array {
272 1
		$chineseClusters = array();
273 1
		foreach($faces as $face) {
274 1
			if ($face->person !== null) {
275
				if (!isset($chineseClusters[$face->person])) {
276
					$chineseClusters[$face->person] = array();
277
				}
278
				$chineseClusters[$face->person][] = $face->id;
279
			}
280
		}
281 1
		return $chineseClusters;
282
	}
283
284 1
	private function getNewClusters(array $faces): array {
285
		// Clustering parameters
286 1
		$sensitivity = $this->settingsService->getSensitivity();
287
288 1
		if (Requirements::pdlibLoaded()) {
289
			// Create edges (neighbors) for Chinese Whispers
290 1
			$edges = array();
291 1
			$faces_count = count($faces);
292 1
			for ($i = 0; $i < $faces_count; $i++) {
293 1
				$face1 = $faces[$i];
294 1
				if (!isset($face1->descriptor)) {
295
					$edges[] = array($i, $i);
296
					continue;
297
				}
298 1
				for ($j = $i; $j < $faces_count; $j++) {
299 1
					$face2 = $faces[$j];
300 1
					if (!isset($face2->descriptor)) {
301
						continue;
302
					}
303 1
					$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

303
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1->descriptor, $face2->descriptor);
Loading history...
304 1
					if ($distance < $sensitivity) {
305 1
						$edges[] = array($i, $j);
306
					}
307
				}
308
			}
309
310
			// Given the edges get the list of labels (found clusters) for each face.
311 1
			$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

311
			$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
312
		} else {
313
			// Create edges (neighbors) for Chinese Whispers
314
			$edges = array();
315
			$faces_count = count($faces);
316
317
			for ($i = 0; $i < $faces_count; $i++) {
318
				$face1 = $faces[$i];
319
				if (!isset($face1->descriptor)) {
320
					$edges[] = array($i, $i);
321
					continue;
322
				}
323
				for ($j = $i; $j < $faces_count; $j++) {
324
					$face2 = $faces[$j];
325
					if (!isset($face2->descriptor)) {
326
						continue;
327
					}
328
					$distance = Euclidean::distance($face1->descriptor, $face2->descriptor);
329
					if ($distance < $sensitivity) {
330
						$edges[] = array($i, $j);
331
					}
332
				}
333
			}
334
335
			// The clustering algorithm actually expects ordered lists.
336
			$oedges = [];
337
			ChineseWhispers::convert_unordered_to_ordered($edges, $oedges);
338
			usort($oedges, function($a, $b) {
339
				if ($a[0] === $b[0]) return $a[1] - $b[1];
340
				return $a[0] - $b[0];
341
			});
342
343
			// Given the edges get the list of labels (found clusters) for each face.
344
			$newChineseClustersByIndex = [];
345
			ChineseWhispers::predict($oedges, $newChineseClustersByIndex);
346
		}
347
348 1
		$newClusters = array();
349 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
350 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
351 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
352
			}
353 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
354
		}
355 1
		return $newClusters;
356
	}
357
358
	/**
359
	 * todo: only reason this is public is because of tests. Go figure it out better.
360
	 */
361 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
362
		// Create map of face transitions
363 3
		$transitions = array();
364 3
		foreach ($newCluster as $newPerson=>$newFaces) {
365 3
			foreach ($newFaces as $newFace) {
366 3
				$oldPersonFound = null;
367 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
368 2
					if (in_array($newFace, $oldFaces)) {
369 2
						$oldPersonFound = $oldPerson;
370 2
						break;
371
					}
372
				}
373 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
374
			}
375
		}
376
		// Count transitions
377 3
		$transitionCount = array();
378 3
		foreach ($transitions as $transition) {
379 3
			$key = $transition[0] . ':' . $transition[1];
380 3
			if (array_key_exists($key, $transitionCount)) {
381 2
				$transitionCount[$key]++;
382
			} else {
383 3
				$transitionCount[$key] = 1;
384
			}
385
		}
386
		// Create map of new person -> old person transitions
387 3
		$newOldPersonMapping = array();
388 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
389 3
		arsort($transitionCount);
390 3
		foreach ($transitionCount as $transitionKey => $count) {
391 3
			$transition = explode(":", $transitionKey);
392 3
			$oldPerson = intval($transition[0]);
393 3
			$newPerson = intval($transition[1]);
394 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
395 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
396 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
397 3
					$oldPersonProcessed[$oldPerson] = 0;
398
				} else {
399 2
					$newOldPersonMapping[$newPerson] = 0;
400
				}
401
			}
402
		}
403
		// Starting with new cluster, convert all new person IDs with old person IDs
404 3
		$maxOldPersonId = 1;
405 3
		if (count($oldCluster) > 0) {
406 2
			$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1;
407
		}
408
409 3
		$result = array();
410 3
		foreach ($newCluster as $newPerson => $newFaces) {
411 3
			$oldPerson = $newOldPersonMapping[$newPerson];
412 3
			if ($oldPerson === 0) {
413 3
				$result[$maxOldPersonId] = $newFaces;
414 3
				$maxOldPersonId++;
415
			} else {
416 2
				$result[$oldPerson] = $newFaces;
417
			}
418
		}
419 3
		return $result;
420
	}
421
}
422