Passed
Push — find-similar ( d95067...db8a67 )
by Matias
04:49
created

CreateClustersTask   C

Complexity

Total Complexity 57

Size/Duplication

Total Lines 348
Duplicated Lines 0 %

Test Coverage

Coverage 0%

Importance

Changes 5
Bugs 0 Features 1
Metric Value
eloc 184
dl 0
loc 348
ccs 0
cts 182
cp 0
rs 5.04
c 5
b 0
f 1
wmc 57

8 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 13 1
A description() 0 2 1
A execute() 0 21 3
B fillFaceRelationsFromPersons() 0 34 7
F mergeClusters() 0 59 14
A getCurrentClusters() 0 11 4
C createClusterIfNeeded() 0 111 15
C getNewClusters() 0 52 12

How to fix   Complexity   

Complex Class

Complex classes like CreateClustersTask often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use CreateClustersTask, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2020 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Db\Relation;
36
use OCA\FaceRecognition\Db\RelationMapper;
37
38
use OCA\FaceRecognition\Helper\Euclidean;
39
40
use OCA\FaceRecognition\Service\SettingsService;
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var PersonMapper Person mapper*/
46
	private $personMapper;
47
48
	/** @var ImageMapper Image mapper*/
49
	private $imageMapper;
50
51
	/** @var FaceMapper Face mapper*/
52
	private $faceMapper;
53
54
	/** @var RelationMapper Relation mapper*/
55
	private $relationMapper;
56
57
	/** @var SettingsService Settings service*/
58
	private $settingsService;
59
60
	/**
61
	 * @param PersonMapper
62
	 * @param ImageMapper
63
	 * @param FaceMapper
64
	 * @param SettingsService
65
	 */
66
	public function __construct(PersonMapper    $personMapper,
67
	                            ImageMapper     $imageMapper,
68
	                            FaceMapper      $faceMapper,
69
	                            RelationMapper  $relationMapper,
70
	                            SettingsService $settingsService)
71
	{
72
		parent::__construct();
73
74
		$this->personMapper    = $personMapper;
75
		$this->imageMapper     = $imageMapper;
76
		$this->faceMapper      = $faceMapper;
77
		$this->relationMapper  = $relationMapper;
78
		$this->settingsService = $settingsService;
79
	}
80
81
	/**
82
	 * @inheritdoc
83
	 */
84
	public function description() {
85
		return "Create new persons or update existing persons";
86
	}
87
88
	/**
89
	 * @inheritdoc
90
	 */
91
	public function execute(FaceRecognitionContext $context) {
92
		$this->setContext($context);
93
94
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
95
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
96
		//
97
		$eligable_users = array();
98
		if (is_null($this->context->user)) {
99
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
100
				$eligable_users[] = $user->getUID();
101
			});
102
		} else {
103
			$eligable_users[] = $this->context->user->getUID();
104
		}
105
106
		foreach($eligable_users as $user) {
107
			$this->createClusterIfNeeded($user);
108
			yield;
109
		}
110
111
		return true;
112
	}
113
114
	private function createClusterIfNeeded(string $userId) {
115
		// Check that we processed enough images to start creating clusters
116
		//
117
		$modelId = $this->settingsService->getCurrentFaceModel();
118
119
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
120
121
		// Depending on whether we already have clusters, decide if we should create/recreate them.
122
		//
123
		if ($hasPersons) {
124
			// OK, we already got some persons. We now need to evaluate whether we want to recreate clusters.
125
			// We want to recreate clusters/persons if:
126
			// * Some cluster/person is invalidated (is_valid is false for someone)
127
			//     This means some image that belonged to this user is changed, deleted etc.
128
			// * There are some new faces. Now, we don't want to jump the gun here. We want to either have:
129
			// ** more than 10 new faces, or
130
			// ** less than 10 new faces, but they are older than 2h
131
			//  (basically, we want to avoid recreating cluster for each new face being uploaded,
132
			//  however, we don't want to wait too much as clusters could be changed a lot)
133
			//
134
			$haveNewFaces = false;
135
			$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
136
			$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
137
				$facesWithoutPersons, $userId, $modelId));
138
			// todo: get rid of magic numbers (move to config)
139
			if ($facesWithoutPersons >= 10) {
140
				$haveNewFaces = true;
141
			} else if ($facesWithoutPersons > 0) {
142
				// We have some faces, but not that many, let's see when oldest one is generated.
143
				$face = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
144
				$oldestFaceTimestamp = $face->creationTime->getTimestamp();
145
				$currentTimestamp = (new \DateTime())->getTimestamp();
146
				$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
147
					$userId, $modelId, $face->creationTime->format('Y-m-d H:i:s')));
148
				// todo: get rid of magic numbers (move to config)
149
				if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60) {
150
					$haveNewFaces = true;
151
				}
152
			}
153
154
			$stalePersonsCount = $this->personMapper->countPersons($userId, $modelId, true);
155
			$haveStalePersons = $stalePersonsCount > 0;
156
			$staleCluster = $haveStalePersons === false && $haveNewFaces === false;
157
158
			$forceRecreation = $this->settingsService->getNeedRecreateClusters($userId);
159
160
			$this->logDebug(sprintf('Found %d changed persons for user %s and model %d', $stalePersonsCount, $userId, $modelId));
161
162
			if ($staleCluster && !$forceRecreation) {
163
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
164
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
165
				return;
166
			}
167
			else if ($forceRecreation) {
168
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
169
			}
170
		} else {
171
			// User should not be able to use this directly, used in tests
172
			$forceCreation = $this->settingsService->getForceCreateClusters($userId);
173
174
			// These are basic criteria without which we should not even consider creating clusters.
175
			// These clusters will be small and not "stable" enough and we should better wait for more images to come.
176
			// todo: 2 queries to get these 2 counts, can we do this smarter?
177
			$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
178
			$imageProcessed = $this->imageMapper->countUserProcessedImages($userId, $modelId);
179
			$percentImagesProcessed = 0;
180
			if ($imageCount > 0) {
181
				$percentImagesProcessed = $imageProcessed / floatval($imageCount);
182
			}
183
			$facesCount = $this->faceMapper->countFaces($userId, $modelId);
184
			// todo: get rid of magic numbers (move to config)
185
			if (!$forceCreation && ($facesCount < 1000) && ($imageCount < 100) && ($percentImagesProcessed < 0.95)) {
186
				$this->logInfo(
187
					'Skipping cluster creation, not enough data (yet) collected. ' .
188
					'For cluster creation, you need either one of the following:');
189
				$this->logInfo(sprintf('* have 1000 faces already processed (you have %d),', $facesCount));
190
				$this->logInfo(sprintf('* have 100 images (you have %d),', $imageCount));
191
				$this->logInfo(sprintf('* or you need to have 95%% of you images processed (you have %.2f%%)', $percentImagesProcessed));
192
				return;
193
			}
194
		}
195
196
		$faces = $this->faceMapper->getFaces($userId, $modelId);
197
		$this->logInfo(count($faces) . ' faces found for clustering');
198
199
		// Cluster is associative array where key is person ID.
200
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
201
		// and for new clusters is whatever chinese whispers decides to identify them.
202
		//
203
		$currentClusters = $this->getCurrentClusters($faces);
204
		$newClusters = $this->getNewClusters($faces);
205
		$this->logInfo(count($newClusters) . ' persons found after clustering');
206
207
		// New merge
208
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
209
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
210
211
		// Remove all orphaned persons (those without any faces)
212
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
213
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
214
		if ($orphansDeleted > 0) {
215
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
216
		}
217
218
		// Fill relation table with new clusters.
219
		$relations = $this->fillFaceRelationsFromPersons($userId, $modelId);
220
		$this->logInfo($relations . ' relations added as suggestions');
221
222
		// Prevents not create/recreate the clusters unnecessarily.
223
		$this->settingsService->setNeedRecreateClusters(false, $userId);
224
		$this->settingsService->setForceCreateClusters(false, $userId);
225
	}
226
227
	private function getCurrentClusters(array $faces): array {
228
		$chineseClusters = array();
229
		foreach($faces as $face) {
230
			if ($face->person !== null) {
231
				if (!isset($chineseClusters[$face->person])) {
232
					$chineseClusters[$face->person] = array();
233
				}
234
				$chineseClusters[$face->person][] = $face->id;
235
			}
236
		}
237
		return $chineseClusters;
238
	}
239
240
	private function getNewClusters(array $faces): array {
241
		// Create edges for chinese whispers
242
		$sensitivity = $this->settingsService->getSensitivity();
243
		$min_confidence = $this->settingsService->getMinimumConfidence();
244
		$edges = array();
245
246
		if (version_compare(phpversion('pdlib'), '1.0.2', '>=')) {
247
			for ($i = 0, $face_count1 = count($faces); $i < $face_count1; $i++) {
248
				$face1 = $faces[$i];
249
				if ($face1->confidence < $min_confidence) {
250
					$edges[] = array($i, $i);
251
					continue;
252
				}
253
				for ($j = $i, $face_count2 = count($faces); $j < $face_count2; $j++) {
254
					$face2 = $faces[$j];
255
					$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

255
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1->descriptor, $face2->descriptor);
Loading history...
256
257
					if ($distance < $sensitivity) {
258
						$edges[] = array($i, $j);
259
					}
260
				}
261
			}
262
		} else {
263
			$euclidean = new Euclidean();
264
			for ($i = 0, $face_count1 = count($faces); $i < $face_count1; $i++) {
265
				$face1 = $faces[$i];
266
				if ($face1->confidence < $min_confidence) {
267
					$edges[] = array($i, $i);
268
					continue;
269
				}
270
				for ($j = $i, $face_count2 = count($faces); $j < $face_count2; $j++) {
271
					$face2 = $faces[$j];
272
					// todo: can't this distance be a method in $face1->distance($face2)?
273
					$distance = $euclidean->distance($face1->descriptor, $face2->descriptor);
274
275
					if ($distance < $sensitivity) {
276
						$edges[] = array($i, $j);
277
					}
278
				}
279
			}
280
		}
281
282
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

282
		$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
283
		$newClusters = array();
284
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
285
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
286
				$newClusters[$newChineseClustersByIndex[$i]] = array();
287
			}
288
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
289
		}
290
291
		return $newClusters;
292
	}
293
294
	/**
295
	 * todo: only reason this is public is because of tests. Go figure it out better.
296
	 */
297
	public function mergeClusters(array $oldCluster, array $newCluster): array {
298
		// Create map of face transitions
299
		$transitions = array();
300
		foreach ($newCluster as $newPerson=>$newFaces) {
301
			foreach ($newFaces as $newFace) {
302
				$oldPersonFound = null;
303
				foreach ($oldCluster as $oldPerson => $oldFaces) {
304
					if (in_array($newFace, $oldFaces)) {
305
						$oldPersonFound = $oldPerson;
306
						break;
307
					}
308
				}
309
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
310
			}
311
		}
312
		// Count transitions
313
		$transitionCount = array();
314
		foreach ($transitions as $transition) {
315
			$key = $transition[0] . ':' . $transition[1];
316
			if (array_key_exists($key, $transitionCount)) {
317
				$transitionCount[$key]++;
318
			} else {
319
				$transitionCount[$key] = 1;
320
			}
321
		}
322
		// Create map of new person -> old person transitions
323
		$newOldPersonMapping = array();
324
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
325
		arsort($transitionCount);
326
		foreach ($transitionCount as $transitionKey => $count) {
327
			$transition = explode(":", $transitionKey);
328
			$oldPerson = intval($transition[0]);
329
			$newPerson = intval($transition[1]);
330
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
331
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
332
					$newOldPersonMapping[$newPerson] = $oldPerson;
333
					$oldPersonProcessed[$oldPerson] = 0;
334
				} else {
335
					$newOldPersonMapping[$newPerson] = 0;
336
				}
337
			}
338
		}
339
		// Starting with new cluster, convert all new person IDs with old person IDs
340
		$maxOldPersonId = 1;
341
		if (count($oldCluster) > 0) {
342
			$maxOldPersonId = max(array_keys($oldCluster)) + 1;
343
		}
344
345
		$result = array();
346
		foreach ($newCluster as $newPerson => $newFaces) {
347
			$oldPerson = $newOldPersonMapping[$newPerson];
348
			if ($oldPerson === 0) {
349
				$result[$maxOldPersonId] = $newFaces;
350
				$maxOldPersonId++;
351
			} else {
352
				$result[$oldPerson] = $newFaces;
353
			}
354
		}
355
		return $result;
356
	}
357
358
	private function fillFaceRelationsFromPersons(string $userId, int $modelId): int {
359
		$deviation = $this->settingsService->getDeviation();
360
		if (!version_compare(phpversion('pdlib'), '1.0.2', '>=') || ($deviation === 0.0))
361
			return 0;
362
363
		$sensitivity = $this->settingsService->getSensitivity();
364
365
		// Get the representative faces of each person
366
		$mainFaces = array();
367
		$persons = $this->personMapper->findAll($userId, $modelId);
368
		foreach ($persons as $person) {
369
			$mainFaces[] = $this->faceMapper->findRepresentativeFromPerson($userId, $modelId, $person->getId(), $sensitivity);
370
		}
371
372
		// Get similar faces taking into account the deviation
373
		$relations = array();
374
		$faces_count = count($mainFaces);
375
		for ($i = 0 ; $i < $faces_count; $i++) {
376
			$face1 = $mainFaces[$i];
377
			for ($j = $i+1; $j < $faces_count; $j++) {
378
				$face2 = $mainFaces[$j];
379
				$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

379
				$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1->descriptor, $face2->descriptor);
Loading history...
380
				if ($distance < ($sensitivity + $deviation)) {
381
					$relation = new Relation();
382
					$relation->setFace1($face1->getId());
383
					$relation->setFace2($face2->getId());
384
					$relation->setState(RELATION::PROPOSED);
385
					$relations[] = $relation;
386
				}
387
			}
388
		}
389
390
		// Merge new suggested relations
391
		return $this->relationMapper->merge($relations);
392
	}
393
394
}
395