Passed
Push — moreTests ( 993e7b...a32738 )
by Matias
05:54
created

CreateClustersTask::getNewClusters()   D

Complexity

Conditions 18
Paths 18

Size

Total Lines 63
Code Lines 43

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 25
CRAP Score 41.7652

Importance

Changes 2
Bugs 0 Features 1
Metric Value
cc 18
eloc 43
c 2
b 0
f 1
nc 18
nop 1
dl 0
loc 63
ccs 25
cts 43
cp 0.5814
crap 41.7652
rs 4.8666

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2020 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
37
use OCA\FaceRecognition\Service\SettingsService;
38
/**
39
 * Taks that, for each user, creates person clusters for each.
40
 */
41
class CreateClustersTask extends FaceRecognitionBackgroundTask {
42
	/** @var PersonMapper Person mapper*/
43
	private $personMapper;
44
45
	/** @var ImageMapper Image mapper*/
46
	private $imageMapper;
47
48
	/** @var FaceMapper Face mapper*/
49
	private $faceMapper;
50
51
	/** @var SettingsService Settings service*/
52
	private $settingsService;
53
54
	/**
55
	 * @param PersonMapper $personMapper
56
	 * @param ImageMapper $imageMapper
57
	 * @param FaceMapper $faceMapper
58
	 * @param SettingsService $settingsService
59
	 */
60 3
	public function __construct(PersonMapper    $personMapper,
61
	                            ImageMapper     $imageMapper,
62
	                            FaceMapper      $faceMapper,
63
	                            SettingsService $settingsService)
64
	{
65 3
		parent::__construct();
66
67 3
		$this->personMapper    = $personMapper;
68 3
		$this->imageMapper     = $imageMapper;
69 3
		$this->faceMapper      = $faceMapper;
70 3
		$this->settingsService = $settingsService;
71 3
	}
72
73
	/**
74
	 * @inheritdoc
75
	 */
76 1
	public function description() {
77 1
		return "Create new persons or update existing persons";
78
	}
79
80
	/**
81
	 * @inheritdoc
82
	 */
83 1
	public function execute(FaceRecognitionContext $context) {
84 1
		$this->setContext($context);
85
86
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
87
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
88
		//
89 1
		$eligable_users = array();
90 1
		if (is_null($this->context->user)) {
91
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
92
				$eligable_users[] = $user->getUID();
93
			});
94
		} else {
95 1
			$eligable_users[] = $this->context->user->getUID();
96
		}
97
98 1
		foreach($eligable_users as $user) {
99 1
			$this->createClusterIfNeeded($user);
100 1
			yield;
101
		}
102
103 1
		return true;
104
	}
105
106
	/**
107
	 * @return void
108
	 */
109 1
	private function createClusterIfNeeded(string $userId) {
110 1
		$modelId = $this->settingsService->getCurrentFaceModel();
111
112
		// Depending on whether we already have clusters, decide if we should create/recreate them.
113
		//
114 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
115 1
		if ($hasPersons) {
116
			$forceRecreate = $this->needRecreateBySettings($userId);
117
			$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
118
			$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
119
120
			if ($forceRecreate) {
121
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
122
			}
123
			else if ($haveEnoughFaces || $haveStaled) {
124
				$this->logInfo('Face clustering will be recreated with new information or changes');
125
			}
126
			else {
127
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
128
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
129
				return;
130
			}
131
		}
132
		else {
133
			// User should not be able to use this directly, used in tests
134 1
			$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
135 1
			$needCreate = $this->needCreateFirstTime($userId, $modelId);
136
137 1
			if ($forceTestCreation) {
138 1
				$this->logInfo('Force the creation of clusters for testing');
139
			}
140 1
			else if ($needCreate) {
141
				$this->logInfo('Face clustering will be created for the first time.');
142
			}
143
			else {
144 1
				$this->logInfo(
145
					'Skipping cluster creation, not enough data (yet) collected. ' .
146 1
					'For cluster creation, you need either one of the following:');
147 1
				$this->logInfo('* have 1000 faces already processed');
148 1
				$this->logInfo('* or you need to have 95% of you images processed');
149 1
				$this->logInfo('Use stats command to track progress');
150 1
				return;
151
			}
152
		}
153
154
		// Ok. If we are here, the clusters must be recreated.
155
		//
156 1
		$faces = $this->faceMapper->getFaces($userId, $modelId);
157 1
		$this->logInfo(count($faces) . ' faces found for clustering');
158
159
		// Cluster is associative array where key is person ID.
160
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
161
		// and for new clusters is whatever chinese whispers decides to identify them.
162
		//
163 1
		$currentClusters = $this->getCurrentClusters($faces);
164 1
		$newClusters = $this->getNewClusters($faces);
165 1
		$this->logInfo(count($newClusters) . ' persons found after clustering');
166
167
		// New merge
168 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
169 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
170
171
		// Remove all orphaned persons (those without any faces)
172
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
173 1
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
174 1
		if ($orphansDeleted > 0) {
175
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
176
		}
177
178
		// Prevents not create/recreate the clusters unnecessarily.
179
180 1
		$this->settingsService->setNeedRecreateClusters(false, $userId);
181 1
		$this->settingsService->_setForceCreateClusters(false, $userId);
182 1
	}
183
184
	/**
185
	 * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
186
	 * - Some cluster/person is invalidated (is_valid is false for someone)
187
	 *   - This means some image that belonged to this user is changed, deleted etc.
188
	 * - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
189
	 *   - more than 25 new faces, or
190
	 *   - less than 25 new faces, but they are older than 2h
191
	 *
192
	 * (basically, we want to avoid recreating cluster for each new face being uploaded,
193
	 *  however, we don't want to wait too much as clusters could be changed a lot)
194
	 */
195
	private function hasNewFacesToRecreate(string $userId, int $modelId): bool {
196
		//
197
		$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
198
		$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
199
		                $facesWithoutPersons, $userId, $modelId));
200
201
		// todo: get rid of magic numbers (move to config)
202
		if ($facesWithoutPersons === 0)
203
			return false;
204
205
		if ($facesWithoutPersons >= 25)
206
			return true;
207
208
		// We have some faces, but not that many, let's see when oldest one is generated.
209
		$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
210
		$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
211
		$currentTimestamp = (new \DateTime())->getTimestamp();
212
		$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
213
		                $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
214
215
		// todo: get rid of magic numbers (move to config)
216
		if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
217
			return true;
218
219
		return false;
220
	}
221
222
	private function hasStalePersonsToRecreate(string $userId, int $modelId): bool {
223
		return $this->personMapper->countPersons($userId, $modelId, true) > 0;
224
	}
225
226
	private function needRecreateBySettings(string $userId): bool {
227
		return $this->settingsService->getNeedRecreateClusters($userId);
228
	}
229
230 1
	private function needCreateFirstTime(string $userId, int $modelId): bool {
231
		// User should not be able to use this directly, used in tests
232 1
		if ($this->settingsService->_getForceCreateClusters($userId))
233 1
			return true;
234
235 1
		$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
236 1
		if ($imageCount === 0)
237
			return false;
238
239 1
		$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
240 1
		if ($imageProcessed === 0)
241 1
			return false;
242
243
		// These are basic criteria without which we should not even consider creating clusters.
244
		// These clusters will be small and not "stable" enough and we should better wait for more images to come.
245
		// todo: get rid of magic numbers (move to config)
246
		$facesCount = $this->faceMapper->countFaces($userId, $modelId);
247
		if ($facesCount > 1000)
248
			return true;
249
250
		$percentImagesProcessed = $imageProcessed / floatval($imageCount);
251
		if ($percentImagesProcessed > 0.95)
252
			return true;
253
254
		return false;
255
	}
256
257 1
	private function getCurrentClusters(array $faces): array {
258 1
		$chineseClusters = array();
259 1
		foreach($faces as $face) {
260 1
			if ($face->person !== null) {
261
				if (!isset($chineseClusters[$face->person])) {
262
					$chineseClusters[$face->person] = array();
263
				}
264
				$chineseClusters[$face->person][] = $face->id;
265
			}
266
		}
267 1
		return $chineseClusters;
268
	}
269
270 1
	private function getNewClusters(array $faces): array {
271
		// Clustering parameters
272 1
		$sensitivity = $this->settingsService->getSensitivity();
273 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
274 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
275
276
		// Create edges for chinese whispers
277 1
		$edges = array();
278
279 1
		if (version_compare(phpversion('pdlib'), '1.0.2', '>=')) {
280 1
			$faces_count = count($faces);
281 1
			for ($i = 0; $i < $faces_count; $i++) {
282 1
				$face1 = $faces[$i];
283 1
				if (($face1->confidence < $min_confidence) ||
284 1
				    (max($face1->height(), $face1->width()) < $min_face_size)) {
285
					$edges[] = array($i, $i);
286
					continue;
287
				}
288 1
				for ($j = $i; $j < $faces_count; $j++) {
289 1
					$face2 = $faces[$j];
290 1
					if (($face2->confidence < $min_confidence) ||
291 1
					    (max($face2->height(), $face2->width()) < $min_face_size)) {
292
						continue;
293
					}
294 1
					$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

294
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1->descriptor, $face2->descriptor);
Loading history...
295 1
					if ($distance < $sensitivity) {
296 1
						$edges[] = array($i, $j);
297
					}
298
				}
299
			}
300
		} else {
301
			$faces_count = count($faces);
302
			for ($i = 0; $i < $faces_count; $i++) {
303
				$face1 = $faces[$i];
304
				if (($face1->confidence < $min_confidence) ||
305
				    (max($face1->height(), $face1->width()) < $min_face_size)) {
306
					$edges[] = array($i, $i);
307
					continue;
308
				}
309
				for ($j = $i; $j < $faces_count; $j++) {
310
					$face2 = $faces[$j];
311
					if (($face2->confidence < $min_confidence) ||
312
					    (max($face2->height(), $face2->width()) < $min_face_size)) {
313
						continue;
314
					}
315
					$distance = Euclidean::distance($face1->descriptor, $face2->descriptor);
316
					if ($distance < $sensitivity) {
317
						$edges[] = array($i, $j);
318
					}
319
				}
320
			}
321
		}
322
323 1
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

323
		$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
324 1
		$newClusters = array();
325 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
326 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
327 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
328
			}
329 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
330
		}
331
332 1
		return $newClusters;
333
	}
334
335
	/**
336
	 * todo: only reason this is public is because of tests. Go figure it out better.
337
	 */
338 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
339
		// Create map of face transitions
340 3
		$transitions = array();
341 3
		foreach ($newCluster as $newPerson=>$newFaces) {
342 3
			foreach ($newFaces as $newFace) {
343 3
				$oldPersonFound = null;
344 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
345 2
					if (in_array($newFace, $oldFaces)) {
346 2
						$oldPersonFound = $oldPerson;
347 2
						break;
348
					}
349
				}
350 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
351
			}
352
		}
353
		// Count transitions
354 3
		$transitionCount = array();
355 3
		foreach ($transitions as $transition) {
356 3
			$key = $transition[0] . ':' . $transition[1];
357 3
			if (array_key_exists($key, $transitionCount)) {
358 2
				$transitionCount[$key]++;
359
			} else {
360 3
				$transitionCount[$key] = 1;
361
			}
362
		}
363
		// Create map of new person -> old person transitions
364 3
		$newOldPersonMapping = array();
365 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
366 3
		arsort($transitionCount);
367 3
		foreach ($transitionCount as $transitionKey => $count) {
368 3
			$transition = explode(":", $transitionKey);
369 3
			$oldPerson = intval($transition[0]);
370 3
			$newPerson = intval($transition[1]);
371 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
372 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
373 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
374 3
					$oldPersonProcessed[$oldPerson] = 0;
375
				} else {
376 2
					$newOldPersonMapping[$newPerson] = 0;
377
				}
378
			}
379
		}
380
		// Starting with new cluster, convert all new person IDs with old person IDs
381 3
		$maxOldPersonId = 1;
382 3
		if (count($oldCluster) > 0) {
383 2
			$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1;
384
		}
385
386 3
		$result = array();
387 3
		foreach ($newCluster as $newPerson => $newFaces) {
388 3
			$oldPerson = $newOldPersonMapping[$newPerson];
389 3
			if ($oldPerson === 0) {
390 3
				$result[$maxOldPersonId] = $newFaces;
391 3
				$maxOldPersonId++;
392
			} else {
393 2
				$result[$oldPerson] = $newFaces;
394
			}
395
		}
396 3
		return $result;
397
	}
398
}
399