Passed
Push — master ( a86ea0...028966 )
by Matias
05:02
created

CreateClustersTask::createClusterIfNeeded()   B

Complexity

Conditions 8
Paths 10

Size

Total Lines 84
Code Lines 44

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 32
CRAP Score 9.0711

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 8
eloc 44
c 1
b 0
f 0
nc 10
nop 1
dl 0
loc 84
ccs 32
cts 43
cp 0.7442
crap 9.0711
rs 7.9715

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2023 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
use OCA\FaceRecognition\Helper\Requirements;
37
38
use OCA\FaceRecognition\Clusterer\ChineseWhispers;
39
40
use OCA\FaceRecognition\Service\SettingsService;
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var PersonMapper Person mapper*/
46
	private $personMapper;
47
48
	/** @var ImageMapper Image mapper*/
49
	private $imageMapper;
50
51
	/** @var FaceMapper Face mapper*/
52
	private $faceMapper;
53
54
	/** @var SettingsService Settings service*/
55
	private $settingsService;
56
57
	/**
58
	 * @param PersonMapper $personMapper
59
	 * @param ImageMapper $imageMapper
60
	 * @param FaceMapper $faceMapper
61
	 * @param SettingsService $settingsService
62
	 */
63 3
	public function __construct(PersonMapper    $personMapper,
64
	                            ImageMapper     $imageMapper,
65
	                            FaceMapper      $faceMapper,
66
	                            SettingsService $settingsService)
67
	{
68 3
		parent::__construct();
69
70 3
		$this->personMapper    = $personMapper;
71 3
		$this->imageMapper     = $imageMapper;
72 3
		$this->faceMapper      = $faceMapper;
73 3
		$this->settingsService = $settingsService;
74
	}
75
76
	/**
77
	 * @inheritdoc
78
	 */
79 1
	public function description() {
80 1
		return "Create new persons or update existing persons";
81
	}
82
83
	/**
84
	 * @inheritdoc
85
	 */
86 1
	public function execute(FaceRecognitionContext $context) {
87 1
		$this->setContext($context);
88 1
		$eligable_users = $this->context->getEligibleUsers();
89 1
		foreach($eligable_users as $user) {
90 1
			$this->createClusterIfNeeded($user);
91 1
			yield;
92
		}
93
94 1
		return true;
95
	}
96
97
	/**
98
	 * @return void
99
	 */
100 1
	private function createClusterIfNeeded(string $userId) {
101 1
		$modelId = $this->settingsService->getCurrentFaceModel();
102
103
		// Depending on whether we already have clusters, decide if we should create/recreate them.
104
		//
105 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
106 1
		if ($hasPersons) {
107
			$forceRecreate = $this->needRecreateBySettings($userId);
108
			$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
109
			$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
110
111
			if ($forceRecreate) {
112
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
113
			}
114
			else if ($haveEnoughFaces || $haveStaled) {
115
				$this->logInfo('Face clustering will be recreated with new information or changes');
116
			}
117
			else {
118
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
119
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
120
				return;
121
			}
122
		}
123
		else {
124
			// User should not be able to use this directly, used in tests
125 1
			$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
126 1
			$needCreate = $this->needCreateFirstTime($userId, $modelId);
127
128 1
			if ($forceTestCreation) {
129 1
				$this->logInfo('Force the creation of clusters for testing');
130
			}
131 1
			else if ($needCreate) {
132
				$this->logInfo('Face clustering will be created for the first time.');
133
			}
134
			else {
135 1
				$this->logInfo(
136 1
					'Skipping cluster creation, not enough data (yet) collected. ' .
137 1
					'For cluster creation, you need either one of the following:');
138 1
				$this->logInfo('* have 1000 faces already processed');
139 1
				$this->logInfo('* or you need to have 95% of you images processed');
140 1
				$this->logInfo('Use stats command to track progress');
141 1
				return;
142
			}
143
		}
144
145
		// Ok. If we are here, the clusters must be recreated.
146
		//
147
148 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
149 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
150
151 1
		$faces = array_merge(
152 1
			$this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence),
153 1
			$this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence)
154 1
		);
155
156 1
		$this->logInfo(count($faces) . ' faces found for clustering');
157
158
		// Cluster is associative array where key is person ID.
159
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
160
		// and for new clusters is whatever chinese whispers decides to identify them.
161
		//
162
163 1
		$currentClusters = $this->getCurrentClusters($faces);
164
165 1
		$newClusters = $this->getNewClusters($faces);
166 1
		$this->logInfo(count($newClusters) . ' clusters found after clustering');
167
168
		// New merge
169 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
170
171 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
172
173
		// Remove all orphaned persons (those without any faces)
174
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
175 1
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
176 1
		if ($orphansDeleted > 0) {
177
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
178
		}
179
180
		// Prevents not create/recreate the clusters unnecessarily.
181
182 1
		$this->settingsService->setNeedRecreateClusters(false, $userId);
183 1
		$this->settingsService->_setForceCreateClusters(false, $userId);
184
	}
185
186
	/**
187
	 * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
188
	 * - Some cluster/person is invalidated (is_valid is false for someone)
189
	 *   - This means some image that belonged to this user is changed, deleted etc.
190
	 * - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
191
	 *   - more than 25 new faces, or
192
	 *   - less than 25 new faces, but they are older than 2h
193
	 *
194
	 * (basically, we want to avoid recreating cluster for each new face being uploaded,
195
	 *  however, we don't want to wait too much as clusters could be changed a lot)
196
	 */
197
	private function hasNewFacesToRecreate(string $userId, int $modelId): bool {
198
		//
199
		$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
200
		$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
201
		                $facesWithoutPersons, $userId, $modelId));
202
203
		// todo: get rid of magic numbers (move to config)
204
		if ($facesWithoutPersons === 0)
205
			return false;
206
207
		if ($facesWithoutPersons >= 25)
208
			return true;
209
210
		// We have some faces, but not that many, let's see when oldest one is generated.
211
		$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
212
		$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
213
		$currentTimestamp = (new \DateTime())->getTimestamp();
214
		$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
215
		                $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
216
217
		// todo: get rid of magic numbers (move to config)
218
		if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
219
			return true;
220
221
		return false;
222
	}
223
224
	private function hasStalePersonsToRecreate(string $userId, int $modelId): bool {
225
		return $this->personMapper->countClusters($userId, $modelId, true) > 0;
226
	}
227
228
	private function needRecreateBySettings(string $userId): bool {
229
		return $this->settingsService->getNeedRecreateClusters($userId);
230
	}
231
232 1
	private function needCreateFirstTime(string $userId, int $modelId): bool {
233
		// User should not be able to use this directly, used in tests
234 1
		if ($this->settingsService->_getForceCreateClusters($userId))
235 1
			return true;
236
237 1
		$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
238 1
		if ($imageCount === 0)
239
			return false;
240
241 1
		$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
242 1
		if ($imageProcessed === 0)
243 1
			return false;
244
245
		// These are basic criteria without which we should not even consider creating clusters.
246
		// These clusters will be small and not "stable" enough and we should better wait for more images to come.
247
		// todo: get rid of magic numbers (move to config)
248
		$facesCount = $this->faceMapper->countFaces($userId, $modelId);
249
		if ($facesCount > 1000)
250
			return true;
251
252
		$percentImagesProcessed = $imageProcessed / floatval($imageCount);
253
		if ($percentImagesProcessed > 0.95)
254
			return true;
255
256
		return false;
257
	}
258
259 1
	private function getCurrentClusters(array $faces): array {
260 1
		$chineseClusters = array();
261 1
		foreach($faces as $face) {
262 1
			if ($face->person !== null) {
263
				if (!isset($chineseClusters[$face->person])) {
264
					$chineseClusters[$face->person] = array();
265
				}
266
				$chineseClusters[$face->person][] = $face->id;
267
			}
268
		}
269 1
		return $chineseClusters;
270
	}
271
272 1
	private function getNewClusters(array $faces): array {
273
		// Clustering parameters
274 1
		$sensitivity = $this->settingsService->getSensitivity();
275
276 1
		if (Requirements::pdlibLoaded()) {
277
			// Create edges (neighbors) for Chinese Whispers
278 1
			$edges = array();
279 1
			$faces_count = count($faces);
280 1
			for ($i = 0; $i < $faces_count; $i++) {
281 1
				$face1 = $faces[$i];
282 1
				if (!isset($face1->descriptor)) {
283
					$edges[] = array($i, $i);
284
					continue;
285
				}
286 1
				for ($j = $i; $j < $faces_count; $j++) {
287 1
					$face2 = $faces[$j];
288 1
					if (!isset($face2->descriptor)) {
289
						continue;
290
					}
291 1
					$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

291
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1->descriptor, $face2->descriptor);
Loading history...
292 1
					if ($distance < $sensitivity) {
293 1
						$edges[] = array($i, $j);
294
					}
295
				}
296
			}
297
298
			// Given the edges get the list of labels (found clusters) for each face.
299 1
			$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

299
			$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
300
		} else {
301
			// Create edges (neighbors) for Chinese Whispers
302
			$edges = array();
303
			$faces_count = count($faces);
304
305
			for ($i = 0; $i < $faces_count; $i++) {
306
				$face1 = $faces[$i];
307
				if (!isset($face1->descriptor)) {
308
					$edges[] = array($i, $i);
309
					continue;
310
				}
311
				for ($j = $i; $j < $faces_count; $j++) {
312
					$face2 = $faces[$j];
313
					if (!isset($face2->descriptor)) {
314
						continue;
315
					}
316
					$distance = Euclidean::distance($face1->descriptor, $face2->descriptor);
317
					if ($distance < $sensitivity) {
318
						$edges[] = array($i, $j);
319
					}
320
				}
321
			}
322
323
			// The clustering algorithm actually expects ordered lists.
324
			$oedges = [];
325
			ChineseWhispers::convert_unordered_to_ordered($edges, $oedges);
326
			usort($oedges, function($a, $b) {
327
				if ($a[0] === $b[0]) return $a[1] - $b[1];
328
				return $a[0] - $b[0];
329
			});
330
331
			// Given the edges get the list of labels (found clusters) for each face.
332
			$newChineseClustersByIndex = [];
333
			ChineseWhispers::predict($oedges, $newChineseClustersByIndex);
334
		}
335
336 1
		$newClusters = array();
337 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
338 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
339 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
340
			}
341 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
342
		}
343 1
		return $newClusters;
344
	}
345
346
	/**
347
	 * todo: only reason this is public is because of tests. Go figure it out better.
348
	 */
349 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
350
		// Create map of face transitions
351 3
		$transitions = array();
352 3
		foreach ($newCluster as $newPerson=>$newFaces) {
353 3
			foreach ($newFaces as $newFace) {
354 3
				$oldPersonFound = null;
355 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
356 2
					if (in_array($newFace, $oldFaces)) {
357 2
						$oldPersonFound = $oldPerson;
358 2
						break;
359
					}
360
				}
361 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
362
			}
363
		}
364
		// Count transitions
365 3
		$transitionCount = array();
366 3
		foreach ($transitions as $transition) {
367 3
			$key = $transition[0] . ':' . $transition[1];
368 3
			if (array_key_exists($key, $transitionCount)) {
369 2
				$transitionCount[$key]++;
370
			} else {
371 3
				$transitionCount[$key] = 1;
372
			}
373
		}
374
		// Create map of new person -> old person transitions
375 3
		$newOldPersonMapping = array();
376 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
377 3
		arsort($transitionCount);
378 3
		foreach ($transitionCount as $transitionKey => $count) {
379 3
			$transition = explode(":", $transitionKey);
380 3
			$oldPerson = intval($transition[0]);
381 3
			$newPerson = intval($transition[1]);
382 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
383 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
384 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
385 3
					$oldPersonProcessed[$oldPerson] = 0;
386
				} else {
387 2
					$newOldPersonMapping[$newPerson] = 0;
388
				}
389
			}
390
		}
391
		// Starting with new cluster, convert all new person IDs with old person IDs
392 3
		$maxOldPersonId = 1;
393 3
		if (count($oldCluster) > 0) {
394 2
			$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1;
395
		}
396
397 3
		$result = array();
398 3
		foreach ($newCluster as $newPerson => $newFaces) {
399 3
			$oldPerson = $newOldPersonMapping[$newPerson];
400 3
			if ($oldPerson === 0) {
401 3
				$result[$maxOldPersonId] = $newFaces;
402 3
				$maxOldPersonId++;
403
			} else {
404 2
				$result[$oldPerson] = $newFaces;
405
			}
406
		}
407 3
		return $result;
408
	}
409
}
410