Passed
Push — master ( b89a89...58e3e0 )
by Matias
09:16
created

CreateClustersTask::createClusterIfNeeded()   B

Complexity

Conditions 10
Paths 34

Size

Total Lines 104
Code Lines 57

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 41
CRAP Score 11.9227

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 10
eloc 57
c 1
b 0
f 0
nc 34
nop 1
dl 0
loc 104
ccs 41
cts 56
cp 0.7321
crap 11.9227
rs 7.0715

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2023 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
use OCA\FaceRecognition\Helper\Requirements;
37
38
use OCA\FaceRecognition\Clusterer\ChineseWhispers;
39
40
use OCA\FaceRecognition\Service\SettingsService;
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var PersonMapper Person mapper*/
46
	private $personMapper;
47
48
	/** @var ImageMapper Image mapper*/
49
	private $imageMapper;
50
51
	/** @var FaceMapper Face mapper*/
52
	private $faceMapper;
53
54
	/** @var SettingsService Settings service*/
55
	private $settingsService;
56
57
	/**
58
	 * @param PersonMapper $personMapper
59
	 * @param ImageMapper $imageMapper
60
	 * @param FaceMapper $faceMapper
61
	 * @param SettingsService $settingsService
62
	 */
63 3
	public function __construct(PersonMapper    $personMapper,
64
	                            ImageMapper     $imageMapper,
65
	                            FaceMapper      $faceMapper,
66
	                            SettingsService $settingsService)
67
	{
68 3
		parent::__construct();
69
70 3
		$this->personMapper    = $personMapper;
71 3
		$this->imageMapper     = $imageMapper;
72 3
		$this->faceMapper      = $faceMapper;
73 3
		$this->settingsService = $settingsService;
74
	}
75
76
	/**
77
	 * @inheritdoc
78
	 */
79 1
	public function description() {
80 1
		return "Create new persons or update existing persons";
81
	}
82
83
	/**
84
	 * @inheritdoc
85
	 */
86 1
	public function execute(FaceRecognitionContext $context) {
87 1
		$this->setContext($context);
88 1
		$eligable_users = $this->context->getEligibleUsers();
89 1
		foreach($eligable_users as $user) {
90 1
			$this->createClusterIfNeeded($user);
91 1
			yield;
92
		}
93
94 1
		return true;
95
	}
96
97
	/**
98
	 * @return void
99
	 */
100 1
	private function createClusterIfNeeded(string $userId) {
101 1
		$modelId = $this->settingsService->getCurrentFaceModel();
102
103
		// Depending on whether we already have clusters, decide if we should create/recreate them.
104
		//
105 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
106 1
		if ($hasPersons) {
107
			$forceRecreate = $this->needRecreateBySettings($userId);
108
			$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
109
			$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
110
111
			if ($forceRecreate) {
112
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
113
			}
114
			else if ($haveEnoughFaces || $haveStaled) {
115
				$this->logInfo('Face clustering will be recreated with new information or changes');
116
			}
117
			else {
118
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
119
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
120
				return;
121
			}
122
		}
123
		else {
124
			// User should not be able to use this directly, used in tests
125 1
			$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
126 1
			$needCreate = $this->needCreateFirstTime($userId, $modelId);
127
128 1
			if ($forceTestCreation) {
129 1
				$this->logInfo('Force the creation of clusters for testing');
130
			}
131 1
			else if ($needCreate) {
132
				$this->logInfo('Face clustering will be created for the first time.');
133
			}
134
			else {
135 1
				$this->logInfo(
136 1
					'Skipping cluster creation, not enough data (yet) collected. ' .
137 1
					'For cluster creation, you need either one of the following:');
138 1
				$this->logInfo('* have 1000 faces already processed');
139 1
				$this->logInfo('* or you need to have 95% of you images processed');
140 1
				$this->logInfo('Use stats command to track progress');
141 1
				return;
142
			}
143
		}
144
145
		// Ok. If we are here, the clusters must be recreated.
146
		//
147
148 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
149 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
150
151 1
		$faces = array_merge(
152 1
			$this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence),
153 1
			$this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence)
154 1
		);
155
156 1
		$facesCount = count($faces);
157 1
		$this->logInfo('There are ' . $facesCount . ' faces for clustering');
158
159 1
		$noSlices = 1;
160 1
		$sliceSize = $facesCount;
161
162 1
		$defaultSlice = $this->settingsService->getClusterigBatchSize();
163 1
		if ($defaultSlice > 0)  {
164
			// The minimum batch size is 20000 faces
165
			$defaultSlice = max($defaultSlice, 2000);
166
			// The maximun batch size is the faces count.
167
			$defaultSlice = min($defaultSlice, $facesCount);
168
			$noSlices = intval($facesCount / $defaultSlice) + 1;
169
			$sliceSize = ceil($facesCount / $noSlices);
170
		}
171
172 1
		$this->logDebug('We will cluster with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces');
173
174 1
		$newClusters = [];
175 1
		for ($i = 0; $i < $noSlices ; $i++) {
176 1
			$facesSliced = array_slice($faces, $i * $sliceSize, $sliceSize);
0 ignored issues
show
Bug introduced by
$i * $sliceSize of type double is incompatible with the type integer expected by parameter $offset of array_slice(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

176
			$facesSliced = array_slice($faces, /** @scrutinizer ignore-type */ $i * $sliceSize, $sliceSize);
Loading history...
Bug introduced by
It seems like $sliceSize can also be of type double; however, parameter $length of array_slice() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

176
			$facesSliced = array_slice($faces, $i * $sliceSize, /** @scrutinizer ignore-type */ $sliceSize);
Loading history...
177 1
			$newClusters = array_merge($newClusters, $this->getNewClusters($facesSliced));
178
		}
179
180
		// Cluster is associative array where key is person ID.
181
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
182
		// and for new clusters is whatever chinese whispers decides to identify them.
183
		//
184 1
		$currentClusters = $this->getCurrentClusters($faces);
185
186 1
		$this->logInfo(count($newClusters) . ' clusters found after clustering');
187
188
		// New merge
189 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
190
191 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
192
193
		// Remove all orphaned persons (those without any faces)
194
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
195 1
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
196 1
		if ($orphansDeleted > 0) {
197
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
198
		}
199
200
		// Prevents not create/recreate the clusters unnecessarily.
201
202 1
		$this->settingsService->setNeedRecreateClusters(false, $userId);
203 1
		$this->settingsService->_setForceCreateClusters(false, $userId);
204
	}
205
206
	/**
207
	 * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
208
	 * - Some cluster/person is invalidated (is_valid is false for someone)
209
	 *   - This means some image that belonged to this user is changed, deleted etc.
210
	 * - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
211
	 *   - more than 25 new faces, or
212
	 *   - less than 25 new faces, but they are older than 2h
213
	 *
214
	 * (basically, we want to avoid recreating cluster for each new face being uploaded,
215
	 *  however, we don't want to wait too much as clusters could be changed a lot)
216
	 */
217
	private function hasNewFacesToRecreate(string $userId, int $modelId): bool {
218
		//
219
		$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
220
		$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
221
		                $facesWithoutPersons, $userId, $modelId));
222
223
		// todo: get rid of magic numbers (move to config)
224
		if ($facesWithoutPersons === 0)
225
			return false;
226
227
		if ($facesWithoutPersons >= 25)
228
			return true;
229
230
		// We have some faces, but not that many, let's see when oldest one is generated.
231
		$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
232
		$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
233
		$currentTimestamp = (new \DateTime())->getTimestamp();
234
		$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
235
		                $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
236
237
		// todo: get rid of magic numbers (move to config)
238
		if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
239
			return true;
240
241
		return false;
242
	}
243
244
	private function hasStalePersonsToRecreate(string $userId, int $modelId): bool {
245
		return $this->personMapper->countClusters($userId, $modelId, true) > 0;
246
	}
247
248
	private function needRecreateBySettings(string $userId): bool {
249
		return $this->settingsService->getNeedRecreateClusters($userId);
250
	}
251
252 1
	private function needCreateFirstTime(string $userId, int $modelId): bool {
253
		// User should not be able to use this directly, used in tests
254 1
		if ($this->settingsService->_getForceCreateClusters($userId))
255 1
			return true;
256
257 1
		$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
258 1
		if ($imageCount === 0)
259
			return false;
260
261 1
		$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
262 1
		if ($imageProcessed === 0)
263 1
			return false;
264
265
		// These are basic criteria without which we should not even consider creating clusters.
266
		// These clusters will be small and not "stable" enough and we should better wait for more images to come.
267
		// todo: get rid of magic numbers (move to config)
268
		$facesCount = $this->faceMapper->countFaces($userId, $modelId);
269
		if ($facesCount > 1000)
270
			return true;
271
272
		$percentImagesProcessed = $imageProcessed / floatval($imageCount);
273
		if ($percentImagesProcessed > 0.95)
274
			return true;
275
276
		return false;
277
	}
278
279 1
	private function getCurrentClusters(array $faces): array {
280 1
		$chineseClusters = array();
281 1
		foreach($faces as $face) {
282 1
			if ($face->person !== null) {
283
				if (!isset($chineseClusters[$face->person])) {
284
					$chineseClusters[$face->person] = array();
285
				}
286
				$chineseClusters[$face->person][] = $face->id;
287
			}
288
		}
289 1
		return $chineseClusters;
290
	}
291
292 1
	private function getNewClusters(array $faces): array {
293
		// Clustering parameters
294 1
		$sensitivity = $this->settingsService->getSensitivity();
295
296 1
		if (Requirements::pdlibLoaded()) {
297
			// Create edges (neighbors) for Chinese Whispers
298 1
			$edges = array();
299 1
			$faces_count = count($faces);
300 1
			for ($i = 0; $i < $faces_count; $i++) {
301 1
				$face1 = $faces[$i];
302 1
				if (!isset($face1->descriptor)) {
303
					$edges[] = array($i, $i);
304
					continue;
305
				}
306 1
				for ($j = $i; $j < $faces_count; $j++) {
307 1
					$face2 = $faces[$j];
308 1
					if (!isset($face2->descriptor)) {
309
						continue;
310
					}
311 1
					$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

311
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1->descriptor, $face2->descriptor);
Loading history...
312 1
					if ($distance < $sensitivity) {
313 1
						$edges[] = array($i, $j);
314
					}
315
				}
316
			}
317
318
			// Given the edges get the list of labels (found clusters) for each face.
319 1
			$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

319
			$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
320
		} else {
321
			// Create edges (neighbors) for Chinese Whispers
322
			$edges = array();
323
			$faces_count = count($faces);
324
325
			for ($i = 0; $i < $faces_count; $i++) {
326
				$face1 = $faces[$i];
327
				if (!isset($face1->descriptor)) {
328
					$edges[] = array($i, $i);
329
					continue;
330
				}
331
				for ($j = $i; $j < $faces_count; $j++) {
332
					$face2 = $faces[$j];
333
					if (!isset($face2->descriptor)) {
334
						continue;
335
					}
336
					$distance = Euclidean::distance($face1->descriptor, $face2->descriptor);
337
					if ($distance < $sensitivity) {
338
						$edges[] = array($i, $j);
339
					}
340
				}
341
			}
342
343
			// The clustering algorithm actually expects ordered lists.
344
			$oedges = [];
345
			ChineseWhispers::convert_unordered_to_ordered($edges, $oedges);
346
			usort($oedges, function($a, $b) {
347
				if ($a[0] === $b[0]) return $a[1] - $b[1];
348
				return $a[0] - $b[0];
349
			});
350
351
			// Given the edges get the list of labels (found clusters) for each face.
352
			$newChineseClustersByIndex = [];
353
			ChineseWhispers::predict($oedges, $newChineseClustersByIndex);
354
		}
355
356 1
		$newClusters = array();
357 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
358 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
359 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
360
			}
361 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
362
		}
363 1
		return $newClusters;
364
	}
365
366
	/**
367
	 * todo: only reason this is public is because of tests. Go figure it out better.
368
	 */
369 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
370
		// Create map of face transitions
371 3
		$transitions = array();
372 3
		foreach ($newCluster as $newPerson=>$newFaces) {
373 3
			foreach ($newFaces as $newFace) {
374 3
				$oldPersonFound = null;
375 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
376 2
					if (in_array($newFace, $oldFaces)) {
377 2
						$oldPersonFound = $oldPerson;
378 2
						break;
379
					}
380
				}
381 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
382
			}
383
		}
384
		// Count transitions
385 3
		$transitionCount = array();
386 3
		foreach ($transitions as $transition) {
387 3
			$key = $transition[0] . ':' . $transition[1];
388 3
			if (array_key_exists($key, $transitionCount)) {
389 2
				$transitionCount[$key]++;
390
			} else {
391 3
				$transitionCount[$key] = 1;
392
			}
393
		}
394
		// Create map of new person -> old person transitions
395 3
		$newOldPersonMapping = array();
396 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
397 3
		arsort($transitionCount);
398 3
		foreach ($transitionCount as $transitionKey => $count) {
399 3
			$transition = explode(":", $transitionKey);
400 3
			$oldPerson = intval($transition[0]);
401 3
			$newPerson = intval($transition[1]);
402 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
403 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
404 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
405 3
					$oldPersonProcessed[$oldPerson] = 0;
406
				} else {
407 2
					$newOldPersonMapping[$newPerson] = 0;
408
				}
409
			}
410
		}
411
		// Starting with new cluster, convert all new person IDs with old person IDs
412 3
		$maxOldPersonId = 1;
413 3
		if (count($oldCluster) > 0) {
414 2
			$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1;
415
		}
416
417 3
		$result = array();
418 3
		foreach ($newCluster as $newPerson => $newFaces) {
419 3
			$oldPerson = $newOldPersonMapping[$newPerson];
420 3
			if ($oldPerson === 0) {
421 3
				$result[$maxOldPersonId] = $newFaces;
422 3
				$maxOldPersonId++;
423
			} else {
424 2
				$result[$oldPerson] = $newFaces;
425
			}
426
		}
427 3
		return $result;
428
	}
429
}
430