Passed
Push — master ( 2ba779...e5f4e6 )
by Matias
03:43
created

CreateClustersTask::getNewClusters()   C

Complexity

Conditions 14
Paths 18

Size

Total Lines 57
Code Lines 37

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 29.8457

Importance

Changes 3
Bugs 0 Features 1
Metric Value
cc 14
eloc 37
c 3
b 0
f 1
nc 18
nop 1
dl 0
loc 57
ccs 21
cts 37
cp 0.5676
crap 29.8457
rs 6.2666

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2020 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
37
use OCA\FaceRecognition\Service\SettingsService;
38
/**
39
 * Taks that, for each user, creates person clusters for each.
40
 */
41
class CreateClustersTask extends FaceRecognitionBackgroundTask {
42
	/** @var PersonMapper Person mapper*/
43
	private $personMapper;
44
45
	/** @var ImageMapper Image mapper*/
46
	private $imageMapper;
47
48
	/** @var FaceMapper Face mapper*/
49
	private $faceMapper;
50
51
	/** @var SettingsService Settings service*/
52
	private $settingsService;
53
54
	/**
55
	 * @param PersonMapper $personMapper
56
	 * @param ImageMapper $imageMapper
57
	 * @param FaceMapper $faceMapper
58
	 * @param SettingsService $settingsService
59
	 */
60 3
	public function __construct(PersonMapper    $personMapper,
61
	                            ImageMapper     $imageMapper,
62
	                            FaceMapper      $faceMapper,
63
	                            SettingsService $settingsService)
64
	{
65 3
		parent::__construct();
66
67 3
		$this->personMapper    = $personMapper;
68 3
		$this->imageMapper     = $imageMapper;
69 3
		$this->faceMapper      = $faceMapper;
70 3
		$this->settingsService = $settingsService;
71
	}
72
73
	/**
74
	 * @inheritdoc
75
	 */
76 1
	public function description() {
77 1
		return "Create new persons or update existing persons";
78
	}
79
80
	/**
81
	 * @inheritdoc
82
	 */
83 1
	public function execute(FaceRecognitionContext $context) {
84 1
		$this->setContext($context);
85
86
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
87
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
88
		//
89 1
		$eligable_users = array();
90 1
		if (is_null($this->context->user)) {
91
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
92
				$eligable_users[] = $user->getUID();
93
			});
94
		} else {
95 1
			$eligable_users[] = $this->context->user->getUID();
96
		}
97
98 1
		foreach($eligable_users as $user) {
99 1
			$this->createClusterIfNeeded($user);
100 1
			yield;
101
		}
102
103 1
		return true;
104
	}
105
106
	/**
107
	 * @return void
108
	 */
109 1
	private function createClusterIfNeeded(string $userId) {
110 1
		$modelId = $this->settingsService->getCurrentFaceModel();
111
112
		// Depending on whether we already have clusters, decide if we should create/recreate them.
113
		//
114 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
115 1
		if ($hasPersons) {
116
			$forceRecreate = $this->needRecreateBySettings($userId);
117
			$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
118
			$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
119
120
			if ($forceRecreate) {
121
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
122
			}
123
			else if ($haveEnoughFaces || $haveStaled) {
124
				$this->logInfo('Face clustering will be recreated with new information or changes');
125
			}
126
			else {
127
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
128
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
129
				return;
130
			}
131
		}
132
		else {
133
			// User should not be able to use this directly, used in tests
134 1
			$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
135 1
			$needCreate = $this->needCreateFirstTime($userId, $modelId);
136
137 1
			if ($forceTestCreation) {
138 1
				$this->logInfo('Force the creation of clusters for testing');
139
			}
140 1
			else if ($needCreate) {
141
				$this->logInfo('Face clustering will be created for the first time.');
142
			}
143
			else {
144 1
				$this->logInfo(
145 1
					'Skipping cluster creation, not enough data (yet) collected. ' .
146 1
					'For cluster creation, you need either one of the following:');
147 1
				$this->logInfo('* have 1000 faces already processed');
148 1
				$this->logInfo('* or you need to have 95% of you images processed');
149 1
				$this->logInfo('Use stats command to track progress');
150 1
				return;
151
			}
152
		}
153
154
		// Ok. If we are here, the clusters must be recreated.
155
		//
156
157 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
158 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
159
160 1
		$faces = array_merge(
161 1
			$this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence),
162 1
			$this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence)
163 1
		);
164
165 1
		$this->logInfo(count($faces) . ' faces found for clustering');
166
167
		// Cluster is associative array where key is person ID.
168
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
169
		// and for new clusters is whatever chinese whispers decides to identify them.
170
		//
171
172 1
		$currentClusters = $this->getCurrentClusters($faces);
173
174 1
		$newClusters = $this->getNewClusters($faces);
175 1
		$this->logInfo(count($newClusters) . ' clusters found after clustering');
176
177
		// New merge
178 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
179
180 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
181
182
		// Remove all orphaned persons (those without any faces)
183
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
184 1
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
185 1
		if ($orphansDeleted > 0) {
186
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
187
		}
188
189
		// Prevents not create/recreate the clusters unnecessarily.
190
191 1
		$this->settingsService->setNeedRecreateClusters(false, $userId);
192 1
		$this->settingsService->_setForceCreateClusters(false, $userId);
193
	}
194
195
	/**
196
	 * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
197
	 * - Some cluster/person is invalidated (is_valid is false for someone)
198
	 *   - This means some image that belonged to this user is changed, deleted etc.
199
	 * - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
200
	 *   - more than 25 new faces, or
201
	 *   - less than 25 new faces, but they are older than 2h
202
	 *
203
	 * (basically, we want to avoid recreating cluster for each new face being uploaded,
204
	 *  however, we don't want to wait too much as clusters could be changed a lot)
205
	 */
206
	private function hasNewFacesToRecreate(string $userId, int $modelId): bool {
207
		//
208
		$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
209
		$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
210
		                $facesWithoutPersons, $userId, $modelId));
211
212
		// todo: get rid of magic numbers (move to config)
213
		if ($facesWithoutPersons === 0)
214
			return false;
215
216
		if ($facesWithoutPersons >= 25)
217
			return true;
218
219
		// We have some faces, but not that many, let's see when oldest one is generated.
220
		$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
221
		$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
222
		$currentTimestamp = (new \DateTime())->getTimestamp();
223
		$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
224
		                $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
225
226
		// todo: get rid of magic numbers (move to config)
227
		if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
228
			return true;
229
230
		return false;
231
	}
232
233
	private function hasStalePersonsToRecreate(string $userId, int $modelId): bool {
234
		return $this->personMapper->countClusters($userId, $modelId, true) > 0;
235
	}
236
237
	private function needRecreateBySettings(string $userId): bool {
238
		return $this->settingsService->getNeedRecreateClusters($userId);
239
	}
240
241 1
	private function needCreateFirstTime(string $userId, int $modelId): bool {
242
		// User should not be able to use this directly, used in tests
243 1
		if ($this->settingsService->_getForceCreateClusters($userId))
244 1
			return true;
245
246 1
		$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
247 1
		if ($imageCount === 0)
248
			return false;
249
250 1
		$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
251 1
		if ($imageProcessed === 0)
252 1
			return false;
253
254
		// These are basic criteria without which we should not even consider creating clusters.
255
		// These clusters will be small and not "stable" enough and we should better wait for more images to come.
256
		// todo: get rid of magic numbers (move to config)
257
		$facesCount = $this->faceMapper->countFaces($userId, $modelId);
258
		if ($facesCount > 1000)
259
			return true;
260
261
		$percentImagesProcessed = $imageProcessed / floatval($imageCount);
262
		if ($percentImagesProcessed > 0.95)
263
			return true;
264
265
		return false;
266
	}
267
268 1
	private function getCurrentClusters(array $faces): array {
269 1
		$chineseClusters = array();
270 1
		foreach($faces as $face) {
271 1
			if ($face->person !== null) {
272
				if (!isset($chineseClusters[$face->person])) {
273
					$chineseClusters[$face->person] = array();
274
				}
275
				$chineseClusters[$face->person][] = $face->id;
276
			}
277
		}
278 1
		return $chineseClusters;
279
	}
280
281 1
	private function getNewClusters(array $faces): array {
282
		// Clustering parameters
283 1
		$sensitivity = $this->settingsService->getSensitivity();
284
285
		// Create edges for chinese whispers
286 1
		$edges = array();
287
288 1
		if (version_compare(phpversion('pdlib'), '1.0.2', '>=')) {
289 1
			$faces_count = count($faces);
290 1
			for ($i = 0; $i < $faces_count; $i++) {
291 1
				$face1 = $faces[$i];
292 1
				if (!isset($face1->descriptor)) {
293
					$edges[] = array($i, $i);
294
					continue;
295
				}
296 1
				for ($j = $i; $j < $faces_count; $j++) {
297 1
					$face2 = $faces[$j];
298 1
					if (!isset($face2->descriptor)) {
299
						continue;
300
					}
301 1
					$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

301
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1->descriptor, $face2->descriptor);
Loading history...
302 1
					if ($distance < $sensitivity) {
303 1
						$edges[] = array($i, $j);
304
					}
305
				}
306
			}
307
		} else {
308
			$faces_count = count($faces);
309
			for ($i = 0; $i < $faces_count; $i++) {
310
				$face1 = $faces[$i];
311
				if (!isset($face1->descriptor)) {
312
					$edges[] = array($i, $i);
313
					continue;
314
				}
315
				for ($j = $i; $j < $faces_count; $j++) {
316
					$face2 = $faces[$j];
317
					if (!isset($face2->descriptor)) {
318
						continue;
319
					}
320
					$distance = Euclidean::distance($face1->descriptor, $face2->descriptor);
321
					if ($distance < $sensitivity) {
322
						$edges[] = array($i, $j);
323
					}
324
				}
325
			}
326
		}
327
328 1
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

328
		$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
329 1
		$newClusters = array();
330 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
331 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
332 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
333
			}
334 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
335
		}
336
337 1
		return $newClusters;
338
	}
339
340
	/**
341
	 * todo: only reason this is public is because of tests. Go figure it out better.
342
	 */
343 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
344
		// Create map of face transitions
345 3
		$transitions = array();
346 3
		foreach ($newCluster as $newPerson=>$newFaces) {
347 3
			foreach ($newFaces as $newFace) {
348 3
				$oldPersonFound = null;
349 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
350 2
					if (in_array($newFace, $oldFaces)) {
351 2
						$oldPersonFound = $oldPerson;
352 2
						break;
353
					}
354
				}
355 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
356
			}
357
		}
358
		// Count transitions
359 3
		$transitionCount = array();
360 3
		foreach ($transitions as $transition) {
361 3
			$key = $transition[0] . ':' . $transition[1];
362 3
			if (array_key_exists($key, $transitionCount)) {
363 2
				$transitionCount[$key]++;
364
			} else {
365 3
				$transitionCount[$key] = 1;
366
			}
367
		}
368
		// Create map of new person -> old person transitions
369 3
		$newOldPersonMapping = array();
370 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
371 3
		arsort($transitionCount);
372 3
		foreach ($transitionCount as $transitionKey => $count) {
373 3
			$transition = explode(":", $transitionKey);
374 3
			$oldPerson = intval($transition[0]);
375 3
			$newPerson = intval($transition[1]);
376 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
377 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
378 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
379 3
					$oldPersonProcessed[$oldPerson] = 0;
380
				} else {
381 2
					$newOldPersonMapping[$newPerson] = 0;
382
				}
383
			}
384
		}
385
		// Starting with new cluster, convert all new person IDs with old person IDs
386 3
		$maxOldPersonId = 1;
387 3
		if (count($oldCluster) > 0) {
388 2
			$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1;
389
		}
390
391 3
		$result = array();
392 3
		foreach ($newCluster as $newPerson => $newFaces) {
393 3
			$oldPerson = $newOldPersonMapping[$newPerson];
394 3
			if ($oldPerson === 0) {
395 3
				$result[$maxOldPersonId] = $newFaces;
396 3
				$maxOldPersonId++;
397
			} else {
398 2
				$result[$oldPerson] = $newFaces;
399
			}
400
		}
401 3
		return $result;
402
	}
403
}
404