Passed
Pull Request — master (#616)
by Matias
05:31 queued 03:30
created

CreateClustersTask::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 11
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 5
nc 1
nop 4
dl 0
loc 11
ccs 6
cts 6
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2020 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
37
use OCA\FaceRecognition\Service\SettingsService;
38
/**
39
 * Taks that, for each user, creates person clusters for each.
40
 */
41
class CreateClustersTask extends FaceRecognitionBackgroundTask {
42
	/** @var PersonMapper Person mapper*/
43
	private $personMapper;
44
45
	/** @var ImageMapper Image mapper*/
46
	private $imageMapper;
47
48
	/** @var FaceMapper Face mapper*/
49
	private $faceMapper;
50
51
	/** @var SettingsService Settings service*/
52
	private $settingsService;
53
54
	/**
55
	 * @param PersonMapper $personMapper
56
	 * @param ImageMapper $imageMapper
57
	 * @param FaceMapper $faceMapper
58
	 * @param SettingsService $settingsService
59
	 */
60 3
	public function __construct(PersonMapper    $personMapper,
61
	                            ImageMapper     $imageMapper,
62
	                            FaceMapper      $faceMapper,
63
	                            SettingsService $settingsService)
64
	{
65 3
		parent::__construct();
66
67 3
		$this->personMapper    = $personMapper;
68 3
		$this->imageMapper     = $imageMapper;
69 3
		$this->faceMapper      = $faceMapper;
70 3
		$this->settingsService = $settingsService;
71
	}
72
73
	/**
74
	 * @inheritdoc
75
	 */
76 1
	public function description() {
77 1
		return "Create new persons or update existing persons";
78
	}
79
80
	/**
81
	 * @inheritdoc
82
	 */
83 1
	public function execute(FaceRecognitionContext $context) {
84 1
		$this->setContext($context);
85
86
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
87
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
88
		//
89 1
		$eligable_users = array();
90 1
		if (is_null($this->context->user)) {
91
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
92
				$eligable_users[] = $user->getUID();
93
			});
94
		} else {
95 1
			$eligable_users[] = $this->context->user->getUID();
96
		}
97
98 1
		foreach($eligable_users as $user) {
99 1
			$this->createClusterIfNeeded($user);
100 1
			yield;
101
		}
102
103 1
		return true;
104
	}
105
106
	/**
107
	 * @return void
108
	 */
109 1
	private function createClusterIfNeeded(string $userId) {
110 1
		$modelId = $this->settingsService->getCurrentFaceModel();
111
112
		// Depending on whether we already have clusters, decide if we should create/recreate them.
113
		//
114 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
115 1
		if ($hasPersons) {
116
			$forceRecreate = $this->needRecreateBySettings($userId);
117
			$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
118
			$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
119
120
			if ($forceRecreate) {
121
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
122
			}
123
			else if ($haveEnoughFaces || $haveStaled) {
124
				$this->logInfo('Face clustering will be recreated with new information or changes');
125
			}
126
			else {
127
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
128
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
129
				return;
130
			}
131
		}
132
		else {
133
			// User should not be able to use this directly, used in tests
134 1
			$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
135 1
			$needCreate = $this->needCreateFirstTime($userId, $modelId);
136
137 1
			if ($forceTestCreation) {
138 1
				$this->logInfo('Force the creation of clusters for testing');
139
			}
140 1
			else if ($needCreate) {
141
				$this->logInfo('Face clustering will be created for the first time.');
142
			}
143
			else {
144 1
				$this->logInfo(
145 1
					'Skipping cluster creation, not enough data (yet) collected. ' .
146 1
					'For cluster creation, you need either one of the following:');
147 1
				$this->logInfo('* have 1000 faces already processed');
148 1
				$this->logInfo('* or you need to have 95% of you images processed');
149 1
				$this->logInfo('Use stats command to track progress');
150 1
				return;
151
			}
152
		}
153
154
		// Ok. If we are here, the clusters must be recreated.
155
		//
156 1
157 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
158
		$min_confidence = $this->settingsService->getMinimumConfidence();
159
160
		$faces = array_merge(
161
			$this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence),
162
			$this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence)
163 1
		);
164 1
165 1
		$this->logInfo(count($faces) . ' faces found for clustering');
166
167
		// Cluster is associative array where key is person ID.
168 1
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
169 1
		// and for new clusters is whatever chinese whispers decides to identify them.
170
		//
171
172
		$currentClusters = $this->getCurrentClusters($faces);
173 1
174 1
		$newClusters = $this->getNewClusters($faces);
175
		$this->logInfo(count($newClusters) . ' clusters found after clustering');
176
177
		// New merge
178
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
179
180 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
181 1
182
		// Remove all orphaned persons (those without any faces)
183
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
184
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
185
		if ($orphansDeleted > 0) {
186
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
187
		}
188
189
		// Prevents not create/recreate the clusters unnecessarily.
190
191
		$this->settingsService->setNeedRecreateClusters(false, $userId);
192
		$this->settingsService->_setForceCreateClusters(false, $userId);
193
	}
194
195
	/**
196
	 * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
197
	 * - Some cluster/person is invalidated (is_valid is false for someone)
198
	 *   - This means some image that belonged to this user is changed, deleted etc.
199
	 * - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
200
	 *   - more than 25 new faces, or
201
	 *   - less than 25 new faces, but they are older than 2h
202
	 *
203
	 * (basically, we want to avoid recreating cluster for each new face being uploaded,
204
	 *  however, we don't want to wait too much as clusters could be changed a lot)
205
	 */
206
	private function hasNewFacesToRecreate(string $userId, int $modelId): bool {
207
		//
208
		$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
209
		$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
210
		                $facesWithoutPersons, $userId, $modelId));
211
212
		// todo: get rid of magic numbers (move to config)
213
		if ($facesWithoutPersons === 0)
214
			return false;
215
216
		if ($facesWithoutPersons >= 25)
217
			return true;
218
219
		// We have some faces, but not that many, let's see when oldest one is generated.
220
		$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
221
		$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
222
		$currentTimestamp = (new \DateTime())->getTimestamp();
223
		$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
224
		                $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
225
226
		// todo: get rid of magic numbers (move to config)
227
		if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
228
			return true;
229
230 1
		return false;
231
	}
232 1
233 1
	private function hasStalePersonsToRecreate(string $userId, int $modelId): bool {
234
		return $this->personMapper->countClusters($userId, $modelId, true) > 0;
235 1
	}
236 1
237
	private function needRecreateBySettings(string $userId): bool {
238
		return $this->settingsService->getNeedRecreateClusters($userId);
239 1
	}
240 1
241 1
	private function needCreateFirstTime(string $userId, int $modelId): bool {
242
		// User should not be able to use this directly, used in tests
243
		if ($this->settingsService->_getForceCreateClusters($userId))
244
			return true;
245
246
		$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
247
		if ($imageCount === 0)
248
			return false;
249
250
		$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
251
		if ($imageProcessed === 0)
252
			return false;
253
254
		// These are basic criteria without which we should not even consider creating clusters.
255
		// These clusters will be small and not "stable" enough and we should better wait for more images to come.
256
		// todo: get rid of magic numbers (move to config)
257 1
		$facesCount = $this->faceMapper->countFaces($userId, $modelId);
258 1
		if ($facesCount > 1000)
259 1
			return true;
260 1
261
		$percentImagesProcessed = $imageProcessed / floatval($imageCount);
262
		if ($percentImagesProcessed > 0.95)
263
			return true;
264
265
		return false;
266
	}
267 1
268
	private function getCurrentClusters(array $faces): array {
269
		$chineseClusters = array();
270 1
		foreach($faces as $face) {
271
			if ($face->person !== null) {
272 1
				if (!isset($chineseClusters[$face->person])) {
273 1
					$chineseClusters[$face->person] = array();
274 1
				}
275
				$chineseClusters[$face->person][] = $face->id;
276
			}
277 1
		}
278
		return $chineseClusters;
279 1
	}
280 1
281 1
	private function getNewClusters(array $faces): array {
282 1
		// Clustering parameters
283 1
		$sensitivity = $this->settingsService->getSensitivity();
284 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
0 ignored issues
show
Unused Code introduced by
The assignment to $min_confidence is dead and can be removed.
Loading history...
285 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
0 ignored issues
show
Unused Code introduced by
The assignment to $min_face_size is dead and can be removed.
Loading history...
286
287
		// Create edges for chinese whispers
288
		$edges = array();
289 1
290 1
		if (version_compare(phpversion('pdlib'), '1.0.2', '>=')) {
291 1
			$faces_count = count($faces);
292 1
			for ($i = 0; $i < $faces_count; $i++) {
293 1
				$face1 = $faces[$i];
294
				if (!isset($face1->descriptor)) {
295
					$edges[] = array($i, $i);
296 1
					continue;
297 1
				}
298 1
				for ($j = $i; $j < $faces_count; $j++) {
299
					$face2 = $faces[$j];
300
					if (!isset($face2->descriptor)) {
301
						continue;
302
					}
303
					$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

303
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1->descriptor, $face2->descriptor);
Loading history...
304
					if ($distance < $sensitivity) {
305
						$edges[] = array($i, $j);
306
					}
307
				}
308
			}
309
		} else {
310
			$faces_count = count($faces);
311
			for ($i = 0; $i < $faces_count; $i++) {
312
				$face1 = $faces[$i];
313
				if (!isset($face1->descriptor)) {
314
					$edges[] = array($i, $i);
315
					continue;
316
				}
317
				for ($j = $i; $j < $faces_count; $j++) {
318
					$face2 = $faces[$j];
319
					if (!isset($face2->descriptor)) {
320
						continue;
321
					}
322
					$distance = Euclidean::distance($face1->descriptor, $face2->descriptor);
323
					if ($distance < $sensitivity) {
324
						$edges[] = array($i, $j);
325
					}
326
				}
327 1
			}
328 1
		}
329 1
330 1
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

330
		$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
331 1
		$newClusters = array();
332
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
333 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
334
				$newClusters[$newChineseClustersByIndex[$i]] = array();
335
			}
336 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
337
		}
338
339
		return $newClusters;
340
	}
341
342 3
	/**
343
	 * todo: only reason this is public is because of tests. Go figure it out better.
344 3
	 */
345 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
346 3
		// Create map of face transitions
347 3
		$transitions = array();
348 3
		foreach ($newCluster as $newPerson=>$newFaces) {
349 2
			foreach ($newFaces as $newFace) {
350 2
				$oldPersonFound = null;
351 2
				foreach ($oldCluster as $oldPerson => $oldFaces) {
352
					if (in_array($newFace, $oldFaces)) {
353
						$oldPersonFound = $oldPerson;
354 3
						break;
355
					}
356
				}
357
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
358 3
			}
359 3
		}
360 3
		// Count transitions
361 3
		$transitionCount = array();
362 2
		foreach ($transitions as $transition) {
363
			$key = $transition[0] . ':' . $transition[1];
364 3
			if (array_key_exists($key, $transitionCount)) {
365
				$transitionCount[$key]++;
366
			} else {
367
				$transitionCount[$key] = 1;
368 3
			}
369 3
		}
370 3
		// Create map of new person -> old person transitions
371 3
		$newOldPersonMapping = array();
372 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
373 3
		arsort($transitionCount);
374 3
		foreach ($transitionCount as $transitionKey => $count) {
375 3
			$transition = explode(":", $transitionKey);
376 3
			$oldPerson = intval($transition[0]);
377 3
			$newPerson = intval($transition[1]);
378 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
379
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
380 2
					$newOldPersonMapping[$newPerson] = $oldPerson;
381
					$oldPersonProcessed[$oldPerson] = 0;
382
				} else {
383
					$newOldPersonMapping[$newPerson] = 0;
384
				}
385 3
			}
386 3
		}
387 2
		// Starting with new cluster, convert all new person IDs with old person IDs
388
		$maxOldPersonId = 1;
389
		if (count($oldCluster) > 0) {
390 3
			$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1;
391 3
		}
392 3
393 3
		$result = array();
394 3
		foreach ($newCluster as $newPerson => $newFaces) {
395 3
			$oldPerson = $newOldPersonMapping[$newPerson];
396
			if ($oldPerson === 0) {
397 2
				$result[$maxOldPersonId] = $newFaces;
398
				$maxOldPersonId++;
399
			} else {
400 3
				$result[$oldPerson] = $newFaces;
401
			}
402
		}
403
		return $result;
404
	}
405
}
406