Completed
Push — master ( 158178...7360c8 )
by Matias
23s queued 12s
created

CreateClustersTask::createClusterIfNeeded()   B

Complexity

Conditions 8
Paths 10

Size

Total Lines 73
Code Lines 40

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 26
CRAP Score 9.6817

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 8
eloc 40
c 1
b 0
f 0
nc 10
nop 1
dl 0
loc 73
ccs 26
cts 37
cp 0.7027
crap 9.6817
rs 8.0355

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2020 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
37
use OCA\FaceRecognition\Service\SettingsService;
38
/**
39
 * Taks that, for each user, creates person clusters for each.
40
 */
41
class CreateClustersTask extends FaceRecognitionBackgroundTask {
42
	/** @var PersonMapper Person mapper*/
43
	private $personMapper;
44
45
	/** @var ImageMapper Image mapper*/
46
	private $imageMapper;
47
48
	/** @var FaceMapper Face mapper*/
49
	private $faceMapper;
50
51
	/** @var SettingsService Settings service*/
52
	private $settingsService;
53
54
	/**
55
	 * @param PersonMapper
56
	 * @param ImageMapper
57
	 * @param FaceMapper
58
	 * @param SettingsService
59
	 */
60 3
	public function __construct(PersonMapper    $personMapper,
61
	                            ImageMapper     $imageMapper,
62
	                            FaceMapper      $faceMapper,
63
	                            SettingsService $settingsService)
64
	{
65 3
		parent::__construct();
66
67 3
		$this->personMapper    = $personMapper;
68 3
		$this->imageMapper     = $imageMapper;
69 3
		$this->faceMapper      = $faceMapper;
70 3
		$this->settingsService = $settingsService;
71 3
	}
72
73
	/**
74
	 * @inheritdoc
75
	 */
76 1
	public function description() {
77 1
		return "Create new persons or update existing persons";
78
	}
79
80
	/**
81
	 * @inheritdoc
82
	 */
83 1
	public function execute(FaceRecognitionContext $context) {
84 1
		$this->setContext($context);
85
86
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
87
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
88
		//
89 1
		$eligable_users = array();
90 1
		if (is_null($this->context->user)) {
91
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
92
				$eligable_users[] = $user->getUID();
93
			});
94
		} else {
95 1
			$eligable_users[] = $this->context->user->getUID();
96
		}
97
98 1
		foreach($eligable_users as $user) {
99 1
			$this->createClusterIfNeeded($user);
100 1
			yield;
101
		}
102
103 1
		return true;
104
	}
105
106 1
	private function createClusterIfNeeded(string $userId) {
107 1
		$modelId = $this->settingsService->getCurrentFaceModel();
108
109
		// Depending on whether we already have clusters, decide if we should create/recreate them.
110
		//
111 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
112 1
		if ($hasPersons) {
113
			$forceRecreate = $this->needRecreateBySettings($userId);
114
			$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId);
115
			$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId);
116
117
			if ($forceRecreate) {
118
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
119
			}
120
			else if ($haveEnoughFaces || $haveStaled) {
121
				$this->logInfo('Face clustering will be recreated with new information or changes');
122
			}
123
			else {
124
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
125
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
126
				return;
127
			}
128
		}
129
		else {
130
			// User should not be able to use this directly, used in tests
131 1
			$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId);
132 1
			$needCreate = $this->needCreateFirstTime($userId, $modelId);
133
134 1
			if ($forceTestCreation) {
135 1
				$this->logInfo('Force the creation of clusters for testing');
136
			}
137 1
			else if ($needCreate) {
138
				$this->logInfo('Face clustering will be created for the first time.');
139
			}
140
			else {
141 1
				$this->logInfo(
142
					'Skipping cluster creation, not enough data (yet) collected. ' .
143 1
					'For cluster creation, you need either one of the following:');
144 1
				$this->logInfo('* have 1000 faces already processed');
145 1
				$this->logInfo('* or you need to have 95% of you images processed');
146 1
				$this->logInfo('Use stats command to track progress');
147 1
				return;
148
			}
149
		}
150
151
		// Ok. If we are here, the clusters must be recreated.
152
		//
153 1
		$faces = $this->faceMapper->getFaces($userId, $modelId);
154 1
		$this->logInfo(count($faces) . ' faces found for clustering');
155
156
		// Cluster is associative array where key is person ID.
157
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
158
		// and for new clusters is whatever chinese whispers decides to identify them.
159
		//
160 1
		$currentClusters = $this->getCurrentClusters($faces);
161 1
		$newClusters = $this->getNewClusters($faces);
162 1
		$this->logInfo(count($newClusters) . ' persons found after clustering');
163
164
		// New merge
165 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
166 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
167
168
		// Remove all orphaned persons (those without any faces)
169
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
170 1
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
171 1
		if ($orphansDeleted > 0) {
172
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
173
		}
174
175
		// Prevents not create/recreate the clusters unnecessarily.
176
177 1
		$this->settingsService->setNeedRecreateClusters(false, $userId);
178 1
		$this->settingsService->_setForceCreateClusters(false, $userId);
179 1
	}
180
181
	/**
182
	 * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if:
183
	 * - Some cluster/person is invalidated (is_valid is false for someone)
184
	 *   - This means some image that belonged to this user is changed, deleted etc.
185
	 * - There are some new faces. Now, we don't want to jump the gun here. We want to either have:
186
	 *   - more than 25 new faces, or
187
	 *   - less than 25 new faces, but they are older than 2h
188
	 *
189
	 * (basically, we want to avoid recreating cluster for each new face being uploaded,
190
	 *  however, we don't want to wait too much as clusters could be changed a lot)
191
	 */
192
	private function hasNewFacesToRecreate($userId, $modelId): bool {
193
		//
194
		$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
195
		$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
196
		                $facesWithoutPersons, $userId, $modelId));
197
198
		// todo: get rid of magic numbers (move to config)
199
		if ($facesWithoutPersons === 0)
200
			return false;
201
202
		if ($facesWithoutPersons >= 25)
203
			return true;
204
205
		// We have some faces, but not that many, let's see when oldest one is generated.
206
		$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
207
		$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp();
208
		$currentTimestamp = (new \DateTime())->getTimestamp();
209
		$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
210
		                $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s')));
211
212
		// todo: get rid of magic numbers (move to config)
213
		if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60)
214
			return true;
215
216
		return false;
217
	}
218
219
	private function hasStalePersonsToRecreate($userId, $modelId): bool {
220
		return $this->personMapper->countPersons($userId, $modelId, true) > 0;
221
	}
222
223
	private function needRecreateBySettings($userId): bool {
224
		return $this->settingsService->getNeedRecreateClusters($userId);
225
	}
226
227 1
	private function needCreateFirstTime($userId, $modelId): bool {
228
		// User should not be able to use this directly, used in tests
229 1
		if ($this->settingsService->_getForceCreateClusters($userId))
230 1
			return true;
231
232 1
		$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
233 1
		if ($imageCount === 0)
234
			return false;
235
236 1
		$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true);
237 1
		if ($imageProcessed === 0)
238 1
			return false;
239
240
		// These are basic criteria without which we should not even consider creating clusters.
241
		// These clusters will be small and not "stable" enough and we should better wait for more images to come.
242
		// todo: get rid of magic numbers (move to config)
243
		$facesCount = $this->faceMapper->countFaces($userId, $modelId);
244
		if ($facesCount > 1000)
245
			return true;
246
247
		$percentImagesProcessed = $imageProcessed / floatval($imageCount);
248
		if ($percentImagesProcessed > 0.95)
249
			return true;
250
251
		return false;
252
	}
253
254 1
	private function getCurrentClusters(array $faces): array {
255 1
		$chineseClusters = array();
256 1
		foreach($faces as $face) {
257 1
			if ($face->person !== null) {
258
				if (!isset($chineseClusters[$face->person])) {
259
					$chineseClusters[$face->person] = array();
260
				}
261
				$chineseClusters[$face->person][] = $face->id;
262
			}
263
		}
264 1
		return $chineseClusters;
265
	}
266
267 1
	private function getNewClusters(array $faces): array {
268
		// Clustering parameters
269 1
		$sensitivity = $this->settingsService->getSensitivity();
270 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
271 1
		$min_face_size = $this->settingsService->getMinimumFaceSize();
272
273
		// Create edges for chinese whispers
274 1
		$edges = array();
275
276 1
		if (version_compare(phpversion('pdlib'), '1.0.2', '>=')) {
277
			$faces_count = count($faces);
278
			for ($i = 0; $i < $faces_count; $i++) {
279
				$face1 = $faces[$i];
280
				if (($face1->confidence < $min_confidence) ||
281
				    (max($face1->height(), $face1->width()) < $min_face_size)) {
282
					$edges[] = array($i, $i);
283
					continue;
284
				}
285
				for ($j = $i; $j < $faces_count; $j++) {
286
					$face2 = $faces[$j];
287
					if (($face2->confidence < $min_confidence) ||
288
					    (max($face2->height(), $face2->width()) < $min_face_size)) {
289
						continue;
290
					}
291
					$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

291
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1->descriptor, $face2->descriptor);
Loading history...
292
					if ($distance < $sensitivity) {
293
						$edges[] = array($i, $j);
294
					}
295
				}
296
			}
297
		} else {
298 1
			$euclidean = new Euclidean();
299 1
			$faces_count = count($faces);
300 1
			for ($i = 0; $i < $faces_count; $i++) {
301 1
				$face1 = $faces[$i];
302 1
				if (($face1->confidence < $min_confidence) ||
303 1
				    (max($face1->height(), $face1->width()) < $min_face_size)) {
304 1
					$edges[] = array($i, $i);
305 1
					continue;
306
				}
307
				for ($j = $i; $j < $faces_count; $j++) {
308
					$face2 = $faces[$j];
309
					if (($face2->confidence < $min_confidence) ||
310
					    (max($face2->height(), $face2->width()) < $min_face_size)) {
311
						continue;
312
					}
313
					// todo: can't this distance be a method in $face1->distance($face2)?
314
					$distance = $euclidean->distance($face1->descriptor, $face2->descriptor);
315
					if ($distance < $sensitivity) {
316
						$edges[] = array($i, $j);
317
					}
318
				}
319
			}
320
		}
321
322 1
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

322
		$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
323 1
		$newClusters = array();
324 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
325 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
326 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
327
			}
328 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
329
		}
330
331 1
		return $newClusters;
332
	}
333
334
	/**
335
	 * todo: only reason this is public is because of tests. Go figure it out better.
336
	 */
337 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
338
		// Create map of face transitions
339 3
		$transitions = array();
340 3
		foreach ($newCluster as $newPerson=>$newFaces) {
341 3
			foreach ($newFaces as $newFace) {
342 3
				$oldPersonFound = null;
343 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
344 2
					if (in_array($newFace, $oldFaces)) {
345 2
						$oldPersonFound = $oldPerson;
346 2
						break;
347
					}
348
				}
349 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
350
			}
351
		}
352
		// Count transitions
353 3
		$transitionCount = array();
354 3
		foreach ($transitions as $transition) {
355 3
			$key = $transition[0] . ':' . $transition[1];
356 3
			if (array_key_exists($key, $transitionCount)) {
357 2
				$transitionCount[$key]++;
358
			} else {
359 3
				$transitionCount[$key] = 1;
360
			}
361
		}
362
		// Create map of new person -> old person transitions
363 3
		$newOldPersonMapping = array();
364 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
365 3
		arsort($transitionCount);
366 3
		foreach ($transitionCount as $transitionKey => $count) {
367 3
			$transition = explode(":", $transitionKey);
368 3
			$oldPerson = intval($transition[0]);
369 3
			$newPerson = intval($transition[1]);
370 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
371 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
372 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
373 3
					$oldPersonProcessed[$oldPerson] = 0;
374
				} else {
375 2
					$newOldPersonMapping[$newPerson] = 0;
376
				}
377
			}
378
		}
379
		// Starting with new cluster, convert all new person IDs with old person IDs
380 3
		$maxOldPersonId = 1;
381 3
		if (count($oldCluster) > 0) {
382 2
			$maxOldPersonId = max(array_keys($oldCluster)) + 1;
383
		}
384
385 3
		$result = array();
386 3
		foreach ($newCluster as $newPerson => $newFaces) {
387 3
			$oldPerson = $newOldPersonMapping[$newPerson];
388 3
			if ($oldPerson === 0) {
389 3
				$result[$maxOldPersonId] = $newFaces;
390 3
				$maxOldPersonId++;
391
			} else {
392 2
				$result[$oldPerson] = $newFaces;
393
			}
394
		}
395 3
		return $result;
396
	}
397
}
398