Completed
Push — master ( c37bb9...9e9a54 )
by Matias
26s queued 13s
created

CreateClustersTask::mergeClusters()   F

Complexity

Conditions 14
Paths 360

Size

Total Lines 59
Code Lines 41

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 39
CRAP Score 14

Importance

Changes 0
Metric Value
cc 14
eloc 41
nc 360
nop 2
dl 0
loc 59
ccs 39
cts 39
cp 1
crap 14
rs 3.4333
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017-2020 Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IUser;
27
28
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
30
31
use OCA\FaceRecognition\Db\FaceMapper;
32
use OCA\FaceRecognition\Db\ImageMapper;
33
use OCA\FaceRecognition\Db\PersonMapper;
34
35
use OCA\FaceRecognition\Helper\Euclidean;
36
37
use OCA\FaceRecognition\Service\SettingsService;
38
/**
39
 * Taks that, for each user, creates person clusters for each.
40
 */
41
class CreateClustersTask extends FaceRecognitionBackgroundTask {
42
	/** @var PersonMapper Person mapper*/
43
	private $personMapper;
44
45
	/** @var ImageMapper Image mapper*/
46
	private $imageMapper;
47
48
	/** @var FaceMapper Face mapper*/
49
	private $faceMapper;
50
51
	/** @var SettingsService Settings service*/
52
	private $settingsService;
53
54
	/**
55
	 * @param PersonMapper
56
	 * @param ImageMapper
57
	 * @param FaceMapper
58
	 * @param SettingsService
59
	 */
60 3
	public function __construct(PersonMapper    $personMapper,
61
	                            ImageMapper     $imageMapper,
62
	                            FaceMapper      $faceMapper,
63
	                            SettingsService $settingsService)
64
	{
65 3
		parent::__construct();
66
67 3
		$this->personMapper    = $personMapper;
68 3
		$this->imageMapper     = $imageMapper;
69 3
		$this->faceMapper      = $faceMapper;
70 3
		$this->settingsService = $settingsService;
71 3
	}
72
73
	/**
74
	 * @inheritdoc
75
	 */
76 1
	public function description() {
77 1
		return "Create new persons or update existing persons";
78
	}
79
80
	/**
81
	 * @inheritdoc
82
	 */
83 1
	public function execute(FaceRecognitionContext $context) {
84 1
		$this->setContext($context);
85
86
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
87
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
88
		//
89 1
		$eligable_users = array();
90 1
		if (is_null($this->context->user)) {
91
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
92
				$eligable_users[] = $user->getUID();
93
			});
94
		} else {
95 1
			$eligable_users[] = $this->context->user->getUID();
96
		}
97
98 1
		foreach($eligable_users as $user) {
99 1
			$this->createClusterIfNeeded($user);
100 1
			yield;
101
		}
102
103 1
		return true;
104
	}
105
106 1
	private function createClusterIfNeeded(string $userId) {
107
		// Check that we processed enough images to start creating clusters
108
		//
109 1
		$modelId = $this->settingsService->getCurrentFaceModel();
110
111 1
		$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0;
112
113
		// Depending on whether we already have clusters, decide if we should create/recreate them.
114
		//
115 1
		if ($hasPersons) {
116
			// OK, we already got some persons. We now need to evaluate whether we want to recreate clusters.
117
			// We want to recreate clusters/persons if:
118
			// * Some cluster/person is invalidated (is_valid is false for someone)
119
			//     This means some image that belonged to this user is changed, deleted etc.
120
			// * There are some new faces. Now, we don't want to jump the gun here. We want to either have:
121
			// ** more than 10 new faces, or
122
			// ** less than 10 new faces, but they are older than 2h
123
			//  (basically, we want to avoid recreating cluster for each new face being uploaded,
124
			//  however, we don't want to wait too much as clusters could be changed a lot)
125
			//
126
			$haveNewFaces = false;
127
			$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
128
			$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
129
				$facesWithoutPersons, $userId, $modelId));
130
			// todo: get rid of magic numbers (move to config)
131
			if ($facesWithoutPersons >= 10) {
132
				$haveNewFaces = true;
133
			} else if ($facesWithoutPersons > 0) {
134
				// We have some faces, but not that many, let's see when oldest one is generated.
135
				$face = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
136
				$oldestFaceTimestamp = $face->creationTime->getTimestamp();
137
				$currentTimestamp = (new \DateTime())->getTimestamp();
138
				$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
139
					$userId, $modelId, $face->creationTime->format('Y-m-d H:i:s')));
140
				// todo: get rid of magic numbers (move to config)
141
				if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60) {
142
					$haveNewFaces = true;
143
				}
144
			}
145
146
			$stalePersonsCount = $this->personMapper->countPersons($userId, $modelId, true);
147
			$haveStalePersons = $stalePersonsCount > 0;
148
			$staleCluster = $haveStalePersons === false && $haveNewFaces === false;
149
150
			$forceRecreation = $this->settingsService->getNeedRecreateClusters($userId);
151
152
			$this->logDebug(sprintf('Found %d changed persons for user %s and model %d', $stalePersonsCount, $userId, $modelId));
153
154
			if ($staleCluster && !$forceRecreation) {
155
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
156
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
157
				return;
158
			}
159
			else if ($forceRecreation) {
160
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
161
			}
162
		} else {
163
			// User should not be able to use this directly, used in tests
164 1
			$forceCreation = $this->settingsService->getForceCreateClusters($userId);
165
166
			// These are basic criteria without which we should not even consider creating clusters.
167
			// These clusters will be small and not "stable" enough and we should better wait for more images to come.
168
			// todo: 2 queries to get these 2 counts, can we do this smarter?
169 1
			$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
170 1
			$imageProcessed = $this->imageMapper->countUserProcessedImages($userId, $modelId);
171 1
			$percentImagesProcessed = 0;
172 1
			if ($imageCount > 0) {
173 1
				$percentImagesProcessed = $imageProcessed / floatval($imageCount);
174
			}
175 1
			$facesCount = $this->faceMapper->countFaces($userId, $modelId);
176
			// todo: get rid of magic numbers (move to config)
177 1
			if (!$forceCreation && ($facesCount < 1000) && ($imageCount < 100) && ($percentImagesProcessed < 0.95)) {
178
				$this->logInfo(
179
					'Skipping cluster creation, not enough data (yet) collected. ' .
180
					'For cluster creation, you need either one of the following:');
181
				$this->logInfo(sprintf('* have 1000 faces already processed (you have %d),', $facesCount));
182
				$this->logInfo(sprintf('* have 100 images (you have %d),', $imageCount));
183
				$this->logInfo(sprintf('* or you need to have 95%% of you images processed (you have %.2f%%)', $percentImagesProcessed));
184
				return;
185
			}
186
		}
187
188 1
		$faces = $this->faceMapper->getFaces($userId, $modelId);
189 1
		$this->logInfo(count($faces) . ' faces found for clustering');
190
191
		// Cluster is associative array where key is person ID.
192
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
193
		// and for new clusters is whatever chinese whispers decides to identify them.
194
		//
195 1
		$currentClusters = $this->getCurrentClusters($faces);
196 1
		$newClusters = $this->getNewClusters($faces);
197 1
		$this->logInfo(count($newClusters) . ' persons found after clustering');
198
199
		// New merge
200 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
201 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
202
203
		// Remove all orphaned persons (those without any faces)
204
		// NOTE: we will do this for all models, not just for current one, but this is not problem.
205 1
		$orphansDeleted = $this->personMapper->deleteOrphaned($userId);
206 1
		if ($orphansDeleted > 0) {
207
			$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces');
208
		}
209
210
		// Prevents not create/recreate the clusters unnecessarily.
211
212 1
		$this->settingsService->setNeedRecreateClusters(false, $userId);
213 1
		$this->settingsService->setForceCreateClusters(false, $userId);
214 1
	}
215
216 1
	private function getCurrentClusters(array $faces): array {
217 1
		$chineseClusters = array();
218 1
		foreach($faces as $face) {
219 1
			if ($face->person !== null) {
220
				if (!isset($chineseClusters[$face->person])) {
221
					$chineseClusters[$face->person] = array();
222
				}
223 1
				$chineseClusters[$face->person][] = $face->id;
224
			}
225
		}
226 1
		return $chineseClusters;
227
	}
228
229 1
	private function getNewClusters(array $faces): array {
230
		// Create edges for chinese whispers
231 1
		$euclidean = new Euclidean();
232 1
		$sensitivity = $this->settingsService->getSensitivity();
233 1
		$min_confidence = $this->settingsService->getMinimumConfidence();
234 1
		$edges = array();
235
236 1
		if (version_compare(phpversion('pdlib'), '1.0.2', '>=')) {
237
			for ($i = 0, $face_count1 = count($faces); $i < $face_count1; $i++) {
238
				$face1 = $faces[$i];
239
				if ($face1->confidence < $min_confidence) {
240
					$edges[] = array($i, $i);
241
					continue;
242
				}
243
				for ($j = $i, $face_count2 = count($faces); $j < $face_count2; $j++) {
244
					$face2 = $faces[$j];
245
					$distance = dlib_vector_length($face1->descriptor, $face2->descriptor);
0 ignored issues
show
Bug introduced by
The function dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

245
					$distance = /** @scrutinizer ignore-call */ dlib_vector_length($face1->descriptor, $face2->descriptor);
Loading history...
246
247
					if ($distance < $sensitivity) {
248
						$edges[] = array($i, $j);
249
					}
250
				}
251
			}
252
		} else {
253 1
			for ($i = 0, $face_count1 = count($faces); $i < $face_count1; $i++) {
254 1
				$face1 = $faces[$i];
255 1
				if ($face1->confidence < $min_confidence) {
256
					$edges[] = array($i, $i);
257
					continue;
258
				}
259 1
				for ($j = $i, $face_count2 = count($faces); $j < $face_count2; $j++) {
260 1
					$face2 = $faces[$j];
261
					// todo: can't this distance be a method in $face1->distance($face2)?
262 1
					$distance = $euclidean->distance($face1->descriptor, $face2->descriptor);
263
264 1
					if ($distance < $sensitivity) {
265 1
						$edges[] = array($i, $j);
266
					}
267
				}
268
			}
269
		}
270
271 1
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

271
		$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
272 1
		$newClusters = array();
273 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
274 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
275 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
276
			}
277 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
278
		}
279
280 1
		return $newClusters;
281
	}
282
283
	/**
284
	 * todo: only reason this is public is because of tests. Go figure it out better.
285
	 */
286 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
287
		// Create map of face transitions
288 3
		$transitions = array();
289 3
		foreach ($newCluster as $newPerson=>$newFaces) {
290 3
			foreach ($newFaces as $newFace) {
291 3
				$oldPersonFound = null;
292 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
293 2
					if (in_array($newFace, $oldFaces)) {
294 2
						$oldPersonFound = $oldPerson;
295 2
						break;
296
					}
297
				}
298 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
299
			}
300
		}
301
		// Count transitions
302 3
		$transitionCount = array();
303 3
		foreach ($transitions as $transition) {
304 3
			$key = $transition[0] . ':' . $transition[1];
305 3
			if (array_key_exists($key, $transitionCount)) {
306 2
				$transitionCount[$key]++;
307
			} else {
308 3
				$transitionCount[$key] = 1;
309
			}
310
		}
311
		// Create map of new person -> old person transitions
312 3
		$newOldPersonMapping = array();
313 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
314 3
		arsort($transitionCount);
315 3
		foreach ($transitionCount as $transitionKey => $count) {
316 3
			$transition = explode(":", $transitionKey);
317 3
			$oldPerson = intval($transition[0]);
318 3
			$newPerson = intval($transition[1]);
319 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
320 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
321 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
322 3
					$oldPersonProcessed[$oldPerson] = 0;
323
				} else {
324 3
					$newOldPersonMapping[$newPerson] = 0;
325
				}
326
			}
327
		}
328
		// Starting with new cluster, convert all new person IDs with old person IDs
329 3
		$maxOldPersonId = 1;
330 3
		if (count($oldCluster) > 0) {
331 2
			$maxOldPersonId = max(array_keys($oldCluster)) + 1;
332
		}
333
334 3
		$result = array();
335 3
		foreach ($newCluster as $newPerson => $newFaces) {
336 3
			$oldPerson = $newOldPersonMapping[$newPerson];
337 3
			if ($oldPerson === 0) {
338 3
				$result[$maxOldPersonId] = $newFaces;
339 3
				$maxOldPersonId++;
340
			} else {
341 3
				$result[$oldPerson] = $newFaces;
342
			}
343
		}
344 3
		return $result;
345
	}
346
}
347