1 | <?php |
||||
2 | /** |
||||
3 | * @copyright Copyright (c) 2017-2023 Matias De lellis <[email protected]> |
||||
4 | * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]> |
||||
5 | * |
||||
6 | * @author Branko Kokanovic <[email protected]> |
||||
7 | * |
||||
8 | * @license GNU AGPL version 3 or any later version |
||||
9 | * |
||||
10 | * This program is free software: you can redistribute it and/or modify |
||||
11 | * it under the terms of the GNU Affero General Public License as |
||||
12 | * published by the Free Software Foundation, either version 3 of the |
||||
13 | * License, or (at your option) any later version. |
||||
14 | * |
||||
15 | * This program is distributed in the hope that it will be useful, |
||||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
18 | * GNU Affero General Public License for more details. |
||||
19 | * |
||||
20 | * You should have received a copy of the GNU Affero General Public License |
||||
21 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
||||
22 | * |
||||
23 | */ |
||||
24 | namespace OCA\FaceRecognition\BackgroundJob\Tasks; |
||||
25 | |||||
26 | use OCP\IUser; |
||||
27 | |||||
28 | use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask; |
||||
29 | use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext; |
||||
30 | |||||
31 | use OCA\FaceRecognition\Db\FaceMapper; |
||||
32 | use OCA\FaceRecognition\Db\ImageMapper; |
||||
33 | use OCA\FaceRecognition\Db\PersonMapper; |
||||
34 | |||||
35 | use OCA\FaceRecognition\Helper\Euclidean; |
||||
36 | use OCA\FaceRecognition\Helper\Requirements; |
||||
37 | |||||
38 | use OCA\FaceRecognition\Clusterer\ChineseWhispers; |
||||
39 | |||||
40 | use OCA\FaceRecognition\Service\SettingsService; |
||||
41 | /** |
||||
42 | * Taks that, for each user, creates person clusters for each. |
||||
43 | */ |
||||
44 | class CreateClustersTask extends FaceRecognitionBackgroundTask { |
||||
45 | /** @var PersonMapper Person mapper*/ |
||||
46 | private $personMapper; |
||||
47 | |||||
48 | /** @var ImageMapper Image mapper*/ |
||||
49 | private $imageMapper; |
||||
50 | |||||
51 | /** @var FaceMapper Face mapper*/ |
||||
52 | private $faceMapper; |
||||
53 | |||||
54 | /** @var SettingsService Settings service*/ |
||||
55 | private $settingsService; |
||||
56 | |||||
57 | /** |
||||
58 | * @param PersonMapper $personMapper |
||||
59 | * @param ImageMapper $imageMapper |
||||
60 | * @param FaceMapper $faceMapper |
||||
61 | * @param SettingsService $settingsService |
||||
62 | */ |
||||
63 | 3 | public function __construct(PersonMapper $personMapper, |
|||
64 | ImageMapper $imageMapper, |
||||
65 | FaceMapper $faceMapper, |
||||
66 | SettingsService $settingsService) |
||||
67 | { |
||||
68 | 3 | parent::__construct(); |
|||
69 | |||||
70 | 3 | $this->personMapper = $personMapper; |
|||
71 | 3 | $this->imageMapper = $imageMapper; |
|||
72 | 3 | $this->faceMapper = $faceMapper; |
|||
73 | 3 | $this->settingsService = $settingsService; |
|||
74 | } |
||||
75 | |||||
76 | /** |
||||
77 | * @inheritdoc |
||||
78 | */ |
||||
79 | 1 | public function description() { |
|||
80 | 1 | return "Create new persons or update existing persons"; |
|||
81 | } |
||||
82 | |||||
83 | /** |
||||
84 | * @inheritdoc |
||||
85 | */ |
||||
86 | 1 | public function execute(FaceRecognitionContext $context) { |
|||
87 | 1 | $this->setContext($context); |
|||
88 | 1 | $eligable_users = $this->context->getEligibleUsers(); |
|||
89 | 1 | foreach($eligable_users as $user) { |
|||
90 | 1 | $this->createClusterIfNeeded($user); |
|||
91 | 1 | yield; |
|||
92 | } |
||||
93 | |||||
94 | 1 | return true; |
|||
95 | } |
||||
96 | |||||
97 | /** |
||||
98 | * @return void |
||||
99 | */ |
||||
100 | 1 | private function createClusterIfNeeded(string $userId) { |
|||
101 | 1 | $modelId = $this->settingsService->getCurrentFaceModel(); |
|||
102 | |||||
103 | // Depending on whether we already have clusters, decide if we should create/recreate them. |
||||
104 | // |
||||
105 | 1 | $hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0; |
|||
106 | 1 | if ($hasPersons) { |
|||
107 | $forceRecreate = $this->needRecreateBySettings($userId); |
||||
108 | $haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId); |
||||
109 | $haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId); |
||||
110 | |||||
111 | if ($forceRecreate) { |
||||
112 | $this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters'); |
||||
113 | } |
||||
114 | else if ($haveEnoughFaces || $haveStaled) { |
||||
115 | $this->logInfo('Face clustering will be recreated with new information or changes'); |
||||
116 | } |
||||
117 | else { |
||||
118 | // If there is no invalid persons, and there is no recent new faces, no need to recreate cluster |
||||
119 | $this->logInfo('Clusters already exist, estimated there is no need to recreate them'); |
||||
120 | return; |
||||
121 | } |
||||
122 | } |
||||
123 | else { |
||||
124 | // User should not be able to use this directly, used in tests |
||||
125 | 1 | $forceTestCreation = $this->settingsService->_getForceCreateClusters($userId); |
|||
126 | 1 | $needCreate = $this->needCreateFirstTime($userId, $modelId); |
|||
127 | |||||
128 | 1 | if ($forceTestCreation) { |
|||
129 | 1 | $this->logInfo('Force the creation of clusters for testing'); |
|||
130 | } |
||||
131 | 1 | else if ($needCreate) { |
|||
132 | $this->logInfo('Face clustering will be created for the first time.'); |
||||
133 | } |
||||
134 | else { |
||||
135 | 1 | $this->logInfo( |
|||
136 | 1 | 'Skipping cluster creation, not enough data (yet) collected. ' . |
|||
137 | 1 | 'For cluster creation, you need either one of the following:'); |
|||
138 | 1 | $this->logInfo('* have 1000 faces already processed'); |
|||
139 | 1 | $this->logInfo('* or you need to have 95% of you images processed'); |
|||
140 | 1 | $this->logInfo('Use stats command to track progress'); |
|||
141 | 1 | return; |
|||
142 | } |
||||
143 | } |
||||
144 | |||||
145 | // Ok. If we are here, the clusters must be recreated. |
||||
146 | // |
||||
147 | |||||
148 | 1 | $min_face_size = $this->settingsService->getMinimumFaceSize(); |
|||
149 | 1 | $min_confidence = $this->settingsService->getMinimumConfidence(); |
|||
150 | |||||
151 | 1 | $faces = $this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence); |
|||
152 | |||||
153 | 1 | $facesCount = count($faces); |
|||
154 | 1 | $this->logInfo('There are ' . $facesCount . ' faces for clustering.'); |
|||
155 | |||||
156 | // The default slice is just one for the total. |
||||
157 | 1 | $noSlices = 1; |
|||
158 | 1 | $sliceSize = $facesCount; |
|||
159 | |||||
160 | // Now calculate it if there is a batch size configured. |
||||
161 | 1 | $batchSize = $this->settingsService->getClusterigBatchSize(); |
|||
162 | 1 | if ($facesCount > 0 && $batchSize > 0) { |
|||
163 | // The minimum batch size is 2000 faces. |
||||
164 | $batchSize = max($batchSize, 2000); |
||||
165 | // The maximun batch size is the faces count. |
||||
166 | $batchSize = min($batchSize, $facesCount); |
||||
167 | |||||
168 | // Calculate the number of slices and their sizes. |
||||
169 | $noSlices = intval($facesCount / $batchSize) + 1; |
||||
170 | $sliceSize = ceil($facesCount / $noSlices); |
||||
171 | } |
||||
172 | |||||
173 | 1 | $this->logDebug('We will cluster these with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces.'); |
|||
174 | |||||
175 | 1 | $newClusters = []; |
|||
176 | // Obtain the clusters in batches and append them. |
||||
177 | 1 | for ($i = 0; $i < $noSlices ; $i++) { |
|||
178 | // Get the batches. |
||||
179 | 1 | $facesSliced = array_slice($faces, $i * $sliceSize, $sliceSize); |
|||
0 ignored issues
–
show
Bug
introduced
by
![]() It seems like
$sliceSize can also be of type double ; however, parameter $length of array_slice() does only seem to accept integer|null , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
180 | // Get the indices, obtain the partial clusters and incorporate them. |
||||
181 | 1 | $faceIds = array_map(function ($face) { return $face['id']; }, $facesSliced); |
|||
182 | 1 | $facesDescripted = $this->faceMapper->findDescriptorsBathed($faceIds); |
|||
183 | 1 | $newClusters = array_merge($newClusters, $this->getNewClusters($facesDescripted)); |
|||
184 | // Discard variables aggressively to improve memory consumption. |
||||
185 | 1 | unset($facesDescripted); |
|||
186 | 1 | unset($facesSliced); |
|||
187 | } |
||||
188 | |||||
189 | // Append non groupable faces on a single step. |
||||
190 | 1 | $nonGroupables = $this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence); |
|||
191 | 1 | $this->logInfo('We will add '. count($nonGroupables) . ' faces that cannot be grouped.'); |
|||
192 | 1 | $newClusters = array_merge($newClusters, $this->getFakeClusters($nonGroupables)); |
|||
193 | |||||
194 | // Cluster is associative array where key is person ID. |
||||
195 | // Value is array of face IDs. For old clusters, person IDs are some existing person IDs, |
||||
196 | // and for new clusters is whatever chinese whispers decides to identify them. |
||||
197 | // |
||||
198 | 1 | $currentClusters = $this->getCurrentClusters(array_merge($faces, $nonGroupables)); |
|||
199 | 1 | $this->logInfo(count($newClusters) . ' clusters found after clustering'); |
|||
200 | |||||
201 | // Discard variables aggressively to improve memory consumption. |
||||
202 | 1 | unset($faces); |
|||
203 | 1 | unset($nonGroupables); |
|||
204 | |||||
205 | // New merge |
||||
206 | 1 | $mergedClusters = $this->mergeClusters($currentClusters, $newClusters); |
|||
207 | |||||
208 | 1 | $this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters); |
|||
209 | |||||
210 | // Remove all orphaned persons (those without any faces) |
||||
211 | // NOTE: we will do this for all models, not just for current one, but this is not problem. |
||||
212 | 1 | $orphansDeleted = $this->personMapper->deleteOrphaned($userId); |
|||
213 | 1 | if ($orphansDeleted > 0) { |
|||
214 | $this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces'); |
||||
215 | } |
||||
216 | |||||
217 | // Prevents not create/recreate the clusters unnecessarily. |
||||
218 | |||||
219 | 1 | $this->settingsService->setNeedRecreateClusters(false, $userId); |
|||
220 | 1 | $this->settingsService->_setForceCreateClusters(false, $userId); |
|||
221 | } |
||||
222 | |||||
223 | /** |
||||
224 | * Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if: |
||||
225 | * - Some cluster/person is invalidated (is_valid is false for someone) |
||||
226 | * - This means some image that belonged to this user is changed, deleted etc. |
||||
227 | * - There are some new faces. Now, we don't want to jump the gun here. We want to either have: |
||||
228 | * - more than 25 new faces, or |
||||
229 | * - less than 25 new faces, but they are older than 2h |
||||
230 | * |
||||
231 | * (basically, we want to avoid recreating cluster for each new face being uploaded, |
||||
232 | * however, we don't want to wait too much as clusters could be changed a lot) |
||||
233 | */ |
||||
234 | private function hasNewFacesToRecreate(string $userId, int $modelId): bool { |
||||
235 | // |
||||
236 | $facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true); |
||||
237 | $this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d', |
||||
238 | $facesWithoutPersons, $userId, $modelId)); |
||||
239 | |||||
240 | // todo: get rid of magic numbers (move to config) |
||||
241 | if ($facesWithoutPersons === 0) |
||||
242 | return false; |
||||
243 | |||||
244 | if ($facesWithoutPersons >= 25) |
||||
245 | return true; |
||||
246 | |||||
247 | // We have some faces, but not that many, let's see when oldest one is generated. |
||||
248 | $oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId); |
||||
249 | $oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp(); |
||||
250 | $currentTimestamp = (new \DateTime())->getTimestamp(); |
||||
251 | $this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s', |
||||
252 | $userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s'))); |
||||
253 | |||||
254 | // todo: get rid of magic numbers (move to config) |
||||
255 | if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60) |
||||
256 | return true; |
||||
257 | |||||
258 | return false; |
||||
259 | } |
||||
260 | |||||
261 | private function hasStalePersonsToRecreate(string $userId, int $modelId): bool { |
||||
262 | return $this->personMapper->countClusters($userId, $modelId, true) > 0; |
||||
263 | } |
||||
264 | |||||
265 | private function needRecreateBySettings(string $userId): bool { |
||||
266 | return $this->settingsService->getNeedRecreateClusters($userId); |
||||
267 | } |
||||
268 | |||||
269 | 1 | private function needCreateFirstTime(string $userId, int $modelId): bool { |
|||
270 | // User should not be able to use this directly, used in tests |
||||
271 | 1 | if ($this->settingsService->_getForceCreateClusters($userId)) |
|||
272 | 1 | return true; |
|||
273 | |||||
274 | 1 | $imageCount = $this->imageMapper->countUserImages($userId, $modelId); |
|||
275 | 1 | if ($imageCount === 0) |
|||
276 | return false; |
||||
277 | |||||
278 | 1 | $imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true); |
|||
279 | 1 | if ($imageProcessed === 0) |
|||
280 | 1 | return false; |
|||
281 | |||||
282 | // These are basic criteria without which we should not even consider creating clusters. |
||||
283 | // These clusters will be small and not "stable" enough and we should better wait for more images to come. |
||||
284 | // todo: get rid of magic numbers (move to config) |
||||
285 | $facesCount = $this->faceMapper->countFaces($userId, $modelId); |
||||
286 | if ($facesCount > 1000) |
||||
287 | return true; |
||||
288 | |||||
289 | $percentImagesProcessed = $imageProcessed / floatval($imageCount); |
||||
290 | if ($percentImagesProcessed > 0.95) |
||||
291 | return true; |
||||
292 | |||||
293 | return false; |
||||
294 | } |
||||
295 | |||||
296 | 1 | private function getCurrentClusters(array $faces): array { |
|||
297 | 1 | $chineseClusters = array(); |
|||
298 | 1 | foreach($faces as $face) { |
|||
299 | 1 | if ($face['person'] !== null) { |
|||
300 | if (!isset($chineseClusters[$face['person']])) { |
||||
301 | $chineseClusters[$face['person']] = array(); |
||||
302 | } |
||||
303 | $chineseClusters[$face['person']][] = $face['id']; |
||||
304 | } |
||||
305 | } |
||||
306 | 1 | return $chineseClusters; |
|||
307 | } |
||||
308 | |||||
309 | 1 | private function getFakeClusters(array $faces): array { |
|||
310 | 1 | $newClusters = array(); |
|||
311 | 1 | for ($i = 0, $c = count($faces); $i < $c; $i++) { |
|||
312 | $fakeCluster = []; |
||||
313 | $fakeCluster[] = $faces[$i]['id']; |
||||
314 | $newClusters[] = $fakeCluster; |
||||
315 | } |
||||
316 | 1 | return $newClusters; |
|||
317 | } |
||||
318 | |||||
319 | 1 | private function getNewClusters(array $faces): array { |
|||
320 | // Clustering parameters |
||||
321 | 1 | $sensitivity = $this->settingsService->getSensitivity(); |
|||
322 | |||||
323 | 1 | if (Requirements::pdlibLoaded()) { |
|||
324 | // Create edges (neighbors) for Chinese Whispers |
||||
325 | 1 | $edges = array(); |
|||
326 | 1 | $faces_count = count($faces); |
|||
327 | 1 | for ($i = 0; $i < $faces_count; $i++) { |
|||
328 | 1 | $face1 = $faces[$i]; |
|||
329 | 1 | for ($j = $i; $j < $faces_count; $j++) { |
|||
330 | 1 | $face2 = $faces[$j]; |
|||
331 | 1 | $distance = dlib_vector_length($face1['descriptor'], $face2['descriptor']); |
|||
0 ignored issues
–
show
The function
dlib_vector_length was not found. Maybe you did not declare it correctly or list all dependencies?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
332 | 1 | if ($distance < $sensitivity) { |
|||
333 | 1 | $edges[] = array($i, $j); |
|||
334 | } |
||||
335 | } |
||||
336 | } |
||||
337 | |||||
338 | // Given the edges get the list of labels (found clusters) for each face. |
||||
339 | 1 | $newChineseClustersByIndex = dlib_chinese_whispers($edges); |
|||
0 ignored issues
–
show
The function
dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
340 | } else { |
||||
341 | // Create edges (neighbors) for Chinese Whispers |
||||
342 | $edges = array(); |
||||
343 | $faces_count = count($faces); |
||||
344 | |||||
345 | for ($i = 0; $i < $faces_count; $i++) { |
||||
346 | $face1 = $faces[$i]; |
||||
347 | for ($j = $i; $j < $faces_count; $j++) { |
||||
348 | $face2 = $faces[$j]; |
||||
349 | $distance = Euclidean::distance($face1['descriptor'], $face2['descriptor']); |
||||
350 | if ($distance < $sensitivity) { |
||||
351 | $edges[] = array($i, $j); |
||||
352 | } |
||||
353 | } |
||||
354 | } |
||||
355 | |||||
356 | // The clustering algorithm actually expects ordered lists. |
||||
357 | $oedges = []; |
||||
358 | ChineseWhispers::convert_unordered_to_ordered($edges, $oedges); |
||||
359 | usort($oedges, function($a, $b) { |
||||
360 | if ($a[0] === $b[0]) return $a[1] - $b[1]; |
||||
361 | return $a[0] - $b[0]; |
||||
362 | }); |
||||
363 | |||||
364 | // Given the edges get the list of labels (found clusters) for each face. |
||||
365 | $newChineseClustersByIndex = []; |
||||
366 | ChineseWhispers::predict($oedges, $newChineseClustersByIndex); |
||||
367 | } |
||||
368 | |||||
369 | 1 | $newClusters = array(); |
|||
370 | 1 | for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) { |
|||
371 | 1 | if (!isset($newClusters[$newChineseClustersByIndex[$i]])) { |
|||
372 | 1 | $newClusters[$newChineseClustersByIndex[$i]] = array(); |
|||
373 | } |
||||
374 | 1 | $newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]['id']; |
|||
375 | } |
||||
376 | 1 | return $newClusters; |
|||
377 | } |
||||
378 | |||||
379 | /** |
||||
380 | * todo: only reason this is public is because of tests. Go figure it out better. |
||||
381 | */ |
||||
382 | 3 | public function mergeClusters(array $oldCluster, array $newCluster): array { |
|||
383 | // Create map of face transitions |
||||
384 | 3 | $transitions = array(); |
|||
385 | 3 | foreach ($newCluster as $newPerson=>$newFaces) { |
|||
386 | 3 | foreach ($newFaces as $newFace) { |
|||
387 | 3 | $oldPersonFound = null; |
|||
388 | 3 | foreach ($oldCluster as $oldPerson => $oldFaces) { |
|||
389 | 2 | if (in_array($newFace, $oldFaces)) { |
|||
390 | 2 | $oldPersonFound = $oldPerson; |
|||
391 | 2 | break; |
|||
392 | } |
||||
393 | } |
||||
394 | 3 | $transitions[$newFace] = array($oldPersonFound, $newPerson); |
|||
395 | } |
||||
396 | } |
||||
397 | // Count transitions |
||||
398 | 3 | $transitionCount = array(); |
|||
399 | 3 | foreach ($transitions as $transition) { |
|||
400 | 3 | $key = $transition[0] . ':' . $transition[1]; |
|||
401 | 3 | if (array_key_exists($key, $transitionCount)) { |
|||
402 | 2 | $transitionCount[$key]++; |
|||
403 | } else { |
||||
404 | 3 | $transitionCount[$key] = 1; |
|||
405 | } |
||||
406 | } |
||||
407 | // Create map of new person -> old person transitions |
||||
408 | 3 | $newOldPersonMapping = array(); |
|||
409 | 3 | $oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array() |
|||
410 | 3 | arsort($transitionCount); |
|||
411 | 3 | foreach ($transitionCount as $transitionKey => $count) { |
|||
412 | 3 | $transition = explode(":", $transitionKey); |
|||
413 | 3 | $oldPerson = intval($transition[0]); |
|||
414 | 3 | $newPerson = intval($transition[1]); |
|||
415 | 3 | if (!array_key_exists($newPerson, $newOldPersonMapping)) { |
|||
416 | 3 | if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) { |
|||
417 | 3 | $newOldPersonMapping[$newPerson] = $oldPerson; |
|||
418 | 3 | $oldPersonProcessed[$oldPerson] = 0; |
|||
419 | } else { |
||||
420 | 2 | $newOldPersonMapping[$newPerson] = 0; |
|||
421 | } |
||||
422 | } |
||||
423 | } |
||||
424 | // Starting with new cluster, convert all new person IDs with old person IDs |
||||
425 | 3 | $maxOldPersonId = 1; |
|||
426 | 3 | if (count($oldCluster) > 0) { |
|||
427 | 2 | $maxOldPersonId = (int) max(array_keys($oldCluster)) + 1; |
|||
428 | } |
||||
429 | |||||
430 | 3 | $result = array(); |
|||
431 | 3 | foreach ($newCluster as $newPerson => $newFaces) { |
|||
432 | 3 | $oldPerson = $newOldPersonMapping[$newPerson]; |
|||
433 | 3 | if ($oldPerson === 0) { |
|||
434 | 3 | $result[$maxOldPersonId] = $newFaces; |
|||
435 | 3 | $maxOldPersonId++; |
|||
436 | } else { |
||||
437 | 2 | $result[$oldPerson] = $newFaces; |
|||
438 | } |
||||
439 | } |
||||
440 | 3 | return $result; |
|||
441 | } |
||||
442 | } |
||||
443 |