1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @copyright Copyright (c) 2017-2023 Matias De lellis <[email protected]> |
4
|
|
|
* @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]> |
5
|
|
|
* |
6
|
|
|
* @author Branko Kokanovic <[email protected]> |
7
|
|
|
* |
8
|
|
|
* @license GNU AGPL version 3 or any later version |
9
|
|
|
* |
10
|
|
|
* This program is free software: you can redistribute it and/or modify |
11
|
|
|
* it under the terms of the GNU Affero General Public License as |
12
|
|
|
* published by the Free Software Foundation, either version 3 of the |
13
|
|
|
* License, or (at your option) any later version. |
14
|
|
|
* |
15
|
|
|
* This program is distributed in the hope that it will be useful, |
16
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
17
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18
|
|
|
* GNU Affero General Public License for more details. |
19
|
|
|
* |
20
|
|
|
* You should have received a copy of the GNU Affero General Public License |
21
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
22
|
|
|
* |
23
|
|
|
*/ |
24
|
|
|
namespace OCA\FaceRecognition\BackgroundJob\Tasks; |
25
|
|
|
|
26
|
|
|
use OCP\IUser; |
27
|
|
|
|
28
|
|
|
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask; |
29
|
|
|
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext; |
30
|
|
|
|
31
|
|
|
use OCA\FaceRecognition\Db\FaceMapper; |
32
|
|
|
use OCA\FaceRecognition\Db\ImageMapper; |
33
|
|
|
use OCA\FaceRecognition\Db\PersonMapper; |
34
|
|
|
|
35
|
|
|
use OCA\FaceRecognition\Helper\Euclidean; |
36
|
|
|
use OCA\FaceRecognition\Helper\Requirements; |
37
|
|
|
|
38
|
|
|
use OCA\FaceRecognition\Clusterer\ChineseWhispers; |
39
|
|
|
|
40
|
|
|
use OCA\FaceRecognition\Service\SettingsService; |
41
|
|
|
/** |
42
|
|
|
* Taks that, for each user, creates person clusters for each. |
43
|
|
|
*/ |
44
|
|
|
class CreateClustersTask extends FaceRecognitionBackgroundTask { |
45
|
|
|
/** @var PersonMapper Person mapper*/ |
46
|
|
|
private $personMapper; |
47
|
|
|
|
48
|
|
|
/** @var ImageMapper Image mapper*/ |
49
|
|
|
private $imageMapper; |
50
|
|
|
|
51
|
|
|
/** @var FaceMapper Face mapper*/ |
52
|
|
|
private $faceMapper; |
53
|
|
|
|
54
|
|
|
/** @var SettingsService Settings service*/ |
55
|
|
|
private $settingsService; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* @param PersonMapper $personMapper |
59
|
|
|
* @param ImageMapper $imageMapper |
60
|
|
|
* @param FaceMapper $faceMapper |
61
|
|
|
* @param SettingsService $settingsService |
62
|
|
|
*/ |
63
|
3 |
|
public function __construct(PersonMapper $personMapper, |
64
|
|
|
ImageMapper $imageMapper, |
65
|
|
|
FaceMapper $faceMapper, |
66
|
|
|
SettingsService $settingsService) |
67
|
|
|
{ |
68
|
3 |
|
parent::__construct(); |
69
|
|
|
|
70
|
3 |
|
$this->personMapper = $personMapper; |
71
|
3 |
|
$this->imageMapper = $imageMapper; |
72
|
3 |
|
$this->faceMapper = $faceMapper; |
73
|
3 |
|
$this->settingsService = $settingsService; |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* @inheritdoc |
78
|
|
|
*/ |
79
|
1 |
|
public function description() { |
80
|
1 |
|
return "Create new persons or update existing persons"; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* @inheritdoc |
85
|
|
|
*/ |
86
|
1 |
|
public function execute(FaceRecognitionContext $context) { |
87
|
1 |
|
$this->setContext($context); |
88
|
1 |
|
$eligable_users = $this->context->getEligibleUsers(); |
89
|
1 |
|
foreach($eligable_users as $user) { |
90
|
1 |
|
$this->createClusterIfNeeded($user); |
91
|
1 |
|
yield; |
92
|
|
|
} |
93
|
|
|
|
94
|
1 |
|
return true; |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* @return void |
99
|
|
|
*/ |
100
|
1 |
|
private function createClusterIfNeeded(string $userId) { |
101
|
1 |
|
$modelId = $this->settingsService->getCurrentFaceModel(); |
102
|
|
|
|
103
|
|
|
// Depending on whether we already have clusters, decide if we should create/recreate them. |
104
|
|
|
// |
105
|
1 |
|
$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0; |
106
|
1 |
|
if ($hasPersons) { |
107
|
|
|
$forceRecreate = $this->needRecreateBySettings($userId); |
108
|
|
|
$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId); |
109
|
|
|
$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId); |
110
|
|
|
|
111
|
|
|
if ($forceRecreate) { |
112
|
|
|
$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters'); |
113
|
|
|
} |
114
|
|
|
else if ($haveEnoughFaces || $haveStaled) { |
115
|
|
|
$this->logInfo('Face clustering will be recreated with new information or changes'); |
116
|
|
|
} |
117
|
|
|
else { |
118
|
|
|
// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster |
119
|
|
|
$this->logInfo('Clusters already exist, estimated there is no need to recreate them'); |
120
|
|
|
return; |
121
|
|
|
} |
122
|
|
|
} |
123
|
|
|
else { |
124
|
|
|
// User should not be able to use this directly, used in tests |
125
|
1 |
|
$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId); |
126
|
1 |
|
$needCreate = $this->needCreateFirstTime($userId, $modelId); |
127
|
|
|
|
128
|
1 |
|
if ($forceTestCreation) { |
129
|
1 |
|
$this->logInfo('Force the creation of clusters for testing'); |
130
|
|
|
} |
131
|
1 |
|
else if ($needCreate) { |
132
|
|
|
$this->logInfo('Face clustering will be created for the first time.'); |
133
|
|
|
} |
134
|
|
|
else { |
135
|
1 |
|
$this->logInfo( |
136
|
1 |
|
'Skipping cluster creation, not enough data (yet) collected. ' . |
137
|
1 |
|
'For cluster creation, you need either one of the following:'); |
138
|
1 |
|
$this->logInfo('* have 1000 faces already processed'); |
139
|
1 |
|
$this->logInfo('* or you need to have 95% of you images processed'); |
140
|
1 |
|
$this->logInfo('Use stats command to track progress'); |
141
|
1 |
|
return; |
142
|
|
|
} |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
// Ok. If we are here, the clusters must be recreated. |
146
|
|
|
// |
147
|
|
|
|
148
|
1 |
|
$min_face_size = $this->settingsService->getMinimumFaceSize(); |
149
|
1 |
|
$min_confidence = $this->settingsService->getMinimumConfidence(); |
150
|
|
|
|
151
|
1 |
|
$faces = array_merge( |
152
|
1 |
|
$this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence), |
153
|
1 |
|
$this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence) |
154
|
1 |
|
); |
155
|
|
|
|
156
|
1 |
|
$this->logInfo(count($faces) . ' faces found for clustering'); |
157
|
|
|
|
158
|
|
|
// Cluster is associative array where key is person ID. |
159
|
|
|
// Value is array of face IDs. For old clusters, person IDs are some existing person IDs, |
160
|
|
|
// and for new clusters is whatever chinese whispers decides to identify them. |
161
|
|
|
// |
162
|
|
|
|
163
|
1 |
|
$currentClusters = $this->getCurrentClusters($faces); |
164
|
|
|
|
165
|
1 |
|
$newClusters = $this->getNewClusters($faces); |
166
|
1 |
|
$this->logInfo(count($newClusters) . ' clusters found after clustering'); |
167
|
|
|
|
168
|
|
|
// New merge |
169
|
1 |
|
$mergedClusters = $this->mergeClusters($currentClusters, $newClusters); |
170
|
|
|
|
171
|
1 |
|
$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters); |
172
|
|
|
|
173
|
|
|
// Remove all orphaned persons (those without any faces) |
174
|
|
|
// NOTE: we will do this for all models, not just for current one, but this is not problem. |
175
|
1 |
|
$orphansDeleted = $this->personMapper->deleteOrphaned($userId); |
176
|
1 |
|
if ($orphansDeleted > 0) { |
177
|
|
|
$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces'); |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
// Prevents not create/recreate the clusters unnecessarily. |
181
|
|
|
|
182
|
1 |
|
$this->settingsService->setNeedRecreateClusters(false, $userId); |
183
|
1 |
|
$this->settingsService->_setForceCreateClusters(false, $userId); |
184
|
|
|
} |
185
|
|
|
|
186
|
|
|
/** |
187
|
|
|
* Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if: |
188
|
|
|
* - Some cluster/person is invalidated (is_valid is false for someone) |
189
|
|
|
* - This means some image that belonged to this user is changed, deleted etc. |
190
|
|
|
* - There are some new faces. Now, we don't want to jump the gun here. We want to either have: |
191
|
|
|
* - more than 25 new faces, or |
192
|
|
|
* - less than 25 new faces, but they are older than 2h |
193
|
|
|
* |
194
|
|
|
* (basically, we want to avoid recreating cluster for each new face being uploaded, |
195
|
|
|
* however, we don't want to wait too much as clusters could be changed a lot) |
196
|
|
|
*/ |
197
|
|
|
private function hasNewFacesToRecreate(string $userId, int $modelId): bool { |
198
|
|
|
// |
199
|
|
|
$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true); |
200
|
|
|
$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d', |
201
|
|
|
$facesWithoutPersons, $userId, $modelId)); |
202
|
|
|
|
203
|
|
|
// todo: get rid of magic numbers (move to config) |
204
|
|
|
if ($facesWithoutPersons === 0) |
205
|
|
|
return false; |
206
|
|
|
|
207
|
|
|
if ($facesWithoutPersons >= 25) |
208
|
|
|
return true; |
209
|
|
|
|
210
|
|
|
// We have some faces, but not that many, let's see when oldest one is generated. |
211
|
|
|
$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId); |
212
|
|
|
$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp(); |
213
|
|
|
$currentTimestamp = (new \DateTime())->getTimestamp(); |
214
|
|
|
$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s', |
215
|
|
|
$userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s'))); |
216
|
|
|
|
217
|
|
|
// todo: get rid of magic numbers (move to config) |
218
|
|
|
if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60) |
219
|
|
|
return true; |
220
|
|
|
|
221
|
|
|
return false; |
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
private function hasStalePersonsToRecreate(string $userId, int $modelId): bool { |
225
|
|
|
return $this->personMapper->countClusters($userId, $modelId, true) > 0; |
226
|
|
|
} |
227
|
|
|
|
228
|
|
|
private function needRecreateBySettings(string $userId): bool { |
229
|
|
|
return $this->settingsService->getNeedRecreateClusters($userId); |
230
|
|
|
} |
231
|
|
|
|
232
|
1 |
|
private function needCreateFirstTime(string $userId, int $modelId): bool { |
233
|
|
|
// User should not be able to use this directly, used in tests |
234
|
1 |
|
if ($this->settingsService->_getForceCreateClusters($userId)) |
235
|
1 |
|
return true; |
236
|
|
|
|
237
|
1 |
|
$imageCount = $this->imageMapper->countUserImages($userId, $modelId); |
238
|
1 |
|
if ($imageCount === 0) |
239
|
|
|
return false; |
240
|
|
|
|
241
|
1 |
|
$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true); |
242
|
1 |
|
if ($imageProcessed === 0) |
243
|
1 |
|
return false; |
244
|
|
|
|
245
|
|
|
// These are basic criteria without which we should not even consider creating clusters. |
246
|
|
|
// These clusters will be small and not "stable" enough and we should better wait for more images to come. |
247
|
|
|
// todo: get rid of magic numbers (move to config) |
248
|
|
|
$facesCount = $this->faceMapper->countFaces($userId, $modelId); |
249
|
|
|
if ($facesCount > 1000) |
250
|
|
|
return true; |
251
|
|
|
|
252
|
|
|
$percentImagesProcessed = $imageProcessed / floatval($imageCount); |
253
|
|
|
if ($percentImagesProcessed > 0.95) |
254
|
|
|
return true; |
255
|
|
|
|
256
|
|
|
return false; |
257
|
|
|
} |
258
|
|
|
|
259
|
1 |
|
private function getCurrentClusters(array $faces): array { |
260
|
1 |
|
$chineseClusters = array(); |
261
|
1 |
|
foreach($faces as $face) { |
262
|
1 |
|
if ($face->person !== null) { |
263
|
|
|
if (!isset($chineseClusters[$face->person])) { |
264
|
|
|
$chineseClusters[$face->person] = array(); |
265
|
|
|
} |
266
|
|
|
$chineseClusters[$face->person][] = $face->id; |
267
|
|
|
} |
268
|
|
|
} |
269
|
1 |
|
return $chineseClusters; |
270
|
|
|
} |
271
|
|
|
|
272
|
1 |
|
private function getNewClusters(array $faces): array { |
273
|
|
|
// Clustering parameters |
274
|
1 |
|
$sensitivity = $this->settingsService->getSensitivity(); |
275
|
|
|
|
276
|
1 |
|
if (Requirements::pdlibLoaded()) { |
277
|
|
|
// Create edges (neighbors) for Chinese Whispers |
278
|
1 |
|
$edges = array(); |
279
|
1 |
|
$faces_count = count($faces); |
280
|
1 |
|
for ($i = 0; $i < $faces_count; $i++) { |
281
|
1 |
|
$face1 = $faces[$i]; |
282
|
1 |
|
if (!isset($face1->descriptor)) { |
283
|
|
|
$edges[] = array($i, $i); |
284
|
|
|
continue; |
285
|
|
|
} |
286
|
1 |
|
for ($j = $i; $j < $faces_count; $j++) { |
287
|
1 |
|
$face2 = $faces[$j]; |
288
|
1 |
|
if (!isset($face2->descriptor)) { |
289
|
|
|
continue; |
290
|
|
|
} |
291
|
1 |
|
$distance = dlib_vector_length($face1->descriptor, $face2->descriptor); |
|
|
|
|
292
|
1 |
|
if ($distance < $sensitivity) { |
293
|
1 |
|
$edges[] = array($i, $j); |
294
|
|
|
} |
295
|
|
|
} |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
// Given the edges get the list of labels (found clusters) for each face. |
299
|
1 |
|
$newChineseClustersByIndex = dlib_chinese_whispers($edges); |
|
|
|
|
300
|
|
|
} else { |
301
|
|
|
// Create edges (neighbors) for Chinese Whispers |
302
|
|
|
$edges = array(); |
303
|
|
|
$faces_count = count($faces); |
304
|
|
|
|
305
|
|
|
for ($i = 0; $i < $faces_count; $i++) { |
306
|
|
|
$face1 = $faces[$i]; |
307
|
|
|
if (!isset($face1->descriptor)) { |
308
|
|
|
$edges[] = array($i, $i); |
309
|
|
|
continue; |
310
|
|
|
} |
311
|
|
|
for ($j = $i; $j < $faces_count; $j++) { |
312
|
|
|
$face2 = $faces[$j]; |
313
|
|
|
if (!isset($face2->descriptor)) { |
314
|
|
|
continue; |
315
|
|
|
} |
316
|
|
|
$distance = Euclidean::distance($face1->descriptor, $face2->descriptor); |
317
|
|
|
if ($distance < $sensitivity) { |
318
|
|
|
$edges[] = array($i, $j); |
319
|
|
|
} |
320
|
|
|
} |
321
|
|
|
} |
322
|
|
|
|
323
|
|
|
// The clustering algorithm actually expects ordered lists. |
324
|
|
|
$oedges = []; |
325
|
|
|
ChineseWhispers::convert_unordered_to_ordered($edges, $oedges); |
326
|
|
|
usort($oedges, function($a, $b) { |
327
|
|
|
if ($a[0] === $b[0]) return $a[1] - $b[1]; |
328
|
|
|
return $a[0] - $b[0]; |
329
|
|
|
}); |
330
|
|
|
|
331
|
|
|
// Given the edges get the list of labels (found clusters) for each face. |
332
|
|
|
$newChineseClustersByIndex = []; |
333
|
|
|
ChineseWhispers::predict($oedges, $newChineseClustersByIndex); |
334
|
|
|
} |
335
|
|
|
|
336
|
1 |
|
$newClusters = array(); |
337
|
1 |
|
for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) { |
338
|
1 |
|
if (!isset($newClusters[$newChineseClustersByIndex[$i]])) { |
339
|
1 |
|
$newClusters[$newChineseClustersByIndex[$i]] = array(); |
340
|
|
|
} |
341
|
1 |
|
$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id; |
342
|
|
|
} |
343
|
1 |
|
return $newClusters; |
344
|
|
|
} |
345
|
|
|
|
346
|
|
|
/** |
347
|
|
|
* todo: only reason this is public is because of tests. Go figure it out better. |
348
|
|
|
*/ |
349
|
3 |
|
public function mergeClusters(array $oldCluster, array $newCluster): array { |
350
|
|
|
// Create map of face transitions |
351
|
3 |
|
$transitions = array(); |
352
|
3 |
|
foreach ($newCluster as $newPerson=>$newFaces) { |
353
|
3 |
|
foreach ($newFaces as $newFace) { |
354
|
3 |
|
$oldPersonFound = null; |
355
|
3 |
|
foreach ($oldCluster as $oldPerson => $oldFaces) { |
356
|
2 |
|
if (in_array($newFace, $oldFaces)) { |
357
|
2 |
|
$oldPersonFound = $oldPerson; |
358
|
2 |
|
break; |
359
|
|
|
} |
360
|
|
|
} |
361
|
3 |
|
$transitions[$newFace] = array($oldPersonFound, $newPerson); |
362
|
|
|
} |
363
|
|
|
} |
364
|
|
|
// Count transitions |
365
|
3 |
|
$transitionCount = array(); |
366
|
3 |
|
foreach ($transitions as $transition) { |
367
|
3 |
|
$key = $transition[0] . ':' . $transition[1]; |
368
|
3 |
|
if (array_key_exists($key, $transitionCount)) { |
369
|
2 |
|
$transitionCount[$key]++; |
370
|
|
|
} else { |
371
|
3 |
|
$transitionCount[$key] = 1; |
372
|
|
|
} |
373
|
|
|
} |
374
|
|
|
// Create map of new person -> old person transitions |
375
|
3 |
|
$newOldPersonMapping = array(); |
376
|
3 |
|
$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array() |
377
|
3 |
|
arsort($transitionCount); |
378
|
3 |
|
foreach ($transitionCount as $transitionKey => $count) { |
379
|
3 |
|
$transition = explode(":", $transitionKey); |
380
|
3 |
|
$oldPerson = intval($transition[0]); |
381
|
3 |
|
$newPerson = intval($transition[1]); |
382
|
3 |
|
if (!array_key_exists($newPerson, $newOldPersonMapping)) { |
383
|
3 |
|
if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) { |
384
|
3 |
|
$newOldPersonMapping[$newPerson] = $oldPerson; |
385
|
3 |
|
$oldPersonProcessed[$oldPerson] = 0; |
386
|
|
|
} else { |
387
|
2 |
|
$newOldPersonMapping[$newPerson] = 0; |
388
|
|
|
} |
389
|
|
|
} |
390
|
|
|
} |
391
|
|
|
// Starting with new cluster, convert all new person IDs with old person IDs |
392
|
3 |
|
$maxOldPersonId = 1; |
393
|
3 |
|
if (count($oldCluster) > 0) { |
394
|
2 |
|
$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1; |
395
|
|
|
} |
396
|
|
|
|
397
|
3 |
|
$result = array(); |
398
|
3 |
|
foreach ($newCluster as $newPerson => $newFaces) { |
399
|
3 |
|
$oldPerson = $newOldPersonMapping[$newPerson]; |
400
|
3 |
|
if ($oldPerson === 0) { |
401
|
3 |
|
$result[$maxOldPersonId] = $newFaces; |
402
|
3 |
|
$maxOldPersonId++; |
403
|
|
|
} else { |
404
|
2 |
|
$result[$oldPerson] = $newFaces; |
405
|
|
|
} |
406
|
|
|
} |
407
|
3 |
|
return $result; |
408
|
|
|
} |
409
|
|
|
} |
410
|
|
|
|