1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @copyright Copyright (c) 2017-2023 Matias De lellis <[email protected]> |
4
|
|
|
* @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]> |
5
|
|
|
* |
6
|
|
|
* @author Branko Kokanovic <[email protected]> |
7
|
|
|
* |
8
|
|
|
* @license GNU AGPL version 3 or any later version |
9
|
|
|
* |
10
|
|
|
* This program is free software: you can redistribute it and/or modify |
11
|
|
|
* it under the terms of the GNU Affero General Public License as |
12
|
|
|
* published by the Free Software Foundation, either version 3 of the |
13
|
|
|
* License, or (at your option) any later version. |
14
|
|
|
* |
15
|
|
|
* This program is distributed in the hope that it will be useful, |
16
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
17
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18
|
|
|
* GNU Affero General Public License for more details. |
19
|
|
|
* |
20
|
|
|
* You should have received a copy of the GNU Affero General Public License |
21
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
22
|
|
|
* |
23
|
|
|
*/ |
24
|
|
|
namespace OCA\FaceRecognition\BackgroundJob\Tasks; |
25
|
|
|
|
26
|
|
|
use OCP\IUser; |
27
|
|
|
|
28
|
|
|
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask; |
29
|
|
|
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext; |
30
|
|
|
|
31
|
|
|
use OCA\FaceRecognition\Db\FaceMapper; |
32
|
|
|
use OCA\FaceRecognition\Db\ImageMapper; |
33
|
|
|
use OCA\FaceRecognition\Db\PersonMapper; |
34
|
|
|
|
35
|
|
|
use OCA\FaceRecognition\Helper\Euclidean; |
36
|
|
|
use OCA\FaceRecognition\Helper\Requirements; |
37
|
|
|
|
38
|
|
|
use OCA\FaceRecognition\Clusterer\ChineseWhispers; |
39
|
|
|
|
40
|
|
|
use OCA\FaceRecognition\Service\SettingsService; |
41
|
|
|
/** |
42
|
|
|
* Taks that, for each user, creates person clusters for each. |
43
|
|
|
*/ |
44
|
|
|
class CreateClustersTask extends FaceRecognitionBackgroundTask { |
45
|
|
|
/** @var PersonMapper Person mapper*/ |
46
|
|
|
private $personMapper; |
47
|
|
|
|
48
|
|
|
/** @var ImageMapper Image mapper*/ |
49
|
|
|
private $imageMapper; |
50
|
|
|
|
51
|
|
|
/** @var FaceMapper Face mapper*/ |
52
|
|
|
private $faceMapper; |
53
|
|
|
|
54
|
|
|
/** @var SettingsService Settings service*/ |
55
|
|
|
private $settingsService; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* @param PersonMapper $personMapper |
59
|
|
|
* @param ImageMapper $imageMapper |
60
|
|
|
* @param FaceMapper $faceMapper |
61
|
|
|
* @param SettingsService $settingsService |
62
|
|
|
*/ |
63
|
3 |
|
public function __construct(PersonMapper $personMapper, |
64
|
|
|
ImageMapper $imageMapper, |
65
|
|
|
FaceMapper $faceMapper, |
66
|
|
|
SettingsService $settingsService) |
67
|
|
|
{ |
68
|
3 |
|
parent::__construct(); |
69
|
|
|
|
70
|
3 |
|
$this->personMapper = $personMapper; |
71
|
3 |
|
$this->imageMapper = $imageMapper; |
72
|
3 |
|
$this->faceMapper = $faceMapper; |
73
|
3 |
|
$this->settingsService = $settingsService; |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* @inheritdoc |
78
|
|
|
*/ |
79
|
1 |
|
public function description() { |
80
|
1 |
|
return "Create new persons or update existing persons"; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* @inheritdoc |
85
|
|
|
*/ |
86
|
1 |
|
public function execute(FaceRecognitionContext $context) { |
87
|
1 |
|
$this->setContext($context); |
88
|
1 |
|
$eligable_users = $this->context->getEligibleUsers(); |
89
|
1 |
|
foreach($eligable_users as $user) { |
90
|
1 |
|
$this->createClusterIfNeeded($user); |
91
|
1 |
|
yield; |
92
|
|
|
} |
93
|
|
|
|
94
|
1 |
|
return true; |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* @return void |
99
|
|
|
*/ |
100
|
1 |
|
private function createClusterIfNeeded(string $userId) { |
101
|
1 |
|
$modelId = $this->settingsService->getCurrentFaceModel(); |
102
|
|
|
|
103
|
|
|
// Depending on whether we already have clusters, decide if we should create/recreate them. |
104
|
|
|
// |
105
|
1 |
|
$hasPersons = $this->personMapper->countPersons($userId, $modelId) > 0; |
106
|
1 |
|
if ($hasPersons) { |
107
|
|
|
$forceRecreate = $this->needRecreateBySettings($userId); |
108
|
|
|
$haveEnoughFaces = $this->hasNewFacesToRecreate($userId, $modelId); |
109
|
|
|
$haveStaled = $this->hasStalePersonsToRecreate($userId, $modelId); |
110
|
|
|
|
111
|
|
|
if ($forceRecreate) { |
112
|
|
|
$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters'); |
113
|
|
|
} |
114
|
|
|
else if ($haveEnoughFaces || $haveStaled) { |
115
|
|
|
$this->logInfo('Face clustering will be recreated with new information or changes'); |
116
|
|
|
} |
117
|
|
|
else { |
118
|
|
|
// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster |
119
|
|
|
$this->logInfo('Clusters already exist, estimated there is no need to recreate them'); |
120
|
|
|
return; |
121
|
|
|
} |
122
|
|
|
} |
123
|
|
|
else { |
124
|
|
|
// User should not be able to use this directly, used in tests |
125
|
1 |
|
$forceTestCreation = $this->settingsService->_getForceCreateClusters($userId); |
126
|
1 |
|
$needCreate = $this->needCreateFirstTime($userId, $modelId); |
127
|
|
|
|
128
|
1 |
|
if ($forceTestCreation) { |
129
|
1 |
|
$this->logInfo('Force the creation of clusters for testing'); |
130
|
|
|
} |
131
|
1 |
|
else if ($needCreate) { |
132
|
|
|
$this->logInfo('Face clustering will be created for the first time.'); |
133
|
|
|
} |
134
|
|
|
else { |
135
|
1 |
|
$this->logInfo( |
136
|
1 |
|
'Skipping cluster creation, not enough data (yet) collected. ' . |
137
|
1 |
|
'For cluster creation, you need either one of the following:'); |
138
|
1 |
|
$this->logInfo('* have 1000 faces already processed'); |
139
|
1 |
|
$this->logInfo('* or you need to have 95% of you images processed'); |
140
|
1 |
|
$this->logInfo('Use stats command to track progress'); |
141
|
1 |
|
return; |
142
|
|
|
} |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
// Ok. If we are here, the clusters must be recreated. |
146
|
|
|
// |
147
|
|
|
|
148
|
1 |
|
$min_face_size = $this->settingsService->getMinimumFaceSize(); |
149
|
1 |
|
$min_confidence = $this->settingsService->getMinimumConfidence(); |
150
|
|
|
|
151
|
1 |
|
$faces = array_merge( |
152
|
1 |
|
$this->faceMapper->getGroupableFaces($userId, $modelId, $min_face_size, $min_confidence), |
153
|
1 |
|
$this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence) |
154
|
1 |
|
); |
155
|
|
|
|
156
|
1 |
|
$facesCount = count($faces); |
157
|
1 |
|
$this->logInfo('There are ' . $facesCount . ' faces for clustering'); |
158
|
|
|
|
159
|
1 |
|
$noSlices = 1; |
160
|
1 |
|
$sliceSize = $facesCount; |
161
|
|
|
|
162
|
1 |
|
$defaultSlice = $this->settingsService->getClusterigBatchSize(); |
163
|
1 |
|
if ($defaultSlice > 0) { |
164
|
|
|
// The minimum batch size is 20000 faces |
165
|
|
|
$defaultSlice = max($defaultSlice, 2000); |
166
|
|
|
// The maximun batch size is the faces count. |
167
|
|
|
$defaultSlice = min($defaultSlice, $facesCount); |
168
|
|
|
$noSlices = intval($facesCount / $defaultSlice) + 1; |
169
|
|
|
$sliceSize = ceil($facesCount / $noSlices); |
170
|
|
|
} |
171
|
|
|
|
172
|
1 |
|
$this->logDebug('We will cluster with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces'); |
173
|
|
|
|
174
|
1 |
|
$newClusters = []; |
175
|
1 |
|
for ($i = 0; $i < $noSlices ; $i++) { |
176
|
1 |
|
$facesSliced = array_slice($faces, $i * $sliceSize, $sliceSize); |
|
|
|
|
177
|
1 |
|
$newClusters = array_merge($newClusters, $this->getNewClusters($facesSliced)); |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
// Cluster is associative array where key is person ID. |
181
|
|
|
// Value is array of face IDs. For old clusters, person IDs are some existing person IDs, |
182
|
|
|
// and for new clusters is whatever chinese whispers decides to identify them. |
183
|
|
|
// |
184
|
1 |
|
$currentClusters = $this->getCurrentClusters($faces); |
185
|
|
|
|
186
|
1 |
|
$this->logInfo(count($newClusters) . ' clusters found after clustering'); |
187
|
|
|
|
188
|
|
|
// New merge |
189
|
1 |
|
$mergedClusters = $this->mergeClusters($currentClusters, $newClusters); |
190
|
|
|
|
191
|
1 |
|
$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters); |
192
|
|
|
|
193
|
|
|
// Remove all orphaned persons (those without any faces) |
194
|
|
|
// NOTE: we will do this for all models, not just for current one, but this is not problem. |
195
|
1 |
|
$orphansDeleted = $this->personMapper->deleteOrphaned($userId); |
196
|
1 |
|
if ($orphansDeleted > 0) { |
197
|
|
|
$this->logInfo('Deleted ' . $orphansDeleted . ' persons without faces'); |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
// Prevents not create/recreate the clusters unnecessarily. |
201
|
|
|
|
202
|
1 |
|
$this->settingsService->setNeedRecreateClusters(false, $userId); |
203
|
1 |
|
$this->settingsService->_setForceCreateClusters(false, $userId); |
204
|
|
|
} |
205
|
|
|
|
206
|
|
|
/** |
207
|
|
|
* Evaluate whether we want to recreate clusters. We want to recreate clusters/persons if: |
208
|
|
|
* - Some cluster/person is invalidated (is_valid is false for someone) |
209
|
|
|
* - This means some image that belonged to this user is changed, deleted etc. |
210
|
|
|
* - There are some new faces. Now, we don't want to jump the gun here. We want to either have: |
211
|
|
|
* - more than 25 new faces, or |
212
|
|
|
* - less than 25 new faces, but they are older than 2h |
213
|
|
|
* |
214
|
|
|
* (basically, we want to avoid recreating cluster for each new face being uploaded, |
215
|
|
|
* however, we don't want to wait too much as clusters could be changed a lot) |
216
|
|
|
*/ |
217
|
|
|
private function hasNewFacesToRecreate(string $userId, int $modelId): bool { |
218
|
|
|
// |
219
|
|
|
$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true); |
220
|
|
|
$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d', |
221
|
|
|
$facesWithoutPersons, $userId, $modelId)); |
222
|
|
|
|
223
|
|
|
// todo: get rid of magic numbers (move to config) |
224
|
|
|
if ($facesWithoutPersons === 0) |
225
|
|
|
return false; |
226
|
|
|
|
227
|
|
|
if ($facesWithoutPersons >= 25) |
228
|
|
|
return true; |
229
|
|
|
|
230
|
|
|
// We have some faces, but not that many, let's see when oldest one is generated. |
231
|
|
|
$oldestFace = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId); |
232
|
|
|
$oldestFaceTimestamp = $oldestFace->creationTime->getTimestamp(); |
233
|
|
|
$currentTimestamp = (new \DateTime())->getTimestamp(); |
234
|
|
|
$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s', |
235
|
|
|
$userId, $modelId, $oldestFace->creationTime->format('Y-m-d H:i:s'))); |
236
|
|
|
|
237
|
|
|
// todo: get rid of magic numbers (move to config) |
238
|
|
|
if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60) |
239
|
|
|
return true; |
240
|
|
|
|
241
|
|
|
return false; |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
private function hasStalePersonsToRecreate(string $userId, int $modelId): bool { |
245
|
|
|
return $this->personMapper->countClusters($userId, $modelId, true) > 0; |
246
|
|
|
} |
247
|
|
|
|
248
|
|
|
private function needRecreateBySettings(string $userId): bool { |
249
|
|
|
return $this->settingsService->getNeedRecreateClusters($userId); |
250
|
|
|
} |
251
|
|
|
|
252
|
1 |
|
private function needCreateFirstTime(string $userId, int $modelId): bool { |
253
|
|
|
// User should not be able to use this directly, used in tests |
254
|
1 |
|
if ($this->settingsService->_getForceCreateClusters($userId)) |
255
|
1 |
|
return true; |
256
|
|
|
|
257
|
1 |
|
$imageCount = $this->imageMapper->countUserImages($userId, $modelId); |
258
|
1 |
|
if ($imageCount === 0) |
259
|
|
|
return false; |
260
|
|
|
|
261
|
1 |
|
$imageProcessed = $this->imageMapper->countUserImages($userId, $modelId, true); |
262
|
1 |
|
if ($imageProcessed === 0) |
263
|
1 |
|
return false; |
264
|
|
|
|
265
|
|
|
// These are basic criteria without which we should not even consider creating clusters. |
266
|
|
|
// These clusters will be small and not "stable" enough and we should better wait for more images to come. |
267
|
|
|
// todo: get rid of magic numbers (move to config) |
268
|
|
|
$facesCount = $this->faceMapper->countFaces($userId, $modelId); |
269
|
|
|
if ($facesCount > 1000) |
270
|
|
|
return true; |
271
|
|
|
|
272
|
|
|
$percentImagesProcessed = $imageProcessed / floatval($imageCount); |
273
|
|
|
if ($percentImagesProcessed > 0.95) |
274
|
|
|
return true; |
275
|
|
|
|
276
|
|
|
return false; |
277
|
|
|
} |
278
|
|
|
|
279
|
1 |
|
private function getCurrentClusters(array $faces): array { |
280
|
1 |
|
$chineseClusters = array(); |
281
|
1 |
|
foreach($faces as $face) { |
282
|
1 |
|
if ($face->person !== null) { |
283
|
|
|
if (!isset($chineseClusters[$face->person])) { |
284
|
|
|
$chineseClusters[$face->person] = array(); |
285
|
|
|
} |
286
|
|
|
$chineseClusters[$face->person][] = $face->id; |
287
|
|
|
} |
288
|
|
|
} |
289
|
1 |
|
return $chineseClusters; |
290
|
|
|
} |
291
|
|
|
|
292
|
1 |
|
private function getNewClusters(array $faces): array { |
293
|
|
|
// Clustering parameters |
294
|
1 |
|
$sensitivity = $this->settingsService->getSensitivity(); |
295
|
|
|
|
296
|
1 |
|
if (Requirements::pdlibLoaded()) { |
297
|
|
|
// Create edges (neighbors) for Chinese Whispers |
298
|
1 |
|
$edges = array(); |
299
|
1 |
|
$faces_count = count($faces); |
300
|
1 |
|
for ($i = 0; $i < $faces_count; $i++) { |
301
|
1 |
|
$face1 = $faces[$i]; |
302
|
1 |
|
if (!isset($face1->descriptor)) { |
303
|
|
|
$edges[] = array($i, $i); |
304
|
|
|
continue; |
305
|
|
|
} |
306
|
1 |
|
for ($j = $i; $j < $faces_count; $j++) { |
307
|
1 |
|
$face2 = $faces[$j]; |
308
|
1 |
|
if (!isset($face2->descriptor)) { |
309
|
|
|
continue; |
310
|
|
|
} |
311
|
1 |
|
$distance = dlib_vector_length($face1->descriptor, $face2->descriptor); |
|
|
|
|
312
|
1 |
|
if ($distance < $sensitivity) { |
313
|
1 |
|
$edges[] = array($i, $j); |
314
|
|
|
} |
315
|
|
|
} |
316
|
|
|
} |
317
|
|
|
|
318
|
|
|
// Given the edges get the list of labels (found clusters) for each face. |
319
|
1 |
|
$newChineseClustersByIndex = dlib_chinese_whispers($edges); |
|
|
|
|
320
|
|
|
} else { |
321
|
|
|
// Create edges (neighbors) for Chinese Whispers |
322
|
|
|
$edges = array(); |
323
|
|
|
$faces_count = count($faces); |
324
|
|
|
|
325
|
|
|
for ($i = 0; $i < $faces_count; $i++) { |
326
|
|
|
$face1 = $faces[$i]; |
327
|
|
|
if (!isset($face1->descriptor)) { |
328
|
|
|
$edges[] = array($i, $i); |
329
|
|
|
continue; |
330
|
|
|
} |
331
|
|
|
for ($j = $i; $j < $faces_count; $j++) { |
332
|
|
|
$face2 = $faces[$j]; |
333
|
|
|
if (!isset($face2->descriptor)) { |
334
|
|
|
continue; |
335
|
|
|
} |
336
|
|
|
$distance = Euclidean::distance($face1->descriptor, $face2->descriptor); |
337
|
|
|
if ($distance < $sensitivity) { |
338
|
|
|
$edges[] = array($i, $j); |
339
|
|
|
} |
340
|
|
|
} |
341
|
|
|
} |
342
|
|
|
|
343
|
|
|
// The clustering algorithm actually expects ordered lists. |
344
|
|
|
$oedges = []; |
345
|
|
|
ChineseWhispers::convert_unordered_to_ordered($edges, $oedges); |
346
|
|
|
usort($oedges, function($a, $b) { |
347
|
|
|
if ($a[0] === $b[0]) return $a[1] - $b[1]; |
348
|
|
|
return $a[0] - $b[0]; |
349
|
|
|
}); |
350
|
|
|
|
351
|
|
|
// Given the edges get the list of labels (found clusters) for each face. |
352
|
|
|
$newChineseClustersByIndex = []; |
353
|
|
|
ChineseWhispers::predict($oedges, $newChineseClustersByIndex); |
354
|
|
|
} |
355
|
|
|
|
356
|
1 |
|
$newClusters = array(); |
357
|
1 |
|
for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) { |
358
|
1 |
|
if (!isset($newClusters[$newChineseClustersByIndex[$i]])) { |
359
|
1 |
|
$newClusters[$newChineseClustersByIndex[$i]] = array(); |
360
|
|
|
} |
361
|
1 |
|
$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id; |
362
|
|
|
} |
363
|
1 |
|
return $newClusters; |
364
|
|
|
} |
365
|
|
|
|
366
|
|
|
/** |
367
|
|
|
* todo: only reason this is public is because of tests. Go figure it out better. |
368
|
|
|
*/ |
369
|
3 |
|
public function mergeClusters(array $oldCluster, array $newCluster): array { |
370
|
|
|
// Create map of face transitions |
371
|
3 |
|
$transitions = array(); |
372
|
3 |
|
foreach ($newCluster as $newPerson=>$newFaces) { |
373
|
3 |
|
foreach ($newFaces as $newFace) { |
374
|
3 |
|
$oldPersonFound = null; |
375
|
3 |
|
foreach ($oldCluster as $oldPerson => $oldFaces) { |
376
|
2 |
|
if (in_array($newFace, $oldFaces)) { |
377
|
2 |
|
$oldPersonFound = $oldPerson; |
378
|
2 |
|
break; |
379
|
|
|
} |
380
|
|
|
} |
381
|
3 |
|
$transitions[$newFace] = array($oldPersonFound, $newPerson); |
382
|
|
|
} |
383
|
|
|
} |
384
|
|
|
// Count transitions |
385
|
3 |
|
$transitionCount = array(); |
386
|
3 |
|
foreach ($transitions as $transition) { |
387
|
3 |
|
$key = $transition[0] . ':' . $transition[1]; |
388
|
3 |
|
if (array_key_exists($key, $transitionCount)) { |
389
|
2 |
|
$transitionCount[$key]++; |
390
|
|
|
} else { |
391
|
3 |
|
$transitionCount[$key] = 1; |
392
|
|
|
} |
393
|
|
|
} |
394
|
|
|
// Create map of new person -> old person transitions |
395
|
3 |
|
$newOldPersonMapping = array(); |
396
|
3 |
|
$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array() |
397
|
3 |
|
arsort($transitionCount); |
398
|
3 |
|
foreach ($transitionCount as $transitionKey => $count) { |
399
|
3 |
|
$transition = explode(":", $transitionKey); |
400
|
3 |
|
$oldPerson = intval($transition[0]); |
401
|
3 |
|
$newPerson = intval($transition[1]); |
402
|
3 |
|
if (!array_key_exists($newPerson, $newOldPersonMapping)) { |
403
|
3 |
|
if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) { |
404
|
3 |
|
$newOldPersonMapping[$newPerson] = $oldPerson; |
405
|
3 |
|
$oldPersonProcessed[$oldPerson] = 0; |
406
|
|
|
} else { |
407
|
2 |
|
$newOldPersonMapping[$newPerson] = 0; |
408
|
|
|
} |
409
|
|
|
} |
410
|
|
|
} |
411
|
|
|
// Starting with new cluster, convert all new person IDs with old person IDs |
412
|
3 |
|
$maxOldPersonId = 1; |
413
|
3 |
|
if (count($oldCluster) > 0) { |
414
|
2 |
|
$maxOldPersonId = (int) max(array_keys($oldCluster)) + 1; |
415
|
|
|
} |
416
|
|
|
|
417
|
3 |
|
$result = array(); |
418
|
3 |
|
foreach ($newCluster as $newPerson => $newFaces) { |
419
|
3 |
|
$oldPerson = $newOldPersonMapping[$newPerson]; |
420
|
3 |
|
if ($oldPerson === 0) { |
421
|
3 |
|
$result[$maxOldPersonId] = $newFaces; |
422
|
3 |
|
$maxOldPersonId++; |
423
|
|
|
} else { |
424
|
2 |
|
$result[$oldPerson] = $newFaces; |
425
|
|
|
} |
426
|
|
|
} |
427
|
3 |
|
return $result; |
428
|
|
|
} |
429
|
|
|
} |
430
|
|
|
|