|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* @copyright Copyright (c) 2017, Matias De lellis <[email protected]> |
|
4
|
|
|
* @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]> |
|
5
|
|
|
* |
|
6
|
|
|
* @author Branko Kokanovic <[email protected]> |
|
7
|
|
|
* |
|
8
|
|
|
* @license GNU AGPL version 3 or any later version |
|
9
|
|
|
* |
|
10
|
|
|
* This program is free software: you can redistribute it and/or modify |
|
11
|
|
|
* it under the terms of the GNU Affero General Public License as |
|
12
|
|
|
* published by the Free Software Foundation, either version 3 of the |
|
13
|
|
|
* License, or (at your option) any later version. |
|
14
|
|
|
* |
|
15
|
|
|
* This program is distributed in the hope that it will be useful, |
|
16
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
17
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
18
|
|
|
* GNU Affero General Public License for more details. |
|
19
|
|
|
* |
|
20
|
|
|
* You should have received a copy of the GNU Affero General Public License |
|
21
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
22
|
|
|
* |
|
23
|
|
|
*/ |
|
24
|
|
|
namespace OCA\FaceRecognition\BackgroundJob\Tasks; |
|
25
|
|
|
|
|
26
|
|
|
use OCP\IConfig; |
|
27
|
|
|
use OCP\IUser; |
|
28
|
|
|
|
|
29
|
|
|
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask; |
|
30
|
|
|
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext; |
|
31
|
|
|
use OCA\FaceRecognition\BackgroundJob\Tasks\AddMissingImagesTask; |
|
32
|
|
|
|
|
33
|
|
|
use OCA\FaceRecognition\Db\FaceMapper; |
|
34
|
|
|
use OCA\FaceRecognition\Db\ImageMapper; |
|
35
|
|
|
use OCA\FaceRecognition\Db\PersonMapper; |
|
36
|
|
|
|
|
37
|
|
|
use OCA\FaceRecognition\Helper\Euclidean; |
|
38
|
|
|
|
|
39
|
|
|
use OCA\FaceRecognition\Migration\AddDefaultFaceModel; |
|
40
|
|
|
|
|
41
|
|
|
/** |
|
42
|
|
|
* Taks that, for each user, creates person clusters for each. |
|
43
|
|
|
*/ |
|
44
|
|
|
class CreateClustersTask extends FaceRecognitionBackgroundTask { |
|
45
|
|
|
/** @var IConfig Config */ |
|
46
|
|
|
private $config; |
|
47
|
|
|
|
|
48
|
|
|
/** @var PersonMapper Person mapper*/ |
|
49
|
|
|
private $personMapper; |
|
50
|
|
|
|
|
51
|
|
|
/** @var ImageMapper Image mapper*/ |
|
52
|
|
|
private $imageMapper; |
|
53
|
|
|
|
|
54
|
|
|
/** @var FaceMapper Face mapper*/ |
|
55
|
|
|
private $faceMapper; |
|
56
|
|
|
|
|
57
|
|
|
/** |
|
58
|
|
|
* @param IConfig $config Config |
|
59
|
|
|
*/ |
|
60
|
3 |
|
public function __construct(IConfig $config, |
|
61
|
|
|
PersonMapper $personMapper, |
|
62
|
|
|
ImageMapper $imageMapper, |
|
63
|
|
|
FaceMapper $faceMapper) |
|
64
|
|
|
{ |
|
65
|
3 |
|
parent::__construct(); |
|
66
|
3 |
|
$this->config = $config; |
|
67
|
3 |
|
$this->personMapper = $personMapper; |
|
68
|
3 |
|
$this->imageMapper = $imageMapper; |
|
69
|
3 |
|
$this->faceMapper = $faceMapper; |
|
70
|
3 |
|
} |
|
71
|
|
|
|
|
72
|
|
|
/** |
|
73
|
|
|
* @inheritdoc |
|
74
|
|
|
*/ |
|
75
|
1 |
|
public function description() { |
|
76
|
1 |
|
return "Create new persons or update existing persons"; |
|
77
|
|
|
} |
|
78
|
|
|
|
|
79
|
|
|
/** |
|
80
|
|
|
* @inheritdoc |
|
81
|
|
|
*/ |
|
82
|
1 |
|
public function execute(FaceRecognitionContext $context) { |
|
83
|
1 |
|
$this->setContext($context); |
|
84
|
|
|
|
|
85
|
|
|
// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure. |
|
86
|
|
|
// However, since we don't want to do deep copy of IUser, we keep only UID in this array. |
|
87
|
|
|
// |
|
88
|
1 |
|
$eligable_users = array(); |
|
89
|
1 |
|
if (is_null($this->context->user)) { |
|
90
|
|
|
$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) { |
|
91
|
|
|
$eligable_users[] = $user->getUID(); |
|
92
|
|
|
}); |
|
93
|
|
|
} else { |
|
94
|
1 |
|
$eligable_users[] = $this->context->user->getUID(); |
|
95
|
|
|
} |
|
96
|
|
|
|
|
97
|
1 |
|
foreach($eligable_users as $user) { |
|
98
|
1 |
|
$this->createClusterIfNeeded($user); |
|
99
|
1 |
|
yield; |
|
100
|
|
|
} |
|
101
|
|
|
|
|
102
|
1 |
|
return true; |
|
103
|
|
|
} |
|
104
|
|
|
|
|
105
|
1 |
|
private function createClusterIfNeeded(string $userId) { |
|
106
|
|
|
// Check that we processed enough images to start creating clusters |
|
107
|
|
|
// |
|
108
|
1 |
|
$modelId = intval($this->config->getAppValue('facerecognition', 'model', AddDefaultFaceModel::DEFAULT_FACE_MODEL_ID)); |
|
109
|
|
|
|
|
110
|
1 |
|
$hasPersons = $this->personMapper->countPersons($userId) > 0; |
|
111
|
|
|
|
|
112
|
|
|
// Depending on whether we already have clusters, decide if we should create/recreate them. |
|
113
|
|
|
// |
|
114
|
1 |
|
if ($hasPersons) { |
|
115
|
|
|
// OK, we already got some persons. We now need to evaluate whether we want to recreate clusters. |
|
116
|
|
|
// We want to recreate clusters/persons if: |
|
117
|
|
|
// * Some cluster/person is invalidated (is_valid is false for someone) |
|
118
|
|
|
// This means some image that belonged to this user is changed, deleted etc. |
|
119
|
|
|
// * There are some new faces. Now, we don't want to jump the gun here. We want to either have: |
|
120
|
|
|
// ** more than 10 new faces, or |
|
121
|
|
|
// ** less than 10 new faces, but they are older than 2h |
|
122
|
|
|
// (basically, we want to avoid recreating cluster for each new face being uploaded, |
|
123
|
|
|
// however, we don't want to wait too much as clusters could be changed a lot) |
|
124
|
|
|
// |
|
125
|
|
|
$haveNewFaces = false; |
|
126
|
|
|
$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true); |
|
127
|
|
|
$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d', |
|
128
|
|
|
$facesWithoutPersons, $userId, $modelId)); |
|
129
|
|
|
// todo: get rid of magic numbers (move to config) |
|
130
|
|
|
if ($facesWithoutPersons >= 10) { |
|
131
|
|
|
$haveNewFaces = true; |
|
132
|
|
|
} else if ($facesWithoutPersons > 0) { |
|
133
|
|
|
// We have some faces, but not that many, let's see when oldest one is generated. |
|
134
|
|
|
$face = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId); |
|
135
|
|
|
$oldestFaceTimestamp = $face->creationTime->getTimestamp(); |
|
136
|
|
|
$currentTimestamp = (new \DateTime())->getTimestamp(); |
|
137
|
|
|
$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s', |
|
138
|
|
|
$userId, $modelId, $face->creationTime->format('Y-m-d H:i:s'))); |
|
139
|
|
|
// todo: get rid of magic numbers (move to config) |
|
140
|
|
|
if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60) { |
|
141
|
|
|
$haveNewFaces = true; |
|
142
|
|
|
} |
|
143
|
|
|
} |
|
144
|
|
|
|
|
145
|
|
|
$stalePersonsCount = $this->personMapper->countPersons($userId, true); |
|
146
|
|
|
$haveStalePersons = $stalePersonsCount > 0; |
|
147
|
|
|
$staleCluster = $haveStalePersons === false && $haveNewFaces === false; |
|
148
|
|
|
|
|
149
|
|
|
$recreateClusters = $this->config->getUserValue($userId, 'facerecognition', 'recreate-clusters', 'false'); |
|
150
|
|
|
$forceRecreation = ($recreateClusters === 'true'); |
|
151
|
|
|
|
|
152
|
|
|
$this->logDebug(sprintf('Found %d changed persons for user %s and model %d', $stalePersonsCount, $userId, $modelId)); |
|
153
|
|
|
|
|
154
|
|
|
if ($staleCluster && !$forceRecreation) { |
|
155
|
|
|
// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster |
|
156
|
|
|
$this->logInfo('Clusters already exist, estimated there is no need to recreate them'); |
|
157
|
|
|
return; |
|
158
|
|
|
} |
|
159
|
|
|
else if ($forceRecreation) { |
|
160
|
|
|
$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters'); |
|
161
|
|
|
} |
|
162
|
|
|
} else { |
|
163
|
|
|
// User should not be able to use this directly, used in tests |
|
164
|
1 |
|
$forceCreateClusters = $this->config->getUserValue($userId, 'facerecognition', 'force-create-clusters', 'false'); |
|
165
|
1 |
|
$forceCreation = ($forceCreateClusters === 'true'); |
|
166
|
|
|
|
|
167
|
|
|
// These are basic criteria without which we should not even consider creating clusters. |
|
168
|
|
|
// These clusters will be small and not "stable" enough and we should better wait for more images to come. |
|
169
|
|
|
// todo: 2 queries to get these 2 counts, can we do this smarter? |
|
170
|
1 |
|
$imageCount = $this->imageMapper->countUserImages($userId, $modelId); |
|
171
|
1 |
|
$imageProcessed = $this->imageMapper->countUserProcessedImages($userId, $modelId); |
|
172
|
1 |
|
$percentImagesProcessed = 0; |
|
173
|
1 |
|
if ($imageCount > 0) { |
|
174
|
1 |
|
$percentImagesProcessed = $imageProcessed / floatval($imageCount); |
|
175
|
|
|
} |
|
176
|
1 |
|
$facesCount = $this->faceMapper->countFaces($userId, $modelId); |
|
177
|
|
|
// todo: get rid of magic numbers (move to config) |
|
178
|
1 |
|
if (!$forceCreation && ($facesCount < 1000) && ($imageCount < 100) && ($percentImagesProcessed < 0.95)) { |
|
179
|
|
|
$this->logInfo( |
|
180
|
|
|
'Skipping cluster creation, not enough data (yet) collected. ' . |
|
181
|
|
|
'For cluster creation, you need either one of the following:'); |
|
182
|
|
|
$this->logInfo(sprintf('* have 1000 faces already processed (you have %d),', $facesCount)); |
|
183
|
|
|
$this->logInfo(sprintf('* have 100 images (you have %d),', $imageCount)); |
|
184
|
|
|
$this->logInfo(sprintf('* or you need to have 95%% of you images processed (you have %.2f%%)', $percentImagesProcessed)); |
|
185
|
|
|
return; |
|
186
|
|
|
} |
|
187
|
|
|
} |
|
188
|
|
|
|
|
189
|
1 |
|
$faces = $this->faceMapper->getFaces($userId, $modelId); |
|
190
|
1 |
|
$this->logInfo(count($faces) . ' faces found for clustering'); |
|
191
|
|
|
|
|
192
|
|
|
// Cluster is associative array where key is person ID. |
|
193
|
|
|
// Value is array of face IDs. For old clusters, person IDs are some existing person IDs, |
|
194
|
|
|
// and for new clusters is whatever chinese whispers decides to identify them. |
|
195
|
|
|
// |
|
196
|
1 |
|
$currentClusters = $this->getCurrentClusters($faces); |
|
197
|
1 |
|
$newClusters = $this->getNewClusters($faces); |
|
198
|
1 |
|
$this->logInfo(count($newClusters) . ' persons found after clustering'); |
|
199
|
|
|
// New merge |
|
200
|
1 |
|
$mergedClusters = $this->mergeClusters($currentClusters, $newClusters); |
|
201
|
1 |
|
$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters); |
|
202
|
|
|
|
|
203
|
|
|
// Prevents not create/recreate the clusters unnecessarily. |
|
204
|
1 |
|
$this->config->setUserValue($userId, 'facerecognition', 'recreate-clusters', 'false'); |
|
205
|
1 |
|
$this->config->setUserValue($userId, 'facerecognition', 'force-create-clusters', 'false'); |
|
206
|
1 |
|
} |
|
207
|
|
|
|
|
208
|
1 |
|
private function getCurrentClusters(array $faces): array { |
|
209
|
1 |
|
$chineseClusters = array(); |
|
210
|
1 |
|
foreach($faces as $face) { |
|
211
|
1 |
|
if ($face->person !== null) { |
|
212
|
|
|
if (!isset($chineseClusters[$face->person])) { |
|
213
|
|
|
$chineseClusters[$face->person] = array(); |
|
214
|
|
|
} |
|
215
|
|
|
$chineseClusters[$face->person][] = $face->id; |
|
216
|
|
|
} |
|
217
|
|
|
} |
|
218
|
1 |
|
return $chineseClusters; |
|
219
|
|
|
} |
|
220
|
|
|
|
|
221
|
1 |
|
private function getNewClusters(array $faces): array { |
|
222
|
|
|
// Create edges for chinese whispers |
|
223
|
1 |
|
$euclidean = new Euclidean(); |
|
224
|
1 |
|
$sensitivity = floatval($this->config->getAppValue('facerecognition', 'sensitivity', '0.5')); |
|
225
|
|
|
|
|
226
|
1 |
|
$edges = array(); |
|
227
|
1 |
|
for ($i = 0, $face_count1 = count($faces); $i < $face_count1; $i++) { |
|
228
|
1 |
|
$face1 = $faces[$i]; |
|
229
|
1 |
|
for ($j = $i, $face_count2 = count($faces); $j < $face_count2; $j++) { |
|
230
|
1 |
|
$face2 = $faces[$j]; |
|
231
|
|
|
// todo: can't this distance be a method in $face1->distance($face2)? |
|
232
|
1 |
|
$distance = $euclidean->distance($face1->descriptor, $face2->descriptor); |
|
233
|
|
|
// todo: extract this magic number to app param |
|
234
|
1 |
|
if ($distance < $sensitivity) { |
|
235
|
1 |
|
$edges[] = array($i, $j); |
|
236
|
|
|
} |
|
237
|
|
|
} |
|
238
|
|
|
} |
|
239
|
|
|
|
|
240
|
1 |
|
$newChineseClustersByIndex = dlib_chinese_whispers($edges); |
|
|
|
|
|
|
241
|
1 |
|
$newClusters = array(); |
|
242
|
1 |
|
for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) { |
|
243
|
1 |
|
if (!isset($newClusters[$newChineseClustersByIndex[$i]])) { |
|
244
|
1 |
|
$newClusters[$newChineseClustersByIndex[$i]] = array(); |
|
245
|
|
|
} |
|
246
|
1 |
|
$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id; |
|
247
|
|
|
} |
|
248
|
|
|
|
|
249
|
1 |
|
return $newClusters; |
|
250
|
|
|
} |
|
251
|
|
|
|
|
252
|
|
|
/** |
|
253
|
|
|
* todo: only reason this is public is because of tests. Go figure it out better. |
|
254
|
|
|
*/ |
|
255
|
3 |
|
public function mergeClusters(array $oldCluster, array $newCluster): array { |
|
256
|
|
|
// Create map of face transitions |
|
257
|
3 |
|
$transitions = array(); |
|
258
|
3 |
|
foreach ($newCluster as $newPerson=>$newFaces) { |
|
259
|
3 |
|
foreach ($newFaces as $newFace) { |
|
260
|
3 |
|
$oldPersonFound = null; |
|
261
|
3 |
|
foreach ($oldCluster as $oldPerson => $oldFaces) { |
|
262
|
2 |
|
if (in_array($newFace, $oldFaces)) { |
|
263
|
2 |
|
$oldPersonFound = $oldPerson; |
|
264
|
2 |
|
break; |
|
265
|
|
|
} |
|
266
|
|
|
} |
|
267
|
3 |
|
$transitions[$newFace] = array($oldPersonFound, $newPerson); |
|
268
|
|
|
} |
|
269
|
|
|
} |
|
270
|
|
|
// Count transitions |
|
271
|
3 |
|
$transitionCount = array(); |
|
272
|
3 |
|
foreach ($transitions as $transition) { |
|
273
|
3 |
|
$key = $transition[0] . ':' . $transition[1]; |
|
274
|
3 |
|
if (array_key_exists($key, $transitionCount)) { |
|
275
|
2 |
|
$transitionCount[$key]++; |
|
276
|
|
|
} else { |
|
277
|
3 |
|
$transitionCount[$key] = 1; |
|
278
|
|
|
} |
|
279
|
|
|
} |
|
280
|
|
|
// Create map of new person -> old person transitions |
|
281
|
3 |
|
$newOldPersonMapping = array(); |
|
282
|
3 |
|
$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array() |
|
283
|
3 |
|
arsort($transitionCount); |
|
284
|
3 |
|
foreach ($transitionCount as $transitionKey => $count) { |
|
285
|
3 |
|
$transition = explode(":", $transitionKey); |
|
286
|
3 |
|
$oldPerson = intval($transition[0]); |
|
287
|
3 |
|
$newPerson = intval($transition[1]); |
|
288
|
3 |
|
if (!array_key_exists($newPerson, $newOldPersonMapping)) { |
|
289
|
3 |
|
if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) { |
|
290
|
3 |
|
$newOldPersonMapping[$newPerson] = $oldPerson; |
|
291
|
3 |
|
$oldPersonProcessed[$oldPerson] = 0; |
|
292
|
|
|
} else { |
|
293
|
2 |
|
$newOldPersonMapping[$newPerson] = 0; |
|
294
|
|
|
} |
|
295
|
|
|
} |
|
296
|
|
|
} |
|
297
|
|
|
// Starting with new cluster, convert all new person IDs with old person IDs |
|
298
|
3 |
|
$maxOldPersonId = 1; |
|
299
|
3 |
|
if (count($oldCluster) > 0) { |
|
300
|
2 |
|
$maxOldPersonId = max(array_keys($oldCluster)) + 1; |
|
301
|
|
|
} |
|
302
|
|
|
|
|
303
|
3 |
|
$result = array(); |
|
304
|
3 |
|
foreach ($newCluster as $newPerson => $newFaces) { |
|
305
|
3 |
|
$oldPerson = $newOldPersonMapping[$newPerson]; |
|
306
|
3 |
|
if ($oldPerson === 0) { |
|
307
|
3 |
|
$result[$maxOldPersonId] = $newFaces; |
|
308
|
3 |
|
$maxOldPersonId++; |
|
309
|
|
|
} else { |
|
310
|
2 |
|
$result[$oldPerson] = $newFaces; |
|
311
|
|
|
} |
|
312
|
|
|
} |
|
313
|
3 |
|
return $result; |
|
314
|
|
|
} |
|
315
|
|
|
} |
|
316
|
|
|
|