1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @copyright Copyright (c) 2017, Matias De lellis <[email protected]> |
4
|
|
|
* @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]> |
5
|
|
|
* |
6
|
|
|
* @author Branko Kokanovic <[email protected]> |
7
|
|
|
* |
8
|
|
|
* @license GNU AGPL version 3 or any later version |
9
|
|
|
* |
10
|
|
|
* This program is free software: you can redistribute it and/or modify |
11
|
|
|
* it under the terms of the GNU Affero General Public License as |
12
|
|
|
* published by the Free Software Foundation, either version 3 of the |
13
|
|
|
* License, or (at your option) any later version. |
14
|
|
|
* |
15
|
|
|
* This program is distributed in the hope that it will be useful, |
16
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
17
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18
|
|
|
* GNU Affero General Public License for more details. |
19
|
|
|
* |
20
|
|
|
* You should have received a copy of the GNU Affero General Public License |
21
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
22
|
|
|
* |
23
|
|
|
*/ |
24
|
|
|
namespace OCA\FaceRecognition\BackgroundJob\Tasks; |
25
|
|
|
|
26
|
|
|
use OCP\IConfig; |
27
|
|
|
use OCP\IUser; |
28
|
|
|
|
29
|
|
|
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask; |
30
|
|
|
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext; |
31
|
|
|
use OCA\FaceRecognition\BackgroundJob\Tasks\AddMissingImagesTask; |
32
|
|
|
|
33
|
|
|
use OCA\FaceRecognition\Db\FaceMapper; |
34
|
|
|
use OCA\FaceRecognition\Db\ImageMapper; |
35
|
|
|
use OCA\FaceRecognition\Db\PersonMapper; |
36
|
|
|
|
37
|
|
|
use OCA\FaceRecognition\Helper\Euclidean; |
38
|
|
|
|
39
|
|
|
use OCA\FaceRecognition\Migration\AddDefaultFaceModel; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* Taks that, for each user, creates person clusters for each. |
43
|
|
|
*/ |
44
|
|
|
class CreateClustersTask extends FaceRecognitionBackgroundTask { |
45
|
|
|
/** @var IConfig Config */ |
46
|
|
|
private $config; |
47
|
|
|
|
48
|
|
|
/** @var PersonMapper Person mapper*/ |
49
|
|
|
private $personMapper; |
50
|
|
|
|
51
|
|
|
/** @var ImageMapper Image mapper*/ |
52
|
|
|
private $imageMapper; |
53
|
|
|
|
54
|
|
|
/** @var FaceMapper Face mapper*/ |
55
|
|
|
private $faceMapper; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* @param IConfig $config Config |
59
|
|
|
*/ |
60
|
3 |
|
public function __construct(IConfig $config, |
61
|
|
|
PersonMapper $personMapper, |
62
|
|
|
ImageMapper $imageMapper, |
63
|
|
|
FaceMapper $faceMapper) |
64
|
|
|
{ |
65
|
3 |
|
parent::__construct(); |
66
|
3 |
|
$this->config = $config; |
67
|
3 |
|
$this->personMapper = $personMapper; |
68
|
3 |
|
$this->imageMapper = $imageMapper; |
69
|
3 |
|
$this->faceMapper = $faceMapper; |
70
|
3 |
|
} |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* @inheritdoc |
74
|
|
|
*/ |
75
|
1 |
|
public function description() { |
76
|
1 |
|
return "Create new persons or update existing persons"; |
77
|
|
|
} |
78
|
|
|
|
79
|
|
|
/** |
80
|
|
|
* @inheritdoc |
81
|
|
|
*/ |
82
|
1 |
|
public function execute(FaceRecognitionContext $context) { |
83
|
1 |
|
$this->setContext($context); |
84
|
|
|
|
85
|
|
|
// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure. |
86
|
|
|
// However, since we don't want to do deep copy of IUser, we keep only UID in this array. |
87
|
|
|
// |
88
|
1 |
|
$eligable_users = array(); |
89
|
1 |
|
if (is_null($this->context->user)) { |
90
|
|
|
$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) { |
91
|
|
|
$eligable_users[] = $user->getUID(); |
92
|
|
|
}); |
93
|
|
|
} else { |
94
|
1 |
|
$eligable_users[] = $this->context->user->getUID(); |
95
|
|
|
} |
96
|
|
|
|
97
|
1 |
|
foreach($eligable_users as $user) { |
98
|
1 |
|
$this->createClusterIfNeeded($user); |
99
|
1 |
|
yield; |
100
|
|
|
} |
101
|
|
|
|
102
|
1 |
|
return true; |
103
|
|
|
} |
104
|
|
|
|
105
|
1 |
|
private function createClusterIfNeeded(string $userId) { |
106
|
|
|
// Check that we processed enough images to start creating clusters |
107
|
|
|
// |
108
|
1 |
|
$modelId = intval($this->config->getAppValue('facerecognition', 'model', AddDefaultFaceModel::DEFAULT_FACE_MODEL_ID)); |
109
|
|
|
|
110
|
1 |
|
$hasPersons = $this->personMapper->countPersons($userId) > 0; |
111
|
|
|
|
112
|
|
|
// Depending on whether we already have clusters, decide if we should create/recreate them. |
113
|
|
|
// |
114
|
1 |
|
if ($hasPersons) { |
115
|
|
|
// OK, we already got some persons. We now need to evaluate whether we want to recreate clusters. |
116
|
|
|
// We want to recreate clusters/persons if: |
117
|
|
|
// * Some cluster/person is invalidated (is_valid is false for someone) |
118
|
|
|
// This means some image that belonged to this user is changed, deleted etc. |
119
|
|
|
// * There are some new faces. Now, we don't want to jump the gun here. We want to either have: |
120
|
|
|
// ** more than 10 new faces, or |
121
|
|
|
// ** less than 10 new faces, but they are older than 2h |
122
|
|
|
// (basically, we want to avoid recreating cluster for each new face being uploaded, |
123
|
|
|
// however, we don't want to wait too much as clusters could be changed a lot) |
124
|
|
|
// |
125
|
|
|
$haveNewFaces = false; |
126
|
|
|
$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true); |
127
|
|
|
$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d', |
128
|
|
|
$facesWithoutPersons, $userId, $modelId)); |
129
|
|
|
// todo: get rid of magic numbers (move to config) |
130
|
|
|
if ($facesWithoutPersons >= 10) { |
131
|
|
|
$haveNewFaces = true; |
132
|
|
|
} else if ($facesWithoutPersons > 0) { |
133
|
|
|
// We have some faces, but not that many, let's see when oldest one is generated. |
134
|
|
|
$face = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId); |
135
|
|
|
$oldestFaceTimestamp = $face->creationTime->getTimestamp(); |
136
|
|
|
$currentTimestamp = (new \DateTime())->getTimestamp(); |
137
|
|
|
$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s', |
138
|
|
|
$userId, $modelId, $face->creationTime->format('Y-m-d H:i:s'))); |
139
|
|
|
// todo: get rid of magic numbers (move to config) |
140
|
|
|
if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60) { |
141
|
|
|
$haveNewFaces = true; |
142
|
|
|
} |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
$stalePersonsCount = $this->personMapper->countPersons($userId, true); |
146
|
|
|
$haveStalePersons = $stalePersonsCount > 0; |
147
|
|
|
$staleCluster = $haveStalePersons === false && $haveNewFaces === false; |
148
|
|
|
|
149
|
|
|
$recreateClusters = $this->config->getUserValue($userId, 'facerecognition', 'recreate-clusters', 'false'); |
150
|
|
|
$forceRecreation = ($recreateClusters === 'true'); |
151
|
|
|
|
152
|
|
|
$this->logDebug(sprintf('Found %d changed persons for user %s and model %d', $stalePersonsCount, $userId, $modelId)); |
153
|
|
|
|
154
|
|
|
if ($staleCluster && !$forceRecreation) { |
155
|
|
|
// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster |
156
|
|
|
$this->logInfo('Clusters already exist, estimated there is no need to recreate them'); |
157
|
|
|
return; |
158
|
|
|
} |
159
|
|
|
else if ($forceRecreation) { |
160
|
|
|
$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters'); |
161
|
|
|
} |
162
|
|
|
} else { |
163
|
|
|
// User should not be able to use this directly, used in tests |
164
|
1 |
|
$forceCreateClusters = $this->config->getUserValue($userId, 'facerecognition', 'force-create-clusters', 'false'); |
165
|
1 |
|
$forceCreation = ($forceCreateClusters === 'true'); |
166
|
|
|
|
167
|
|
|
// These are basic criteria without which we should not even consider creating clusters. |
168
|
|
|
// These clusters will be small and not "stable" enough and we should better wait for more images to come. |
169
|
|
|
// todo: 2 queries to get these 2 counts, can we do this smarter? |
170
|
1 |
|
$imageCount = $this->imageMapper->countUserImages($userId, $modelId); |
171
|
1 |
|
$imageProcessed = $this->imageMapper->countUserProcessedImages($userId, $modelId); |
172
|
1 |
|
$percentImagesProcessed = 0; |
173
|
1 |
|
if ($imageCount > 0) { |
174
|
1 |
|
$percentImagesProcessed = $imageProcessed / floatval($imageCount); |
175
|
|
|
} |
176
|
1 |
|
$facesCount = $this->faceMapper->countFaces($userId, $modelId); |
177
|
|
|
// todo: get rid of magic numbers (move to config) |
178
|
1 |
|
if (!$forceCreation && ($facesCount < 1000) && ($imageCount < 100) && ($percentImagesProcessed < 0.95)) { |
179
|
|
|
$this->logInfo( |
180
|
|
|
'Skipping cluster creation, not enough data (yet) collected. ' . |
181
|
|
|
'For cluster creation, you need either one of the following:'); |
182
|
|
|
$this->logInfo(sprintf('* have 1000 faces already processed (you have %d),', $facesCount)); |
183
|
|
|
$this->logInfo(sprintf('* have 100 images (you have %d),', $imageCount)); |
184
|
|
|
$this->logInfo(sprintf('* or you need to have 95%% of you images processed (you have %.2f%%)', $percentImagesProcessed)); |
185
|
|
|
return; |
186
|
|
|
} |
187
|
|
|
} |
188
|
|
|
|
189
|
1 |
|
$faces = $this->faceMapper->getFaces($userId, $modelId); |
190
|
1 |
|
$this->logInfo(count($faces) . ' faces found for clustering'); |
191
|
|
|
|
192
|
|
|
// Cluster is associative array where key is person ID. |
193
|
|
|
// Value is array of face IDs. For old clusters, person IDs are some existing person IDs, |
194
|
|
|
// and for new clusters is whatever chinese whispers decides to identify them. |
195
|
|
|
// |
196
|
1 |
|
$currentClusters = $this->getCurrentClusters($faces); |
197
|
1 |
|
$newClusters = $this->getNewClusters($faces); |
198
|
1 |
|
$this->logInfo(count($newClusters) . ' persons found after clustering'); |
199
|
|
|
// New merge |
200
|
1 |
|
$mergedClusters = $this->mergeClusters($currentClusters, $newClusters); |
201
|
1 |
|
$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters); |
202
|
|
|
|
203
|
|
|
// Prevents not create/recreate the clusters unnecessarily. |
204
|
1 |
|
$this->config->setUserValue($userId, 'facerecognition', 'recreate-clusters', 'false'); |
205
|
1 |
|
$this->config->setUserValue($userId, 'facerecognition', 'force-create-clusters', 'false'); |
206
|
1 |
|
} |
207
|
|
|
|
208
|
1 |
|
private function getCurrentClusters(array $faces): array { |
209
|
1 |
|
$chineseClusters = array(); |
210
|
1 |
|
foreach($faces as $face) { |
211
|
1 |
|
if ($face->person !== null) { |
212
|
|
|
if (!isset($chineseClusters[$face->person])) { |
213
|
|
|
$chineseClusters[$face->person] = array(); |
214
|
|
|
} |
215
|
|
|
$chineseClusters[$face->person][] = $face->id; |
216
|
|
|
} |
217
|
|
|
} |
218
|
1 |
|
return $chineseClusters; |
219
|
|
|
} |
220
|
|
|
|
221
|
1 |
|
private function getNewClusters(array $faces): array { |
222
|
|
|
// Create edges for chinese whispers |
223
|
1 |
|
$euclidean = new Euclidean(); |
224
|
1 |
|
$sensitivity = floatval($this->config->getAppValue('facerecognition', 'sensitivity', '0.5')); |
225
|
|
|
|
226
|
1 |
|
$edges = array(); |
227
|
1 |
|
for ($i = 0, $face_count1 = count($faces); $i < $face_count1; $i++) { |
228
|
1 |
|
$face1 = $faces[$i]; |
229
|
1 |
|
for ($j = $i, $face_count2 = count($faces); $j < $face_count2; $j++) { |
230
|
1 |
|
$face2 = $faces[$j]; |
231
|
|
|
// todo: can't this distance be a method in $face1->distance($face2)? |
232
|
1 |
|
$distance = $euclidean->distance($face1->descriptor, $face2->descriptor); |
233
|
|
|
// todo: extract this magic number to app param |
234
|
1 |
|
if ($distance < $sensitivity) { |
235
|
1 |
|
$edges[] = array($i, $j); |
236
|
|
|
} |
237
|
|
|
} |
238
|
|
|
} |
239
|
|
|
|
240
|
1 |
|
$newChineseClustersByIndex = dlib_chinese_whispers($edges); |
|
|
|
|
241
|
1 |
|
$newClusters = array(); |
242
|
1 |
|
for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) { |
243
|
1 |
|
if (!isset($newClusters[$newChineseClustersByIndex[$i]])) { |
244
|
1 |
|
$newClusters[$newChineseClustersByIndex[$i]] = array(); |
245
|
|
|
} |
246
|
1 |
|
$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id; |
247
|
|
|
} |
248
|
|
|
|
249
|
1 |
|
return $newClusters; |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
/** |
253
|
|
|
* todo: only reason this is public is because of tests. Go figure it out better. |
254
|
|
|
*/ |
255
|
3 |
|
public function mergeClusters(array $oldCluster, array $newCluster): array { |
256
|
|
|
// Create map of face transitions |
257
|
3 |
|
$transitions = array(); |
258
|
3 |
|
foreach ($newCluster as $newPerson=>$newFaces) { |
259
|
3 |
|
foreach ($newFaces as $newFace) { |
260
|
3 |
|
$oldPersonFound = null; |
261
|
3 |
|
foreach ($oldCluster as $oldPerson => $oldFaces) { |
262
|
2 |
|
if (in_array($newFace, $oldFaces)) { |
263
|
2 |
|
$oldPersonFound = $oldPerson; |
264
|
2 |
|
break; |
265
|
|
|
} |
266
|
|
|
} |
267
|
3 |
|
$transitions[$newFace] = array($oldPersonFound, $newPerson); |
268
|
|
|
} |
269
|
|
|
} |
270
|
|
|
// Count transitions |
271
|
3 |
|
$transitionCount = array(); |
272
|
3 |
|
foreach ($transitions as $transition) { |
273
|
3 |
|
$key = $transition[0] . ':' . $transition[1]; |
274
|
3 |
|
if (array_key_exists($key, $transitionCount)) { |
275
|
2 |
|
$transitionCount[$key]++; |
276
|
|
|
} else { |
277
|
3 |
|
$transitionCount[$key] = 1; |
278
|
|
|
} |
279
|
|
|
} |
280
|
|
|
// Create map of new person -> old person transitions |
281
|
3 |
|
$newOldPersonMapping = array(); |
282
|
3 |
|
$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array() |
283
|
3 |
|
arsort($transitionCount); |
284
|
3 |
|
foreach ($transitionCount as $transitionKey => $count) { |
285
|
3 |
|
$transition = explode(":", $transitionKey); |
286
|
3 |
|
$oldPerson = intval($transition[0]); |
287
|
3 |
|
$newPerson = intval($transition[1]); |
288
|
3 |
|
if (!array_key_exists($newPerson, $newOldPersonMapping)) { |
289
|
3 |
|
if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) { |
290
|
3 |
|
$newOldPersonMapping[$newPerson] = $oldPerson; |
291
|
3 |
|
$oldPersonProcessed[$oldPerson] = 0; |
292
|
|
|
} else { |
293
|
2 |
|
$newOldPersonMapping[$newPerson] = 0; |
294
|
|
|
} |
295
|
|
|
} |
296
|
|
|
} |
297
|
|
|
// Starting with new cluster, convert all new person IDs with old person IDs |
298
|
3 |
|
$maxOldPersonId = 1; |
299
|
3 |
|
if (count($oldCluster) > 0) { |
300
|
2 |
|
$maxOldPersonId = max(array_keys($oldCluster)) + 1; |
301
|
|
|
} |
302
|
|
|
|
303
|
3 |
|
$result = array(); |
304
|
3 |
|
foreach ($newCluster as $newPerson => $newFaces) { |
305
|
3 |
|
$oldPerson = $newOldPersonMapping[$newPerson]; |
306
|
3 |
|
if ($oldPerson === 0) { |
307
|
3 |
|
$result[$maxOldPersonId] = $newFaces; |
308
|
3 |
|
$maxOldPersonId++; |
309
|
|
|
} else { |
310
|
2 |
|
$result[$oldPerson] = $newFaces; |
311
|
|
|
} |
312
|
|
|
} |
313
|
3 |
|
return $result; |
314
|
|
|
} |
315
|
|
|
} |
316
|
|
|
|