1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @copyright Copyright (c) 2017, Matias De lellis <[email protected]> |
4
|
|
|
* @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]> |
5
|
|
|
* |
6
|
|
|
* @author Branko Kokanovic <[email protected]> |
7
|
|
|
* |
8
|
|
|
* @license GNU AGPL version 3 or any later version |
9
|
|
|
* |
10
|
|
|
* This program is free software: you can redistribute it and/or modify |
11
|
|
|
* it under the terms of the GNU Affero General Public License as |
12
|
|
|
* published by the Free Software Foundation, either version 3 of the |
13
|
|
|
* License, or (at your option) any later version. |
14
|
|
|
* |
15
|
|
|
* This program is distributed in the hope that it will be useful, |
16
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
17
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18
|
|
|
* GNU Affero General Public License for more details. |
19
|
|
|
* |
20
|
|
|
* You should have received a copy of the GNU Affero General Public License |
21
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
22
|
|
|
* |
23
|
|
|
*/ |
24
|
|
|
namespace OCA\FaceRecognition\BackgroundJob\Tasks; |
25
|
|
|
|
26
|
|
|
use OCP\IConfig; |
27
|
|
|
use OCP\IUser; |
28
|
|
|
|
29
|
|
|
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask; |
30
|
|
|
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext; |
31
|
|
|
use OCA\FaceRecognition\BackgroundJob\Tasks\AddMissingImagesTask; |
32
|
|
|
|
33
|
|
|
use OCA\FaceRecognition\Db\FaceNewMapper; |
34
|
|
|
use OCA\FaceRecognition\Db\ImageMapper; |
35
|
|
|
use OCA\FaceRecognition\Db\PersonMapper; |
36
|
|
|
|
37
|
|
|
use OCA\FaceRecognition\Helper\Euclidean; |
38
|
|
|
|
39
|
|
|
use OCA\FaceRecognition\Migration\AddDefaultFaceModel; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* Taks that, for each user, creates person clusters for each. |
43
|
|
|
*/ |
44
|
|
|
class CreateClustersTask extends FaceRecognitionBackgroundTask { |
45
|
|
|
/** @var IConfig Config */ |
46
|
|
|
private $config; |
47
|
|
|
|
48
|
|
|
/** @var PersonMapper Person mapper*/ |
49
|
|
|
private $personMapper; |
50
|
|
|
|
51
|
|
|
/** @var ImageMapper Image mapper*/ |
52
|
|
|
private $imageMapper; |
53
|
|
|
|
54
|
|
|
/** @var FaceNewMapper Face mapper*/ |
55
|
|
|
private $faceMapper; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* @param IConfig $config Config |
59
|
|
|
*/ |
60
|
2 |
|
public function __construct(IConfig $config, PersonMapper $personMapper, ImageMapper $imageMapper, FaceNewMapper $faceMapper) { |
61
|
2 |
|
parent::__construct(); |
62
|
2 |
|
$this->config = $config; |
63
|
2 |
|
$this->personMapper = $personMapper; |
64
|
2 |
|
$this->imageMapper = $imageMapper; |
65
|
2 |
|
$this->faceMapper = $faceMapper; |
66
|
2 |
|
} |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* @inheritdoc |
70
|
|
|
*/ |
71
|
|
|
public function description() { |
72
|
|
|
return "Create new persons or update existing persons"; |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* @inheritdoc |
77
|
|
|
*/ |
78
|
|
|
public function execute(FaceRecognitionContext $context) { |
79
|
|
|
$this->setContext($context); |
80
|
|
|
|
81
|
|
|
$fullImageScanDone = $this->config->getAppValue('facerecognition', AddMissingImagesTask::FULL_IMAGE_SCAN_DONE_KEY, 'false'); |
82
|
|
|
if ($fullImageScanDone != 'true') { |
83
|
|
|
// If not all images are not interested in the database, we cannot determine when we should start clustering. |
84
|
|
|
// Since this is step in beggining, just bail out. |
85
|
|
|
$this->logInfo('Skipping cluster creation, as not even existing images are found and inserted in database'); |
86
|
|
|
return true; |
87
|
|
|
} |
88
|
|
|
|
89
|
|
|
// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure. |
90
|
|
|
// However, since we don't want to do deep copy of IUser, we keep only UID in this array. |
91
|
|
|
// |
92
|
|
|
$eligable_users = array(); |
93
|
|
View Code Duplication |
if (is_null($this->context->user)) { |
94
|
|
|
$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) { |
95
|
|
|
$eligable_users[] = $user->getUID(); |
96
|
|
|
}); |
97
|
|
|
} else { |
98
|
|
|
$eligable_users[] = $this->context->user->getUID(); |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
foreach($eligable_users as $user) { |
102
|
|
|
$this->createClusterIfNeeded($user); |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
return true; |
106
|
|
|
} |
107
|
|
|
|
108
|
|
|
private function createClusterIfNeeded(string $userId) { |
109
|
|
|
// Check that we processed enough images to start creating clusters |
110
|
|
|
// |
111
|
|
|
$modelId = intval($this->config->getAppValue('facerecognition', 'model', AddDefaultFaceModel::DEFAULT_FACE_MODEL_ID)); |
112
|
|
|
|
113
|
|
|
$hasPersons = $this->personMapper->countPersons($userId) > 0; |
114
|
|
|
|
115
|
|
|
// Depending on whether we already have clusters, decide if we should create/recreate them. |
116
|
|
|
// |
117
|
|
|
if ($hasPersons) { |
118
|
|
|
// todo: find all faces that are in DB, but are not in user’s clusters. |
119
|
|
|
// If we detect more than 10 faces like this, |
120
|
|
|
// or if more than 2h since any of these is passed, |
121
|
|
|
// or if “is_valid” (UserCluster table) is false, |
122
|
|
|
// start new round of clustering for that user. |
123
|
|
|
} else { |
124
|
|
|
// These are basic criteria without which we should not even consider creating clusters. |
125
|
|
|
// These clusters will be small and not "stable" enough and we should better wait for more images to come. |
126
|
|
|
// todo: 2 queries to get these 2 counts, can we do this smarter? |
127
|
|
|
$imageCount = $this->imageMapper->countUserImages($userId, $modelId); |
128
|
|
|
$imageProcessed = $this->imageMapper->countUserProcessedImages($userId, $modelId); |
129
|
|
|
$percentImagesProcessed = $imageProcessed / floatval($imageCount); |
130
|
|
|
$facesCount = $this->faceMapper->countFaces($userId, $modelId); |
131
|
|
|
// todo: get rid of magic numbers (move to config) |
132
|
|
|
if (($facesCount < 1000) && ($imageCount < 100) && ($percentImagesProcessed < 0.95)) { |
133
|
|
|
$this->logInfo( |
134
|
|
|
'Skipping cluster creation, not enough data (yet) collected. ' . |
135
|
|
|
'For cluster creation, you need either one of the following:'); |
136
|
|
|
$this->logInfo(sprintf('* have 1000 faces already processed (you have %d),', $facesCount)); |
137
|
|
|
$this->logInfo(sprintf('* have 100 images (you have %d),', $imageCount)); |
138
|
|
|
$this->logInfo(sprintf('* or you need to have 95%% of you images processed (you have %.2f%%)', $percentImagesProcessed)); |
139
|
|
|
return; |
140
|
|
|
} |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
$faces = $this->faceMapper->getFaces($userId, $modelId); |
144
|
|
|
$this->logInfo(count($faces) . ' faces found for clustering'); |
145
|
|
|
|
146
|
|
|
// Cluster is associative array where key is person ID. |
147
|
|
|
// Value is array of face IDs. For old clusters, person IDs are some existing person IDs, |
148
|
|
|
// and for new clusters is whatever chinese whispers decides to identify them. |
149
|
|
|
// |
150
|
|
|
$currentClusters = $this->getCurrentClusters($faces); |
151
|
|
|
$newClusters = $this->getNewClusters($faces); |
152
|
|
|
$this->logInfo(count($newClusters) . ' clusters found for clustering'); |
153
|
|
|
// New merge |
154
|
|
|
$mergedClusters = $this->mergeClusters($currentClusters, $newClusters); |
|
|
|
|
155
|
|
|
$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $newClusters); |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
private function getCurrentClusters(array $faces): array { |
159
|
|
|
$chineseClusters = array(); |
160
|
|
|
foreach($faces as $face) { |
161
|
|
|
if ($face->person != null) { |
162
|
|
|
if (!isset($chineseClusters[$face->person])) { |
163
|
|
|
$chineseClusters[$face->person] = array(); |
164
|
|
|
} |
165
|
|
|
$chineseClusters[$face->person][] = $face->id; |
166
|
|
|
} |
167
|
|
|
} |
168
|
|
|
return $chineseClusters; |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
private function getNewClusters(array $faces): array { |
172
|
|
|
// Create edges for chinese whispers |
173
|
|
|
$euclidean = new Euclidean(); |
174
|
|
|
$edges = array(); |
175
|
|
|
for ($i = 0; $i < count($faces); $i++) { |
|
|
|
|
176
|
|
|
$face1 = $faces[$i]; |
177
|
|
|
for ($j = $i; $j < count($faces); $j++) { |
|
|
|
|
178
|
|
|
$face2 = $faces[$j]; |
179
|
|
|
// todo: can't this distance be a method in $face1->distance($face2)? |
180
|
|
|
$distance = $euclidean->distance($face1->descriptor, $face2->descriptor); |
181
|
|
|
// todo: extract this magic number to app param |
182
|
|
|
if ($distance < 0.5) { |
183
|
|
|
$edges[] = array($i, $j); |
184
|
|
|
} |
185
|
|
|
} |
186
|
|
|
} |
187
|
|
|
|
188
|
|
|
$newChineseClustersByIndex = dlib_chinese_whispers($edges); |
189
|
|
|
$newClusters = array(); |
190
|
|
|
for ($i = 0; $i < count($newChineseClustersByIndex); $i++) { |
|
|
|
|
191
|
|
|
if (!isset($newClusters[$newChineseClustersByIndex[$i]])) { |
192
|
|
|
$newClusters[$newChineseClustersByIndex[$i]] = array(); |
193
|
|
|
} |
194
|
|
|
$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id; |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
return $newClusters; |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* todo: only reason this is public is because of tests. Go figure it out better. |
202
|
|
|
*/ |
203
|
2 |
|
public function mergeClusters(array $oldCluster, array $newCluster): array { |
204
|
|
|
// Create map of face transitions |
205
|
2 |
|
$transitions = array(); |
206
|
2 |
|
foreach ($newCluster as $newPerson=>$newFaces) { |
207
|
2 |
|
foreach ($newFaces as $newFace) { |
208
|
2 |
|
$oldPersonFound = null; |
209
|
2 |
|
foreach ($oldCluster as $oldPerson => $oldFaces) { |
210
|
2 |
|
if (in_array($newFace, $oldFaces)) { |
211
|
2 |
|
$oldPersonFound = $oldPerson; |
212
|
2 |
|
break; |
213
|
|
|
} |
214
|
|
|
} |
215
|
2 |
|
$transitions[$newFace] = array($oldPersonFound, $newPerson); |
216
|
|
|
} |
217
|
|
|
} |
218
|
|
|
// Count transitions |
219
|
2 |
|
$transitionCount = array(); |
220
|
2 |
|
foreach ($transitions as $transition) { |
221
|
2 |
|
$key = $transition[0] . ':' . $transition[1]; |
222
|
2 |
|
if (array_key_exists($key, $transitionCount)) { |
223
|
2 |
|
$transitionCount[$key]++; |
224
|
|
|
} else { |
225
|
2 |
|
$transitionCount[$key] = 1; |
226
|
|
|
} |
227
|
|
|
} |
228
|
|
|
// Create map of new person -> old persion transitions |
229
|
2 |
|
$newOldPersonMapping = array(); |
230
|
2 |
|
$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array() |
231
|
2 |
|
arsort($transitionCount); |
232
|
2 |
|
foreach ($transitionCount as $transitionKey => $count) { |
233
|
2 |
|
$transition = explode(":", $transitionKey); |
234
|
2 |
|
$oldPerson = intval($transition[0]); |
235
|
2 |
|
$newPerson = intval($transition[1]); |
236
|
2 |
|
if (!array_key_exists($newPerson, $newOldPersonMapping)) { |
237
|
2 |
|
if (($oldPerson == 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) { |
238
|
2 |
|
$newOldPersonMapping[$newPerson] = $oldPerson; |
239
|
2 |
|
$oldPersonProcessed[$oldPerson] = 0; |
240
|
|
|
} else { |
241
|
2 |
|
$newOldPersonMapping[$newPerson] = 0; |
242
|
|
|
} |
243
|
|
|
} |
244
|
|
|
} |
245
|
|
|
// Starting with new cluster, convert all new person IDs with old person IDs |
246
|
2 |
|
$maxOldPersonId = 1; |
247
|
2 |
|
if (count($oldCluster) > 0) { |
248
|
2 |
|
$maxOldPersonId = max(array_keys($oldCluster)) + 1; |
249
|
|
|
} |
250
|
|
|
|
251
|
2 |
|
$result = array(); |
252
|
2 |
|
foreach ($newCluster as $newPerson => $newFaces) { |
253
|
2 |
|
$oldPerson = $newOldPersonMapping[$newPerson]; |
254
|
2 |
|
if ($oldPerson == 0) { |
255
|
2 |
|
$result[$maxOldPersonId] = $newFaces; |
256
|
2 |
|
$maxOldPersonId++; |
257
|
|
|
} else { |
258
|
2 |
|
$result[$oldPerson] = $newFaces; |
259
|
|
|
} |
260
|
|
|
} |
261
|
2 |
|
return $result; |
262
|
|
|
} |
263
|
|
|
} |
264
|
|
|
|
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.
Both the
$myVar
assignment in line 1 and the$higher
assignment in line 2 are dead. The first because$myVar
is never used and the second because$higher
is always overwritten for every possible time line.