Passed
Pull Request — master (#69)
by Branko
56s
created

CreateClustersTask::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 5
nc 1
nop 4
dl 0
loc 10
ccs 6
cts 6
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * @copyright Copyright (c) 2017, Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IConfig;
27
use OCP\IUser;
28
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
30
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
31
use OCA\FaceRecognition\BackgroundJob\Tasks\AddMissingImagesTask;
32
33
use OCA\FaceRecognition\Db\FaceMapper;
34
use OCA\FaceRecognition\Db\ImageMapper;
35
use OCA\FaceRecognition\Db\PersonMapper;
36
37
use OCA\FaceRecognition\Helper\Euclidean;
38
39
use OCA\FaceRecognition\Migration\AddDefaultFaceModel;
40
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var IConfig Config */
46
	private $config;
47
48
	/** @var PersonMapper Person mapper*/
49
	private $personMapper;
50
51
	/** @var ImageMapper Image mapper*/
52
	private $imageMapper;
53
54
	/** @var FaceMapper Face mapper*/
55
	private $faceMapper;
56
57
	/**
58
	 * @param IConfig $config Config
59
	 */
60 2
	public function __construct(IConfig      $config,
61
	                            PersonMapper $personMapper,
62
	                            ImageMapper  $imageMapper,
63
	                            FaceMapper   $faceMapper)
64
	{
65 2
		parent::__construct();
66 2
		$this->config = $config;
67 2
		$this->personMapper = $personMapper;
68 2
		$this->imageMapper = $imageMapper;
69 2
		$this->faceMapper = $faceMapper;
70 2
	}
71
72
	/**
73
	 * @inheritdoc
74
	 */
75
	public function description() {
76
		return "Create new persons or update existing persons";
77
	}
78
79
	/**
80
	 * @inheritdoc
81
	 */
82
	public function execute(FaceRecognitionContext $context) {
83
		$this->setContext($context);
84
85
		$fullImageScanDone = $this->config->getAppValue('facerecognition', AddMissingImagesTask::FULL_IMAGE_SCAN_DONE_KEY, 'false');
86
		if ($fullImageScanDone != 'true') {
87
			// If not all images are not interested in the database, we cannot determine when we should start clustering.
88
			// Since this is step in beggining, just bail out.
89
			$this->logInfo('Skipping cluster creation, as not even existing images are found and inserted in database');
90
			return true;
91
		}
92
93
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
94
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
95
		//
96
		$eligable_users = array();
97
		if (is_null($this->context->user)) {
98
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
99
				$eligable_users[] = $user->getUID();
100
			});
101
		} else {
102
			$eligable_users[] = $this->context->user->getUID();
103
		}
104
105
		foreach($eligable_users as $user) {
106
			$this->createClusterIfNeeded($user);
107
		}
108
109
		return true;
110
	}
111
112
	private function createClusterIfNeeded(string $userId) {
113
		// Check that we processed enough images to start creating clusters
114
		//
115
		$modelId = intval($this->config->getAppValue('facerecognition', 'model', AddDefaultFaceModel::DEFAULT_FACE_MODEL_ID));
116
117
		$hasPersons = $this->personMapper->countPersons($userId) > 0;
118
119
		// Depending on whether we already have clusters, decide if we should create/recreate them.
120
		//
121
		if ($hasPersons) {
122
			// OK, we already got some persons. We now need to evaluate whether we want to recreate clusters.
123
			// We want to recreate clusters/persons if:
124
			// * Some cluster/person is invalidated (is_valid is false for someone)
125
			//     This means some image that belonged to this user is changed, deleted etc.
126
			// * There are some new faces. Now, we don't want to jump the gun here. We want to either have:
127
			// ** more than 10 new faces, or
128
			// ** less than 10 new faces, but they are older than 2h
129
			//  (basically, we want to avoid recreating cluster for each new face being uploaded,
130
			//  however, we don't want to wait too much as clusters could be changed a lot)
131
			//
132
			$haveNewFaces = false;
133
			$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
134
			$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
135
				$facesWithoutPersons, $userId, $modelId));
136
			// todo: get rid of magic numbers (move to config)
137
			if ($facesWithoutPersons >= 10) {
138
				$haveNewFaces = true;
139
			} else if ($facesWithoutPersons > 0) {
140
				// We have some faces, but not that many, let's see when oldest one is generated.
141
				$face = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
142
				$oldestFaceTimestamp = $face->creationTime->getTimestamp();
143
				$currentTimestamp = (new \DateTime())->getTimestamp();
144
				$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
145
					$userId, $modelId, $face->creationTime->format('Y-m-d H:i:s')));
146
				// todo: get rid of magic numbers (move to config)
147
				if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60) {
148
					$haveNewFaces = true;
149
				}
150
			}
151
152
			$stalePersonsCount = $this->personMapper->countPersons($userId, true);
153
			$this->logDebug(sprintf('Found %d stale persons for user %s and model %d', $stalePersonsCount, $userId, $modelId));
154
			$haveStalePersons = $stalePersonsCount > 0;
155
156
			if ($haveStalePersons === false && $haveNewFaces === false) {
157
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
158
				$this->logInfo('Clusters already exist, calculated there is no need to recreate them');
159
				return;
160
			}
161
		} else {
162
			// These are basic criteria without which we should not even consider creating clusters.
163
			// These clusters will be small and not "stable" enough and we should better wait for more images to come.
164
			// todo: 2 queries to get these 2 counts, can we do this smarter?
165
			$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
166
			$imageProcessed = $this->imageMapper->countUserProcessedImages($userId, $modelId);
167
			$percentImagesProcessed = 0;
168
			if ($imageCount > 0) {
169
				$percentImagesProcessed = $imageProcessed / floatval($imageCount);
170
			}
171
			$facesCount = $this->faceMapper->countFaces($userId, $modelId);
172
			// todo: get rid of magic numbers (move to config)
173
			if (($facesCount < 1000) && ($imageCount < 100) && ($percentImagesProcessed < 0.95)) {
174
				$this->logInfo(
175
					'Skipping cluster creation, not enough data (yet) collected. ' .
176
					'For cluster creation, you need either one of the following:');
177
				$this->logInfo(sprintf('* have 1000 faces already processed (you have %d),', $facesCount));
178
				$this->logInfo(sprintf('* have 100 images (you have %d),', $imageCount));
179
				$this->logInfo(sprintf('* or you need to have 95%% of you images processed (you have %.2f%%)', $percentImagesProcessed));
180
				return;
181
			}
182
		}
183
184
		$faces = $this->faceMapper->getFaces($userId, $modelId);
185
		$this->logInfo(count($faces) . ' faces found for clustering');
186
187
		// Cluster is associative array where key is person ID.
188
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
189
		// and for new clusters is whatever chinese whispers decides to identify them.
190
		//
191
		$currentClusters = $this->getCurrentClusters($faces);
192
		$newClusters = $this->getNewClusters($faces);
193
		$this->logInfo(count($newClusters) . ' clusters found for clustering');
194
		// New merge
195
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
196
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
197
	}
198
199
	private function getCurrentClusters(array $faces): array {
200
		$chineseClusters = array();
201
		foreach($faces as $face) {
202
			if ($face->person != null) {
203
				if (!isset($chineseClusters[$face->person])) {
204
					$chineseClusters[$face->person] = array();
205
				}
206
				$chineseClusters[$face->person][] = $face->id;
207
			}
208
		}
209
		return $chineseClusters;
210
	}
211
212
	private function getNewClusters(array $faces): array {
213
		// Create edges for chinese whispers
214
		$euclidean = new Euclidean();
215
		$edges = array();
216
		for ($i = 0, $face_count1 = count($faces); $i < $face_count1; $i++) {
217
			$face1 = $faces[$i];
218
			for ($j = $i, $face_count2 = count($faces); $j < $face_count2; $j++) {
219
				$face2 = $faces[$j];
220
				// todo: can't this distance be a method in $face1->distance($face2)?
221
				$distance = $euclidean->distance($face1->descriptor, $face2->descriptor);
222
				// todo: extract this magic number to app param
223
				if ($distance < 0.5) {
224
					$edges[] = array($i, $j);
225
				}
226
			}
227
		}
228
229
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

229
		$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
230
		$newClusters = array();
231
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
232
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
233
				$newClusters[$newChineseClustersByIndex[$i]] = array();
234
			}
235
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
236
		}
237
238
		return $newClusters;
239
	}
240
241
	/**
242
	 * todo: only reason this is public is because of tests. Go figure it out better.
243
	 */
244 2
	public function mergeClusters(array $oldCluster, array $newCluster): array {
245
		// Create map of face transitions
246 2
		$transitions = array();
247 2
		foreach ($newCluster as $newPerson=>$newFaces) {
248 2
			foreach ($newFaces as $newFace) {
249 2
				$oldPersonFound = null;
250 2
				foreach ($oldCluster as $oldPerson => $oldFaces) {
251 2
					if (in_array($newFace, $oldFaces)) {
252 2
						$oldPersonFound = $oldPerson;
253 2
						break;
254
					}
255
				}
256 2
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
257
			}
258
		}
259
		// Count transitions
260 2
		$transitionCount = array();
261 2
		foreach ($transitions as $transition) {
262 2
			$key = $transition[0] . ':' . $transition[1];
263 2
			if (array_key_exists($key, $transitionCount)) {
264 2
				$transitionCount[$key]++;
265
			} else {
266 2
				$transitionCount[$key] = 1;
267
			}
268
		}
269
		// Create map of new person -> old persion transitions
270 2
		$newOldPersonMapping = array();
271 2
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
272 2
		arsort($transitionCount);
273 2
		foreach ($transitionCount as $transitionKey => $count) {
274 2
			$transition = explode(":", $transitionKey);
275 2
			$oldPerson = intval($transition[0]);
276 2
			$newPerson = intval($transition[1]);
277 2
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
278 2
				if (($oldPerson == 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
279 2
					$newOldPersonMapping[$newPerson] = $oldPerson;
280 2
					$oldPersonProcessed[$oldPerson] = 0;
281
				} else {
282 2
					$newOldPersonMapping[$newPerson] = 0;
283
				}
284
			}
285
		}
286
		// Starting with new cluster, convert all new person IDs with old person IDs
287 2
		$maxOldPersonId = 1;
288 2
		if (count($oldCluster) > 0) {
289 2
			$maxOldPersonId = max(array_keys($oldCluster)) + 1;
290
		}
291
292 2
		$result = array();
293 2
		foreach ($newCluster as $newPerson => $newFaces) {
294 2
			$oldPerson = $newOldPersonMapping[$newPerson];
295 2
			if ($oldPerson == 0) {
296 2
				$result[$maxOldPersonId] = $newFaces;
297 2
				$maxOldPersonId++;
298
			} else {
299 2
				$result[$oldPerson] = $newFaces;
300
			}
301
		}
302 2
		return $result;
303
	}
304
}
305