Passed
Push — translations ( c3ff52...9a02a6 )
by Matias
05:15
created

CreateClustersTask::createClusterIfNeeded()   C

Complexity

Conditions 14
Paths 28

Size

Total Lines 101
Code Lines 54

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 22
CRAP Score 53.2193

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 14
eloc 54
c 2
b 0
f 0
nc 28
nop 1
dl 0
loc 101
ccs 22
cts 53
cp 0.4151
crap 53.2193
rs 6.2666

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017, Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IConfig;
27
use OCP\IUser;
28
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
30
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
31
use OCA\FaceRecognition\BackgroundJob\Tasks\AddMissingImagesTask;
32
33
use OCA\FaceRecognition\Db\FaceMapper;
34
use OCA\FaceRecognition\Db\ImageMapper;
35
use OCA\FaceRecognition\Db\PersonMapper;
36
37
use OCA\FaceRecognition\Helper\Euclidean;
38
39
use OCA\FaceRecognition\Migration\AddDefaultFaceModel;
40
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var IConfig Config */
46
	private $config;
47
48
	/** @var PersonMapper Person mapper*/
49
	private $personMapper;
50
51
	/** @var ImageMapper Image mapper*/
52
	private $imageMapper;
53
54
	/** @var FaceMapper Face mapper*/
55
	private $faceMapper;
56
57
	/**
58
	 * @param IConfig $config Config
59
	 */
60 3
	public function __construct(IConfig      $config,
61
	                            PersonMapper $personMapper,
62
	                            ImageMapper  $imageMapper,
63
	                            FaceMapper   $faceMapper)
64
	{
65 3
		parent::__construct();
66 3
		$this->config = $config;
67 3
		$this->personMapper = $personMapper;
68 3
		$this->imageMapper = $imageMapper;
69 3
		$this->faceMapper = $faceMapper;
70 3
	}
71
72
	/**
73
	 * @inheritdoc
74
	 */
75 1
	public function description() {
76 1
		return "Create new persons or update existing persons";
77
	}
78
79
	/**
80
	 * @inheritdoc
81
	 */
82 1
	public function execute(FaceRecognitionContext $context) {
83 1
		$this->setContext($context);
84
85
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
86
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
87
		//
88 1
		$eligable_users = array();
89 1
		if (is_null($this->context->user)) {
90
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
91
				$eligable_users[] = $user->getUID();
92
			});
93
		} else {
94 1
			$eligable_users[] = $this->context->user->getUID();
95
		}
96
97 1
		foreach($eligable_users as $user) {
98 1
			$this->createClusterIfNeeded($user);
99 1
			yield;
100
		}
101
102 1
		return true;
103
	}
104
105 1
	private function createClusterIfNeeded(string $userId) {
106
		// Check that we processed enough images to start creating clusters
107
		//
108 1
		$modelId = intval($this->config->getAppValue('facerecognition', 'model', AddDefaultFaceModel::DEFAULT_FACE_MODEL_ID));
109
110 1
		$hasPersons = $this->personMapper->countPersons($userId) > 0;
111
112
		// Depending on whether we already have clusters, decide if we should create/recreate them.
113
		//
114 1
		if ($hasPersons) {
115
			// OK, we already got some persons. We now need to evaluate whether we want to recreate clusters.
116
			// We want to recreate clusters/persons if:
117
			// * Some cluster/person is invalidated (is_valid is false for someone)
118
			//     This means some image that belonged to this user is changed, deleted etc.
119
			// * There are some new faces. Now, we don't want to jump the gun here. We want to either have:
120
			// ** more than 10 new faces, or
121
			// ** less than 10 new faces, but they are older than 2h
122
			//  (basically, we want to avoid recreating cluster for each new face being uploaded,
123
			//  however, we don't want to wait too much as clusters could be changed a lot)
124
			//
125
			$haveNewFaces = false;
126
			$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
127
			$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
128
				$facesWithoutPersons, $userId, $modelId));
129
			// todo: get rid of magic numbers (move to config)
130
			if ($facesWithoutPersons >= 10) {
131
				$haveNewFaces = true;
132
			} else if ($facesWithoutPersons > 0) {
133
				// We have some faces, but not that many, let's see when oldest one is generated.
134
				$face = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
135
				$oldestFaceTimestamp = $face->creationTime->getTimestamp();
136
				$currentTimestamp = (new \DateTime())->getTimestamp();
137
				$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
138
					$userId, $modelId, $face->creationTime->format('Y-m-d H:i:s')));
139
				// todo: get rid of magic numbers (move to config)
140
				if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60) {
141
					$haveNewFaces = true;
142
				}
143
			}
144
145
			$stalePersonsCount = $this->personMapper->countPersons($userId, true);
146
			$haveStalePersons = $stalePersonsCount > 0;
147
			$staleCluster = $haveStalePersons === false && $haveNewFaces === false;
148
149
			$recreateClusters = $this->config->getUserValue($userId, 'facerecognition', 'recreate-clusters', 'false');
150
			$forceRecreation = ($recreateClusters === 'true');
151
152
			$this->logDebug(sprintf('Found %d changed persons for user %s and model %d', $stalePersonsCount, $userId, $modelId));
153
154
			if ($staleCluster && !$forceRecreation) {
155
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
156
				$this->logInfo('Clusters already exist, estimated there is no need to recreate them');
157
				return;
158
			}
159
			else if ($forceRecreation) {
160
				$this->logInfo('Clusters already exist, but there was some change that requires recreating the clusters');
161
			}
162
		} else {
163
			// User should not be able to use this directly, used in tests
164 1
			$forceCreateClusters = $this->config->getUserValue($userId, 'facerecognition', 'force-create-clusters', 'false');
165 1
			$forceCreation = ($forceCreateClusters === 'true');
166
167
			// These are basic criteria without which we should not even consider creating clusters.
168
			// These clusters will be small and not "stable" enough and we should better wait for more images to come.
169
			// todo: 2 queries to get these 2 counts, can we do this smarter?
170 1
			$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
171 1
			$imageProcessed = $this->imageMapper->countUserProcessedImages($userId, $modelId);
172 1
			$percentImagesProcessed = 0;
173 1
			if ($imageCount > 0) {
174 1
				$percentImagesProcessed = $imageProcessed / floatval($imageCount);
175
			}
176 1
			$facesCount = $this->faceMapper->countFaces($userId, $modelId);
177
			// todo: get rid of magic numbers (move to config)
178 1
			if (!$forceCreation && ($facesCount < 1000) && ($imageCount < 100) && ($percentImagesProcessed < 0.95)) {
179
				$this->logInfo(
180
					'Skipping cluster creation, not enough data (yet) collected. ' .
181
					'For cluster creation, you need either one of the following:');
182
				$this->logInfo(sprintf('* have 1000 faces already processed (you have %d),', $facesCount));
183
				$this->logInfo(sprintf('* have 100 images (you have %d),', $imageCount));
184
				$this->logInfo(sprintf('* or you need to have 95%% of you images processed (you have %.2f%%)', $percentImagesProcessed));
185
				return;
186
			}
187
		}
188
189 1
		$faces = $this->faceMapper->getFaces($userId, $modelId);
190 1
		$this->logInfo(count($faces) . ' faces found for clustering');
191
192
		// Cluster is associative array where key is person ID.
193
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
194
		// and for new clusters is whatever chinese whispers decides to identify them.
195
		//
196 1
		$currentClusters = $this->getCurrentClusters($faces);
197 1
		$newClusters = $this->getNewClusters($faces);
198 1
		$this->logInfo(count($newClusters) . ' persons found after clustering');
199
		// New merge
200 1
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
201 1
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
202
203
		// Prevents not create/recreate the clusters unnecessarily.
204 1
		$this->config->setUserValue($userId, 'facerecognition', 'recreate-clusters', 'false');
205 1
		$this->config->setUserValue($userId, 'facerecognition', 'force-create-clusters', 'false');
206 1
	}
207
208 1
	private function getCurrentClusters(array $faces): array {
209 1
		$chineseClusters = array();
210 1
		foreach($faces as $face) {
211 1
			if ($face->person !== null) {
212
				if (!isset($chineseClusters[$face->person])) {
213
					$chineseClusters[$face->person] = array();
214
				}
215
				$chineseClusters[$face->person][] = $face->id;
216
			}
217
		}
218 1
		return $chineseClusters;
219
	}
220
221 1
	private function getNewClusters(array $faces): array {
222
		// Create edges for chinese whispers
223 1
		$euclidean = new Euclidean();
224 1
		$sensitivity = floatval($this->config->getAppValue('facerecognition', 'sensitivity', '0.5'));
225 1
		$min_confidence = floatval($this->config->getAppValue('facerecognition', 'min-confidence', '0.9'));
226 1
		$edges = array();
227 1
		for ($i = 0, $face_count1 = count($faces); $i < $face_count1; $i++) {
228 1
			$face1 = $faces[$i];
229 1
			if ($face1->confidence < $min_confidence) {
230
				$edges[] = array($i, $i); // fixme: Should we create an single group? o just ignore.
231
				continue;
232
			}
233 1
			for ($j = $i, $face_count2 = count($faces); $j < $face_count2; $j++) {
234 1
				$face2 = $faces[$j];
235
				// todo: can't this distance be a method in $face1->distance($face2)?
236 1
				$distance = $euclidean->distance($face1->descriptor, $face2->descriptor);
237
238 1
				if ($distance < $sensitivity) {
239 1
					$edges[] = array($i, $j);
240
				}
241
			}
242
		}
243
244 1
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

244
		$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
245 1
		$newClusters = array();
246 1
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
247 1
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
248 1
				$newClusters[$newChineseClustersByIndex[$i]] = array();
249
			}
250 1
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
251
		}
252
253 1
		return $newClusters;
254
	}
255
256
	/**
257
	 * todo: only reason this is public is because of tests. Go figure it out better.
258
	 */
259 3
	public function mergeClusters(array $oldCluster, array $newCluster): array {
260
		// Create map of face transitions
261 3
		$transitions = array();
262 3
		foreach ($newCluster as $newPerson=>$newFaces) {
263 3
			foreach ($newFaces as $newFace) {
264 3
				$oldPersonFound = null;
265 3
				foreach ($oldCluster as $oldPerson => $oldFaces) {
266 2
					if (in_array($newFace, $oldFaces)) {
267 2
						$oldPersonFound = $oldPerson;
268 2
						break;
269
					}
270
				}
271 3
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
272
			}
273
		}
274
		// Count transitions
275 3
		$transitionCount = array();
276 3
		foreach ($transitions as $transition) {
277 3
			$key = $transition[0] . ':' . $transition[1];
278 3
			if (array_key_exists($key, $transitionCount)) {
279 2
				$transitionCount[$key]++;
280
			} else {
281 3
				$transitionCount[$key] = 1;
282
			}
283
		}
284
		// Create map of new person -> old person transitions
285 3
		$newOldPersonMapping = array();
286 3
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
287 3
		arsort($transitionCount);
288 3
		foreach ($transitionCount as $transitionKey => $count) {
289 3
			$transition = explode(":", $transitionKey);
290 3
			$oldPerson = intval($transition[0]);
291 3
			$newPerson = intval($transition[1]);
292 3
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
293 3
				if (($oldPerson === 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
294 3
					$newOldPersonMapping[$newPerson] = $oldPerson;
295 3
					$oldPersonProcessed[$oldPerson] = 0;
296
				} else {
297 2
					$newOldPersonMapping[$newPerson] = 0;
298
				}
299
			}
300
		}
301
		// Starting with new cluster, convert all new person IDs with old person IDs
302 3
		$maxOldPersonId = 1;
303 3
		if (count($oldCluster) > 0) {
304 2
			$maxOldPersonId = max(array_keys($oldCluster)) + 1;
305
		}
306
307 3
		$result = array();
308 3
		foreach ($newCluster as $newPerson => $newFaces) {
309 3
			$oldPerson = $newOldPersonMapping[$newPerson];
310 3
			if ($oldPerson === 0) {
311 3
				$result[$maxOldPersonId] = $newFaces;
312 3
				$maxOldPersonId++;
313
			} else {
314 2
				$result[$oldPerson] = $newFaces;
315
			}
316
		}
317 3
		return $result;
318
	}
319
}
320