Passed
Push — recreate-cluster-logic ( 25ad8b )
by Branko
01:54
created

CreateClustersTask::mergeClusters()   F

Complexity

Conditions 14
Paths 360

Size

Total Lines 59
Code Lines 41

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 39
CRAP Score 14

Importance

Changes 0
Metric Value
cc 14
eloc 41
nc 360
nop 2
dl 0
loc 59
ccs 39
cts 39
cp 1
crap 14
rs 3.4333
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017, Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IConfig;
27
use OCP\IUser;
28
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
30
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
31
use OCA\FaceRecognition\BackgroundJob\Tasks\AddMissingImagesTask;
32
33
use OCA\FaceRecognition\Db\FaceNewMapper;
34
use OCA\FaceRecognition\Db\ImageMapper;
35
use OCA\FaceRecognition\Db\PersonMapper;
36
37
use OCA\FaceRecognition\Helper\Euclidean;
38
39
use OCA\FaceRecognition\Migration\AddDefaultFaceModel;
40
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var IConfig Config */
46
	private $config;
47
48
	/** @var PersonMapper Person mapper*/
49
	private $personMapper;
50
51
	/** @var ImageMapper Image mapper*/
52
	private $imageMapper;
53
54
	/** @var FaceNewMapper Face mapper*/
55
	private $faceMapper;
56
57
	/**
58
	 * @param IConfig $config Config
59
	 */
60 2
	public function __construct(IConfig $config, PersonMapper $personMapper, ImageMapper $imageMapper, FaceNewMapper $faceMapper) {
61 2
		parent::__construct();
62 2
		$this->config = $config;
63 2
		$this->personMapper = $personMapper;
64 2
		$this->imageMapper = $imageMapper;
65 2
		$this->faceMapper = $faceMapper;
66 2
	}
67
68
	/**
69
	 * @inheritdoc
70
	 */
71
	public function description() {
72
		return "Create new persons or update existing persons";
73
	}
74
75
	/**
76
	 * @inheritdoc
77
	 */
78
	public function execute(FaceRecognitionContext $context) {
79
		$this->setContext($context);
80
81
		$fullImageScanDone = $this->config->getAppValue('facerecognition', AddMissingImagesTask::FULL_IMAGE_SCAN_DONE_KEY, 'false');
82
		if ($fullImageScanDone != 'true') {
83
			// If not all images are not interested in the database, we cannot determine when we should start clustering.
84
			// Since this is step in beggining, just bail out.
85
			$this->logInfo('Skipping cluster creation, as not even existing images are found and inserted in database');
86
			return true;
87
		}
88
89
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
90
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
91
		//
92
		$eligable_users = array();
93
		if (is_null($this->context->user)) {
94
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
95
				$eligable_users[] = $user->getUID();
96
			});
97
		} else {
98
			$eligable_users[] = $this->context->user->getUID();
99
		}
100
101
		foreach($eligable_users as $user) {
102
			$this->createClusterIfNeeded($user);
103
		}
104
105
		return true;
106
	}
107
108
	private function createClusterIfNeeded(string $userId) {
109
		// Check that we processed enough images to start creating clusters
110
		//
111
		$modelId = intval($this->config->getAppValue('facerecognition', 'model', AddDefaultFaceModel::DEFAULT_FACE_MODEL_ID));
112
113
		$hasPersons = $this->personMapper->countPersons($userId) > 0;
114
115
		// Depending on whether we already have clusters, decide if we should create/recreate them.
116
		//
117
		if ($hasPersons) {
118
			// OK, we already got some persons. We now need to evaluate whether we want to recreate clusters.
119
			// We want to recreate clusters/persons if:
120
			// * Some cluster/person is invalidated (is_valid is false for someone)
121
			//     This means some image that belonged to this user is changed, deleted etc.
122
			// * There are some new faces. Now, we don't want to jump the gun here. We want to either have:
123
			// ** more than 10 new faces, or
124
			// ** less than 10 new faces, but they are older than 2h
125
			//  (basically, we want to avoid recreating cluster for each new face being uploaded,
126
			//  however, we don't want to wait too much as clusters could be changed a lot)
127
			//
128
			$haveNewFaces = false;
129
			$facesWithoutPersons = $this->faceMapper->countFaces($userId, $modelId, true);
130
			$this->logDebug(sprintf('Found %d faces without associated persons for user %s and model %d',
131
				$facesWithoutPersons, $userId, $modelId));
132
			// todo: get rid of magic numbers (move to config)
133
			if ($facesWithoutPersons >= 10) {
134
				$haveNewFaces = true;
135
			} else if ($facesWithoutPersons > 0) {
136
				// We have some faces, but not that many, let's see when oldest one is generated.
137
				$face = $this->faceMapper->getOldestCreatedFaceWithoutPerson($userId, $modelId);
138
				$oldestFaceTimestamp = $face->creationTime->getTimestamp();
139
				$currentTimestamp = (new \DateTime())->getTimestamp();
140
				$this->logDebug(sprintf('Oldest face without persons for user %s and model %d is from %s',
141
					$userId, $modelId, $face->creationTime->format('Y-m-d H:i:s')));
142
				// todo: get rid of magic numbers (move to config)
143
				if ($currentTimestamp - $oldestFaceTimestamp > 2 * 60 * 60) {
144
					$haveNewFaces = true;
145
				}
146
			}
147
148
			$stalePersonsCount = $this->personMapper->countPersons($userId, true);
149
			$this->logDebug(sprintf('Found %d stale persons for user %s and model %d', $stalePersonsCount, $userId, $modelId));
150
			$haveStalePersons = $stalePersonsCount > 0;
151
152
			if ($haveStalePersons == false && $haveNewFaces == false) {
0 ignored issues
show
Coding Style Best Practice introduced by
It seems like you are loosely comparing two booleans. Considering using the strict comparison === instead.

When comparing two booleans, it is generally considered safer to use the strict comparison operator.

Loading history...
153
				// If there is no invalid persons, and there is no recent new faces, no need to recreate cluster
154
				$this->logInfo('Clusters already exist, calculated there is no need to recreate them');
155
				return;
156
			}
157
		} else {
158
			// These are basic criteria without which we should not even consider creating clusters.
159
			// These clusters will be small and not "stable" enough and we should better wait for more images to come.
160
			// todo: 2 queries to get these 2 counts, can we do this smarter?
161
			$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
162
			$imageProcessed = $this->imageMapper->countUserProcessedImages($userId, $modelId);
163
			$percentImagesProcessed = $imageProcessed / floatval($imageCount);
164
			$facesCount = $this->faceMapper->countFaces($userId, $modelId);
165
			// todo: get rid of magic numbers (move to config)
166
			if (($facesCount < 1000) && ($imageCount < 100) && ($percentImagesProcessed < 0.95)) {
167
				$this->logInfo(
168
					'Skipping cluster creation, not enough data (yet) collected. ' .
169
					'For cluster creation, you need either one of the following:');
170
				$this->logInfo(sprintf('* have 1000 faces already processed (you have %d),', $facesCount));
171
				$this->logInfo(sprintf('* have 100 images (you have %d),', $imageCount));
172
				$this->logInfo(sprintf('* or you need to have 95%% of you images processed (you have %.2f%%)', $percentImagesProcessed));
173
				return;
174
			}
175
		}
176
177
		$faces = $this->faceMapper->getFaces($userId, $modelId);
178
		$this->logInfo(count($faces) . ' faces found for clustering');
179
180
		// Cluster is associative array where key is person ID.
181
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
182
		// and for new clusters is whatever chinese whispers decides to identify them.
183
		//
184
		$currentClusters = $this->getCurrentClusters($faces);
185
		$newClusters = $this->getNewClusters($faces);
186
		$this->logInfo(count($newClusters) . ' clusters found for clustering');
187
		// New merge
188
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
189
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
190
	}
191
192
	private function getCurrentClusters(array $faces): array {
193
		$chineseClusters = array();
194
		foreach($faces as $face) {
195
			if ($face->person != null) {
196
				if (!isset($chineseClusters[$face->person])) {
197
					$chineseClusters[$face->person] = array();
198
				}
199
				$chineseClusters[$face->person][] = $face->id;
200
			}
201
		}
202
		return $chineseClusters;
203
	}
204
205
	private function getNewClusters(array $faces): array {
206
		// Create edges for chinese whispers
207
		$euclidean = new Euclidean();
208
		$edges = array();
209
		for ($i = 0, $face_count1 = count($faces); $i < $face_count1; $i++) {
210
			$face1 = $faces[$i];
211
			for ($j = $i, $face_count2 = count($faces); $j < $face_count2; $j++) {
212
				$face2 = $faces[$j];
213
				// todo: can't this distance be a method in $face1->distance($face2)?
214
				$distance = $euclidean->distance($face1->descriptor, $face2->descriptor);
215
				// todo: extract this magic number to app param
216
				if ($distance < 0.5) {
217
					$edges[] = array($i, $j);
218
				}
219
			}
220
		}
221
222
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

222
		$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
223
		$newClusters = array();
224
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
225
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
226
				$newClusters[$newChineseClustersByIndex[$i]] = array();
227
			}
228
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
229
		}
230
231
		return $newClusters;
232
	}
233
234
	/**
235
	 * todo: only reason this is public is because of tests. Go figure it out better.
236
	 */
237 2
	public function mergeClusters(array $oldCluster, array $newCluster): array {
238
		// Create map of face transitions
239 2
		$transitions = array();
240 2
		foreach ($newCluster as $newPerson=>$newFaces) {
241 2
			foreach ($newFaces as $newFace) {
242 2
				$oldPersonFound = null;
243 2
				foreach ($oldCluster as $oldPerson => $oldFaces) {
244 2
					if (in_array($newFace, $oldFaces)) {
245 2
						$oldPersonFound = $oldPerson;
246 2
						break;
247
					}
248
				}
249 2
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
250
			}
251
		}
252
		// Count transitions
253 2
		$transitionCount = array();
254 2
		foreach ($transitions as $transition) {
255 2
			$key = $transition[0] . ':' . $transition[1];
256 2
			if (array_key_exists($key, $transitionCount)) {
257 2
				$transitionCount[$key]++;
258
			} else {
259 2
				$transitionCount[$key] = 1;
260
			}
261
		}
262
		// Create map of new person -> old persion transitions
263 2
		$newOldPersonMapping = array();
264 2
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
265 2
		arsort($transitionCount);
266 2
		foreach ($transitionCount as $transitionKey => $count) {
267 2
			$transition = explode(":", $transitionKey);
268 2
			$oldPerson = intval($transition[0]);
269 2
			$newPerson = intval($transition[1]);
270 2
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
271 2
				if (($oldPerson == 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
272 2
					$newOldPersonMapping[$newPerson] = $oldPerson;
273 2
					$oldPersonProcessed[$oldPerson] = 0;
274
				} else {
275 2
					$newOldPersonMapping[$newPerson] = 0;
276
				}
277
			}
278
		}
279
		// Starting with new cluster, convert all new person IDs with old person IDs
280 2
		$maxOldPersonId = 1;
281 2
		if (count($oldCluster) > 0) {
282 2
			$maxOldPersonId = max(array_keys($oldCluster)) + 1;
283
		}
284
285 2
		$result = array();
286 2
		foreach ($newCluster as $newPerson => $newFaces) {
287 2
			$oldPerson = $newOldPersonMapping[$newPerson];
288 2
			if ($oldPerson == 0) {
289 2
				$result[$maxOldPersonId] = $newFaces;
290 2
				$maxOldPersonId++;
291
			} else {
292 2
				$result[$oldPerson] = $newFaces;
293
			}
294
		}
295 2
		return $result;
296
	}
297
}
298