Completed
Pull Request — master (#51)
by Branko
03:40 queued 01:36
created

CreateClustersTask::mergeClusters()   F

Complexity

Conditions 14
Paths 360

Size

Total Lines 60

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 39
CRAP Score 14

Importance

Changes 0
Metric Value
dl 0
loc 60
ccs 39
cts 39
cp 1
rs 3.4333
c 0
b 0
f 0
cc 14
nc 360
nop 2
crap 14

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * @copyright Copyright (c) 2017, Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IConfig;
27
use OCP\IUser;
28
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
30
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
31
use OCA\FaceRecognition\BackgroundJob\Tasks\AddMissingImagesTask;
32
33
use OCA\FaceRecognition\Db\FaceNewMapper;
34
use OCA\FaceRecognition\Db\ImageMapper;
35
use OCA\FaceRecognition\Db\PersonMapper;
36
37
use OCA\FaceRecognition\Helper\Euclidean;
38
39
use OCA\FaceRecognition\Migration\AddDefaultFaceModel;
40
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var IConfig Config */
46
	private $config;
47
48
	/** @var PersonMapper Person mapper*/
49
	private $personMapper;
50
51
	/** @var ImageMapper Image mapper*/
52
	private $imageMapper;
53
54
	/** @var FaceNewMapper Face mapper*/
55
	private $faceMapper;
56
57
	/**
58
	 * @param IConfig $config Config
59
	 */
60 2
	public function __construct(IConfig $config, PersonMapper $personMapper, ImageMapper $imageMapper, FaceNewMapper $faceMapper) {
61 2
		parent::__construct();
62 2
		$this->config = $config;
63 2
		$this->personMapper = $personMapper;
64 2
		$this->imageMapper = $imageMapper;
65 2
		$this->faceMapper = $faceMapper;
66 2
	}
67
68
	/**
69
	 * @inheritdoc
70
	 */
71
	public function description() {
72
		return "Create new persons or update existing persons";
73
	}
74
75
	/**
76
	 * @inheritdoc
77
	 */
78
	public function execute(FaceRecognitionContext $context) {
79
		$this->setContext($context);
80
81
		$fullImageScanDone = $this->config->getAppValue('facerecognition', AddMissingImagesTask::FULL_IMAGE_SCAN_DONE_KEY, 'false');
82
		if ($fullImageScanDone != 'true') {
83
			// If not all images are not interested in the database, we cannot determine when we should start clustering.
84
			// Since this is step in beggining, just bail out.
85
			$this->logInfo('Skipping cluster creation, as not even existing images are found and inserted in database');
86
			return true;
87
		}
88
89
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
90
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
91
		//
92
		$eligable_users = array();
93 View Code Duplication
		if (is_null($this->context->user)) {
94
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
95
				$eligable_users[] = $user->getUID();
96
			});
97
		} else {
98
			$eligable_users[] = $this->context->user->getUID();
99
		}
100
101
		foreach($eligable_users as $user) {
102
			$this->createClusterIfNeeded($user);
103
		}
104
105
		return true;
106
	}
107
108
	private function createClusterIfNeeded(string $userId) {
109
		// Check that we processed enough images to start creating clusters
110
		//
111
		$modelId = intval($this->config->getAppValue('facerecognition', 'model', AddDefaultFaceModel::DEFAULT_FACE_MODEL_ID));
112
113
		$hasPersons = $this->personMapper->countPersons($userId) > 0;
114
115
		// Depending on whether we already have clusters, decide if we should create/recreate them.
116
		//
117
		if ($hasPersons) {
118
			// todo: find all faces that are in DB, but are not in user’s clusters.
119
			// If we detect more than 10 faces like this,
120
			// or if more than 2h since any of these is passed,
121
			// or if “is_valid” (UserCluster table) is false,
122
			// start new round of clustering for that user.
123
		} else {
124
			// These are basic criteria without which we should not even consider creating clusters.
125
			// These clusters will be small and not "stable" enough and we should better wait for more images to come.
126
			// todo: 2 queries to get these 2 counts, can we do this smarter?
127
			$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
128
			$imageProcessed = $this->imageMapper->countUserProcessedImages($userId, $modelId);
129
			$percentImagesProcessed = $imageProcessed / floatval($imageCount);
130
			$facesCount = $this->faceMapper->countFaces($userId, $modelId);
131
			// todo: get rid of magic numbers (move to config)
132
			if (($facesCount < 1000) && ($imageCount < 100) && ($percentImagesProcessed < 0.95)) {
133
				$this->logInfo(
134
					'Skipping cluster creation, not enough data (yet) collected. ' .
135
					'For cluster creation, you need either one of the following:');
136
				$this->logInfo(sprintf('* have 1000 faces already processed (you have %d),', $facesCount));
137
				$this->logInfo(sprintf('* have 100 images (you have %d),', $imageCount));
138
				$this->logInfo(sprintf('* or you need to have 95%% of you images processed (you have %.2f%%)', $percentImagesProcessed));
139
				return;
140
			}
141
		}
142
143
		$faces = $this->faceMapper->getFaces($userId, $modelId);
144
		$this->logInfo(count($faces) . ' faces found for clustering');
145
146
		// Cluster is associative array where key is person ID.
147
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
148
		// and for new clusters is whatever chinese whispers decides to identify them.
149
		//
150
		$currentClusters = $this->getCurrentClusters($faces);
151
		$newClusters = $this->getNewClusters($faces);
152
		$this->logInfo(count($newClusters) . ' clusters found for clustering');
153
		// New merge
154
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
155
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
156
	}
157
158
	private function getCurrentClusters(array $faces): array {
159
		$chineseClusters = array();
160
		foreach($faces as $face) {
161
			if ($face->person != null) {
162
				if (!isset($chineseClusters[$face->person])) {
163
					$chineseClusters[$face->person] = array();
164
				}
165
				$chineseClusters[$face->person][] = $face->id;
166
			}
167
		}
168
		return $chineseClusters;
169
	}
170
171
	private function getNewClusters(array $faces): array {
172
		// Create edges for chinese whispers
173
		$euclidean = new Euclidean();
174
		$edges = array();
175
		for ($i = 0, $face_count1 = count($faces); $i < $face_count1; $i++) {
176
			$face1 = $faces[$i];
177
			for ($j = $i, $face_count2 = count($faces); $j < $face_count2; $j++) {
178
				$face2 = $faces[$j];
179
				// todo: can't this distance be a method in $face1->distance($face2)?
180
				$distance = $euclidean->distance($face1->descriptor, $face2->descriptor);
181
				// todo: extract this magic number to app param
182
				if ($distance < 0.5) {
183
					$edges[] = array($i, $j);
184
				}
185
			}
186
		}
187
188
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
189
		$newClusters = array();
190
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
191
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
192
				$newClusters[$newChineseClustersByIndex[$i]] = array();
193
			}
194
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
195
		}
196
197
		return $newClusters;
198
	}
199
200
	/**
201
	 * todo: only reason this is public is because of tests. Go figure it out better.
202
	 */
203 2
	public function mergeClusters(array $oldCluster, array $newCluster): array {
204
		// Create map of face transitions
205 2
		$transitions = array();
206 2
		foreach ($newCluster as $newPerson=>$newFaces) {
207 2
			foreach ($newFaces as $newFace) {
208 2
				$oldPersonFound = null;
209 2
				foreach ($oldCluster as $oldPerson => $oldFaces) {
210 2
					if (in_array($newFace, $oldFaces)) {
211 2
						$oldPersonFound = $oldPerson;
212 2
						break;
213
					}
214
				}
215 2
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
216
			}
217
		}
218
		// Count transitions
219 2
		$transitionCount = array();
220 2
		foreach ($transitions as $transition) {
221 2
			$key = $transition[0] . ':' . $transition[1];
222 2
			if (array_key_exists($key, $transitionCount)) {
223 2
				$transitionCount[$key]++;
224
			} else {
225 2
				$transitionCount[$key] = 1;
226
			}
227
		}
228
		// Create map of new person -> old persion transitions
229 2
		$newOldPersonMapping = array();
230 2
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
231 2
		arsort($transitionCount);
232 2
		foreach ($transitionCount as $transitionKey => $count) {
233 2
			$transition = explode(":", $transitionKey);
234 2
			$oldPerson = intval($transition[0]);
235 2
			$newPerson = intval($transition[1]);
236 2
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
237 2
				if (($oldPerson == 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
238 2
					$newOldPersonMapping[$newPerson] = $oldPerson;
239 2
					$oldPersonProcessed[$oldPerson] = 0;
240
				} else {
241 2
					$newOldPersonMapping[$newPerson] = 0;
242
				}
243
			}
244
		}
245
		// Starting with new cluster, convert all new person IDs with old person IDs
246 2
		$maxOldPersonId = 1;
247 2
		if (count($oldCluster) > 0) {
248 2
			$maxOldPersonId = max(array_keys($oldCluster)) + 1;
249
		}
250
251 2
		$result = array();
252 2
		foreach ($newCluster as $newPerson => $newFaces) {
253 2
			$oldPerson = $newOldPersonMapping[$newPerson];
254 2
			if ($oldPerson == 0) {
255 2
				$result[$maxOldPersonId] = $newFaces;
256 2
				$maxOldPersonId++;
257
			} else {
258 2
				$result[$oldPerson] = $newFaces;
259
			}
260
		}
261 2
		return $result;
262
	}
263
}
264