Passed
Push — test-scrutinizer-new-analyser ( 748b29 )
by Branko
05:41
created

CreateClustersTask::description()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 2
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 2
ccs 0
cts 2
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * @copyright Copyright (c) 2017, Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IConfig;
27
use OCP\IUser;
28
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
30
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
31
use OCA\FaceRecognition\BackgroundJob\Tasks\AddMissingImagesTask;
32
33
use OCA\FaceRecognition\Db\FaceNewMapper;
34
use OCA\FaceRecognition\Db\ImageMapper;
35
use OCA\FaceRecognition\Db\PersonMapper;
36
37
use OCA\FaceRecognition\Helper\Euclidean;
38
39
use OCA\FaceRecognition\Migration\AddDefaultFaceModel;
40
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var IConfig Config */
46
	private $config;
47
48
	/** @var PersonMapper Person mapper*/
49
	private $personMapper;
50
51
	/** @var ImageMapper Image mapper*/
52
	private $imageMapper;
53
54
	/** @var FaceNewMapper Face mapper*/
55
	private $faceMapper;
56
57
	/**
58
	 * @param IConfig $config Config
59
	 */
60 2
	public function __construct(IConfig $config, PersonMapper $personMapper, ImageMapper $imageMapper, FaceNewMapper $faceMapper) {
61 2
		parent::__construct();
62 2
		$this->config = $config;
63 2
		$this->personMapper = $personMapper;
64 2
		$this->imageMapper = $imageMapper;
65 2
		$this->faceMapper = $faceMapper;
66 2
	}
67
68
	/**
69
	 * @inheritdoc
70
	 */
71
	public function description() {
72
		return "Create new persons or update existing persons";
73
	}
74
75
	/**
76
	 * @inheritdoc
77
	 */
78
	public function do(FaceRecognitionContext $context) {
79
		$this->setContext($context);
80
81
		$fullImageScanDone = $this->config->getAppValue('facerecognition', AddMissingImagesTask::FULL_IMAGE_SCAN_DONE_KEY, 'false');
82
		if ($fullImageScanDone != 'true') {
83
			// If not all images are not interested in the database, we cannot determine when we should start clustering.
84
			// Since this is step in beggining, just bail out.
85
			$this->logInfo('Skipping cluster creation, as not even existing images are found and inserted in database');
86
			return;
87
		}
88
89
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
90
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
91
		//
92
		$eligable_users = array();
93
		if (is_null($this->context->user)) {
94
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
95
				$eligable_users[] = $user->getUID();
96
			});
97
		} else {
98
			$eligable_users[] = $this->context->user->getUID();
99
		}
100
101
		foreach($eligable_users as $user) {
102
			$this->createClusterIfNeeded($user);
103
		}
104
	}
105
106
	private function createClusterIfNeeded(string $userId) {
107
		// Check that we processed enough images to start creating clusters
108
		//
109
		$modelId = intval($this->config->getAppValue('facerecognition', 'model', AddDefaultFaceModel::DEFAULT_FACE_MODEL_ID));
110
111
		$hasPersons = $this->personMapper->countPersons($userId) > 0;
112
113
		// Depending on whether we already have clusters, decide if we should create/recreate them.
114
		//
115
		if ($hasPersons) {
116
			// todo: find all faces that are in DB, but are not in user’s clusters.
117
			// If we detect more than 10 faces like this,
118
			// or if more than 2h since any of these is passed,
119
			// or if “is_valid” (UserCluster table) is false,
120
			// start new round of clustering for that user.
121
		} else {
122
			// These are basic criteria without which we should not even consider creating clusters.
123
			// These clusters will be small and not "stable" enough and we should better wait for more images to come.
124
			// todo: 2 queries to get these 2 counts, can we do this smarter?
125
			$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
126
			$imageProcessed = $this->imageMapper->countUserProcessedImages($userId, $modelId);
127
			$percentImagesProcessed = $imageProcessed / floatval($imageCount);
128
			$facesCount = $this->faceMapper->countFaces($userId, $modelId);
129
			// todo: get rid of magic numbers (move to config)
130
			if (($facesCount < 1000) && ($imageCount < 100) && ($percentImagesProcessed < 0.95)) {
131
				$this->logInfo(
132
					'Skipping cluster creation, not enough data (yet) collected. ' .
133
					'For cluster creation, you need either one of the following:');
134
				$this->logInfo(sprintf('* have 1000 faces already processed (you have %d),', $facesCount));
135
				$this->logInfo(sprintf('* have 100 images (you have %d),', $imageCount));
136
				$this->logInfo(sprintf('* or you need to have 95%% of you images processed (you have %.2f%%)', $percentImagesProcessed));
137
				return;
138
			}
139
		}
140
141
		$faces = $this->faceMapper->getFaces($userId, $modelId);
142
		$this->logInfo(count($faces) . ' faces found for clustering');
143
144
		// Cluster is associative array where key is person ID.
145
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
146
		// and for new clusters is whatever chinese whispers decides to identify them.
147
		//
148
		$currentClusters = $this->getCurrentClusters($faces);
149
		$newClusters = $this->getNewClusters($faces);
150
		$this->logInfo(count($newClusters) . ' clusters found for clustering');
151
		// New merge
152
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
153
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $mergedClusters);
154
	}
155
156
	private function getCurrentClusters(array $faces): array {
157
		$chineseClusters = array();
158
		foreach($faces as $face) {
159
			if ($face->person != null) {
160
				if (!isset($chineseClusters[$face->person])) {
161
					$chineseClusters[$face->person] = array();
162
				}
163
				$chineseClusters[$face->person][] = $face->id;
164
			}
165
		}
166
		return $chineseClusters;
167
	}
168
169
	private function getNewClusters(array $faces): array {
170
		// Create edges for chinese whispers
171
		$euclidean = new Euclidean();
172
		$edges = array();
173
		for ($i = 0, $face_count1 = count($faces); $i < $face_count1; $i++) {
174
			$face1 = $faces[$i];
175
			for ($j = $i, $face_count2 = count($faces); $j < $face_count2; $j++) {
176
				$face2 = $faces[$j];
177
				// todo: can't this distance be a method in $face1->distance($face2)?
178
				$distance = $euclidean->distance($face1->descriptor, $face2->descriptor);
179
				// todo: extract this magic number to app param
180
				if ($distance < 0.5) {
181
					$edges[] = array($i, $j);
182
				}
183
			}
184
		}
185
186
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
0 ignored issues
show
Bug introduced by
The function dlib_chinese_whispers was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

186
		$newChineseClustersByIndex = /** @scrutinizer ignore-call */ dlib_chinese_whispers($edges);
Loading history...
187
		$newClusters = array();
188
		for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
189
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
190
				$newClusters[$newChineseClustersByIndex[$i]] = array();
191
			}
192
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
193
		}
194
195
		return $newClusters;
196
	}
197
198
	/**
199
	 * todo: only reason this is public is because of tests. Go figure it out better.
200
	 */
201 2
	public function mergeClusters(array $oldCluster, array $newCluster): array {
202
		// Create map of face transitions
203 2
		$transitions = array();
204 2
		foreach ($newCluster as $newPerson=>$newFaces) {
205 2
			foreach ($newFaces as $newFace) {
206 2
				$oldPersonFound = null;
207 2
				foreach ($oldCluster as $oldPerson => $oldFaces) {
208 2
					if (in_array($newFace, $oldFaces)) {
209 2
						$oldPersonFound = $oldPerson;
210 2
						break;
211
					}
212
				}
213 2
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
214
			}
215
		}
216
		// Count transitions
217 2
		$transitionCount = array();
218 2
		foreach ($transitions as $transition) {
219 2
			$key = $transition[0] . ':' . $transition[1];
220 2
			if (array_key_exists($key, $transitionCount)) {
221 2
				$transitionCount[$key]++;
222
			} else {
223 2
				$transitionCount[$key] = 1;
224
			}
225
		}
226
		// Create map of new person -> old persion transitions
227 2
		$newOldPersonMapping = array();
228 2
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
229 2
		arsort($transitionCount);
230 2
		foreach ($transitionCount as $transitionKey => $count) {
231 2
			$transition = explode(":", $transitionKey);
232 2
			$oldPerson = intval($transition[0]);
233 2
			$newPerson = intval($transition[1]);
234 2
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
235 2
				if (($oldPerson == 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
236 2
					$newOldPersonMapping[$newPerson] = $oldPerson;
237 2
					$oldPersonProcessed[$oldPerson] = 0;
238
				} else {
239 2
					$newOldPersonMapping[$newPerson] = 0;
240
				}
241
			}
242
		}
243
		// Starting with new cluster, convert all new person IDs with old person IDs
244 2
		$maxOldPersonId = 1;
245 2
		if (count($oldCluster) > 0) {
246 2
			$maxOldPersonId = max(array_keys($oldCluster)) + 1;
247
		}
248
249 2
		$result = array();
250 2
		foreach ($newCluster as $newPerson => $newFaces) {
251 2
			$oldPerson = $newOldPersonMapping[$newPerson];
252 2
			if ($oldPerson == 0) {
253 2
				$result[$maxOldPersonId] = $newFaces;
254 2
				$maxOldPersonId++;
255
			} else {
256 2
				$result[$oldPerson] = $newFaces;
257
			}
258
		}
259 2
		return $result;
260
	}
261
}
262