Completed
Push — exception_in_tasks ( 7463cd...3899e4 )
by Branko
01:58
created

CreateClustersTask::execute()   A

Complexity

Conditions 4
Paths 5

Size

Total Lines 29

Duplication

Lines 7
Ratio 24.14 %

Code Coverage

Tests 0
CRAP Score 20

Importance

Changes 0
Metric Value
dl 7
loc 29
ccs 0
cts 15
cp 0
rs 9.456
c 0
b 0
f 0
cc 4
nc 5
nop 1
crap 20
1
<?php
2
/**
3
 * @copyright Copyright (c) 2017, Matias De lellis <[email protected]>
4
 * @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
5
 *
6
 * @author Branko Kokanovic <[email protected]>
7
 *
8
 * @license GNU AGPL version 3 or any later version
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU Affero General Public License as
12
 * published by the Free Software Foundation, either version 3 of the
13
 * License, or (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 * GNU Affero General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Affero General Public License
21
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
23
 */
24
namespace OCA\FaceRecognition\BackgroundJob\Tasks;
25
26
use OCP\IConfig;
27
use OCP\IUser;
28
29
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionBackgroundTask;
30
use OCA\FaceRecognition\BackgroundJob\FaceRecognitionContext;
31
use OCA\FaceRecognition\BackgroundJob\Tasks\AddMissingImagesTask;
32
33
use OCA\FaceRecognition\Db\FaceNewMapper;
34
use OCA\FaceRecognition\Db\ImageMapper;
35
use OCA\FaceRecognition\Db\PersonMapper;
36
37
use OCA\FaceRecognition\Helper\Euclidean;
38
39
use OCA\FaceRecognition\Migration\AddDefaultFaceModel;
40
41
/**
42
 * Taks that, for each user, creates person clusters for each.
43
 */
44
class CreateClustersTask extends FaceRecognitionBackgroundTask {
45
	/** @var IConfig Config */
46
	private $config;
47
48
	/** @var PersonMapper Person mapper*/
49
	private $personMapper;
50
51
	/** @var ImageMapper Image mapper*/
52
	private $imageMapper;
53
54
	/** @var FaceNewMapper Face mapper*/
55
	private $faceMapper;
56
57
	/**
58
	 * @param IConfig $config Config
59
	 */
60 2
	public function __construct(IConfig $config, PersonMapper $personMapper, ImageMapper $imageMapper, FaceNewMapper $faceMapper) {
61 2
		parent::__construct();
62 2
		$this->config = $config;
63 2
		$this->personMapper = $personMapper;
64 2
		$this->imageMapper = $imageMapper;
65 2
		$this->faceMapper = $faceMapper;
66 2
	}
67
68
	/**
69
	 * @inheritdoc
70
	 */
71
	public function description() {
72
		return "Create new persons or update existing persons";
73
	}
74
75
	/**
76
	 * @inheritdoc
77
	 */
78
	public function execute(FaceRecognitionContext $context) {
79
		$this->setContext($context);
80
81
		$fullImageScanDone = $this->config->getAppValue('facerecognition', AddMissingImagesTask::FULL_IMAGE_SCAN_DONE_KEY, 'false');
82
		if ($fullImageScanDone != 'true') {
83
			// If not all images are not interested in the database, we cannot determine when we should start clustering.
84
			// Since this is step in beggining, just bail out.
85
			$this->logInfo('Skipping cluster creation, as not even existing images are found and inserted in database');
86
			return true;
87
		}
88
89
		// We cannot yield inside of Closure, so we need to extract all users and iterate outside of closure.
90
		// However, since we don't want to do deep copy of IUser, we keep only UID in this array.
91
		//
92
		$eligable_users = array();
93 View Code Duplication
		if (is_null($this->context->user)) {
94
			$this->context->userManager->callForSeenUsers(function (IUser $user) use (&$eligable_users) {
95
				$eligable_users[] = $user->getUID();
96
			});
97
		} else {
98
			$eligable_users[] = $this->context->user->getUID();
99
		}
100
101
		foreach($eligable_users as $user) {
102
			$this->createClusterIfNeeded($user);
103
		}
104
105
		return true;
106
	}
107
108
	private function createClusterIfNeeded(string $userId) {
109
		// Check that we processed enough images to start creating clusters
110
		//
111
		$modelId = intval($this->config->getAppValue('facerecognition', 'model', AddDefaultFaceModel::DEFAULT_FACE_MODEL_ID));
112
113
		$hasPersons = $this->personMapper->countPersons($userId) > 0;
114
115
		// Depending on whether we already have clusters, decide if we should create/recreate them.
116
		//
117
		if ($hasPersons) {
118
			// todo: find all faces that are in DB, but are not in user’s clusters.
119
			// If we detect more than 10 faces like this,
120
			// or if more than 2h since any of these is passed,
121
			// or if “is_valid” (UserCluster table) is false,
122
			// start new round of clustering for that user.
123
		} else {
124
			// These are basic criteria without which we should not even consider creating clusters.
125
			// These clusters will be small and not "stable" enough and we should better wait for more images to come.
126
			// todo: 2 queries to get these 2 counts, can we do this smarter?
127
			$imageCount = $this->imageMapper->countUserImages($userId, $modelId);
128
			$imageProcessed = $this->imageMapper->countUserProcessedImages($userId, $modelId);
129
			$percentImagesProcessed = $imageProcessed / floatval($imageCount);
130
			$facesCount = $this->faceMapper->countFaces($userId, $modelId);
131
			// todo: get rid of magic numbers (move to config)
132
			if (($facesCount < 1000) && ($imageCount < 100) && ($percentImagesProcessed < 0.95)) {
133
				$this->logInfo(
134
					'Skipping cluster creation, not enough data (yet) collected. ' .
135
					'For cluster creation, you need either one of the following:');
136
				$this->logInfo(sprintf('* have 1000 faces already processed (you have %d),', $facesCount));
137
				$this->logInfo(sprintf('* have 100 images (you have %d),', $imageCount));
138
				$this->logInfo(sprintf('* or you need to have 95%% of you images processed (you have %.2f%%)', $percentImagesProcessed));
139
				return;
140
			}
141
		}
142
143
		$faces = $this->faceMapper->getFaces($userId, $modelId);
144
		$this->logInfo(count($faces) . ' faces found for clustering');
145
146
		// Cluster is associative array where key is person ID.
147
		// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
148
		// and for new clusters is whatever chinese whispers decides to identify them.
149
		//
150
		$currentClusters = $this->getCurrentClusters($faces);
151
		$newClusters = $this->getNewClusters($faces);
152
		$this->logInfo(count($newClusters) . ' clusters found for clustering');
153
		// New merge
154
		$mergedClusters = $this->mergeClusters($currentClusters, $newClusters);
0 ignored issues
show
Unused Code introduced by
$mergedClusters is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
155
		$this->personMapper->mergeClusterToDatabase($userId, $currentClusters, $newClusters);
156
	}
157
158
	private function getCurrentClusters(array $faces): array {
159
		$chineseClusters = array();
160
		foreach($faces as $face) {
161
			if ($face->person != null) {
162
				if (!isset($chineseClusters[$face->person])) {
163
					$chineseClusters[$face->person] = array();
164
				}
165
				$chineseClusters[$face->person][] = $face->id;
166
			}
167
		}
168
		return $chineseClusters;
169
	}
170
171
	private function getNewClusters(array $faces): array {
172
		// Create edges for chinese whispers
173
		$euclidean = new Euclidean();
174
		$edges = array();
175
		for ($i = 0; $i < count($faces); $i++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
176
			$face1 = $faces[$i];
177
			for ($j = $i; $j < count($faces); $j++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
178
				$face2 = $faces[$j];
179
				// todo: can't this distance be a method in $face1->distance($face2)?
180
				$distance = $euclidean->distance($face1->descriptor, $face2->descriptor);
181
				// todo: extract this magic number to app param
182
				if ($distance < 0.5) {
183
					$edges[] = array($i, $j);
184
				}
185
			}
186
		}
187
188
		$newChineseClustersByIndex = dlib_chinese_whispers($edges);
189
		$newClusters = array();
190
		for ($i = 0; $i < count($newChineseClustersByIndex); $i++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
191
			if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
192
				$newClusters[$newChineseClustersByIndex[$i]] = array();
193
			}
194
			$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
195
		}
196
197
		return $newClusters;
198
	}
199
200
	/**
201
	 * todo: only reason this is public is because of tests. Go figure it out better.
202
	 */
203 2
	public function mergeClusters(array $oldCluster, array $newCluster): array {
204
		// Create map of face transitions
205 2
		$transitions = array();
206 2
		foreach ($newCluster as $newPerson=>$newFaces) {
207 2
			foreach ($newFaces as $newFace) {
208 2
				$oldPersonFound = null;
209 2
				foreach ($oldCluster as $oldPerson => $oldFaces) {
210 2
					if (in_array($newFace, $oldFaces)) {
211 2
						$oldPersonFound = $oldPerson;
212 2
						break;
213
					}
214
				}
215 2
				$transitions[$newFace] = array($oldPersonFound, $newPerson);
216
			}
217
		}
218
		// Count transitions
219 2
		$transitionCount = array();
220 2
		foreach ($transitions as $transition) {
221 2
			$key = $transition[0] . ':' . $transition[1];
222 2
			if (array_key_exists($key, $transitionCount)) {
223 2
				$transitionCount[$key]++;
224
			} else {
225 2
				$transitionCount[$key] = 1;
226
			}
227
		}
228
		// Create map of new person -> old persion transitions
229 2
		$newOldPersonMapping = array();
230 2
		$oldPersonProcessed = array(); // store this, so we don't waste cycles for in_array()
231 2
		arsort($transitionCount);
232 2
		foreach ($transitionCount as $transitionKey => $count) {
233 2
			$transition = explode(":", $transitionKey);
234 2
			$oldPerson = intval($transition[0]);
235 2
			$newPerson = intval($transition[1]);
236 2
			if (!array_key_exists($newPerson, $newOldPersonMapping)) {
237 2
				if (($oldPerson == 0) || (!array_key_exists($oldPerson, $oldPersonProcessed))) {
238 2
					$newOldPersonMapping[$newPerson] = $oldPerson;
239 2
					$oldPersonProcessed[$oldPerson] = 0;
240
				} else {
241 2
					$newOldPersonMapping[$newPerson] = 0;
242
				}
243
			}
244
		}
245
		// Starting with new cluster, convert all new person IDs with old person IDs
246 2
		$maxOldPersonId = 1;
247 2
		if (count($oldCluster) > 0) {
248 2
			$maxOldPersonId = max(array_keys($oldCluster)) + 1;
249
		}
250
251 2
		$result = array();
252 2
		foreach ($newCluster as $newPerson => $newFaces) {
253 2
			$oldPerson = $newOldPersonMapping[$newPerson];
254 2
			if ($oldPerson == 0) {
255 2
				$result[$maxOldPersonId] = $newFaces;
256 2
				$maxOldPersonId++;
257
			} else {
258 2
				$result[$oldPerson] = $newFaces;
259
			}
260
		}
261 2
		return $result;
262
	}
263
}
264