ChangeRunCoalescer   A
last analyzed

Complexity

Total Complexity 37

Size/Duplication

Total Lines 306
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 8

Importance

Changes 0
Metric Value
wmc 37
lcom 1
cbo 8
dl 0
loc 306
rs 9.44
c 0
b 0
f 0

7 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 11 1
A transformChangeList() 0 22 2
A groupChangesByEntity() 0 15 3
C mergeChanges() 0 80 9
A isBadgesOnlyChange() 0 3 2
F coalesceRuns() 0 66 15
A compareChangesByTimestamp() 0 17 5
1
<?php
2
3
namespace Wikibase\Client\Changes;
4
5
use Diff\DiffOp\Diff\Diff;
6
use Diff\DiffOp\DiffOp;
7
use Exception;
8
use MWException;
9
use Psr\Log\LoggerInterface;
10
use Wikibase\DataModel\Entity\EntityId;
11
use Wikibase\Lib\Changes\Change;
12
use Wikibase\Lib\Changes\EntityChange;
13
use Wikibase\Lib\Changes\EntityChangeFactory;
14
use Wikibase\Lib\Changes\ItemChange;
15
use Wikibase\Lib\Store\EntityRevisionLookup;
16
use Wikibase\Lib\Store\LookupConstants;
17
18
/**
19
 * A transformer for lists of EntityChanges that combines runs of changes into a single change.
20
 * A "run" of changes is a sequence of consecutive changes performed by the same
21
 * user, and not interrupted by a "disruptive" change. Changes altering the association
22
 * between pages on the local wiki and items on the repo are considered disruptive.
23
 *
24
 * @license GPL-2.0-or-later
25
 * @author Daniel Kinzler
26
 */
27
class ChangeRunCoalescer {
28
29
	/**
30
	 * @var EntityRevisionLookup
31
	 */
32
	private $entityRevisionLookup;
33
34
	/**
35
	 * @var EntityChangeFactory
36
	 */
37
	private $changeFactory;
38
39
	/**
40
	 * @var LoggerInterface
41
	 */
42
	private $logger;
43
44
	/**
45
	 * @var string
46
	 */
47
	private $localSiteId;
48
49
	/**
50
	 * @param EntityRevisionLookup $entityRevisionLookup
51
	 * @param EntityChangeFactory $changeFactory
52
	 * @param LoggerInterface $logger
53
	 * @param string $localSiteId
54
	 */
55
	public function __construct(
56
		EntityRevisionLookup $entityRevisionLookup,
57
		EntityChangeFactory $changeFactory,
58
		LoggerInterface $logger,
59
		$localSiteId
60
	) {
61
		$this->entityRevisionLookup = $entityRevisionLookup;
62
		$this->changeFactory = $changeFactory;
63
		$this->logger = $logger;
64
		$this->localSiteId = $localSiteId;
65
	}
66
67
	/**
68
	 * Processes the given list of changes, combining any runs of changes into a single change.
69
	 * See the class level documentation for more details on change runs.
70
	 *
71
	 * @param EntityChange[] $changes
72
	 *
73
	 * @return EntityChange[]
74
	 */
75
	public function transformChangeList( array $changes ) {
76
		$coalesced = [];
77
78
		$changesByEntity = $this->groupChangesByEntity( $changes );
79
		/** @var EntityChange[] $entityChanges */
80
		foreach ( $changesByEntity as $entityChanges ) {
81
			$entityChanges = $this->coalesceRuns( $entityChanges[0]->getEntityId(), $entityChanges );
82
			$coalesced = array_merge( $coalesced, $entityChanges );
83
		}
84
85
		usort( $coalesced, [ $this, 'compareChangesByTimestamp' ] );
86
		$this->logger->debug(
87
			'{method}: coalesced {changeCount} into {changeCoalescedCount} changes',
88
			[
89
				'method' => __METHOD__,
90
				'changeCount' => count( $changes ),
91
				'changeCoalescedCount' => count( $coalesced ),
92
			]
93
		);
94
95
		return $coalesced;
96
	}
97
98
	/**
99
	 * Group changes by the entity they were applied to.
100
	 *
101
	 * @param EntityChange[] $changes
102
	 *
103
	 * @return array[] an associative array using entity IDs for keys. Associated with each
104
	 *         entity ID is the list of changes performed on that entity.
105
	 */
106
	private function groupChangesByEntity( array $changes ) {
107
		$groups = [];
108
109
		foreach ( $changes as $change ) {
110
			$id = $change->getEntityId()->getSerialization();
111
112
			if ( !isset( $groups[$id] ) ) {
113
				$groups[$id] = [];
114
			}
115
116
			$groups[$id][] = $change;
117
		}
118
119
		return $groups;
120
	}
121
122
	/**
123
	 * Combines a set of changes into one change. All changes are assumed to have been performed
124
	 * by the same user on the same entity. They are further assumed to be UPDATE actions
125
	 * and sorted in causal (chronological) order.
126
	 *
127
	 * If $changes contains exactly one change, that change is returned. Otherwise, a combined
128
	 * change is returned.
129
	 *
130
	 * @param EntityId $entityId
131
	 * @param EntityChange[] $changes The changes to combine.
132
	 *
133
	 * @throws MWException
134
	 * @return Change a combined change representing the activity from all the original changes.
135
	 */
136
	private function mergeChanges( EntityId $entityId, array $changes ) {
137
		if ( count( $changes ) === 1 ) {
138
			return reset( $changes );
139
		}
140
141
		// we now assume that we have a list if EntityChanges,
142
		// all done by the same user on the same entity.
143
144
		/**
145
		 * @var EntityChange $last
146
		 * @var EntityChange $first
147
		 */
148
		$last = end( $changes );
149
		$first = reset( $changes );
150
151
		$minor = true;
152
		$bot = true;
153
154
		$ids = [];
155
156
		foreach ( $changes as $change ) {
157
			$ids[] = $change->getId();
158
			$meta = $change->getMetadata();
159
160
			$minor &= isset( $meta['minor'] ) && (bool)$meta['minor'];
161
			$bot &= isset( $meta['bot'] ) && (bool)$meta['bot'];
162
		}
163
164
		$lastmeta = $last->getMetadata();
165
		$firstmeta = $first->getMetadata();
166
167
		$parentRevId = $firstmeta['parent_id'];
168
		$latestRevId = $lastmeta['rev_id'];
169
170
		$entityRev = $this->entityRevisionLookup->getEntityRevision(
171
			$entityId,
172
			$latestRevId,
173
			LookupConstants::LATEST_FROM_REPLICA_WITH_FALLBACK
174
		);
175
176
		if ( !$entityRev ) {
177
			throw new MWException( "Failed to load revision $latestRevId of $entityId" );
178
		}
179
180
		$parentRev = $parentRevId ? $this->entityRevisionLookup->getEntityRevision( $entityId, $parentRevId ) : null;
181
182
		//XXX: we could avoid loading the entity data by merging the diffs programatically
183
		//     instead of re-calculating.
184
		$change = $this->changeFactory->newFromUpdate(
185
			$parentRev ? EntityChange::UPDATE : EntityChange::ADD,
186
			$parentRev ? $parentRev->getEntity() : null,
187
			$entityRev->getEntity()
188
		);
189
190
		$change->setFields(
191
			[
192
				'revision_id' => $last->getField( 'revision_id' ),
193
				'user_id' => $last->getUserId(),
194
				'time' => $last->getTime(),
195
			]
196
		);
197
198
		$change->setMetadata( array_merge(
199
			$lastmeta,
200
			[
201
				'parent_id' => $parentRevId,
202
				'minor' => $minor,
203
				'bot' => $bot,
204
			]
205
		//FIXME: size before & size after
206
		//FIXME: size before & size after
207
		) );
208
209
		$info = $change->getInfo();
210
		$info['change-ids'] = $ids;
211
		$info['changes'] = $changes;
212
		$change->setField( 'info', $info );
213
214
		return $change;
215
	}
216
217
	/**
218
	 * @param DiffOp $siteLinkDiffOp
219
	 *
220
	 * @return bool
221
	 */
222
	private function isBadgesOnlyChange( DiffOp $siteLinkDiffOp ) {
223
		return $siteLinkDiffOp instanceof Diff && !$siteLinkDiffOp->offsetExists( 'name' );
224
	}
225
226
	/**
227
	 * Coalesce consecutive changes by the same user to the same entity into one.
228
	 *
229
	 * A run of changes may be broken if the action performed changes (e.g. deletion
230
	 * instead of update) or if a sitelink pointing to the local wiki was modified.
231
	 *
232
	 * Some types of actions, like deletion, will break runs.
233
	 *
234
	 * @param EntityId $entityId
235
	 * @param EntityChange[] $changes
236
	 *
237
	 * @return Change[] grouped changes
238
	 */
239
	private function coalesceRuns( EntityId $entityId, array $changes ) {
240
		$coalesced = [];
241
242
		$currentRun = [];
243
		$currentUser = null;
244
		$currentAction = null;
245
		$breakNext = false;
246
247
		foreach ( $changes as $change ) {
248
			try {
249
				$action = $change->getAction();
250
				$meta = $change->getMetadata();
251
				$user = $meta['user_text'];
252
253
				$break = $breakNext
254
					|| $currentAction !== $action
255
					|| $currentUser !== $user;
256
257
				$breakNext = false;
258
259
				if ( !$break && ( $change instanceof ItemChange ) ) {
260
					$siteLinkDiff = $change->getSiteLinkDiff();
261
					if ( isset( $siteLinkDiff[$this->localSiteId] )
262
						&& !$this->isBadgesOnlyChange( $siteLinkDiff[$this->localSiteId] ) ) {
263
						$break = true;
264
						$breakNext = true;
265
					}
266
				}
267
268
				if ( $break ) {
269
					if ( !empty( $currentRun ) ) {
270
						try {
271
							$coalesced[] = $this->mergeChanges( $entityId, $currentRun );
272
						} catch ( MWException $ex ) {
0 ignored issues
show
Bug introduced by
The class MWException does not exist. Is this class maybe located in a folder that is not analyzed, or in a newer version of your dependencies than listed in your composer.lock/composer.json?
Loading history...
273
							// Something went wrong while trying to merge the changes.
274
							// Just keep the original run.
275
							wfWarn( $ex->getMessage() );
276
							$coalesced = array_merge( $coalesced, $currentRun );
277
						}
278
					}
279
280
					$currentRun = [];
281
					$currentUser = $user;
282
					$currentAction = $action === EntityChange::ADD ? EntityChange::UPDATE : $action;
283
				}
284
285
				$currentRun[] = $change;
286
				// skip any change that failed to process in some way (bug T51417)
287
			} catch ( Exception $ex ) {
288
				wfLogWarning( __METHOD__ . ':' . $ex->getMessage() );
289
			}
290
		}
291
292
		if ( !empty( $currentRun ) ) {
293
			try {
294
				$coalesced[] = $this->mergeChanges( $entityId, $currentRun );
295
			} catch ( MWException $ex ) {
0 ignored issues
show
Bug introduced by
The class MWException does not exist. Is this class maybe located in a folder that is not analyzed, or in a newer version of your dependencies than listed in your composer.lock/composer.json?
Loading history...
296
				// Something went wrong while trying to merge the changes.
297
				// Just keep the original run.
298
				wfWarn( $ex->getMessage() );
299
				$coalesced = array_merge( $coalesced, $currentRun );
300
			}
301
		}
302
303
		return $coalesced;
304
	}
305
306
	/**
307
	 * Compares two changes based on their timestamp.
308
	 *
309
	 * @param Change $a
310
	 * @param Change $b
311
	 *
312
	 * @return int
313
	 */
314
	public function compareChangesByTimestamp( Change $a, Change $b ) {
315
		//NOTE: beware https://bugs.php.net/bug.php?id=50688 !
316
317
		if ( $a->getTime() > $b->getTime() ) {
318
			return 1;
319
		} elseif ( $a->getTime() < $b->getTime() ) {
320
			return -1;
321
		}
322
323
		if ( $a->getId() > $b->getId() ) {
324
			return 1;
325
		} elseif ( $a->getId() < $b->getId() ) {
326
			return -1;
327
		}
328
329
		return 0;
330
	}
331
332
}
333