AffectedPagesFinder::getPagesReferencedInDiff()   A
last analyzed

Complexity

Conditions 3
Paths 4

Size

Total Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 14
rs 9.7998
c 0
b 0
f 0
cc 3
nc 4
nop 1
1
<?php
2
3
declare( strict_types = 1 );
4
5
namespace Wikibase\Client\Changes;
6
7
use ArrayIterator;
8
use InvalidArgumentException;
9
use MediaWiki\Cache\LinkBatchFactory;
10
use Psr\Log\LoggerInterface;
11
use Psr\Log\NullLogger;
12
use Title;
13
use TitleFactory;
14
use Traversable;
15
use UnexpectedValueException;
16
use Wikibase\Client\Usage\EntityUsage;
17
use Wikibase\Client\Usage\PageEntityUsages;
18
use Wikibase\Client\Usage\UsageAspectTransformer;
19
use Wikibase\Client\Usage\UsageLookup;
20
use Wikibase\DataModel\Entity\EntityId;
21
use Wikibase\Lib\Changes\Change;
22
use Wikibase\Lib\Changes\EntityChange;
23
use Wikibase\Lib\Changes\ItemChange;
24
25
/**
26
 * @license GPL-2.0-or-later
27
 * @author Daniel Kinzler
28
 * @author Katie Filbert < [email protected] >
29
 */
30
class AffectedPagesFinder {
31
32
	/**
33
	 * @var UsageLookup
34
	 */
35
	private $usageLookup;
36
37
	/**
38
	 * @var TitleFactory
39
	 */
40
	private $titleFactory;
41
42
	/** @var LinkBatchFactory */
43
	private $linkBatchFactory;
44
45
	/**
46
	 * @var string
47
	 */
48
	private $siteId;
49
50
	/**
51
	 * @var LoggerInterface
52
	 */
53
	private $logger;
54
55
	/**
56
	 * @var bool
57
	 */
58
	private $checkPageExistence;
59
60
	/**
61
	 * @param UsageLookup $usageLookup
62
	 * @param TitleFactory $titleFactory
63
	 * @param LinkBatchFactory $linkBatchFactory
64
	 * @param string $siteId
65
	 * @param LoggerInterface|null $logger
66
	 * @param bool $checkPageExistence To disable slow filtering that is not relevant in test
67
	 *  scenarios. Not meant to be used in production!
68
	 *
69
	 * @throws InvalidArgumentException
70
	 */
71
	public function __construct(
72
		UsageLookup $usageLookup,
73
		TitleFactory $titleFactory,
74
		LinkBatchFactory $linkBatchFactory,
75
		string $siteId,
76
		?LoggerInterface $logger = null,
77
		bool $checkPageExistence = true
78
	) {
79
		$this->usageLookup = $usageLookup;
80
		$this->titleFactory = $titleFactory;
81
		$this->linkBatchFactory = $linkBatchFactory;
82
		$this->siteId = $siteId;
83
		$this->logger = $logger ?: new NullLogger();
84
		$this->checkPageExistence = $checkPageExistence;
85
	}
86
87
	/**
88
	 * @param Change $change
89
	 *
90
	 * @return PageEntityUsages[]
91
	 */
92
	public function getAffectedUsagesByPage( Change $change ) {
93
		if ( $change instanceof EntityChange ) {
94
			$usages = $this->getAffectedPages( $change );
95
			return $this->filterUpdates( $usages );
96
		}
97
98
		return [];
99
	}
100
101
	/**
102
	 * @param EntityChange $change
103
	 *
104
	 * @return string[]
105
	 */
106
	public function getChangedAspects( EntityChange $change ) {
107
		$aspects = [];
108
		$diffAspects = $change->getCompactDiff();
109
110
		if ( $diffAspects->getSiteLinkChanges() !== [] ) {
111
			$sitelinkChanges = $diffAspects->getSiteLinkChanges();
112
			$aspects[] = EntityUsage::SITELINK_USAGE;
113
114
			if ( isset( $sitelinkChanges[$this->siteId] )
115
				&& !$this->isBadgesOnlyChange( $sitelinkChanges[$this->siteId] )
116
			) {
117
				$aspects[] = EntityUsage::TITLE_USAGE;
118
			}
119
		}
120
121
		if ( $diffAspects->getLabelChanges() !== [] ) {
122
			$labelAspects = $this->getChangedTermAspects(
123
				EntityUsage::LABEL_USAGE,
124
				$diffAspects->getLabelChanges()
125
			);
126
			$aspects = array_merge( $aspects, $labelAspects );
127
		}
128
129
		if ( $diffAspects->getDescriptionChanges() !== [] ) {
130
			$descriptionsAspects = $this->getChangedTermAspects(
131
				EntityUsage::DESCRIPTION_USAGE,
132
				$diffAspects->getDescriptionChanges()
133
			);
134
			$aspects = array_merge( $aspects, $descriptionsAspects );
135
		}
136
137
		if ( $diffAspects->getStatementChanges() !== [] ) {
138
			$statementAspects = $this->getChangedStatementAspects(
139
				$diffAspects->getStatementChanges()
140
			);
141
			$aspects = array_merge( $aspects, $statementAspects );
142
		}
143
144
		if ( $diffAspects->hasOtherChanges() !== false ) {
145
			$aspects[] = EntityUsage::OTHER_USAGE;
146
		}
147
148
		if ( $aspects === [] ) {
149
			// This is needed when diff is suppressed for performance reasons
150
			$aspects[] = EntityUsage::OTHER_USAGE;
151
		}
152
153
		return $aspects;
154
	}
155
156
	/**
157
	 * @param string[] $diff
158
	 *
159
	 * @return string[]
160
	 */
161
	private function getChangedStatementAspects( array $diff ) {
162
		$aspects = [];
163
164
		foreach ( $diff as $propertyId ) {
165
			$aspects[] = EntityUsage::makeAspectKey( EntityUsage::STATEMENT_USAGE, $propertyId );
166
		}
167
168
		$aspects[] = EntityUsage::makeAspectKey( EntityUsage::STATEMENT_USAGE );
169
170
		return $aspects;
171
	}
172
173
	/**
174
	 * @param string $aspect
175
	 * @param string[] $diff
176
	 *
177
	 * @return string[]
178
	 */
179
	private function getChangedTermAspects( $aspect, array $diff ) {
180
		$aspects = [];
181
182
		foreach ( $diff as $lang ) {
183
			$aspects[] = EntityUsage::makeAspectKey( $aspect, $lang );
184
		}
185
186
		$aspects[] = EntityUsage::makeAspectKey( $aspect );
187
188
		return $aspects;
189
	}
190
191
	/**
192
	 * Returns the page updates implied by the given the change.
193
	 *
194
	 * @param EntityChange $change
195
	 *
196
	 * @return Traversable of PageEntityUsages
197
	 *
198
	 * @see @ref md_docs_topics_usagetracking for details about virtual usages
199
	 */
200
	private function getAffectedPages( EntityChange $change ) {
201
		$entityId = $change->getEntityId();
202
		$changedAspects = $this->getChangedAspects( $change );
203
204
		$usages = $this->usageLookup->getPagesUsing(
205
			// @todo: more than one entity at once!
206
			[ $entityId ],
0 ignored issues
show
Documentation introduced by
array($entityId) is of type array<integer,object<Wik...tity\\EntityId>|null"}>, but the function expects a array<integer,object<Wik...Model\Entity\EntityId>>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
207
			// Look up pages that are marked as either using one of the changed or all aspects
208
			array_merge( $changedAspects, [ EntityUsage::ALL_USAGE ] )
209
		);
210
211
		$usages = $this->transformAllPageEntityUsages( $usages, $entityId, $changedAspects );
0 ignored issues
show
Bug introduced by
It seems like $entityId defined by $change->getEntityId() on line 201 can be null; however, Wikibase\Client\Changes\...rmAllPageEntityUsages() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
212
213
		// if title changed, add virtual usages for both old and new title
214
		if ( $change instanceof ItemChange && in_array( EntityUsage::TITLE_USAGE, $changedAspects ) ) {
215
			$diffChangedAspects = $change->getCompactDiff();
216
			$namesFromDiff = $this->getPagesReferencedInDiff(
217
				$diffChangedAspects->getSiteLinkChanges()
218
			);
219
			$titlesFromDiff = $this->getTitlesFromTexts( $namesFromDiff );
220
			$usagesFromDiff = $this->makeVirtualUsages(
221
				$titlesFromDiff,
222
				$entityId,
0 ignored issues
show
Bug introduced by
It seems like $entityId defined by $change->getEntityId() on line 201 can be null; however, Wikibase\Client\Changes\...er::makeVirtualUsages() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
223
				[ EntityUsage::SITELINK_USAGE ]
224
			);
225
226
			//FIXME: we can't really merge if $usages is an iterator, not an array.
227
			//TODO: Inject $usagesFromDiff "on the fly" while streaming other usages.
228
			//NOTE: $usages must pass through mergeUsagesInto for re-indexing
229
			$mergedUsages = [];
230
			$this->mergeUsagesInto( $usages, $mergedUsages );
231
			$this->mergeUsagesInto( $usagesFromDiff, $mergedUsages );
232
			$usages = new ArrayIterator( $mergedUsages );
233
		}
234
235
		return $usages;
236
	}
237
238
	/**
239
	 * @param iterable<PageEntityUsages> $from
0 ignored issues
show
Documentation introduced by
The doc-type iterable<PageEntityUsages> could not be parsed: Expected "|" or "end of type", but got "<" at position 8. (view supported doc-types)

This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.

Loading history...
240
	 * @param PageEntityUsages[] &$into Array to merge into
241
	 */
242
	private function mergeUsagesInto( iterable $from, array &$into ) {
243
		foreach ( $from as $pageEntityUsages ) {
244
			$key = $pageEntityUsages->getPageId();
245
246
			if ( isset( $into[$key] ) ) {
247
				$into[$key]->addUsages( $pageEntityUsages->getUsages() );
248
			} else {
249
				$into[$key] = $pageEntityUsages;
250
			}
251
		}
252
	}
253
254
	/**
255
	 * @param array[] $siteLinkDiff
256
	 *
257
	 * @throws UnexpectedValueException
258
	 * @return string[]
259
	 */
260
	private function getPagesReferencedInDiff( array $siteLinkDiff ) {
261
		$pagesToUpdate = [];
262
		$siteLinkDiffWiki = $siteLinkDiff[$this->siteId];
263
264
		if ( $siteLinkDiffWiki[0] !== null ) {
265
			$pagesToUpdate[] = $siteLinkDiffWiki[0];
266
		}
267
268
		if ( $siteLinkDiffWiki[1] !== null ) {
269
			$pagesToUpdate[] = $siteLinkDiffWiki[1];
270
		}
271
272
		return $pagesToUpdate;
273
	}
274
275
	/**
276
	 * @param array $siteLinkDiff
277
	 *
278
	 * @return bool
279
	 */
280
	private function isBadgesOnlyChange( array $siteLinkDiff ) {
281
		return ( $siteLinkDiff[0] === $siteLinkDiff[1] && $siteLinkDiff[2] === true );
282
	}
283
284
	/**
285
	 * Filters updates. This removes duplicates and non-existing pages.
286
	 *
287
	 * @param Traversable $usages A traversable of PageEntityUsages.
288
	 *
289
	 * @return PageEntityUsages[]
290
	 */
291
	private function filterUpdates( Traversable $usages ) {
292
		$usagesByPageId = [];
293
294
		/** @var PageEntityUsages $pageEntityUsages */
295
		foreach ( $usages as $pageEntityUsages ) {
296
			$usagesByPageId[$pageEntityUsages->getPageId()] = $pageEntityUsages;
297
		}
298
299
		$titlesToUpdate = [];
300
301
		foreach ( $this->titleFactory->newFromIDs( array_keys( $usagesByPageId ) ) as $title ) {
302
			if ( $this->checkPageExistence && !$title->exists() ) {
303
				continue;
304
			}
305
306
			$pageId = $title->getArticleID();
307
			$titlesToUpdate[$pageId] = $usagesByPageId[$pageId];
308
		}
309
310
		return $titlesToUpdate;
311
	}
312
313
	/**
314
	 * @param string[] $names
315
	 *
316
	 * @return Title[]
317
	 */
318
	private function getTitlesFromTexts( array $names ) {
319
		$titles = [];
320
321
		foreach ( $names as $name ) {
322
			$title = $this->titleFactory->newFromText( $name );
323
			if ( $title ) {
324
				$titles[] = $title;
325
			}
326
		}
327
328
		return $titles;
329
	}
330
331
	/**
332
	 * @param Title[] $titles
333
	 * @param EntityId $entityId
334
	 * @param string[] $aspects
335
	 *
336
	 * @return PageEntityUsages[]
337
	 */
338
	private function makeVirtualUsages( array $titles, EntityId $entityId, array $aspects ) {
339
		$usagesForItem = [];
340
		foreach ( $aspects as $aspect ) {
341
			list( $aspect, $modifier ) = EntityUsage::splitAspectKey( $aspect );
342
			$usagesForItem[] = new EntityUsage( $entityId, $aspect, $modifier );
343
		}
344
345
		// bulk-load the page IDs into the LinkCache
346
		$this->linkBatchFactory->newLinkBatch( $titles )->execute();
347
348
		$usagesPerPage = [];
349
		foreach ( $titles as $title ) {
350
			$pageId = $title->getArticleID();
351
352
			if ( $pageId === 0 ) {
353
				$this->logger->debug(
354
					'{method}: Article ID for {titleFullText} is 0',
355
					[ 'method' => __METHOD__, 'titleFullText' => $title->getFullText() ]
356
				);
357
358
				continue;
359
			}
360
361
			$usagesPerPage[$pageId] = new PageEntityUsages( $pageId, $usagesForItem );
362
		}
363
364
		return $usagesPerPage;
365
	}
366
367
	/**
368
	 * @param iterable<PageEntityUsages> $usages
0 ignored issues
show
Documentation introduced by
The doc-type iterable<PageEntityUsages> could not be parsed: Expected "|" or "end of type", but got "<" at position 8. (view supported doc-types)

This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.

Loading history...
369
	 * @param EntityId $entityId
370
	 * @param string[] $changedAspects
371
	 *
372
	 * @return iterable<PageEntityUsages>
0 ignored issues
show
Documentation introduced by
The doc-type iterable<PageEntityUsages> could not be parsed: Expected "|" or "end of type", but got "<" at position 8. (view supported doc-types)

This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.

Loading history...
373
	 */
374
	private function transformAllPageEntityUsages( iterable $usages, EntityId $entityId, array $changedAspects ): iterable {
375
		$aspectTransformer = new UsageAspectTransformer();
376
		$aspectTransformer->setRelevantAspects( $entityId, $changedAspects );
377
378
		foreach ( $usages as $key => $usagesOnPage ) {
379
			$transformedUsagesOnPage = $aspectTransformer->transformPageEntityUsages( $usagesOnPage );
380
381
			if ( !$transformedUsagesOnPage->isEmpty() ) {
382
				yield $key => $transformedUsagesOnPage;
383
			}
384
		}
385
	}
386
387
}
388