Completed
Push — master ( a4baab...0eea42 )
by
unknown
08:53 queued 11s
created

filterImplicitUsages()   B

Complexity

Conditions 7
Paths 11

Size

Total Lines 32

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 32
rs 8.4746
c 0
b 0
f 0
cc 7
nc 11
nop 3
1
<?php
2
3
declare( strict_types = 1 );
4
5
namespace Wikibase\Client\Usage;
6
7
use MediaWiki\Cache\LinkBatchFactory;
8
use TitleFactory;
9
use Traversable;
10
use Wikibase\DataModel\Entity\EntityId;
11
use Wikibase\DataModel\Entity\ItemId;
12
use Wikibase\Lib\Store\SiteLinkLookup;
13
14
/**
15
 * A {@link UsageLookup} which decorates an inner lookup
16
 * and adds an implicit usage on a linked item’s description.
17
 *
18
 * An implicit usage is different from an ordinary, explicit usage
19
 * in that it is never recorded by a {@link UsageTracker}:
20
 * it is not tracked when the page is parsed and actually uses a part of an entity,
21
 * but rather synthesized by this class based on hard-coded knowledge
22
 * about where else an entity’s data is used in relation to a page.
23
 * However, implicit usages otherwise look exactly like explicit usages:
24
 * for a user of the {@link UsageLookup} interface, it is not (yet?) possible
25
 * to determine whether a usage from the lookup is explicit or implicit.
26
 *
27
 * This class implements one kind of implicit usage:
28
 * if a client page is linked to an item, it has an implicit usage
29
 * on that item’s description in the client wiki’s content language.
30
 * This is because the description is used, for example,
31
 * as part of the search result for the page (typically on mobile),
32
 * even if it is never used in the page itself.
33
 *
34
 * @see @ref md_docs_topics_usagetracking for virtual usage,
35
 * a similar but separate concept.
36
 *
37
 * @license GPL-2.0-or-later
38
 */
39
class ImplicitDescriptionUsageLookup implements UsageLookup {
40
41
	/** @var UsageLookup */
42
	private $usageLookup;
43
44
	/** @var TitleFactory */
45
	private $titleFactory;
46
47
	/** @var LinkBatchFactory */
48
	private $linkBatchFactory;
49
50
	/** @var string */
51
	private $globalSiteId;
52
53
	/** @var SiteLinkLookup */
54
	private $siteLinkLookup;
55
56
	/**
57
	 * @param UsageLookup $usageLookup The underlying/inner lookup.
58
	 * @param TitleFactory $titleFactory
59
	 * @param LinkBatchFactory $linkBatchFactory
60
	 * @param string $globalSiteId The global site ID of the client wiki.
61
	 * @param SiteLinkLookup $siteLinkLookup
62
	 */
63
	public function __construct(
64
		UsageLookup $usageLookup,
65
		TitleFactory $titleFactory,
66
		LinkBatchFactory $linkBatchFactory,
67
		string $globalSiteId,
68
		SiteLinkLookup $siteLinkLookup
69
	) {
70
		$this->usageLookup = $usageLookup;
71
		$this->titleFactory = $titleFactory;
72
		$this->linkBatchFactory = $linkBatchFactory;
73
		$this->globalSiteId = $globalSiteId;
74
		$this->siteLinkLookup = $siteLinkLookup;
75
	}
76
77
	public function getUsagesForPage( $pageId ): array {
78
		$usages = $this->usageLookup->getUsagesForPage( $pageId );
79
		$title = $this->titleFactory->newFromID( $pageId );
80
		if ( !$title ) {
81
			return $usages;
82
		}
83
		$entityId = $this->siteLinkLookup->getItemIdForLink(
84
			$this->globalSiteId,
85
			$title->getPrefixedText()
86
		);
87
		if ( !$entityId ) {
88
			return $usages;
89
		}
90
		$contentLanguage = $title->getPageLanguage()->getCode();
91
92
		foreach ( $usages as $usage ) {
93
			if (
94
				$usage->getAspect() === EntityUsage::DESCRIPTION_USAGE &&
95
				$usage->getModifier() === $contentLanguage &&
96
				$usage->getEntityId()->equals( $entityId )
97
			) {
98
				// there already is an explicit usage, nothing to do
99
				return $usages;
100
			}
101
		}
102
103
		// there is no explicit usage, add the implicit one
104
		$usages[] = new EntityUsage(
105
			$entityId,
106
			EntityUsage::DESCRIPTION_USAGE,
107
			$contentLanguage
108
		);
109
		return $usages;
110
	}
111
112
	public function getPagesUsing( array $entityIds, array $aspects = [] ): Traversable {
113
		if ( !$this->aspectsMatchImplicitUsage( $aspects ) ) {
114
			// Caller is not interested in implicit usage,
115
			// no need to add anything
116
			return yield from $this->usageLookup->getPagesUsing( $entityIds, $aspects );
117
		}
118
119
		// Find the implicit usages that we’ll add – one per page / item ID / content language
120
		[ $itemIdsByPageId, $contentLanguagesByPageId ] = $this->findImplicitUsages( $entityIds );
0 ignored issues
show
Bug introduced by
The variable $itemIdsByPageId does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
Bug introduced by
The variable $contentLanguagesByPageId does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
121
		// Filter them according to the aspects
122
		[ $itemIdsByPageId, $contentLanguagesByPageId ] = $this->filterImplicitUsages(
123
			$itemIdsByPageId, $contentLanguagesByPageId, $aspects );
124
125
		// Now decorate the inner lookup’s usages with them
126
		foreach ( $this->usageLookup->getPagesUsing( $entityIds, $aspects ) as $pageEntityUsages ) {
127
			/** @var PageEntityUsages $pageEntityUsages */
128
			'@phan-var PageEntityUsages $pageEntityUsages';
129
			$pageId = $pageEntityUsages->getPageId();
130
			if ( isset( $itemIdsByPageId[$pageId] ) ) {
131
				// if equivalent usages already exist then addUsages() is a no-op
132
				$pageEntityUsages->addUsages( [
133
					new EntityUsage(
134
						$itemIdsByPageId[$pageId],
135
						EntityUsage::DESCRIPTION_USAGE,
136
						$contentLanguagesByPageId[$pageId]
137
					),
138
				] );
139
				unset( $itemIdsByPageId[$pageId] );
140
			}
141
			yield $pageEntityUsages;
142
		}
143
144
		// And yield any remaining pages that the inner lookup didn’t return at all
145
		foreach ( $itemIdsByPageId as $pageId => $itemId ) {
146
			yield new PageEntityUsages( $pageId, [
147
				new EntityUsage(
148
					$itemId,
149
					EntityUsage::DESCRIPTION_USAGE,
150
					$contentLanguagesByPageId[$pageId]
151
				),
152
			] );
153
		}
154
	}
155
156
	/**
157
	 * Whether the given aspects potentially match an implicit usage.
158
	 *
159
	 * @param string[] $aspects
160
	 * @return bool
161
	 */
162
	private function aspectsMatchImplicitUsage( array $aspects ): bool {
163
		if ( $aspects === [] ) {
164
			return true;
165
		}
166
167
		foreach ( $aspects as $aspectKey ) {
168
			if ( EntityUsage::stripModifier( $aspectKey ) === EntityUsage::DESCRIPTION_USAGE ) {
169
				// The implicit usage is on the description in the *page* content language,
170
				// not the wiki content language, so any description aspect matches.
171
				// (We’ll later filter based on the modifier and content language,
172
				// see filterImplicitUsages().)
173
				return true;
174
			}
175
		}
176
		return false;
177
	}
178
179
	/**
180
	 * Find the implicit usages on the given entity IDs.
181
	 *
182
	 * Returns two arrays, both keyed by page ID:
183
	 * the item ID linked to that page and the content language of the page.
184
	 *
185
	 * @param EntityId[] $entityIds
186
	 * @return array [ ItemId[] $itemIdsByPageId, string[] $contentLanguagesByPageId ]
187
	 */
188
	private function findImplicitUsages( array $entityIds ): array {
189
		$numericItemIds = [];
190
		foreach ( $entityIds as $entityId ) {
191
			if ( $entityId instanceof ItemId ) {
192
				$numericItemIds[] = $entityId->getNumericId();
193
			}
194
		}
195
196
		// each link is an array [ string $siteId, string $pageName, int $itemId ]
197
		$links = $this->siteLinkLookup->getLinks( $numericItemIds, [ $this->globalSiteId ] );
198
		// preload the titles in bulk (page ID and language)
199
		$titles = array_map( [ $this->titleFactory, 'newFromDBkey' ], array_column( $links, 1 ) );
200
		$this->linkBatchFactory->newLinkBatch( $titles )->execute();
201
202
		$itemIdsByPageId = [];
203
		foreach ( $links as [ $siteId, $pageName, $itemId ] ) {
204
			// note: this creates a new Title and looks up its page ID in the link cache;
205
			// this is simpler than finding the right existing Title in the $titles we have
206
			// (the $pageName is probably not exactly in DB key form)
207
			$pageId = $this->titleFactory->newFromDBkey( $pageName )->getArticleID();
0 ignored issues
show
Bug introduced by
The variable $pageName does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
208
			if ( $pageId ) {
209
				$itemIdsByPageId[$pageId] = ItemId::newFromNumber( $itemId );
0 ignored issues
show
Bug introduced by
The variable $itemId does not exist. Did you mean $numericItemIds?

This check looks for variables that are accessed but have not been defined. It raises an issue if it finds another variable that has a similar name.

The variable may have been renamed without also renaming all references.

Loading history...
210
			}
211
		}
212
213
		$contentLanguagesByPageId = [];
214
		foreach ( $titles as $title ) {
215
			$pageId = $title->getArticleID();
216
			if ( $pageId ) {
217
				$contentLanguagesByPageId[$pageId] = $title->getPageLanguage()->getCode();
218
			}
219
		}
220
221
		return [ $itemIdsByPageId, $contentLanguagesByPageId ];
222
	}
223
224
	/**
225
	 * Filter the implicit usages by the given aspects.
226
	 *
227
	 * Takes two arrays as returned by {@link findImplicitUsages}
228
	 * and returns similar arrays, but filtered if necessary.
229
	 *
230
	 * @param ItemId[] $itemIdsByPageId
231
	 * @param string[] $contentLanguagesByPageId
232
	 * @param string[] $aspects
233
	 * @return array [ ItemId[] $itemIdsByPageId, string[] $contentLanguagesByPageId ]
234
	 */
235
	private function filterImplicitUsages(
236
		array $itemIdsByPageId,
237
		array $contentLanguagesByPageId,
238
		array $aspects
239
	): array {
240
		if ( $aspects === [] ) {
241
			// caller is interested in all usages, don’t filter
242
			return [ $itemIdsByPageId, $contentLanguagesByPageId ];
243
		}
244
245
		$relevantLanguages = [];
246
		foreach ( $aspects as $aspectKey ) {
247
			[ $aspect, $modifier ] = EntityUsage::splitAspectKey( $aspectKey );
0 ignored issues
show
Bug introduced by
The variable $aspect does not exist. Did you mean $aspects?

This check looks for variables that are accessed but have not been defined. It raises an issue if it finds another variable that has a similar name.

The variable may have been renamed without also renaming all references.

Loading history...
Bug introduced by
The variable $modifier does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
248
			if ( $aspect !== EntityUsage::DESCRIPTION_USAGE ) {
0 ignored issues
show
Bug introduced by
The variable $aspect does not exist. Did you mean $aspects?

This check looks for variables that are accessed but have not been defined. It raises an issue if it finds another variable that has a similar name.

The variable may have been renamed without also renaming all references.

Loading history...
249
				continue;
250
			}
251
			if ( $modifier === null ) {
252
				// caller is interested in all description usages, don’t filter
253
				return [ $itemIdsByPageId, $contentLanguagesByPageId ];
254
			}
255
			$relevantLanguages[] = $modifier;
256
		}
257
258
		// caller is only interested in some description usages, filter
259
		foreach ( $contentLanguagesByPageId as $pageId => $contentLanguage ) {
260
			if ( !in_array( $contentLanguage, $relevantLanguages, /* strict */ true ) ) {
261
				unset( $itemIdsByPageId[$pageId] );
262
				unset( $contentLanguagesByPageId[$pageId] );
263
			}
264
		}
265
		return [ $itemIdsByPageId, $contentLanguagesByPageId ];
266
	}
267
268
	public function getUnusedEntities( array $entityIds ): array {
269
		// If a page is linked to an item, it has at least a sitelink usage on it;
270
		// therefore, the implicit usage can never make a difference for
271
		// whether an entity is used or unused.
272
		return $this->usageLookup->getUnusedEntities( $entityIds );
273
	}
274
275
}
276