Completed
Push — master ( 1326c7...574a68 )
by
unknown
08:45 queued 11s
created

findImplicitUsages()   B

Complexity

Conditions 7
Paths 27

Size

Total Lines 35

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 35
rs 8.4266
c 0
b 0
f 0
cc 7
nc 27
nop 1
1
<?php
2
3
declare( strict_types = 1 );
4
5
namespace Wikibase\Client\Usage;
6
7
use MediaWiki\Cache\LinkBatchFactory;
8
use TitleFactory;
9
use Traversable;
10
use Wikibase\DataModel\Entity\EntityId;
11
use Wikibase\DataModel\Entity\ItemId;
12
use Wikibase\Lib\Store\SiteLinkLookup;
13
14
/**
15
 * A {@link UsageLookup} which decorates an inner lookup
16
 * and adds an implicit usage on a linked item’s description.
17
 *
18
 * An implicit usage is different from an ordinary, explicit usage
19
 * in that it is never recorded by a {@link UsageTracker}:
20
 * it is not tracked when the page is parsed and actually uses a part of an entity,
21
 * but rather synthesized by this class based on hard-coded knowledge
22
 * about where else an entity’s data is used in relation to a page.
23
 * However, implicit usages otherwise look exactly like explicit usages:
24
 * for a user of the {@link UsageLookup} interface, it is not (yet?) possible
25
 * to determine whether a usage from the lookup is explicit or implicit.
26
 *
27
 * This class implements one kind of implicit usage:
28
 * if a client page is linked to an item, it has an implicit usage
29
 * on that item’s description in the client wiki’s content language.
30
 * This is because the description is used, for example,
31
 * as part of the search result for the page (typically on mobile),
32
 * even if it is never used in the page itself.
33
 *
34
 * @see @ref md_docs_topics_usagetracking for virtual usage,
35
 * a similar but separate concept.
36
 *
37
 * @license GPL-2.0-or-later
38
 */
39
class ImplicitDescriptionUsageLookup implements UsageLookup {
40
41
	/** @var UsageLookup */
42
	private $usageLookup;
43
44
	/** @var TitleFactory */
45
	private $titleFactory;
46
47
	/** @var LinkBatchFactory */
48
	private $linkBatchFactory;
49
50
	/** @var string */
51
	private $globalSiteId;
52
53
	/** @var SiteLinkLookup */
54
	private $siteLinkLookup;
55
56
	/**
57
	 * @param UsageLookup $usageLookup The underlying/inner lookup.
58
	 * @param TitleFactory $titleFactory
59
	 * @param LinkBatchFactory $linkBatchFactory
60
	 * @param string $globalSiteId The global site ID of the client wiki.
61
	 * @param SiteLinkLookup $siteLinkLookup
62
	 */
63
	public function __construct(
64
		UsageLookup $usageLookup,
65
		TitleFactory $titleFactory,
66
		LinkBatchFactory $linkBatchFactory,
67
		string $globalSiteId,
68
		SiteLinkLookup $siteLinkLookup
69
	) {
70
		$this->usageLookup = $usageLookup;
71
		$this->titleFactory = $titleFactory;
72
		$this->linkBatchFactory = $linkBatchFactory;
73
		$this->globalSiteId = $globalSiteId;
74
		$this->siteLinkLookup = $siteLinkLookup;
75
	}
76
77
	public function getUsagesForPage( $pageId ): array {
78
		$usages = $this->usageLookup->getUsagesForPage( $pageId );
79
		$title = $this->titleFactory->newFromID( $pageId );
80
		if ( !$title ) {
81
			return $usages;
82
		}
83
		$entityId = $this->siteLinkLookup->getItemIdForLink(
84
			$this->globalSiteId,
85
			$title->getPrefixedText()
86
		);
87
		if ( !$entityId ) {
88
			return $usages;
89
		}
90
		$contentLanguage = $title->getPageLanguage()->getCode();
91
92
		$usage = new EntityUsage(
93
			$entityId,
94
			EntityUsage::DESCRIPTION_USAGE,
95
			$contentLanguage
96
		);
97
		// this might replace an existing usage but that’s okay
98
		$usages[$usage->getIdentityString()] = $usage;
99
100
		return $usages;
101
	}
102
103
	public function getPagesUsing( array $entityIds, array $aspects = [] ): Traversable {
104
		if ( !$this->aspectsMatchImplicitUsage( $aspects ) ) {
105
			// Caller is not interested in implicit usage,
106
			// no need to add anything
107
			return yield from $this->usageLookup->getPagesUsing( $entityIds, $aspects );
108
		}
109
110
		// Find the implicit usages that we’ll add – one per page / item ID / content language
111
		[ $itemIdsByPageId, $contentLanguagesByPageId ] = $this->findImplicitUsages( $entityIds );
0 ignored issues
show
Bug introduced by
The variable $itemIdsByPageId does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
Bug introduced by
The variable $contentLanguagesByPageId does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
112
		// Filter them according to the aspects
113
		[ $itemIdsByPageId, $contentLanguagesByPageId ] = $this->filterImplicitUsages(
114
			$itemIdsByPageId, $contentLanguagesByPageId, $aspects );
115
116
		// Now decorate the inner lookup’s usages with them
117
		foreach ( $this->usageLookup->getPagesUsing( $entityIds, $aspects ) as $pageEntityUsages ) {
118
			/** @var PageEntityUsages $pageEntityUsages */
119
			'@phan-var PageEntityUsages $pageEntityUsages';
120
			$pageId = $pageEntityUsages->getPageId();
121
			if ( isset( $itemIdsByPageId[$pageId] ) ) {
122
				// if equivalent usages already exist then addUsages() is a no-op
123
				$pageEntityUsages->addUsages( [
124
					new EntityUsage(
125
						$itemIdsByPageId[$pageId],
126
						EntityUsage::DESCRIPTION_USAGE,
127
						$contentLanguagesByPageId[$pageId]
128
					),
129
				] );
130
				unset( $itemIdsByPageId[$pageId] );
131
			}
132
			yield $pageEntityUsages;
133
		}
134
135
		// And yield any remaining pages that the inner lookup didn’t return at all
136
		foreach ( $itemIdsByPageId as $pageId => $itemId ) {
137
			yield new PageEntityUsages( $pageId, [
138
				new EntityUsage(
139
					$itemId,
140
					EntityUsage::DESCRIPTION_USAGE,
141
					$contentLanguagesByPageId[$pageId]
142
				),
143
			] );
144
		}
145
	}
146
147
	/**
148
	 * Whether the given aspects potentially match an implicit usage.
149
	 *
150
	 * @param string[] $aspects
151
	 * @return bool
152
	 */
153
	private function aspectsMatchImplicitUsage( array $aspects ): bool {
154
		if ( $aspects === [] ) {
155
			return true;
156
		}
157
158
		foreach ( $aspects as $aspectKey ) {
159
			if ( EntityUsage::stripModifier( $aspectKey ) === EntityUsage::DESCRIPTION_USAGE ) {
160
				// The implicit usage is on the description in the *page* content language,
161
				// not the wiki content language, so any description aspect matches.
162
				// (We’ll later filter based on the modifier and content language,
163
				// see filterImplicitUsages().)
164
				return true;
165
			}
166
		}
167
		return false;
168
	}
169
170
	/**
171
	 * Find the implicit usages on the given entity IDs.
172
	 *
173
	 * Returns two arrays, both keyed by page ID:
174
	 * the item ID linked to that page and the content language of the page.
175
	 *
176
	 * @param EntityId[] $entityIds
177
	 * @return array [ ItemId[] $itemIdsByPageId, string[] $contentLanguagesByPageId ]
178
	 */
179
	private function findImplicitUsages( array $entityIds ): array {
180
		$numericItemIds = [];
181
		foreach ( $entityIds as $entityId ) {
182
			if ( $entityId instanceof ItemId ) {
183
				$numericItemIds[] = $entityId->getNumericId();
184
			}
185
		}
186
187
		// each link is an array [ string $siteId, string $pageName, int $itemId ]
188
		$links = $this->siteLinkLookup->getLinks( $numericItemIds, [ $this->globalSiteId ] );
189
		// preload the titles in bulk (page ID and language)
190
		$titles = array_map( [ $this->titleFactory, 'newFromDBkey' ], array_column( $links, 1 ) );
191
		$this->linkBatchFactory->newLinkBatch( $titles )->execute();
192
193
		$itemIdsByPageId = [];
194
		foreach ( $links as [ $siteId, $pageName, $itemId ] ) {
195
			// note: this creates a new Title and looks up its page ID in the link cache;
196
			// this is simpler than finding the right existing Title in the $titles we have
197
			// (the $pageName is probably not exactly in DB key form)
198
			$pageId = $this->titleFactory->newFromDBkey( $pageName )->getArticleID();
0 ignored issues
show
Bug introduced by
The variable $pageName does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
199
			if ( $pageId ) {
200
				$itemIdsByPageId[$pageId] = ItemId::newFromNumber( $itemId );
0 ignored issues
show
Bug introduced by
The variable $itemId does not exist. Did you mean $numericItemIds?

This check looks for variables that are accessed but have not been defined. It raises an issue if it finds another variable that has a similar name.

The variable may have been renamed without also renaming all references.

Loading history...
201
			}
202
		}
203
204
		$contentLanguagesByPageId = [];
205
		foreach ( $titles as $title ) {
206
			$pageId = $title->getArticleID();
207
			if ( $pageId ) {
208
				$contentLanguagesByPageId[$pageId] = $title->getPageLanguage()->getCode();
209
			}
210
		}
211
212
		return [ $itemIdsByPageId, $contentLanguagesByPageId ];
213
	}
214
215
	/**
216
	 * Filter the implicit usages by the given aspects.
217
	 *
218
	 * Takes two arrays as returned by {@link findImplicitUsages}
219
	 * and returns similar arrays, but filtered if necessary.
220
	 *
221
	 * @param ItemId[] $itemIdsByPageId
222
	 * @param string[] $contentLanguagesByPageId
223
	 * @param string[] $aspects
224
	 * @return array [ ItemId[] $itemIdsByPageId, string[] $contentLanguagesByPageId ]
225
	 */
226
	private function filterImplicitUsages(
227
		array $itemIdsByPageId,
228
		array $contentLanguagesByPageId,
229
		array $aspects
230
	): array {
231
		if ( $aspects === [] ) {
232
			// caller is interested in all usages, don’t filter
233
			return [ $itemIdsByPageId, $contentLanguagesByPageId ];
234
		}
235
236
		$relevantLanguages = [];
237
		foreach ( $aspects as $aspectKey ) {
238
			[ $aspect, $modifier ] = EntityUsage::splitAspectKey( $aspectKey );
0 ignored issues
show
Bug introduced by
The variable $aspect does not exist. Did you mean $aspects?

This check looks for variables that are accessed but have not been defined. It raises an issue if it finds another variable that has a similar name.

The variable may have been renamed without also renaming all references.

Loading history...
Bug introduced by
The variable $modifier does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
239
			if ( $aspect !== EntityUsage::DESCRIPTION_USAGE ) {
0 ignored issues
show
Bug introduced by
The variable $aspect does not exist. Did you mean $aspects?

This check looks for variables that are accessed but have not been defined. It raises an issue if it finds another variable that has a similar name.

The variable may have been renamed without also renaming all references.

Loading history...
240
				continue;
241
			}
242
			if ( $modifier === null ) {
243
				// caller is interested in all description usages, don’t filter
244
				return [ $itemIdsByPageId, $contentLanguagesByPageId ];
245
			}
246
			$relevantLanguages[] = $modifier;
247
		}
248
249
		// caller is only interested in some description usages, filter
250
		foreach ( $contentLanguagesByPageId as $pageId => $contentLanguage ) {
251
			if ( !in_array( $contentLanguage, $relevantLanguages, /* strict */ true ) ) {
252
				unset( $itemIdsByPageId[$pageId] );
253
				unset( $contentLanguagesByPageId[$pageId] );
254
			}
255
		}
256
		return [ $itemIdsByPageId, $contentLanguagesByPageId ];
257
	}
258
259
	public function getUnusedEntities( array $entityIds ): array {
260
		// If a page is linked to an item, it has at least a sitelink usage on it;
261
		// therefore, the implicit usage can never make a difference for
262
		// whether an entity is used or unused.
263
		return $this->usageLookup->getUnusedEntities( $entityIds );
264
	}
265
266
}
267