Completed
Push — master ( 951284...b5c57e )
by
unknown
06:36 queued 11s
created

includes/Usage/ImplicitDescriptionUsageLookup.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare( strict_types = 1 );
4
5
namespace Wikibase\Client\Usage;
6
7
use MediaWiki\Cache\LinkBatchFactory;
8
use TitleFactory;
9
use Traversable;
10
use Wikibase\Client\Store\DescriptionLookup;
11
use Wikibase\DataModel\Entity\EntityId;
12
use Wikibase\DataModel\Entity\ItemId;
13
use Wikibase\Lib\Store\SiteLinkLookup;
14
15
/**
16
 * A {@link UsageLookup} which decorates an inner lookup
17
 * and adds an implicit usage on a linked item’s description.
18
 *
19
 * An implicit usage is different from an ordinary, explicit usage
20
 * in that it is never recorded by a {@link UsageTracker}:
21
 * it is not tracked when the page is parsed and actually uses a part of an entity,
22
 * but rather synthesized by this class based on hard-coded knowledge
23
 * about where else an entity’s data is used in relation to a page.
24
 * However, implicit usages otherwise look exactly like explicit usages:
25
 * for a user of the {@link UsageLookup} interface, it is not (yet?) possible
26
 * to determine whether a usage from the lookup is explicit or implicit.
27
 *
28
 * This class implements one kind of implicit usage:
29
 * if a client page is linked to an item, it has an implicit usage
30
 * on that item’s description in the client wiki’s content language,
31
 * unless the client page also has a local description overriding the central one.
32
 * This is because the description is used, for example,
33
 * as part of the search result for the page (typically on mobile),
34
 * even if it is never used in the page itself.
35
 *
36
 * @see @ref md_docs_topics_usagetracking for virtual usage,
37
 * a similar but separate concept.
38
 *
39
 * @license GPL-2.0-or-later
40
 */
41
class ImplicitDescriptionUsageLookup implements UsageLookup {
42
43
	/** @var UsageLookup */
44
	private $usageLookup;
45
46
	/** @var TitleFactory */
47
	private $titleFactory;
48
49
	/** @var bool */
50
	private $allowLocalShortDesc;
51
52
	/** @var DescriptionLookup */
53
	private $descriptionLookup;
54
55
	/** @var LinkBatchFactory */
56
	private $linkBatchFactory;
57
58
	/** @var string */
59
	private $globalSiteId;
60
61
	/** @var SiteLinkLookup */
62
	private $siteLinkLookup;
63
64
	/**
65
	 * @param UsageLookup $usageLookup The underlying/inner lookup.
66
	 * @param TitleFactory $titleFactory
67
	 * @param bool $allowLocalShortDesc The 'allowLocalShortDesc' client setting.
68
	 * If true, only pages with a local description will get an implicit usage.
69
	 * @param DescriptionLookup $descriptionLookup Used to look up local descriptions.
70
	 * Unused if $allowLocalShortDesc is false.
71
	 * @param LinkBatchFactory $linkBatchFactory
72
	 * @param string $globalSiteId The global site ID of the client wiki.
73
	 * @param SiteLinkLookup $siteLinkLookup
74
	 */
75
	public function __construct(
76
		UsageLookup $usageLookup,
77
		TitleFactory $titleFactory,
78
		bool $allowLocalShortDesc,
79
		DescriptionLookup $descriptionLookup,
80
		LinkBatchFactory $linkBatchFactory,
81
		string $globalSiteId,
82
		SiteLinkLookup $siteLinkLookup
83
	) {
84
		$this->usageLookup = $usageLookup;
85
		$this->titleFactory = $titleFactory;
86
		$this->allowLocalShortDesc = $allowLocalShortDesc;
87
		$this->descriptionLookup = $descriptionLookup;
88
		$this->linkBatchFactory = $linkBatchFactory;
89
		$this->globalSiteId = $globalSiteId;
90
		$this->siteLinkLookup = $siteLinkLookup;
91
	}
92
93
	public function getUsagesForPage( $pageId ): array {
94
		$usages = $this->usageLookup->getUsagesForPage( $pageId );
95
		$title = $this->titleFactory->newFromID( $pageId );
96
		if ( !$title ) {
97
			return $usages;
98
		}
99
100
		if (
101
			$this->allowLocalShortDesc &&
102
			$this->descriptionLookup->getDescription( $title, DescriptionLookup::SOURCE_LOCAL )
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->descriptionLookup...onLookup::SOURCE_LOCAL) of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
103
		) {
104
			// central short description overridden locally, no implicit usage
105
			return $usages;
106
		}
107
108
		$entityId = $this->siteLinkLookup->getItemIdForLink(
109
			$this->globalSiteId,
110
			$title->getPrefixedText()
111
		);
112
		if ( !$entityId ) {
113
			return $usages;
114
		}
115
116
		$contentLanguage = $title->getPageLanguage()->getCode();
117
118
		$usage = new EntityUsage(
119
			$entityId,
120
			EntityUsage::DESCRIPTION_USAGE,
121
			$contentLanguage
122
		);
123
		// this might replace an existing usage but that’s okay
124
		$usages[$usage->getIdentityString()] = $usage;
125
126
		return $usages;
127
	}
128
129
	public function getPagesUsing( array $entityIds, array $aspects = [] ): Traversable {
130
		if ( !$this->aspectsMatchImplicitUsage( $aspects ) ) {
131
			// Caller is not interested in implicit usage,
132
			// no need to add anything
133
			return yield from $this->usageLookup->getPagesUsing( $entityIds, $aspects );
134
		}
135
136
		// Find the implicit usages that we’ll add – one per page / item ID / content language
137
		[ $itemIdsByPageId, $contentLanguagesByPageId ] = $this->findImplicitUsages( $entityIds );
138
		// Filter them according to the aspects
139
		[ $itemIdsByPageId, $contentLanguagesByPageId ] = $this->filterImplicitUsages(
140
			$itemIdsByPageId, $contentLanguagesByPageId, $aspects );
141
142
		// Now decorate the inner lookup’s usages with them
143
		foreach ( $this->usageLookup->getPagesUsing( $entityIds, $aspects ) as $pageEntityUsages ) {
144
			/** @var PageEntityUsages $pageEntityUsages */
145
			'@phan-var PageEntityUsages $pageEntityUsages';
146
			$pageId = $pageEntityUsages->getPageId();
147
			if ( isset( $itemIdsByPageId[$pageId] ) ) {
148
				// if equivalent usages already exist then addUsages() is a no-op
149
				$pageEntityUsages->addUsages( [
150
					new EntityUsage(
151
						$itemIdsByPageId[$pageId],
152
						EntityUsage::DESCRIPTION_USAGE,
153
						$contentLanguagesByPageId[$pageId]
154
					),
155
				] );
156
				unset( $itemIdsByPageId[$pageId] );
157
			}
158
			yield $pageEntityUsages;
159
		}
160
161
		// And yield any remaining pages that the inner lookup didn’t return at all
162
		foreach ( $itemIdsByPageId as $pageId => $itemId ) {
163
			yield new PageEntityUsages( $pageId, [
164
				new EntityUsage(
165
					$itemId,
166
					EntityUsage::DESCRIPTION_USAGE,
167
					$contentLanguagesByPageId[$pageId]
168
				),
169
			] );
170
		}
171
	}
172
173
	/**
174
	 * Whether the given aspects potentially match an implicit usage.
175
	 *
176
	 * @param string[] $aspects
177
	 * @return bool
178
	 */
179
	private function aspectsMatchImplicitUsage( array $aspects ): bool {
180
		if ( $aspects === [] ) {
181
			return true;
182
		}
183
184
		foreach ( $aspects as $aspectKey ) {
185
			if ( EntityUsage::stripModifier( $aspectKey ) === EntityUsage::DESCRIPTION_USAGE ) {
186
				// The implicit usage is on the description in the *page* content language,
187
				// not the wiki content language, so any description aspect matches.
188
				// (We’ll later filter based on the modifier and content language,
189
				// see filterImplicitUsages().)
190
				return true;
191
			}
192
		}
193
		return false;
194
	}
195
196
	/**
197
	 * Find the implicit usages on the given entity IDs.
198
	 *
199
	 * Returns two arrays, both keyed by page ID:
200
	 * the item ID linked to that page and the content language of the page.
201
	 *
202
	 * @param EntityId[] $entityIds
203
	 * @return array [ ItemId[] $itemIdsByPageId, string[] $contentLanguagesByPageId ]
204
	 */
205
	private function findImplicitUsages( array $entityIds ): array {
206
		$numericItemIds = [];
207
		foreach ( $entityIds as $entityId ) {
208
			if ( $entityId instanceof ItemId ) {
209
				$numericItemIds[] = $entityId->getNumericId();
210
			}
211
		}
212
213
		// each link is an array [ string $siteId, string $pageName, int $itemId ]
214
		$links = $this->siteLinkLookup->getLinks( $numericItemIds, [ $this->globalSiteId ] );
215
		// preload the titles in bulk (page ID and language)
216
		$titles = array_map( [ $this->titleFactory, 'newFromDBkey' ], array_column( $links, 1 ) );
217
		$this->linkBatchFactory->newLinkBatch( $titles )->execute();
218
219
		if ( $this->allowLocalShortDesc ) {
220
			// look up which of them have local descriptions
221
			$localShortDescriptions = $this->descriptionLookup->getDescriptions(
222
				$titles,
223
				DescriptionLookup::SOURCE_LOCAL
224
			);
225
			// (any page ID that exists in $localShortDescriptions overrides the central description
226
			// locally and should therefore not have an implicit usage)
227
		} else {
228
			$localShortDescriptions = [];
229
		}
230
231
		$itemIdsByPageId = [];
232
		foreach ( $links as [ $siteId, $pageName, $itemId ] ) {
233
			// note: this creates a new Title and looks up its page ID in the link cache;
234
			// this is simpler than finding the right existing Title in the $titles we have
235
			// (the $pageName is probably not exactly in DB key form)
236
			$pageId = $this->titleFactory->newFromDBkey( $pageName )->getArticleID();
237
			if ( $pageId && !isset( $localShortDescriptions[$pageId] ) ) {
238
				$itemIdsByPageId[$pageId] = ItemId::newFromNumber( $itemId );
239
			}
240
		}
241
242
		$contentLanguagesByPageId = [];
243
		foreach ( $titles as $title ) {
244
			$pageId = $title->getArticleID();
245
			if ( $pageId && !isset( $localShortDescriptions[$pageId] ) ) {
246
				$contentLanguagesByPageId[$pageId] = $title->getPageLanguage()->getCode();
247
			}
248
		}
249
250
		return [ $itemIdsByPageId, $contentLanguagesByPageId ];
251
	}
252
253
	/**
254
	 * Filter the implicit usages by the given aspects.
255
	 *
256
	 * Takes two arrays as returned by {@link findImplicitUsages}
257
	 * and returns similar arrays, but filtered if necessary.
258
	 *
259
	 * @param ItemId[] $itemIdsByPageId
260
	 * @param string[] $contentLanguagesByPageId
261
	 * @param string[] $aspects
262
	 * @return array [ ItemId[] $itemIdsByPageId, string[] $contentLanguagesByPageId ]
263
	 */
264
	private function filterImplicitUsages(
265
		array $itemIdsByPageId,
266
		array $contentLanguagesByPageId,
267
		array $aspects
268
	): array {
269
		if ( $aspects === [] ) {
270
			// caller is interested in all usages, don’t filter
271
			return [ $itemIdsByPageId, $contentLanguagesByPageId ];
272
		}
273
274
		$relevantLanguages = [];
275
		foreach ( $aspects as $aspectKey ) {
276
			[ $aspect, $modifier ] = EntityUsage::splitAspectKey( $aspectKey );
277
			if ( $aspect !== EntityUsage::DESCRIPTION_USAGE ) {
278
				continue;
279
			}
280
			if ( $modifier === null ) {
281
				// caller is interested in all description usages, don’t filter
282
				return [ $itemIdsByPageId, $contentLanguagesByPageId ];
283
			}
284
			$relevantLanguages[] = $modifier;
285
		}
286
287
		// caller is only interested in some description usages, filter
288
		foreach ( $contentLanguagesByPageId as $pageId => $contentLanguage ) {
289
			if ( !in_array( $contentLanguage, $relevantLanguages, /* strict */ true ) ) {
290
				unset( $itemIdsByPageId[$pageId] );
291
				unset( $contentLanguagesByPageId[$pageId] );
292
			}
293
		}
294
		return [ $itemIdsByPageId, $contentLanguagesByPageId ];
295
	}
296
297
	public function getUnusedEntities( array $entityIds ): array {
298
		// If a page is linked to an item, it has at least a sitelink usage on it;
299
		// therefore, the implicit usage can never make a difference for
300
		// whether an entity is used or unused.
301
		return $this->usageLookup->getUnusedEntities( $entityIds );
302
	}
303
304
}
305