Completed
Branch master (939199)
by
unknown
39:35
created

includes/parser/LinkHolderArray.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * Holder of replacement pairs for wiki links
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 * @ingroup Parser
22
 */
23
24
/**
25
 * @ingroup Parser
26
 */
27
class LinkHolderArray {
28
	public $internals = [];
29
	public $interwikis = [];
30
	public $size = 0;
31
32
	/**
33
	 * @var Parser
34
	 */
35
	public $parent;
36
	protected $tempIdOffset;
37
38
	/**
39
	 * @param Parser $parent
40
	 */
41
	public function __construct( $parent ) {
42
		$this->parent = $parent;
43
	}
44
45
	/**
46
	 * Reduce memory usage to reduce the impact of circular references
47
	 */
48
	public function __destruct() {
49
		foreach ( $this as $name => $value ) {
50
			unset( $this->$name );
51
		}
52
	}
53
54
	/**
55
	 * Don't serialize the parent object, it is big, and not needed when it is
56
	 * a parameter to mergeForeign(), which is the only application of
57
	 * serializing at present.
58
	 *
59
	 * Compact the titles, only serialize the text form.
60
	 * @return array
61
	 */
62
	public function __sleep() {
63
		foreach ( $this->internals as &$nsLinks ) {
64
			foreach ( $nsLinks as &$entry ) {
65
				unset( $entry['title'] );
66
			}
67
		}
68
		unset( $nsLinks );
69
		unset( $entry );
70
71
		foreach ( $this->interwikis as &$entry ) {
72
			unset( $entry['title'] );
73
		}
74
		unset( $entry );
75
76
		return [ 'internals', 'interwikis', 'size' ];
77
	}
78
79
	/**
80
	 * Recreate the Title objects
81
	 */
82
	public function __wakeup() {
83
		foreach ( $this->internals as &$nsLinks ) {
84
			foreach ( $nsLinks as &$entry ) {
85
				$entry['title'] = Title::newFromText( $entry['pdbk'] );
86
			}
87
		}
88
		unset( $nsLinks );
89
		unset( $entry );
90
91
		foreach ( $this->interwikis as &$entry ) {
92
			$entry['title'] = Title::newFromText( $entry['pdbk'] );
93
		}
94
		unset( $entry );
95
	}
96
97
	/**
98
	 * Merge another LinkHolderArray into this one
99
	 * @param LinkHolderArray $other
100
	 */
101
	public function merge( $other ) {
102
		foreach ( $other->internals as $ns => $entries ) {
103
			$this->size += count( $entries );
104
			if ( !isset( $this->internals[$ns] ) ) {
105
				$this->internals[$ns] = $entries;
106
			} else {
107
				$this->internals[$ns] += $entries;
108
			}
109
		}
110
		$this->interwikis += $other->interwikis;
111
	}
112
113
	/**
114
	 * Merge a LinkHolderArray from another parser instance into this one. The
115
	 * keys will not be preserved. Any text which went with the old
116
	 * LinkHolderArray and needs to work with the new one should be passed in
117
	 * the $texts array. The strings in this array will have their link holders
118
	 * converted for use in the destination link holder. The resulting array of
119
	 * strings will be returned.
120
	 *
121
	 * @param LinkHolderArray $other
122
	 * @param array $texts Array of strings
123
	 * @return array
124
	 */
125
	public function mergeForeign( $other, $texts ) {
126
		$this->tempIdOffset = $idOffset = $this->parent->nextLinkID();
127
		$maxId = 0;
128
129
		# Renumber internal links
130
		foreach ( $other->internals as $ns => $nsLinks ) {
131 View Code Duplication
			foreach ( $nsLinks as $key => $entry ) {
132
				$newKey = $idOffset + $key;
133
				$this->internals[$ns][$newKey] = $entry;
134
				$maxId = $newKey > $maxId ? $newKey : $maxId;
135
			}
136
		}
137
		$texts = preg_replace_callback( '/(<!--LINK \d+:)(\d+)(-->)/',
138
			[ $this, 'mergeForeignCallback' ], $texts );
139
140
		# Renumber interwiki links
141 View Code Duplication
		foreach ( $other->interwikis as $key => $entry ) {
142
			$newKey = $idOffset + $key;
143
			$this->interwikis[$newKey] = $entry;
144
			$maxId = $newKey > $maxId ? $newKey : $maxId;
145
		}
146
		$texts = preg_replace_callback( '/(<!--IWLINK )(\d+)(-->)/',
147
			[ $this, 'mergeForeignCallback' ], $texts );
148
149
		# Set the parent link ID to be beyond the highest used ID
150
		$this->parent->setLinkID( $maxId + 1 );
151
		$this->tempIdOffset = null;
152
		return $texts;
153
	}
154
155
	/**
156
	 * @param array $m
157
	 * @return string
158
	 */
159
	protected function mergeForeignCallback( $m ) {
160
		return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3];
161
	}
162
163
	/**
164
	 * Get a subset of the current LinkHolderArray which is sufficient to
165
	 * interpret the given text.
166
	 * @param string $text
167
	 * @return LinkHolderArray
168
	 */
169
	public function getSubArray( $text ) {
170
		$sub = new LinkHolderArray( $this->parent );
171
172
		# Internal links
173
		$pos = 0;
174
		while ( $pos < strlen( $text ) ) {
175
			if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/',
176
				$text, $m, PREG_OFFSET_CAPTURE, $pos )
177
			) {
178
				break;
179
			}
180
			$ns = $m[1][0];
181
			$key = $m[2][0];
182
			$sub->internals[$ns][$key] = $this->internals[$ns][$key];
183
			$pos = $m[0][1] + strlen( $m[0][0] );
184
		}
185
186
		# Interwiki links
187
		$pos = 0;
188
		while ( $pos < strlen( $text ) ) {
189
			if ( !preg_match( '/<!--IWLINK (\d+)-->/', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) {
190
				break;
191
			}
192
			$key = $m[1][0];
193
			$sub->interwikis[$key] = $this->interwikis[$key];
194
			$pos = $m[0][1] + strlen( $m[0][0] );
195
		}
196
		return $sub;
197
	}
198
199
	/**
200
	 * Returns true if the memory requirements of this object are getting large
201
	 * @return bool
202
	 */
203
	public function isBig() {
204
		global $wgLinkHolderBatchSize;
205
		return $this->size > $wgLinkHolderBatchSize;
206
	}
207
208
	/**
209
	 * Clear all stored link holders.
210
	 * Make sure you don't have any text left using these link holders, before you call this
211
	 */
212
	public function clear() {
213
		$this->internals = [];
214
		$this->interwikis = [];
215
		$this->size = 0;
216
	}
217
218
	/**
219
	 * Make a link placeholder. The text returned can be later resolved to a real link with
220
	 * replaceLinkHolders(). This is done for two reasons: firstly to avoid further
221
	 * parsing of interwiki links, and secondly to allow all existence checks and
222
	 * article length checks (for stub links) to be bundled into a single query.
223
	 *
224
	 * @param Title $nt
225
	 * @param string $text
226
	 * @param array $query [optional]
227
	 * @param string $trail [optional]
228
	 * @param string $prefix [optional]
229
	 * @return string
230
	 */
231
	public function makeHolder( $nt, $text = '', $query = [], $trail = '', $prefix = '' ) {
232
		if ( !is_object( $nt ) ) {
233
			# Fail gracefully
234
			$retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
235
		} else {
236
			# Separate the link trail from the rest of the link
237
			list( $inside, $trail ) = Linker::splitTrail( $trail );
238
239
			$entry = [
240
				'title' => $nt,
241
				'text' => $prefix . $text . $inside,
242
				'pdbk' => $nt->getPrefixedDBkey(),
243
			];
244
			if ( $query !== [] ) {
245
				$entry['query'] = $query;
246
			}
247
248
			if ( $nt->isExternal() ) {
249
				// Use a globally unique ID to keep the objects mergable
250
				$key = $this->parent->nextLinkID();
251
				$this->interwikis[$key] = $entry;
252
				$retVal = "<!--IWLINK $key-->{$trail}";
253
			} else {
254
				$key = $this->parent->nextLinkID();
255
				$ns = $nt->getNamespace();
256
				$this->internals[$ns][$key] = $entry;
257
				$retVal = "<!--LINK $ns:$key-->{$trail}";
258
			}
259
			$this->size++;
260
		}
261
		return $retVal;
262
	}
263
264
	/**
265
	 * Replace <!--LINK--> link placeholders with actual links, in the buffer
266
	 *
267
	 * @param string $text
268
	 */
269
	public function replace( &$text ) {
270
271
		$this->replaceInternal( $text );
272
		$this->replaceInterwiki( $text );
273
274
	}
275
276
	/**
277
	 * Replace internal links
278
	 * @param string $text
279
	 */
280
	protected function replaceInternal( &$text ) {
281
		if ( !$this->internals ) {
282
			return;
283
		}
284
285
		global $wgContLang;
286
287
		$colours = [];
288
		$linkCache = LinkCache::singleton();
289
		$output = $this->parent->getOutput();
290
		$linkRenderer = $this->parent->getLinkRenderer();
291
292
		$dbr = wfGetDB( DB_REPLICA );
293
294
		# Sort by namespace
295
		ksort( $this->internals );
296
297
		$linkcolour_ids = [];
298
299
		# Generate query
300
		$lb = new LinkBatch();
301
		$lb->setCaller( __METHOD__ );
302
303
		foreach ( $this->internals as $ns => $entries ) {
304
			foreach ( $entries as $entry ) {
305
				/** @var Title $title */
306
				$title = $entry['title'];
307
				$pdbk = $entry['pdbk'];
308
309
				# Skip invalid entries.
310
				# Result will be ugly, but prevents crash.
311
				if ( is_null( $title ) ) {
312
					continue;
313
				}
314
315
				# Check if it's a static known link, e.g. interwiki
316
				if ( $title->isAlwaysKnown() ) {
317
					$colours[$pdbk] = '';
318
				} elseif ( $ns == NS_SPECIAL ) {
319
					$colours[$pdbk] = 'new';
320
				} else {
321
					$id = $linkCache->getGoodLinkID( $pdbk );
322
					if ( $id != 0 ) {
323
						$colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
324
						$output->addLink( $title, $id );
325
						$linkcolour_ids[$id] = $pdbk;
326
					} elseif ( $linkCache->isBadLink( $pdbk ) ) {
327
						$colours[$pdbk] = 'new';
328
					} else {
329
						# Not in the link cache, add it to the query
330
						$lb->addObj( $title );
331
					}
332
				}
333
			}
334
		}
335
		if ( !$lb->isEmpty() ) {
336
			$fields = array_merge(
337
				LinkCache::getSelectFields(),
338
				[ 'page_namespace', 'page_title' ]
339
			);
340
341
			$res = $dbr->select(
342
				'page',
343
				$fields,
344
				$lb->constructSet( 'page', $dbr ),
345
				__METHOD__
346
			);
347
348
			# Fetch data and form into an associative array
349
			# non-existent = broken
350
			foreach ( $res as $s ) {
351
				$title = Title::makeTitle( $s->page_namespace, $s->page_title );
352
				$pdbk = $title->getPrefixedDBkey();
353
				$linkCache->addGoodLinkObjFromRow( $title, $s );
354
				$output->addLink( $title, $s->page_id );
355
				$colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
356
				// add id to the extension todolist
357
				$linkcolour_ids[$s->page_id] = $pdbk;
358
			}
359
			unset( $res );
360
		}
361
		if ( count( $linkcolour_ids ) ) {
362
			// pass an array of page_ids to an extension
363
			Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours ] );
364
		}
365
366
		# Do a second query for different language variants of links and categories
367
		if ( $wgContLang->hasVariants() ) {
368
			$this->doVariants( $colours );
369
		}
370
371
		# Construct search and replace arrays
372
		$replacePairs = [];
373
		foreach ( $this->internals as $ns => $entries ) {
374
			foreach ( $entries as $index => $entry ) {
375
				$pdbk = $entry['pdbk'];
376
				$title = $entry['title'];
377
				$query = isset( $entry['query'] ) ? $entry['query'] : [];
378
				$key = "$ns:$index";
379
				$searchkey = "<!--LINK $key-->";
380
				$displayText = $entry['text'];
381
				if ( isset( $entry['selflink'] ) ) {
382
					$replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
383
					continue;
384
				}
385
				if ( $displayText === '' ) {
386
					$displayText = null;
387
				} else {
388
					$displayText = new HtmlArmor( $displayText );
389
				}
390
				if ( !isset( $colours[$pdbk] ) ) {
391
					$colours[$pdbk] = 'new';
392
				}
393
				$attribs = [];
394
				if ( $colours[$pdbk] == 'new' ) {
395
					$linkCache->addBadLinkObj( $title );
396
					$output->addLink( $title, 0 );
397
					$link = $linkRenderer->makeBrokenLink(
398
						$title, $displayText, $attribs, $query
399
					);
400
				} else {
401
					$link = $linkRenderer->makePreloadedLink(
402
						$title, $displayText, $colours[$pdbk], $attribs, $query
403
					);
404
				}
405
406
				$replacePairs[$searchkey] = $link;
407
			}
408
		}
409
		$replacer = new HashtableReplacer( $replacePairs, 1 );
410
411
		# Do the thing
412
		$text = preg_replace_callback(
413
			'/(<!--LINK .*?-->)/',
414
			$replacer->cb(),
415
			$text
416
		);
417
418
	}
419
420
	/**
421
	 * Replace interwiki links
422
	 * @param string $text
423
	 */
424
	protected function replaceInterwiki( &$text ) {
425
		if ( empty( $this->interwikis ) ) {
426
			return;
427
		}
428
429
		# Make interwiki link HTML
430
		$output = $this->parent->getOutput();
431
		$replacePairs = [];
432
		$linkRenderer = $this->parent->getLinkRenderer();
433
		foreach ( $this->interwikis as $key => $link ) {
434
			$replacePairs[$key] = $linkRenderer->makeLink(
435
				$link['title'],
436
				new HtmlArmor( $link['text'] )
437
			);
438
			$output->addInterwikiLink( $link['title'] );
439
		}
440
		$replacer = new HashtableReplacer( $replacePairs, 1 );
441
442
		$text = preg_replace_callback(
443
			'/<!--IWLINK (.*?)-->/',
444
			$replacer->cb(),
445
			$text );
446
	}
447
448
	/**
449
	 * Modify $this->internals and $colours according to language variant linking rules
450
	 * @param array $colours
451
	 */
452
	protected function doVariants( &$colours ) {
453
		global $wgContLang;
454
		$linkBatch = new LinkBatch();
455
		$variantMap = []; // maps $pdbkey_Variant => $keys (of link holders)
456
		$output = $this->parent->getOutput();
457
		$linkCache = LinkCache::singleton();
458
		$titlesToBeConverted = '';
459
		$titlesAttrs = [];
460
461
		// Concatenate titles to a single string, thus we only need auto convert the
462
		// single string to all variants. This would improve parser's performance
463
		// significantly.
464
		foreach ( $this->internals as $ns => $entries ) {
465
			if ( $ns == NS_SPECIAL ) {
466
				continue;
467
			}
468
			foreach ( $entries as $index => $entry ) {
469
				$pdbk = $entry['pdbk'];
470
				// we only deal with new links (in its first query)
471
				if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
472
					$titlesAttrs[] = [ $index, $entry['title'] ];
473
					// separate titles with \0 because it would never appears
474
					// in a valid title
475
					$titlesToBeConverted .= $entry['title']->getText() . "\0";
476
				}
477
			}
478
		}
479
480
		// Now do the conversion and explode string to text of titles
481
		$titlesAllVariants = $wgContLang->autoConvertToAllVariants( rtrim( $titlesToBeConverted, "\0" ) );
482
		$allVariantsName = array_keys( $titlesAllVariants );
483
		foreach ( $titlesAllVariants as &$titlesVariant ) {
484
			$titlesVariant = explode( "\0", $titlesVariant );
485
		}
486
487
		// Then add variants of links to link batch
488
		$parentTitle = $this->parent->getTitle();
489
		foreach ( $titlesAttrs as $i => $attrs ) {
490
			/** @var Title $title */
491
			list( $index, $title ) = $attrs;
492
			$ns = $title->getNamespace();
493
			$text = $title->getText();
494
495
			foreach ( $allVariantsName as $variantName ) {
496
				$textVariant = $titlesAllVariants[$variantName][$i];
497
				if ( $textVariant === $text ) {
498
					continue;
499
				}
500
501
				$variantTitle = Title::makeTitle( $ns, $textVariant );
502
503
				// Self-link checking for mixed/different variant titles. At this point, we
504
				// already know the exact title does not exist, so the link cannot be to a
505
				// variant of the current title that exists as a separate page.
506
				if ( $variantTitle->equals( $parentTitle ) && !$title->hasFragment() ) {
507
					$this->internals[$ns][$index]['selflink'] = true;
508
					continue 2;
509
				}
510
511
				$linkBatch->addObj( $variantTitle );
512
				$variantMap[$variantTitle->getPrefixedDBkey()][] = "$ns:$index";
513
			}
514
		}
515
516
		// process categories, check if a category exists in some variant
517
		$categoryMap = []; // maps $category_variant => $category (dbkeys)
518
		$varCategories = []; // category replacements oldDBkey => newDBkey
519
		foreach ( $output->getCategoryLinks() as $category ) {
520
			$categoryTitle = Title::makeTitleSafe( NS_CATEGORY, $category );
521
			$linkBatch->addObj( $categoryTitle );
0 ignored issues
show
It seems like $categoryTitle defined by \Title::makeTitleSafe(NS_CATEGORY, $category) on line 520 can be null; however, LinkBatch::addObj() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
522
			$variants = $wgContLang->autoConvertToAllVariants( $category );
523
			foreach ( $variants as $variant ) {
524
				if ( $variant !== $category ) {
525
					$variantTitle = Title::makeTitleSafe( NS_CATEGORY, $variant );
526
					if ( is_null( $variantTitle ) ) {
527
						continue;
528
					}
529
					$linkBatch->addObj( $variantTitle );
530
					$categoryMap[$variant] = [ $category, $categoryTitle ];
531
				}
532
			}
533
		}
534
535
		if ( !$linkBatch->isEmpty() ) {
536
			// construct query
537
			$dbr = wfGetDB( DB_REPLICA );
538
			$fields = array_merge(
539
				LinkCache::getSelectFields(),
540
				[ 'page_namespace', 'page_title' ]
541
			);
542
543
			$varRes = $dbr->select( 'page',
544
				$fields,
545
				$linkBatch->constructSet( 'page', $dbr ),
546
				__METHOD__
547
			);
548
549
			$linkcolour_ids = [];
550
			$linkRenderer = $this->parent->getLinkRenderer();
551
552
			// for each found variants, figure out link holders and replace
553
			foreach ( $varRes as $s ) {
554
				$variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
555
				$varPdbk = $variantTitle->getPrefixedDBkey();
556
				$vardbk = $variantTitle->getDBkey();
557
558
				$holderKeys = [];
559
				if ( isset( $variantMap[$varPdbk] ) ) {
560
					$holderKeys = $variantMap[$varPdbk];
561
					$linkCache->addGoodLinkObjFromRow( $variantTitle, $s );
562
					$output->addLink( $variantTitle, $s->page_id );
563
				}
564
565
				// loop over link holders
566
				foreach ( $holderKeys as $key ) {
567
					list( $ns, $index ) = explode( ':', $key, 2 );
568
					$entry =& $this->internals[$ns][$index];
569
					$pdbk = $entry['pdbk'];
570
571
					if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
572
						// found link in some of the variants, replace the link holder data
573
						$entry['title'] = $variantTitle;
574
						$entry['pdbk'] = $varPdbk;
575
576
						// set pdbk and colour
577
						$colours[$varPdbk] = $linkRenderer->getLinkClasses( $variantTitle );
578
						$linkcolour_ids[$s->page_id] = $pdbk;
579
					}
580
				}
581
582
				// check if the object is a variant of a category
583
				if ( isset( $categoryMap[$vardbk] ) ) {
584
					list( $oldkey, $oldtitle ) = $categoryMap[$vardbk];
585
					if ( !isset( $varCategories[$oldkey] ) && !$oldtitle->exists() ) {
586
						$varCategories[$oldkey] = $vardbk;
587
					}
588
				}
589
			}
590
			Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours ] );
591
592
			// rebuild the categories in original order (if there are replacements)
593
			if ( count( $varCategories ) > 0 ) {
594
				$newCats = [];
595
				$originalCats = $output->getCategories();
596
				foreach ( $originalCats as $cat => $sortkey ) {
597
					// make the replacement
598
					if ( array_key_exists( $cat, $varCategories ) ) {
599
						$newCats[$varCategories[$cat]] = $sortkey;
600
					} else {
601
						$newCats[$cat] = $sortkey;
602
					}
603
				}
604
				$output->setCategoryLinks( $newCats );
605
			}
606
		}
607
	}
608
609
	/**
610
	 * Replace <!--LINK--> link placeholders with plain text of links
611
	 * (not HTML-formatted).
612
	 *
613
	 * @param string $text
614
	 * @return string
615
	 */
616
	public function replaceText( $text ) {
617
618
		$text = preg_replace_callback(
619
			'/<!--(LINK|IWLINK) (.*?)-->/',
620
			[ &$this, 'replaceTextCallback' ],
621
			$text );
622
623
		return $text;
624
	}
625
626
	/**
627
	 * Callback for replaceText()
628
	 *
629
	 * @param array $matches
630
	 * @return string
631
	 * @private
632
	 */
633
	public function replaceTextCallback( $matches ) {
634
		$type = $matches[1];
635
		$key = $matches[2];
636
		if ( $type == 'LINK' ) {
637
			list( $ns, $index ) = explode( ':', $key, 2 );
638
			if ( isset( $this->internals[$ns][$index]['text'] ) ) {
639
				return $this->internals[$ns][$index]['text'];
640
			}
641
		} elseif ( $type == 'IWLINK' ) {
642
			if ( isset( $this->interwikis[$key]['text'] ) ) {
643
				return $this->interwikis[$key]['text'];
644
			}
645
		}
646
		return $matches[0];
647
	}
648
}
649