Completed
Branch master (715cbe)
by
unknown
51:55
created

includes/parser/Parser.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * PHP parser that converts wiki markup to HTML.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 * @ingroup Parser
22
 */
23
use MediaWiki\Linker\LinkRenderer;
24
use MediaWiki\MediaWikiServices;
25
use Wikimedia\ScopedCallback;
26
27
/**
28
 * @defgroup Parser Parser
29
 */
30
31
/**
32
 * PHP Parser - Processes wiki markup (which uses a more user-friendly
33
 * syntax, such as "[[link]]" for making links), and provides a one-way
34
 * transformation of that wiki markup it into (X)HTML output / markup
35
 * (which in turn the browser understands, and can display).
36
 *
37
 * There are seven main entry points into the Parser class:
38
 *
39
 * - Parser::parse()
40
 *     produces HTML output
41
 * - Parser::preSaveTransform()
42
 *     produces altered wiki markup
43
 * - Parser::preprocess()
44
 *     removes HTML comments and expands templates
45
 * - Parser::cleanSig() and Parser::cleanSigInSig()
46
 *     cleans a signature before saving it to preferences
47
 * - Parser::getSection()
48
 *     return the content of a section from an article for section editing
49
 * - Parser::replaceSection()
50
 *     replaces a section by number inside an article
51
 * - Parser::getPreloadText()
52
 *     removes <noinclude> sections and <includeonly> tags
53
 *
54
 * Globals used:
55
 *    object: $wgContLang
56
 *
57
 * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
58
 *
59
 * @par Settings:
60
 * $wgNamespacesWithSubpages
61
 *
62
 * @par Settings only within ParserOptions:
63
 * $wgAllowExternalImages
64
 * $wgAllowSpecialInclusion
65
 * $wgInterwikiMagic
66
 * $wgMaxArticleSize
67
 *
68
 * @ingroup Parser
69
 */
70
class Parser {
71
	/**
72
	 * Update this version number when the ParserOutput format
73
	 * changes in an incompatible way, so the parser cache
74
	 * can automatically discard old data.
75
	 */
76
	const VERSION = '1.6.4';
77
78
	/**
79
	 * Update this version number when the output of serialiseHalfParsedText()
80
	 * changes in an incompatible way
81
	 */
82
	const HALF_PARSED_VERSION = 2;
83
84
	# Flags for Parser::setFunctionHook
85
	const SFH_NO_HASH = 1;
86
	const SFH_OBJECT_ARGS = 2;
87
88
	# Constants needed for external link processing
89
	# Everything except bracket, space, or control characters
90
	# \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
91
	# as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
92
	const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
93
	# Simplified expression to match an IPv4 or IPv6 address, or
94
	# at least one character of a host name (embeds EXT_LINK_URL_CLASS)
95
	const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
96
	# RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
97
	// @codingStandardsIgnoreStart Generic.Files.LineLength
98
	const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
99
		\\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
100
	// @codingStandardsIgnoreEnd
101
102
	# Regular expression for a non-newline space
103
	const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
104
105
	# Flags for preprocessToDom
106
	const PTD_FOR_INCLUSION = 1;
107
108
	# Allowed values for $this->mOutputType
109
	# Parameter to startExternalParse().
110
	const OT_HTML = 1; # like parse()
111
	const OT_WIKI = 2; # like preSaveTransform()
112
	const OT_PREPROCESS = 3; # like preprocess()
113
	const OT_MSG = 3;
114
	const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
115
116
	/**
117
	 * @var string Prefix and suffix for temporary replacement strings
118
	 * for the multipass parser.
119
	 *
120
	 * \x7f should never appear in input as it's disallowed in XML.
121
	 * Using it at the front also gives us a little extra robustness
122
	 * since it shouldn't match when butted up against identifier-like
123
	 * string constructs.
124
	 *
125
	 * Must not consist of all title characters, or else it will change
126
	 * the behavior of <nowiki> in a link.
127
	 *
128
	 * Must have a character that needs escaping in attributes, otherwise
129
	 * someone could put a strip marker in an attribute, to get around
130
	 * escaping quote marks, and break out of the attribute. Thus we add
131
	 * `'".
132
	 */
133
	const MARKER_SUFFIX = "-QINU`\"'\x7f";
134
	const MARKER_PREFIX = "\x7f'\"`UNIQ-";
135
136
	# Markers used for wrapping the table of contents
137
	const TOC_START = '<mw:toc>';
138
	const TOC_END = '</mw:toc>';
139
140
	# Persistent:
141
	public $mTagHooks = [];
142
	public $mTransparentTagHooks = [];
143
	public $mFunctionHooks = [];
144
	public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
145
	public $mFunctionTagHooks = [];
146
	public $mStripList = [];
147
	public $mDefaultStripList = [];
148
	public $mVarCache = [];
149
	public $mImageParams = [];
150
	public $mImageParamsMagicArray = [];
151
	public $mMarkerIndex = 0;
152
	public $mFirstCall = true;
153
154
	# Initialised by initialiseVariables()
155
156
	/**
157
	 * @var MagicWordArray
158
	 */
159
	public $mVariables;
160
161
	/**
162
	 * @var MagicWordArray
163
	 */
164
	public $mSubstWords;
165
	# Initialised in constructor
166
	public $mConf, $mExtLinkBracketedRegex, $mUrlProtocols;
167
168
	# Initialized in getPreprocessor()
169
	/** @var Preprocessor */
170
	public $mPreprocessor;
171
172
	# Cleared with clearState():
173
	/**
174
	 * @var ParserOutput
175
	 */
176
	public $mOutput;
177
	public $mAutonumber;
178
179
	/**
180
	 * @var StripState
181
	 */
182
	public $mStripState;
183
184
	public $mIncludeCount;
185
	/**
186
	 * @var LinkHolderArray
187
	 */
188
	public $mLinkHolders;
189
190
	public $mLinkID;
191
	public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
192
	public $mDefaultSort;
193
	public $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
194
	public $mExpensiveFunctionCount; # number of expensive parser function calls
195
	public $mShowToc, $mForceTocPosition;
196
197
	/**
198
	 * @var User
199
	 */
200
	public $mUser; # User object; only used when doing pre-save transform
201
202
	# Temporary
203
	# These are variables reset at least once per parse regardless of $clearState
204
205
	/**
206
	 * @var ParserOptions
207
	 */
208
	public $mOptions;
209
210
	/**
211
	 * @var Title
212
	 */
213
	public $mTitle;        # Title context, used for self-link rendering and similar things
214
	public $mOutputType;   # Output type, one of the OT_xxx constants
215
	public $ot;            # Shortcut alias, see setOutputType()
216
	public $mRevisionObject; # The revision object of the specified revision ID
217
	public $mRevisionId;   # ID to display in {{REVISIONID}} tags
218
	public $mRevisionTimestamp; # The timestamp of the specified revision ID
219
	public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
220
	public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
221
	public $mRevIdForTs;   # The revision ID which was used to fetch the timestamp
222
	public $mInputSize = false; # For {{PAGESIZE}} on current page.
223
224
	/**
225
	 * @var string Deprecated accessor for the strip marker prefix.
226
	 * @deprecated since 1.26; use Parser::MARKER_PREFIX instead.
227
	 **/
228
	public $mUniqPrefix = Parser::MARKER_PREFIX;
229
230
	/**
231
	 * @var array Array with the language name of each language link (i.e. the
232
	 * interwiki prefix) in the key, value arbitrary. Used to avoid sending
233
	 * duplicate language links to the ParserOutput.
234
	 */
235
	public $mLangLinkLanguages;
236
237
	/**
238
	 * @var MapCacheLRU|null
239
	 * @since 1.24
240
	 *
241
	 * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
242
	 */
243
	public $currentRevisionCache;
244
245
	/**
246
	 * @var bool Recursive call protection.
247
	 * This variable should be treated as if it were private.
248
	 */
249
	public $mInParse = false;
250
251
	/** @var SectionProfiler */
252
	protected $mProfiler;
253
254
	/**
255
	 * @var LinkRenderer
256
	 */
257
	protected $mLinkRenderer;
258
259
	/**
260
	 * @param array $conf
261
	 */
262
	public function __construct( $conf = [] ) {
263
		$this->mConf = $conf;
264
		$this->mUrlProtocols = wfUrlProtocols();
265
		$this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
266
			self::EXT_LINK_ADDR .
267
			self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
268
		if ( isset( $conf['preprocessorClass'] ) ) {
269
			$this->mPreprocessorClass = $conf['preprocessorClass'];
270
		} elseif ( defined( 'HPHP_VERSION' ) ) {
271
			# Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
272
			$this->mPreprocessorClass = 'Preprocessor_Hash';
273
		} elseif ( extension_loaded( 'domxml' ) ) {
274
			# PECL extension that conflicts with the core DOM extension (bug 13770)
275
			wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
276
			$this->mPreprocessorClass = 'Preprocessor_Hash';
277
		} elseif ( extension_loaded( 'dom' ) ) {
278
			$this->mPreprocessorClass = 'Preprocessor_DOM';
279
		} else {
280
			$this->mPreprocessorClass = 'Preprocessor_Hash';
281
		}
282
		wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
283
	}
284
285
	/**
286
	 * Reduce memory usage to reduce the impact of circular references
287
	 */
288
	public function __destruct() {
289
		if ( isset( $this->mLinkHolders ) ) {
290
			unset( $this->mLinkHolders );
291
		}
292
		foreach ( $this as $name => $value ) {
293
			unset( $this->$name );
294
		}
295
	}
296
297
	/**
298
	 * Allow extensions to clean up when the parser is cloned
299
	 */
300
	public function __clone() {
301
		$this->mInParse = false;
302
303
		// Bug 56226: When you create a reference "to" an object field, that
304
		// makes the object field itself be a reference too (until the other
305
		// reference goes out of scope). When cloning, any field that's a
306
		// reference is copied as a reference in the new object. Both of these
307
		// are defined PHP5 behaviors, as inconvenient as it is for us when old
308
		// hooks from PHP4 days are passing fields by reference.
309
		foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
310
			// Make a non-reference copy of the field, then rebind the field to
311
			// reference the new copy.
312
			$tmp = $this->$k;
313
			$this->$k =& $tmp;
314
			unset( $tmp );
315
		}
316
317
		Hooks::run( 'ParserCloned', [ $this ] );
318
	}
319
320
	/**
321
	 * Do various kinds of initialisation on the first call of the parser
322
	 */
323
	public function firstCallInit() {
324
		if ( !$this->mFirstCall ) {
325
			return;
326
		}
327
		$this->mFirstCall = false;
328
329
		CoreParserFunctions::register( $this );
330
		CoreTagHooks::register( $this );
331
		$this->initialiseVariables();
332
333
		Hooks::run( 'ParserFirstCallInit', [ &$this ] );
334
	}
335
336
	/**
337
	 * Clear Parser state
338
	 *
339
	 * @private
340
	 */
341
	public function clearState() {
342
		if ( $this->mFirstCall ) {
343
			$this->firstCallInit();
344
		}
345
		$this->mOutput = new ParserOutput;
346
		$this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
347
		$this->mAutonumber = 0;
348
		$this->mIncludeCount = [];
349
		$this->mLinkHolders = new LinkHolderArray( $this );
350
		$this->mLinkID = 0;
351
		$this->mRevisionObject = $this->mRevisionTimestamp =
352
			$this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
353
		$this->mVarCache = [];
354
		$this->mUser = null;
355
		$this->mLangLinkLanguages = [];
356
		$this->currentRevisionCache = null;
357
358
		$this->mStripState = new StripState;
359
360
		# Clear these on every parse, bug 4549
361
		$this->mTplRedirCache = $this->mTplDomCache = [];
362
363
		$this->mShowToc = true;
364
		$this->mForceTocPosition = false;
365
		$this->mIncludeSizes = [
366
			'post-expand' => 0,
367
			'arg' => 0,
368
		];
369
		$this->mPPNodeCount = 0;
370
		$this->mGeneratedPPNodeCount = 0;
371
		$this->mHighestExpansionDepth = 0;
372
		$this->mDefaultSort = false;
373
		$this->mHeadings = [];
374
		$this->mDoubleUnderscores = [];
375
		$this->mExpensiveFunctionCount = 0;
376
377
		# Fix cloning
378
		if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
379
			$this->mPreprocessor = null;
380
		}
381
382
		$this->mProfiler = new SectionProfiler();
383
384
		Hooks::run( 'ParserClearState', [ &$this ] );
385
	}
386
387
	/**
388
	 * Convert wikitext to HTML
389
	 * Do not call this function recursively.
390
	 *
391
	 * @param string $text Text we want to parse
392
	 * @param Title $title
393
	 * @param ParserOptions $options
394
	 * @param bool $linestart
395
	 * @param bool $clearState
396
	 * @param int $revid Number to pass in {{REVISIONID}}
397
	 * @return ParserOutput A ParserOutput
398
	 */
399
	public function parse(
400
		$text, Title $title, ParserOptions $options,
401
		$linestart = true, $clearState = true, $revid = null
402
	) {
403
		/**
404
		 * First pass--just handle <nowiki> sections, pass the rest off
405
		 * to internalParse() which does all the real work.
406
		 */
407
408
		global $wgShowHostnames;
409
410
		if ( $clearState ) {
411
			// We use U+007F DELETE to construct strip markers, so we have to make
412
			// sure that this character does not occur in the input text.
413
			$text = strtr( $text, "\x7f", "?" );
414
			$magicScopeVariable = $this->lock();
415
		}
416
417
		$this->startParse( $title, $options, self::OT_HTML, $clearState );
418
419
		$this->currentRevisionCache = null;
420
		$this->mInputSize = strlen( $text );
421
		if ( $this->mOptions->getEnableLimitReport() ) {
422
			$this->mOutput->resetParseStartTime();
423
		}
424
425
		$oldRevisionId = $this->mRevisionId;
426
		$oldRevisionObject = $this->mRevisionObject;
427
		$oldRevisionTimestamp = $this->mRevisionTimestamp;
428
		$oldRevisionUser = $this->mRevisionUser;
429
		$oldRevisionSize = $this->mRevisionSize;
430
		if ( $revid !== null ) {
431
			$this->mRevisionId = $revid;
432
			$this->mRevisionObject = null;
433
			$this->mRevisionTimestamp = null;
434
			$this->mRevisionUser = null;
435
			$this->mRevisionSize = null;
436
		}
437
438
		Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
439
		# No more strip!
440
		Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
441
		$text = $this->internalParse( $text );
442
		Hooks::run( 'ParserAfterParse', [ &$this, &$text, &$this->mStripState ] );
443
444
		$text = $this->internalParseHalfParsed( $text, true, $linestart );
445
446
		/**
447
		 * A converted title will be provided in the output object if title and
448
		 * content conversion are enabled, the article text does not contain
449
		 * a conversion-suppressing double-underscore tag, and no
450
		 * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
451
		 * automatic link conversion.
452
		 */
453
		if ( !( $options->getDisableTitleConversion()
454
			|| isset( $this->mDoubleUnderscores['nocontentconvert'] )
455
			|| isset( $this->mDoubleUnderscores['notitleconvert'] )
456
			|| $this->mOutput->getDisplayTitle() !== false )
457
		) {
458
			$convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
459
			if ( $convruletitle ) {
460
				$this->mOutput->setTitleText( $convruletitle );
461
			} else {
462
				$titleText = $this->getConverterLanguage()->convertTitle( $title );
463
				$this->mOutput->setTitleText( $titleText );
464
			}
465
		}
466
467
		# Done parsing! Compute runtime adaptive expiry if set
468
		$this->mOutput->finalizeAdaptiveCacheExpiry();
469
470
		# Warn if too many heavyweight parser functions were used
471
		if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
472
			$this->limitationWarn( 'expensive-parserfunction',
473
				$this->mExpensiveFunctionCount,
474
				$this->mOptions->getExpensiveParserFunctionLimit()
475
			);
476
		}
477
478
		# Information on include size limits, for the benefit of users who try to skirt them
479
		if ( $this->mOptions->getEnableLimitReport() ) {
480
			$max = $this->mOptions->getMaxIncludeSize();
481
482
			$cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
483
			if ( $cpuTime !== null ) {
484
				$this->mOutput->setLimitReportData( 'limitreport-cputime',
485
					sprintf( "%.3f", $cpuTime )
486
				);
487
			}
488
489
			$wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
490
			$this->mOutput->setLimitReportData( 'limitreport-walltime',
491
				sprintf( "%.3f", $wallTime )
492
			);
493
494
			$this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
495
				[ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
496
			);
497
			$this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
498
				[ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
499
			);
500
			$this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
501
				[ $this->mIncludeSizes['post-expand'], $max ]
502
			);
503
			$this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
504
				[ $this->mIncludeSizes['arg'], $max ]
505
			);
506
			$this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
507
				[ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
508
			);
509
			$this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
510
				[ $this->mExpensiveFunctionCount,
511
					$this->mOptions->getExpensiveParserFunctionLimit() ]
512
			);
513
			Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
514
515
			$limitReport = '';
516
			Hooks::run( 'ParserLimitReport', [ $this, &$limitReport ] );
517
			if ( $limitReport != '' ) {
518
				// Sanitize for comment. Note '‐' in the replacement is U+2010,
519
				// which looks much like the problematic '-'.
520
				$limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
521
				$text .= "\n<!-- \nNewPP limit report\n$limitReport-->\n";
522
			}
523
524
			// Add on template profiling data in human/machine readable way
525
			$dataByFunc = $this->mProfiler->getFunctionStats();
526
			uasort( $dataByFunc, function ( $a, $b ) {
527
				return $a['real'] < $b['real']; // descending order
528
			} );
529
			$profileReport = [];
530
			foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
531
				$profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
532
					$item['%real'], $item['real'], $item['calls'], $item['name'] );
533
			}
534
			$this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
535
536
			// Add other cache related metadata
537
			if ( $wgShowHostnames ) {
538
				$this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
539
			}
540
			$this->mOutput->setLimitReportData( 'cachereport-timestamp',
541
				$this->mOutput->getCacheTime() );
542
			$this->mOutput->setLimitReportData( 'cachereport-ttl',
543
				$this->mOutput->getCacheExpiry() );
544
			$this->mOutput->setLimitReportData( 'cachereport-transientcontent',
545
				$this->mOutput->hasDynamicContent() );
546
547
			if ( $this->mGeneratedPPNodeCount
548
				> $this->mOptions->getMaxGeneratedPPNodeCount() / 10
549
			) {
550
				wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
551
					$this->mTitle->getPrefixedDBkey() );
552
			}
553
		}
554
		$this->mOutput->setText( $text );
555
556
		$this->mRevisionId = $oldRevisionId;
557
		$this->mRevisionObject = $oldRevisionObject;
558
		$this->mRevisionTimestamp = $oldRevisionTimestamp;
559
		$this->mRevisionUser = $oldRevisionUser;
560
		$this->mRevisionSize = $oldRevisionSize;
561
		$this->mInputSize = false;
562
		$this->currentRevisionCache = null;
563
564
		return $this->mOutput;
565
	}
566
567
	/**
568
	 * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
569
	 * can be called from an extension tag hook.
570
	 *
571
	 * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
572
	 * instead, which means that lists and links have not been fully parsed yet,
573
	 * and strip markers are still present.
574
	 *
575
	 * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
576
	 *
577
	 * Use this function if you're a parser tag hook and you want to parse
578
	 * wikitext before or after applying additional transformations, and you
579
	 * intend to *return the result as hook output*, which will cause it to go
580
	 * through the rest of parsing process automatically.
581
	 *
582
	 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
583
	 * $text are not expanded
584
	 *
585
	 * @param string $text Text extension wants to have parsed
586
	 * @param bool|PPFrame $frame The frame to use for expanding any template variables
587
	 * @return string UNSAFE half-parsed HTML
588
	 */
589
	public function recursiveTagParse( $text, $frame = false ) {
590
		Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
591
		Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
592
		$text = $this->internalParse( $text, false, $frame );
593
		return $text;
594
	}
595
596
	/**
597
	 * Fully parse wikitext to fully parsed HTML. This recursive parser entry
598
	 * point can be called from an extension tag hook.
599
	 *
600
	 * The output of this function is fully-parsed HTML that is safe for output.
601
	 * If you're a parser tag hook, you might want to use recursiveTagParse()
602
	 * instead.
603
	 *
604
	 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
605
	 * $text are not expanded
606
	 *
607
	 * @since 1.25
608
	 *
609
	 * @param string $text Text extension wants to have parsed
610
	 * @param bool|PPFrame $frame The frame to use for expanding any template variables
611
	 * @return string Fully parsed HTML
612
	 */
613
	public function recursiveTagParseFully( $text, $frame = false ) {
614
		$text = $this->recursiveTagParse( $text, $frame );
615
		$text = $this->internalParseHalfParsed( $text, false );
616
		return $text;
617
	}
618
619
	/**
620
	 * Expand templates and variables in the text, producing valid, static wikitext.
621
	 * Also removes comments.
622
	 * Do not call this function recursively.
623
	 * @param string $text
624
	 * @param Title $title
625
	 * @param ParserOptions $options
626
	 * @param int|null $revid
627
	 * @param bool|PPFrame $frame
628
	 * @return mixed|string
629
	 */
630
	public function preprocess( $text, Title $title = null,
631
		ParserOptions $options, $revid = null, $frame = false
632
	) {
633
		$magicScopeVariable = $this->lock();
634
		$this->startParse( $title, $options, self::OT_PREPROCESS, true );
635
		if ( $revid !== null ) {
636
			$this->mRevisionId = $revid;
637
		}
638
		Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
639
		Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
640
		$text = $this->replaceVariables( $text, $frame );
641
		$text = $this->mStripState->unstripBoth( $text );
642
		return $text;
643
	}
644
645
	/**
646
	 * Recursive parser entry point that can be called from an extension tag
647
	 * hook.
648
	 *
649
	 * @param string $text Text to be expanded
650
	 * @param bool|PPFrame $frame The frame to use for expanding any template variables
651
	 * @return string
652
	 * @since 1.19
653
	 */
654
	public function recursivePreprocess( $text, $frame = false ) {
655
		$text = $this->replaceVariables( $text, $frame );
656
		$text = $this->mStripState->unstripBoth( $text );
657
		return $text;
658
	}
659
660
	/**
661
	 * Process the wikitext for the "?preload=" feature. (bug 5210)
662
	 *
663
	 * "<noinclude>", "<includeonly>" etc. are parsed as for template
664
	 * transclusion, comments, templates, arguments, tags hooks and parser
665
	 * functions are untouched.
666
	 *
667
	 * @param string $text
668
	 * @param Title $title
669
	 * @param ParserOptions $options
670
	 * @param array $params
671
	 * @return string
672
	 */
673
	public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
674
		$msg = new RawMessage( $text );
675
		$text = $msg->params( $params )->plain();
676
677
		# Parser (re)initialisation
678
		$magicScopeVariable = $this->lock();
679
		$this->startParse( $title, $options, self::OT_PLAIN, true );
680
681
		$flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
682
		$dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
683
		$text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
684
		$text = $this->mStripState->unstripBoth( $text );
685
		return $text;
686
	}
687
688
	/**
689
	 * Get a random string
690
	 *
691
	 * @return string
692
	 * @deprecated since 1.26; use wfRandomString() instead.
693
	 */
694
	public static function getRandomString() {
695
		wfDeprecated( __METHOD__, '1.26' );
696
		return wfRandomString( 16 );
697
	}
698
699
	/**
700
	 * Set the current user.
701
	 * Should only be used when doing pre-save transform.
702
	 *
703
	 * @param User|null $user User object or null (to reset)
704
	 */
705
	public function setUser( $user ) {
706
		$this->mUser = $user;
707
	}
708
709
	/**
710
	 * Accessor for mUniqPrefix.
711
	 *
712
	 * @return string
713
	 * @deprecated since 1.26; use Parser::MARKER_PREFIX instead.
714
	 */
715
	public function uniqPrefix() {
716
		wfDeprecated( __METHOD__, '1.26' );
717
		return self::MARKER_PREFIX;
718
	}
719
720
	/**
721
	 * Set the context title
722
	 *
723
	 * @param Title $t
724
	 */
725
	public function setTitle( $t ) {
726
		if ( !$t ) {
727
			$t = Title::newFromText( 'NO TITLE' );
728
		}
729
730
		if ( $t->hasFragment() ) {
731
			# Strip the fragment to avoid various odd effects
732
			$this->mTitle = $t->createFragmentTarget( '' );
733
		} else {
734
			$this->mTitle = $t;
735
		}
736
	}
737
738
	/**
739
	 * Accessor for the Title object
740
	 *
741
	 * @return Title
742
	 */
743
	public function getTitle() {
744
		return $this->mTitle;
745
	}
746
747
	/**
748
	 * Accessor/mutator for the Title object
749
	 *
750
	 * @param Title $x Title object or null to just get the current one
751
	 * @return Title
752
	 */
753
	public function Title( $x = null ) {
754
		return wfSetVar( $this->mTitle, $x );
755
	}
756
757
	/**
758
	 * Set the output type
759
	 *
760
	 * @param int $ot New value
761
	 */
762
	public function setOutputType( $ot ) {
763
		$this->mOutputType = $ot;
764
		# Shortcut alias
765
		$this->ot = [
766
			'html' => $ot == self::OT_HTML,
767
			'wiki' => $ot == self::OT_WIKI,
768
			'pre' => $ot == self::OT_PREPROCESS,
769
			'plain' => $ot == self::OT_PLAIN,
770
		];
771
	}
772
773
	/**
774
	 * Accessor/mutator for the output type
775
	 *
776
	 * @param int|null $x New value or null to just get the current one
777
	 * @return int
778
	 */
779
	public function OutputType( $x = null ) {
780
		return wfSetVar( $this->mOutputType, $x );
781
	}
782
783
	/**
784
	 * Get the ParserOutput object
785
	 *
786
	 * @return ParserOutput
787
	 */
788
	public function getOutput() {
789
		return $this->mOutput;
790
	}
791
792
	/**
793
	 * Get the ParserOptions object
794
	 *
795
	 * @return ParserOptions
796
	 */
797
	public function getOptions() {
798
		return $this->mOptions;
799
	}
800
801
	/**
802
	 * Accessor/mutator for the ParserOptions object
803
	 *
804
	 * @param ParserOptions $x New value or null to just get the current one
805
	 * @return ParserOptions Current ParserOptions object
806
	 */
807
	public function Options( $x = null ) {
808
		return wfSetVar( $this->mOptions, $x );
809
	}
810
811
	/**
812
	 * @return int
813
	 */
814
	public function nextLinkID() {
815
		return $this->mLinkID++;
816
	}
817
818
	/**
819
	 * @param int $id
820
	 */
821
	public function setLinkID( $id ) {
822
		$this->mLinkID = $id;
823
	}
824
825
	/**
826
	 * Get a language object for use in parser functions such as {{FORMATNUM:}}
827
	 * @return Language
828
	 */
829
	public function getFunctionLang() {
830
		return $this->getTargetLanguage();
831
	}
832
833
	/**
834
	 * Get the target language for the content being parsed. This is usually the
835
	 * language that the content is in.
836
	 *
837
	 * @since 1.19
838
	 *
839
	 * @throws MWException
840
	 * @return Language
841
	 */
842
	public function getTargetLanguage() {
843
		$target = $this->mOptions->getTargetLanguage();
844
845
		if ( $target !== null ) {
846
			return $target;
847
		} elseif ( $this->mOptions->getInterfaceMessage() ) {
848
			return $this->mOptions->getUserLangObj();
849
		} elseif ( is_null( $this->mTitle ) ) {
850
			throw new MWException( __METHOD__ . ': $this->mTitle is null' );
851
		}
852
853
		return $this->mTitle->getPageLanguage();
854
	}
855
856
	/**
857
	 * Get the language object for language conversion
858
	 * @return Language|null
859
	 */
860
	public function getConverterLanguage() {
861
		return $this->getTargetLanguage();
862
	}
863
864
	/**
865
	 * Get a User object either from $this->mUser, if set, or from the
866
	 * ParserOptions object otherwise
867
	 *
868
	 * @return User
869
	 */
870
	public function getUser() {
871
		if ( !is_null( $this->mUser ) ) {
872
			return $this->mUser;
873
		}
874
		return $this->mOptions->getUser();
875
	}
876
877
	/**
878
	 * Get a preprocessor object
879
	 *
880
	 * @return Preprocessor
881
	 */
882
	public function getPreprocessor() {
883
		if ( !isset( $this->mPreprocessor ) ) {
884
			$class = $this->mPreprocessorClass;
885
			$this->mPreprocessor = new $class( $this );
886
		}
887
		return $this->mPreprocessor;
888
	}
889
890
	/**
891
	 * Get a LinkRenderer instance to make links with
892
	 *
893
	 * @since 1.28
894
	 * @return LinkRenderer
895
	 */
896
	public function getLinkRenderer() {
897
		if ( !$this->mLinkRenderer ) {
898
			$this->mLinkRenderer = MediaWikiServices::getInstance()
899
				->getLinkRendererFactory()->create();
900
			$this->mLinkRenderer->setStubThreshold(
901
				$this->getOptions()->getStubThreshold()
902
			);
903
		}
904
905
		return $this->mLinkRenderer;
906
	}
907
908
	/**
909
	 * Replaces all occurrences of HTML-style comments and the given tags
910
	 * in the text with a random marker and returns the next text. The output
911
	 * parameter $matches will be an associative array filled with data in
912
	 * the form:
913
	 *
914
	 * @code
915
	 *   'UNIQ-xxxxx' => [
916
	 *     'element',
917
	 *     'tag content',
918
	 *     [ 'param' => 'x' ],
919
	 *     '<element param="x">tag content</element>' ]
920
	 * @endcode
921
	 *
922
	 * @param array $elements List of element names. Comments are always extracted.
923
	 * @param string $text Source text string.
924
	 * @param array $matches Out parameter, Array: extracted tags
925
	 * @param string|null $uniq_prefix
926
	 * @return string Stripped text
927
	 * @since 1.26 The uniq_prefix argument is deprecated.
928
	 */
929
	public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) {
930
		if ( $uniq_prefix !== null ) {
931
			wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' );
932
		}
933
		static $n = 1;
934
		$stripped = '';
935
		$matches = [];
936
937
		$taglist = implode( '|', $elements );
938
		$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
939
940
		while ( $text != '' ) {
941
			$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
942
			$stripped .= $p[0];
943
			if ( count( $p ) < 5 ) {
944
				break;
945
			}
946
			if ( count( $p ) > 5 ) {
947
				# comment
948
				$element = $p[4];
949
				$attributes = '';
950
				$close = '';
951
				$inside = $p[5];
952
			} else {
953
				# tag
954
				$element = $p[1];
955
				$attributes = $p[2];
956
				$close = $p[3];
957
				$inside = $p[4];
958
			}
959
960
			$marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
961
			$stripped .= $marker;
962
963
			if ( $close === '/>' ) {
964
				# Empty element tag, <tag />
965
				$content = null;
966
				$text = $inside;
967
				$tail = null;
968
			} else {
969
				if ( $element === '!--' ) {
970
					$end = '/(-->)/';
971
				} else {
972
					$end = "/(<\\/$element\\s*>)/i";
973
				}
974
				$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
975
				$content = $q[0];
976
				if ( count( $q ) < 3 ) {
977
					# No end tag -- let it run out to the end of the text.
978
					$tail = '';
979
					$text = '';
980
				} else {
981
					$tail = $q[1];
982
					$text = $q[2];
983
				}
984
			}
985
986
			$matches[$marker] = [ $element,
987
				$content,
988
				Sanitizer::decodeTagAttributes( $attributes ),
989
				"<$element$attributes$close$content$tail" ];
990
		}
991
		return $stripped;
992
	}
993
994
	/**
995
	 * Get a list of strippable XML-like elements
996
	 *
997
	 * @return array
998
	 */
999
	public function getStripList() {
1000
		return $this->mStripList;
1001
	}
1002
1003
	/**
1004
	 * Add an item to the strip state
1005
	 * Returns the unique tag which must be inserted into the stripped text
1006
	 * The tag will be replaced with the original text in unstrip()
1007
	 *
1008
	 * @param string $text
1009
	 *
1010
	 * @return string
1011
	 */
1012
	public function insertStripItem( $text ) {
1013
		$marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1014
		$this->mMarkerIndex++;
1015
		$this->mStripState->addGeneral( $marker, $text );
1016
		return $marker;
1017
	}
1018
1019
	/**
1020
	 * parse the wiki syntax used to render tables
1021
	 *
1022
	 * @private
1023
	 * @param string $text
1024
	 * @return string
1025
	 */
1026
	public function doTableStuff( $text ) {
1027
1028
		$lines = StringUtils::explode( "\n", $text );
1029
		$out = '';
1030
		$td_history = []; # Is currently a td tag open?
1031
		$last_tag_history = []; # Save history of last lag activated (td, th or caption)
1032
		$tr_history = []; # Is currently a tr tag open?
1033
		$tr_attributes = []; # history of tr attributes
1034
		$has_opened_tr = []; # Did this table open a <tr> element?
1035
		$indent_level = 0; # indent level of the table
1036
1037
		foreach ( $lines as $outLine ) {
1038
			$line = trim( $outLine );
1039
1040
			if ( $line === '' ) { # empty line, go to next line
1041
				$out .= $outLine . "\n";
1042
				continue;
1043
			}
1044
1045
			$first_character = $line[0];
1046
			$first_two = substr( $line, 0, 2 );
1047
			$matches = [];
1048
1049
			if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1050
				# First check if we are starting a new table
1051
				$indent_level = strlen( $matches[1] );
1052
1053
				$attributes = $this->mStripState->unstripBoth( $matches[2] );
1054
				$attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1055
1056
				$outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1057
				array_push( $td_history, false );
1058
				array_push( $last_tag_history, '' );
1059
				array_push( $tr_history, false );
1060
				array_push( $tr_attributes, '' );
1061
				array_push( $has_opened_tr, false );
1062
			} elseif ( count( $td_history ) == 0 ) {
1063
				# Don't do any of the following
1064
				$out .= $outLine . "\n";
1065
				continue;
1066
			} elseif ( $first_two === '|}' ) {
1067
				# We are ending a table
1068
				$line = '</table>' . substr( $line, 2 );
1069
				$last_tag = array_pop( $last_tag_history );
1070
1071
				if ( !array_pop( $has_opened_tr ) ) {
1072
					$line = "<tr><td></td></tr>{$line}";
1073
				}
1074
1075
				if ( array_pop( $tr_history ) ) {
1076
					$line = "</tr>{$line}";
1077
				}
1078
1079
				if ( array_pop( $td_history ) ) {
1080
					$line = "</{$last_tag}>{$line}";
1081
				}
1082
				array_pop( $tr_attributes );
1083
				$outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1084
			} elseif ( $first_two === '|-' ) {
1085
				# Now we have a table row
1086
				$line = preg_replace( '#^\|-+#', '', $line );
1087
1088
				# Whats after the tag is now only attributes
1089
				$attributes = $this->mStripState->unstripBoth( $line );
1090
				$attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1091
				array_pop( $tr_attributes );
1092
				array_push( $tr_attributes, $attributes );
1093
1094
				$line = '';
1095
				$last_tag = array_pop( $last_tag_history );
1096
				array_pop( $has_opened_tr );
1097
				array_push( $has_opened_tr, true );
1098
1099
				if ( array_pop( $tr_history ) ) {
1100
					$line = '</tr>';
1101
				}
1102
1103
				if ( array_pop( $td_history ) ) {
1104
					$line = "</{$last_tag}>{$line}";
1105
				}
1106
1107
				$outLine = $line;
1108
				array_push( $tr_history, false );
1109
				array_push( $td_history, false );
1110
				array_push( $last_tag_history, '' );
1111
			} elseif ( $first_character === '|'
1112
				|| $first_character === '!'
1113
				|| $first_two === '|+'
1114
			) {
1115
				# This might be cell elements, td, th or captions
1116
				if ( $first_two === '|+' ) {
1117
					$first_character = '+';
1118
					$line = substr( $line, 2 );
1119
				} else {
1120
					$line = substr( $line, 1 );
1121
				}
1122
1123
				// Implies both are valid for table headings.
1124
				if ( $first_character === '!' ) {
1125
					$line = StringUtils::replaceMarkup( '!!', '||', $line );
1126
				}
1127
1128
				# Split up multiple cells on the same line.
1129
				# FIXME : This can result in improper nesting of tags processed
1130
				# by earlier parser steps.
1131
				$cells = explode( '||', $line );
1132
1133
				$outLine = '';
1134
1135
				# Loop through each table cell
1136
				foreach ( $cells as $cell ) {
1137
					$previous = '';
1138
					if ( $first_character !== '+' ) {
1139
						$tr_after = array_pop( $tr_attributes );
1140
						if ( !array_pop( $tr_history ) ) {
1141
							$previous = "<tr{$tr_after}>\n";
1142
						}
1143
						array_push( $tr_history, true );
1144
						array_push( $tr_attributes, '' );
1145
						array_pop( $has_opened_tr );
1146
						array_push( $has_opened_tr, true );
1147
					}
1148
1149
					$last_tag = array_pop( $last_tag_history );
1150
1151
					if ( array_pop( $td_history ) ) {
1152
						$previous = "</{$last_tag}>\n{$previous}";
1153
					}
1154
1155
					if ( $first_character === '|' ) {
1156
						$last_tag = 'td';
1157
					} elseif ( $first_character === '!' ) {
1158
						$last_tag = 'th';
1159
					} elseif ( $first_character === '+' ) {
1160
						$last_tag = 'caption';
1161
					} else {
1162
						$last_tag = '';
1163
					}
1164
1165
					array_push( $last_tag_history, $last_tag );
1166
1167
					# A cell could contain both parameters and data
1168
					$cell_data = explode( '|', $cell, 2 );
1169
1170
					# Bug 553: Note that a '|' inside an invalid link should not
1171
					# be mistaken as delimiting cell parameters
1172
					if ( strpos( $cell_data[0], '[[' ) !== false ) {
1173
						$cell = "{$previous}<{$last_tag}>{$cell}";
1174
					} elseif ( count( $cell_data ) == 1 ) {
1175
						$cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1176
					} else {
1177
						$attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1178
						$attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1179
						$cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1180
					}
1181
1182
					$outLine .= $cell;
1183
					array_push( $td_history, true );
1184
				}
1185
			}
1186
			$out .= $outLine . "\n";
1187
		}
1188
1189
		# Closing open td, tr && table
1190
		while ( count( $td_history ) > 0 ) {
1191
			if ( array_pop( $td_history ) ) {
1192
				$out .= "</td>\n";
1193
			}
1194
			if ( array_pop( $tr_history ) ) {
1195
				$out .= "</tr>\n";
1196
			}
1197
			if ( !array_pop( $has_opened_tr ) ) {
1198
				$out .= "<tr><td></td></tr>\n";
1199
			}
1200
1201
			$out .= "</table>\n";
1202
		}
1203
1204
		# Remove trailing line-ending (b/c)
1205 View Code Duplication
		if ( substr( $out, -1 ) === "\n" ) {
1206
			$out = substr( $out, 0, -1 );
1207
		}
1208
1209
		# special case: don't return empty table
1210
		if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1211
			$out = '';
1212
		}
1213
1214
		return $out;
1215
	}
1216
1217
	/**
1218
	 * Helper function for parse() that transforms wiki markup into half-parsed
1219
	 * HTML. Only called for $mOutputType == self::OT_HTML.
1220
	 *
1221
	 * @private
1222
	 *
1223
	 * @param string $text The text to parse
1224
	 * @param bool $isMain Whether this is being called from the main parse() function
1225
	 * @param PPFrame|bool $frame A pre-processor frame
1226
	 *
1227
	 * @return string
1228
	 */
1229
	public function internalParse( $text, $isMain = true, $frame = false ) {
1230
1231
		$origText = $text;
1232
1233
		# Hook to suspend the parser in this state
1234
		if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$this, &$text, &$this->mStripState ] ) ) {
1235
			return $text;
1236
		}
1237
1238
		# if $frame is provided, then use $frame for replacing any variables
1239
		if ( $frame ) {
1240
			# use frame depth to infer how include/noinclude tags should be handled
1241
			# depth=0 means this is the top-level document; otherwise it's an included document
1242
			if ( !$frame->depth ) {
1243
				$flag = 0;
1244
			} else {
1245
				$flag = Parser::PTD_FOR_INCLUSION;
1246
			}
1247
			$dom = $this->preprocessToDom( $text, $flag );
1248
			$text = $frame->expand( $dom );
1249
		} else {
1250
			# if $frame is not provided, then use old-style replaceVariables
1251
			$text = $this->replaceVariables( $text );
1252
		}
1253
1254
		Hooks::run( 'InternalParseBeforeSanitize', [ &$this, &$text, &$this->mStripState ] );
1255
		$text = Sanitizer::removeHTMLtags(
1256
			$text,
1257
			[ &$this, 'attributeStripCallback' ],
1258
			false,
1259
			array_keys( $this->mTransparentTagHooks ),
1260
			[],
1261
			[ &$this, 'addTrackingCategory' ]
1262
		);
1263
		Hooks::run( 'InternalParseBeforeLinks', [ &$this, &$text, &$this->mStripState ] );
1264
1265
		# Tables need to come after variable replacement for things to work
1266
		# properly; putting them before other transformations should keep
1267
		# exciting things like link expansions from showing up in surprising
1268
		# places.
1269
		$text = $this->doTableStuff( $text );
1270
1271
		$text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1272
1273
		$text = $this->doDoubleUnderscore( $text );
1274
1275
		$text = $this->doHeadings( $text );
1276
		$text = $this->replaceInternalLinks( $text );
1277
		$text = $this->doAllQuotes( $text );
1278
		$text = $this->replaceExternalLinks( $text );
1279
1280
		# replaceInternalLinks may sometimes leave behind
1281
		# absolute URLs, which have to be masked to hide them from replaceExternalLinks
1282
		$text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1283
1284
		$text = $this->doMagicLinks( $text );
1285
		$text = $this->formatHeadings( $text, $origText, $isMain );
1286
1287
		return $text;
1288
	}
1289
1290
	/**
1291
	 * Helper function for parse() that transforms half-parsed HTML into fully
1292
	 * parsed HTML.
1293
	 *
1294
	 * @param string $text
1295
	 * @param bool $isMain
1296
	 * @param bool $linestart
1297
	 * @return string
1298
	 */
1299
	private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1300
		$text = $this->mStripState->unstripGeneral( $text );
1301
1302
		if ( $isMain ) {
1303
			Hooks::run( 'ParserAfterUnstrip', [ &$this, &$text ] );
1304
		}
1305
1306
		# Clean up special characters, only run once, next-to-last before doBlockLevels
1307
		$fixtags = [
1308
			# french spaces, last one Guillemet-left
1309
			# only if there is something before the space
1310
			'/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1311
			# french spaces, Guillemet-right
1312
			'/(\\302\\253) /' => '\\1&#160;',
1313
			'/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
1314
		];
1315
		$text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1316
1317
		$text = $this->doBlockLevels( $text, $linestart );
1318
1319
		$this->replaceLinkHolders( $text );
1320
1321
		/**
1322
		 * The input doesn't get language converted if
1323
		 * a) It's disabled
1324
		 * b) Content isn't converted
1325
		 * c) It's a conversion table
1326
		 * d) it is an interface message (which is in the user language)
1327
		 */
1328
		if ( !( $this->mOptions->getDisableContentConversion()
1329
			|| isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1330
		) {
1331
			if ( !$this->mOptions->getInterfaceMessage() ) {
1332
				# The position of the convert() call should not be changed. it
1333
				# assumes that the links are all replaced and the only thing left
1334
				# is the <nowiki> mark.
1335
				$text = $this->getConverterLanguage()->convert( $text );
1336
			}
1337
		}
1338
1339
		$text = $this->mStripState->unstripNoWiki( $text );
1340
1341
		if ( $isMain ) {
1342
			Hooks::run( 'ParserBeforeTidy', [ &$this, &$text ] );
1343
		}
1344
1345
		$text = $this->replaceTransparentTags( $text );
1346
		$text = $this->mStripState->unstripGeneral( $text );
1347
1348
		$text = Sanitizer::normalizeCharReferences( $text );
1349
1350
		if ( MWTidy::isEnabled() ) {
1351
			if ( $this->mOptions->getTidy() ) {
1352
				$text = MWTidy::tidy( $text );
1353
			}
1354
		} else {
1355
			# attempt to sanitize at least some nesting problems
1356
			# (bug #2702 and quite a few others)
1357
			$tidyregs = [
1358
				# ''Something [http://www.cool.com cool''] -->
1359
				# <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1360
				'/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1361
				'\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1362
				# fix up an anchor inside another anchor, only
1363
				# at least for a single single nested link (bug 3695)
1364
				'/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1365
				'\\1\\2</a>\\3</a>\\1\\4</a>',
1366
				# fix div inside inline elements- doBlockLevels won't wrap a line which
1367
				# contains a div, so fix it up here; replace
1368
				# div with escaped text
1369
				'/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1370
				'\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1371
				# remove empty italic or bold tag pairs, some
1372
				# introduced by rules above
1373
				'/<([bi])><\/\\1>/' => '',
1374
			];
1375
1376
			$text = preg_replace(
1377
				array_keys( $tidyregs ),
1378
				array_values( $tidyregs ),
1379
				$text );
1380
		}
1381
1382
		if ( $isMain ) {
1383
			Hooks::run( 'ParserAfterTidy', [ &$this, &$text ] );
1384
		}
1385
1386
		return $text;
1387
	}
1388
1389
	/**
1390
	 * Replace special strings like "ISBN xxx" and "RFC xxx" with
1391
	 * magic external links.
1392
	 *
1393
	 * DML
1394
	 * @private
1395
	 *
1396
	 * @param string $text
1397
	 *
1398
	 * @return string
1399
	 */
1400
	public function doMagicLinks( $text ) {
1401
		$prots = wfUrlProtocolsWithoutProtRel();
1402
		$urlChar = self::EXT_LINK_URL_CLASS;
1403
		$addr = self::EXT_LINK_ADDR;
1404
		$space = self::SPACE_NOT_NL; #  non-newline space
1405
		$spdash = "(?:-|$space)"; # a dash or a non-newline space
1406
		$spaces = "$space++"; # possessive match of 1 or more spaces
1407
		$text = preg_replace_callback(
1408
			'!(?:                            # Start cases
1409
				(<a[ \t\r\n>].*?</a>) |      # m[1]: Skip link text
1410
				(<.*?>) |                    # m[2]: Skip stuff inside
1411
				                             #       HTML elements' . "
1412
				(\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
1413
				                             # m[4]: Post-protocol path
1414
				\b(?:RFC|PMID) $spaces       # m[5]: RFC or PMID, capture number
1415
					([0-9]+)\b |
1416
				\bISBN $spaces (             # m[6]: ISBN, capture number
1417
					(?: 97[89] $spdash? )?   #  optional 13-digit ISBN prefix
1418
					(?: [0-9]  $spdash? ){9} #  9 digits with opt. delimiters
1419
					[0-9Xx]                  #  check digit
1420
				)\b
1421
			)!xu", [ &$this, 'magicLinkCallback' ], $text );
1422
		return $text;
1423
	}
1424
1425
	/**
1426
	 * @throws MWException
1427
	 * @param array $m
1428
	 * @return HTML|string
1429
	 */
1430
	public function magicLinkCallback( $m ) {
1431
		if ( isset( $m[1] ) && $m[1] !== '' ) {
1432
			# Skip anchor
1433
			return $m[0];
1434
		} elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1435
			# Skip HTML element
1436
			return $m[0];
1437
		} elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1438
			# Free external link
1439
			return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1440
		} elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1441
			# RFC or PMID
1442
			if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1443
				if ( !$this->mOptions->getMagicRFCLinks() ) {
1444
					return $m[0];
1445
				}
1446
				$keyword = 'RFC';
1447
				$urlmsg = 'rfcurl';
1448
				$cssClass = 'mw-magiclink-rfc';
1449
				$id = $m[5];
1450
			} elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1451
				if ( !$this->mOptions->getMagicPMIDLinks() ) {
1452
					return $m[0];
1453
				}
1454
				$keyword = 'PMID';
1455
				$urlmsg = 'pubmedurl';
1456
				$cssClass = 'mw-magiclink-pmid';
1457
				$id = $m[5];
1458
			} else {
1459
				throw new MWException( __METHOD__ . ': unrecognised match type "' .
1460
					substr( $m[0], 0, 20 ) . '"' );
1461
			}
1462
			$url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1463
			return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1464
		} elseif ( isset( $m[6] ) && $m[6] !== ''
1465
			&& $this->mOptions->getMagicISBNLinks()
1466
		) {
1467
			# ISBN
1468
			$isbn = $m[6];
1469
			$space = self::SPACE_NOT_NL; #  non-newline space
1470
			$isbn = preg_replace( "/$space/", ' ', $isbn );
1471
			$num = strtr( $isbn, [
1472
				'-' => '',
1473
				' ' => '',
1474
				'x' => 'X',
1475
			] );
1476
			return $this->getLinkRenderer()->makeKnownLink(
1477
				SpecialPage::getTitleFor( 'Booksources', $num ),
1478
				"ISBN $isbn",
1479
				[
1480
					'class' => 'internal mw-magiclink-isbn',
1481
					'title' => false // suppress title attribute
1482
				]
1483
			);
1484
		} else {
1485
			return $m[0];
1486
		}
1487
	}
1488
1489
	/**
1490
	 * Make a free external link, given a user-supplied URL
1491
	 *
1492
	 * @param string $url
1493
	 * @param int $numPostProto
1494
	 *   The number of characters after the protocol.
1495
	 * @return string HTML
1496
	 * @private
1497
	 */
1498
	public function makeFreeExternalLink( $url, $numPostProto ) {
1499
		$trail = '';
1500
1501
		# The characters '<' and '>' (which were escaped by
1502
		# removeHTMLtags()) should not be included in
1503
		# URLs, per RFC 2396.
1504
		# Make &nbsp; terminate a URL as well (bug T84937)
1505
		$m2 = [];
1506 View Code Duplication
		if ( preg_match(
1507
			'/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1508
			$url,
1509
			$m2,
1510
			PREG_OFFSET_CAPTURE
1511
		) ) {
1512
			$trail = substr( $url, $m2[0][1] ) . $trail;
1513
			$url = substr( $url, 0, $m2[0][1] );
1514
		}
1515
1516
		# Move trailing punctuation to $trail
1517
		$sep = ',;\.:!?';
1518
		# If there is no left bracket, then consider right brackets fair game too
1519
		if ( strpos( $url, '(' ) === false ) {
1520
			$sep .= ')';
1521
		}
1522
1523
		$urlRev = strrev( $url );
1524
		$numSepChars = strspn( $urlRev, $sep );
1525
		# Don't break a trailing HTML entity by moving the ; into $trail
1526
		# This is in hot code, so use substr_compare to avoid having to
1527
		# create a new string object for the comparison
1528
		if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1529
			# more optimization: instead of running preg_match with a $
1530
			# anchor, which can be slow, do the match on the reversed
1531
			# string starting at the desired offset.
1532
			# un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1533
			if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1534
				$numSepChars--;
1535
			}
1536
		}
1537
		if ( $numSepChars ) {
1538
			$trail = substr( $url, -$numSepChars ) . $trail;
1539
			$url = substr( $url, 0, -$numSepChars );
1540
		}
1541
1542
		# Verify that we still have a real URL after trail removal, and
1543
		# not just lone protocol
1544
		if ( strlen( $trail ) >= $numPostProto ) {
1545
			return $url . $trail;
1546
		}
1547
1548
		$url = Sanitizer::cleanUrl( $url );
1549
1550
		# Is this an external image?
1551
		$text = $this->maybeMakeExternalImage( $url );
1552
		if ( $text === false ) {
1553
			# Not an image, make a link
1554
			$text = Linker::makeExternalLink( $url,
1555
				$this->getConverterLanguage()->markNoConversion( $url, true ),
1556
				true, 'free',
1557
				$this->getExternalLinkAttribs( $url ), $this->mTitle );
1558
			# Register it in the output object...
1559
			# Replace unnecessary URL escape codes with their equivalent characters
1560
			$pasteurized = self::normalizeLinkUrl( $url );
1561
			$this->mOutput->addExternalLink( $pasteurized );
1562
		}
1563
		return $text . $trail;
1564
	}
1565
1566
	/**
1567
	 * Parse headers and return html
1568
	 *
1569
	 * @private
1570
	 *
1571
	 * @param string $text
1572
	 *
1573
	 * @return string
1574
	 */
1575
	public function doHeadings( $text ) {
1576
		for ( $i = 6; $i >= 1; --$i ) {
1577
			$h = str_repeat( '=', $i );
1578
			$text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1579
		}
1580
		return $text;
1581
	}
1582
1583
	/**
1584
	 * Replace single quotes with HTML markup
1585
	 * @private
1586
	 *
1587
	 * @param string $text
1588
	 *
1589
	 * @return string The altered text
1590
	 */
1591
	public function doAllQuotes( $text ) {
1592
		$outtext = '';
1593
		$lines = StringUtils::explode( "\n", $text );
1594
		foreach ( $lines as $line ) {
1595
			$outtext .= $this->doQuotes( $line ) . "\n";
1596
		}
1597
		$outtext = substr( $outtext, 0, -1 );
1598
		return $outtext;
1599
	}
1600
1601
	/**
1602
	 * Helper function for doAllQuotes()
1603
	 *
1604
	 * @param string $text
1605
	 *
1606
	 * @return string
1607
	 */
1608
	public function doQuotes( $text ) {
1609
		$arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1610
		$countarr = count( $arr );
1611
		if ( $countarr == 1 ) {
1612
			return $text;
1613
		}
1614
1615
		// First, do some preliminary work. This may shift some apostrophes from
1616
		// being mark-up to being text. It also counts the number of occurrences
1617
		// of bold and italics mark-ups.
1618
		$numbold = 0;
1619
		$numitalics = 0;
1620
		for ( $i = 1; $i < $countarr; $i += 2 ) {
1621
			$thislen = strlen( $arr[$i] );
1622
			// If there are ever four apostrophes, assume the first is supposed to
1623
			// be text, and the remaining three constitute mark-up for bold text.
1624
			// (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1625
			if ( $thislen == 4 ) {
1626
				$arr[$i - 1] .= "'";
1627
				$arr[$i] = "'''";
1628
				$thislen = 3;
1629
			} elseif ( $thislen > 5 ) {
1630
				// If there are more than 5 apostrophes in a row, assume they're all
1631
				// text except for the last 5.
1632
				// (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1633
				$arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1634
				$arr[$i] = "'''''";
1635
				$thislen = 5;
1636
			}
1637
			// Count the number of occurrences of bold and italics mark-ups.
1638
			if ( $thislen == 2 ) {
1639
				$numitalics++;
1640
			} elseif ( $thislen == 3 ) {
1641
				$numbold++;
1642
			} elseif ( $thislen == 5 ) {
1643
				$numitalics++;
1644
				$numbold++;
1645
			}
1646
		}
1647
1648
		// If there is an odd number of both bold and italics, it is likely
1649
		// that one of the bold ones was meant to be an apostrophe followed
1650
		// by italics. Which one we cannot know for certain, but it is more
1651
		// likely to be one that has a single-letter word before it.
1652
		if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1653
			$firstsingleletterword = -1;
1654
			$firstmultiletterword = -1;
1655
			$firstspace = -1;
1656
			for ( $i = 1; $i < $countarr; $i += 2 ) {
1657
				if ( strlen( $arr[$i] ) == 3 ) {
1658
					$x1 = substr( $arr[$i - 1], -1 );
1659
					$x2 = substr( $arr[$i - 1], -2, 1 );
1660
					if ( $x1 === ' ' ) {
1661
						if ( $firstspace == -1 ) {
1662
							$firstspace = $i;
1663
						}
1664
					} elseif ( $x2 === ' ' ) {
1665
						$firstsingleletterword = $i;
1666
						// if $firstsingleletterword is set, we don't
1667
						// look at the other options, so we can bail early.
1668
						break;
1669
					} else {
1670
						if ( $firstmultiletterword == -1 ) {
1671
							$firstmultiletterword = $i;
1672
						}
1673
					}
1674
				}
1675
			}
1676
1677
			// If there is a single-letter word, use it!
1678
			if ( $firstsingleletterword > -1 ) {
1679
				$arr[$firstsingleletterword] = "''";
1680
				$arr[$firstsingleletterword - 1] .= "'";
1681
			} elseif ( $firstmultiletterword > -1 ) {
1682
				// If not, but there's a multi-letter word, use that one.
1683
				$arr[$firstmultiletterword] = "''";
1684
				$arr[$firstmultiletterword - 1] .= "'";
1685
			} elseif ( $firstspace > -1 ) {
1686
				// ... otherwise use the first one that has neither.
1687
				// (notice that it is possible for all three to be -1 if, for example,
1688
				// there is only one pentuple-apostrophe in the line)
1689
				$arr[$firstspace] = "''";
1690
				$arr[$firstspace - 1] .= "'";
1691
			}
1692
		}
1693
1694
		// Now let's actually convert our apostrophic mush to HTML!
1695
		$output = '';
1696
		$buffer = '';
1697
		$state = '';
1698
		$i = 0;
1699
		foreach ( $arr as $r ) {
1700
			if ( ( $i % 2 ) == 0 ) {
1701
				if ( $state === 'both' ) {
1702
					$buffer .= $r;
1703
				} else {
1704
					$output .= $r;
1705
				}
1706
			} else {
1707
				$thislen = strlen( $r );
1708
				if ( $thislen == 2 ) {
1709 View Code Duplication
					if ( $state === 'i' ) {
1710
						$output .= '</i>';
1711
						$state = '';
1712
					} elseif ( $state === 'bi' ) {
1713
						$output .= '</i>';
1714
						$state = 'b';
1715
					} elseif ( $state === 'ib' ) {
1716
						$output .= '</b></i><b>';
1717
						$state = 'b';
1718
					} elseif ( $state === 'both' ) {
1719
						$output .= '<b><i>' . $buffer . '</i>';
1720
						$state = 'b';
1721
					} else { // $state can be 'b' or ''
1722
						$output .= '<i>';
1723
						$state .= 'i';
1724
					}
1725 View Code Duplication
				} elseif ( $thislen == 3 ) {
1726
					if ( $state === 'b' ) {
1727
						$output .= '</b>';
1728
						$state = '';
1729
					} elseif ( $state === 'bi' ) {
1730
						$output .= '</i></b><i>';
1731
						$state = 'i';
1732
					} elseif ( $state === 'ib' ) {
1733
						$output .= '</b>';
1734
						$state = 'i';
1735
					} elseif ( $state === 'both' ) {
1736
						$output .= '<i><b>' . $buffer . '</b>';
1737
						$state = 'i';
1738
					} else { // $state can be 'i' or ''
1739
						$output .= '<b>';
1740
						$state .= 'b';
1741
					}
1742
				} elseif ( $thislen == 5 ) {
1743
					if ( $state === 'b' ) {
1744
						$output .= '</b><i>';
1745
						$state = 'i';
1746
					} elseif ( $state === 'i' ) {
1747
						$output .= '</i><b>';
1748
						$state = 'b';
1749
					} elseif ( $state === 'bi' ) {
1750
						$output .= '</i></b>';
1751
						$state = '';
1752
					} elseif ( $state === 'ib' ) {
1753
						$output .= '</b></i>';
1754
						$state = '';
1755
					} elseif ( $state === 'both' ) {
1756
						$output .= '<i><b>' . $buffer . '</b></i>';
1757
						$state = '';
1758
					} else { // ($state == '')
1759
						$buffer = '';
1760
						$state = 'both';
1761
					}
1762
				}
1763
			}
1764
			$i++;
1765
		}
1766
		// Now close all remaining tags.  Notice that the order is important.
1767
		if ( $state === 'b' || $state === 'ib' ) {
1768
			$output .= '</b>';
1769
		}
1770
		if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1771
			$output .= '</i>';
1772
		}
1773
		if ( $state === 'bi' ) {
1774
			$output .= '</b>';
1775
		}
1776
		// There might be lonely ''''', so make sure we have a buffer
1777
		if ( $state === 'both' && $buffer ) {
1778
			$output .= '<b><i>' . $buffer . '</i></b>';
1779
		}
1780
		return $output;
1781
	}
1782
1783
	/**
1784
	 * Replace external links (REL)
1785
	 *
1786
	 * Note: this is all very hackish and the order of execution matters a lot.
1787
	 * Make sure to run tests/parser/parserTests.php if you change this code.
1788
	 *
1789
	 * @private
1790
	 *
1791
	 * @param string $text
1792
	 *
1793
	 * @throws MWException
1794
	 * @return string
1795
	 */
1796
	public function replaceExternalLinks( $text ) {
1797
1798
		$bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1799
		if ( $bits === false ) {
1800
			throw new MWException( "PCRE needs to be compiled with "
1801
				. "--enable-unicode-properties in order for MediaWiki to function" );
1802
		}
1803
		$s = array_shift( $bits );
1804
1805
		$i = 0;
1806
		while ( $i < count( $bits ) ) {
1807
			$url = $bits[$i++];
1808
			$i++; // protocol
1809
			$text = $bits[$i++];
1810
			$trail = $bits[$i++];
1811
1812
			# The characters '<' and '>' (which were escaped by
1813
			# removeHTMLtags()) should not be included in
1814
			# URLs, per RFC 2396.
1815
			$m2 = [];
1816 View Code Duplication
			if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1817
				$text = substr( $url, $m2[0][1] ) . ' ' . $text;
1818
				$url = substr( $url, 0, $m2[0][1] );
1819
			}
1820
1821
			# If the link text is an image URL, replace it with an <img> tag
1822
			# This happened by accident in the original parser, but some people used it extensively
1823
			$img = $this->maybeMakeExternalImage( $text );
1824
			if ( $img !== false ) {
1825
				$text = $img;
1826
			}
1827
1828
			$dtrail = '';
1829
1830
			# Set linktype for CSS - if URL==text, link is essentially free
1831
			$linktype = ( $text === $url ) ? 'free' : 'text';
1832
1833
			# No link text, e.g. [http://domain.tld/some.link]
1834
			if ( $text == '' ) {
1835
				# Autonumber
1836
				$langObj = $this->getTargetLanguage();
1837
				$text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1838
				$linktype = 'autonumber';
1839
			} else {
1840
				# Have link text, e.g. [http://domain.tld/some.link text]s
1841
				# Check for trail
1842
				list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1843
			}
1844
1845
			$text = $this->getConverterLanguage()->markNoConversion( $text );
1846
1847
			$url = Sanitizer::cleanUrl( $url );
1848
1849
			# Use the encoded URL
1850
			# This means that users can paste URLs directly into the text
1851
			# Funny characters like ö aren't valid in URLs anyway
1852
			# This was changed in August 2004
1853
			$s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1854
				$this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
1855
1856
			# Register link in the output object.
1857
			# Replace unnecessary URL escape codes with the referenced character
1858
			# This prevents spammers from hiding links from the filters
1859
			$pasteurized = self::normalizeLinkUrl( $url );
1860
			$this->mOutput->addExternalLink( $pasteurized );
1861
		}
1862
1863
		return $s;
1864
	}
1865
1866
	/**
1867
	 * Get the rel attribute for a particular external link.
1868
	 *
1869
	 * @since 1.21
1870
	 * @param string|bool $url Optional URL, to extract the domain from for rel =>
1871
	 *   nofollow if appropriate
1872
	 * @param Title $title Optional Title, for wgNoFollowNsExceptions lookups
1873
	 * @return string|null Rel attribute for $url
1874
	 */
1875
	public static function getExternalLinkRel( $url = false, $title = null ) {
1876
		global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
1877
		$ns = $title ? $title->getNamespace() : false;
1878
		if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1879
			&& !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1880
		) {
1881
			return 'nofollow';
1882
		}
1883
		return null;
1884
	}
1885
1886
	/**
1887
	 * Get an associative array of additional HTML attributes appropriate for a
1888
	 * particular external link.  This currently may include rel => nofollow
1889
	 * (depending on configuration, namespace, and the URL's domain) and/or a
1890
	 * target attribute (depending on configuration).
1891
	 *
1892
	 * @param string $url URL to extract the domain from for rel =>
1893
	 *   nofollow if appropriate
1894
	 * @return array Associative array of HTML attributes
1895
	 */
1896
	public function getExternalLinkAttribs( $url ) {
1897
		$attribs = [];
1898
		$rel = self::getExternalLinkRel( $url, $this->mTitle );
1899
1900
		$target = $this->mOptions->getExternalLinkTarget();
1901
		if ( $target ) {
1902
			$attribs['target'] = $target;
1903
			if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
1904
				// T133507. New windows can navigate parent cross-origin.
1905
				// Including noreferrer due to lacking browser
1906
				// support of noopener. Eventually noreferrer should be removed.
1907
				if ( $rel !== '' ) {
1908
					$rel .= ' ';
1909
				}
1910
				$rel .= 'noreferrer noopener';
1911
			}
1912
		}
1913
		$attribs['rel'] = $rel;
1914
		return $attribs;
1915
	}
1916
1917
	/**
1918
	 * Replace unusual escape codes in a URL with their equivalent characters
1919
	 *
1920
	 * @deprecated since 1.24, use normalizeLinkUrl
1921
	 * @param string $url
1922
	 * @return string
1923
	 */
1924
	public static function replaceUnusualEscapes( $url ) {
1925
		wfDeprecated( __METHOD__, '1.24' );
1926
		return self::normalizeLinkUrl( $url );
1927
	}
1928
1929
	/**
1930
	 * Replace unusual escape codes in a URL with their equivalent characters
1931
	 *
1932
	 * This generally follows the syntax defined in RFC 3986, with special
1933
	 * consideration for HTTP query strings.
1934
	 *
1935
	 * @param string $url
1936
	 * @return string
1937
	 */
1938
	public static function normalizeLinkUrl( $url ) {
1939
		# First, make sure unsafe characters are encoded
1940
		$url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1941
			function ( $m ) {
1942
				return rawurlencode( $m[0] );
1943
			},
1944
			$url
1945
		);
1946
1947
		$ret = '';
1948
		$end = strlen( $url );
1949
1950
		# Fragment part - 'fragment'
1951
		$start = strpos( $url, '#' );
1952 View Code Duplication
		if ( $start !== false && $start < $end ) {
1953
			$ret = self::normalizeUrlComponent(
1954
				substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1955
			$end = $start;
1956
		}
1957
1958
		# Query part - 'query' minus &=+;
1959
		$start = strpos( $url, '?' );
1960 View Code Duplication
		if ( $start !== false && $start < $end ) {
1961
			$ret = self::normalizeUrlComponent(
1962
				substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
1963
			$end = $start;
1964
		}
1965
1966
		# Scheme and path part - 'pchar'
1967
		# (we assume no userinfo or encoded colons in the host)
1968
		$ret = self::normalizeUrlComponent(
1969
			substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
1970
1971
		return $ret;
1972
	}
1973
1974
	private static function normalizeUrlComponent( $component, $unsafe ) {
1975
		$callback = function ( $matches ) use ( $unsafe ) {
1976
			$char = urldecode( $matches[0] );
1977
			$ord = ord( $char );
1978
			if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
1979
				# Unescape it
1980
				return $char;
1981
			} else {
1982
				# Leave it escaped, but use uppercase for a-f
1983
				return strtoupper( $matches[0] );
1984
			}
1985
		};
1986
		return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
1987
	}
1988
1989
	/**
1990
	 * make an image if it's allowed, either through the global
1991
	 * option, through the exception, or through the on-wiki whitelist
1992
	 *
1993
	 * @param string $url
1994
	 *
1995
	 * @return string
1996
	 */
1997
	private function maybeMakeExternalImage( $url ) {
1998
		$imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
1999
		$imagesexception = !empty( $imagesfrom );
2000
		$text = false;
2001
		# $imagesfrom could be either a single string or an array of strings, parse out the latter
2002
		if ( $imagesexception && is_array( $imagesfrom ) ) {
2003
			$imagematch = false;
2004
			foreach ( $imagesfrom as $match ) {
2005
				if ( strpos( $url, $match ) === 0 ) {
2006
					$imagematch = true;
2007
					break;
2008
				}
2009
			}
2010
		} elseif ( $imagesexception ) {
2011
			$imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2012
		} else {
2013
			$imagematch = false;
2014
		}
2015
2016
		if ( $this->mOptions->getAllowExternalImages()
2017
			|| ( $imagesexception && $imagematch )
2018
		) {
2019
			if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2020
				# Image found
2021
				$text = Linker::makeExternalImage( $url );
2022
			}
2023
		}
2024
		if ( !$text && $this->mOptions->getEnableImageWhitelist()
2025
			&& preg_match( self::EXT_IMAGE_REGEX, $url )
2026
		) {
2027
			$whitelist = explode(
2028
				"\n",
2029
				wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2030
			);
2031
2032
			foreach ( $whitelist as $entry ) {
2033
				# Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2034
				if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2035
					continue;
2036
				}
2037
				if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2038
					# Image matches a whitelist entry
2039
					$text = Linker::makeExternalImage( $url );
2040
					break;
2041
				}
2042
			}
2043
		}
2044
		return $text;
2045
	}
2046
2047
	/**
2048
	 * Process [[ ]] wikilinks
2049
	 *
2050
	 * @param string $s
2051
	 *
2052
	 * @return string Processed text
2053
	 *
2054
	 * @private
2055
	 */
2056
	public function replaceInternalLinks( $s ) {
2057
		$this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2058
		return $s;
2059
	}
2060
2061
	/**
2062
	 * Process [[ ]] wikilinks (RIL)
2063
	 * @param string $s
2064
	 * @throws MWException
2065
	 * @return LinkHolderArray
2066
	 *
2067
	 * @private
2068
	 */
2069
	public function replaceInternalLinks2( &$s ) {
2070
		global $wgExtraInterlanguageLinkPrefixes;
2071
2072
		static $tc = false, $e1, $e1_img;
2073
		# the % is needed to support urlencoded titles as well
2074
		if ( !$tc ) {
2075
			$tc = Title::legalChars() . '#%';
2076
			# Match a link having the form [[namespace:link|alternate]]trail
2077
			$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2078
			# Match cases where there is no "]]", which might still be images
2079
			$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2080
		}
2081
2082
		$holders = new LinkHolderArray( $this );
2083
2084
		# split the entire text string on occurrences of [[
2085
		$a = StringUtils::explode( '[[', ' ' . $s );
2086
		# get the first element (all text up to first [[), and remove the space we added
2087
		$s = $a->current();
2088
		$a->next();
2089
		$line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2090
		$s = substr( $s, 1 );
2091
2092
		$useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2093
		$e2 = null;
2094
		if ( $useLinkPrefixExtension ) {
2095
			# Match the end of a line for a word that's not followed by whitespace,
2096
			# e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2097
			global $wgContLang;
2098
			$charset = $wgContLang->linkPrefixCharset();
2099
			$e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2100
		}
2101
2102
		if ( is_null( $this->mTitle ) ) {
2103
			throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2104
		}
2105
		$nottalk = !$this->mTitle->isTalkPage();
2106
2107 View Code Duplication
		if ( $useLinkPrefixExtension ) {
2108
			$m = [];
2109
			if ( preg_match( $e2, $s, $m ) ) {
2110
				$first_prefix = $m[2];
2111
			} else {
2112
				$first_prefix = false;
2113
			}
2114
		} else {
2115
			$prefix = '';
2116
		}
2117
2118
		$useSubpages = $this->areSubpagesAllowed();
2119
2120
		// @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2121
		# Loop for each link
2122
		for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2123
			// @codingStandardsIgnoreEnd
2124
2125
			# Check for excessive memory usage
2126
			if ( $holders->isBig() ) {
2127
				# Too big
2128
				# Do the existence check, replace the link holders and clear the array
2129
				$holders->replace( $s );
2130
				$holders->clear();
2131
			}
2132
2133
			if ( $useLinkPrefixExtension ) {
2134 View Code Duplication
				if ( preg_match( $e2, $s, $m ) ) {
2135
					$prefix = $m[2];
2136
					$s = $m[1];
2137
				} else {
2138
					$prefix = '';
2139
				}
2140
				# first link
2141
				if ( $first_prefix ) {
2142
					$prefix = $first_prefix;
2143
					$first_prefix = false;
2144
				}
2145
			}
2146
2147
			$might_be_img = false;
2148
2149
			if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2150
				$text = $m[2];
2151
				# If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2152
				# [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2153
				# the real problem is with the $e1 regex
2154
				# See bug 1300.
2155
				# Still some problems for cases where the ] is meant to be outside punctuation,
2156
				# and no image is in sight. See bug 2095.
2157
				if ( $text !== ''
2158
					&& substr( $m[3], 0, 1 ) === ']'
2159
					&& strpos( $text, '[' ) !== false
2160
				) {
2161
					$text .= ']'; # so that replaceExternalLinks($text) works later
2162
					$m[3] = substr( $m[3], 1 );
2163
				}
2164
				# fix up urlencoded title texts
2165 View Code Duplication
				if ( strpos( $m[1], '%' ) !== false ) {
2166
					# Should anchors '#' also be rejected?
2167
					$m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2168
				}
2169
				$trail = $m[3];
2170
			} elseif ( preg_match( $e1_img, $line, $m ) ) {
2171
				# Invalid, but might be an image with a link in its caption
2172
				$might_be_img = true;
2173
				$text = $m[2];
2174 View Code Duplication
				if ( strpos( $m[1], '%' ) !== false ) {
2175
					$m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2176
				}
2177
				$trail = "";
2178
			} else { # Invalid form; output directly
2179
				$s .= $prefix . '[[' . $line;
2180
				continue;
2181
			}
2182
2183
			$origLink = $m[1];
2184
2185
			# Don't allow internal links to pages containing
2186
			# PROTO: where PROTO is a valid URL protocol; these
2187
			# should be external links.
2188
			if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2189
				$s .= $prefix . '[[' . $line;
2190
				continue;
2191
			}
2192
2193
			# Make subpage if necessary
2194
			if ( $useSubpages ) {
2195
				$link = $this->maybeDoSubpageLink( $origLink, $text );
2196
			} else {
2197
				$link = $origLink;
2198
			}
2199
2200
			$noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2201
			if ( !$noforce ) {
2202
				# Strip off leading ':'
2203
				$link = substr( $link, 1 );
2204
			}
2205
2206
			$unstrip = $this->mStripState->unstripNoWiki( $link );
2207
			$nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null;
2208
			if ( $nt === null ) {
2209
				$s .= $prefix . '[[' . $line;
2210
				continue;
2211
			}
2212
2213
			$ns = $nt->getNamespace();
2214
			$iw = $nt->getInterwiki();
2215
2216
			if ( $might_be_img ) { # if this is actually an invalid link
2217
				if ( $ns == NS_FILE && $noforce ) { # but might be an image
2218
					$found = false;
2219
					while ( true ) {
2220
						# look at the next 'line' to see if we can close it there
2221
						$a->next();
2222
						$next_line = $a->current();
2223
						if ( $next_line === false || $next_line === null ) {
2224
							break;
2225
						}
2226
						$m = explode( ']]', $next_line, 3 );
2227
						if ( count( $m ) == 3 ) {
2228
							# the first ]] closes the inner link, the second the image
2229
							$found = true;
2230
							$text .= "[[{$m[0]}]]{$m[1]}";
2231
							$trail = $m[2];
2232
							break;
2233
						} elseif ( count( $m ) == 2 ) {
2234
							# if there's exactly one ]] that's fine, we'll keep looking
2235
							$text .= "[[{$m[0]}]]{$m[1]}";
2236
						} else {
2237
							# if $next_line is invalid too, we need look no further
2238
							$text .= '[[' . $next_line;
2239
							break;
2240
						}
2241
					}
2242
					if ( !$found ) {
2243
						# we couldn't find the end of this imageLink, so output it raw
2244
						# but don't ignore what might be perfectly normal links in the text we've examined
2245
						$holders->merge( $this->replaceInternalLinks2( $text ) );
2246
						$s .= "{$prefix}[[$link|$text";
2247
						# note: no $trail, because without an end, there *is* no trail
2248
						continue;
2249
					}
2250
				} else { # it's not an image, so output it raw
2251
					$s .= "{$prefix}[[$link|$text";
2252
					# note: no $trail, because without an end, there *is* no trail
2253
					continue;
2254
				}
2255
			}
2256
2257
			$wasblank = ( $text == '' );
2258
			if ( $wasblank ) {
2259
				$text = $link;
2260
			} else {
2261
				# Bug 4598 madness. Handle the quotes only if they come from the alternate part
2262
				# [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2263
				# [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2264
				#    -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2265
				$text = $this->doQuotes( $text );
2266
			}
2267
2268
			# Link not escaped by : , create the various objects
2269
			if ( $noforce && !$nt->wasLocalInterwiki() ) {
2270
				# Interwikis
2271
				if (
2272
					$iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2273
						Language::fetchLanguageName( $iw, null, 'mw' ) ||
2274
						in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2275
					)
2276
				) {
2277
					# Bug 24502: filter duplicates
2278
					if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2279
						$this->mLangLinkLanguages[$iw] = true;
2280
						$this->mOutput->addLanguageLink( $nt->getFullText() );
2281
					}
2282
2283
					$s = rtrim( $s . $prefix );
2284
					$s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2285
					continue;
2286
				}
2287
2288
				if ( $ns == NS_FILE ) {
2289
					if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2290
						if ( $wasblank ) {
2291
							# if no parameters were passed, $text
2292
							# becomes something like "File:Foo.png",
2293
							# which we don't want to pass on to the
2294
							# image generator
2295
							$text = '';
2296
						} else {
2297
							# recursively parse links inside the image caption
2298
							# actually, this will parse them in any other parameters, too,
2299
							# but it might be hard to fix that, and it doesn't matter ATM
2300
							$text = $this->replaceExternalLinks( $text );
2301
							$holders->merge( $this->replaceInternalLinks2( $text ) );
2302
						}
2303
						# cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2304
						$s .= $prefix . $this->armorLinks(
2305
							$this->makeImage( $nt, $text, $holders ) ) . $trail;
2306
						continue;
2307
					}
2308
				} elseif ( $ns == NS_CATEGORY ) {
2309
					$s = rtrim( $s . "\n" ); # bug 87
2310
2311
					if ( $wasblank ) {
2312
						$sortkey = $this->getDefaultSort();
2313
					} else {
2314
						$sortkey = $text;
2315
					}
2316
					$sortkey = Sanitizer::decodeCharReferences( $sortkey );
2317
					$sortkey = str_replace( "\n", '', $sortkey );
2318
					$sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2319
					$this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2320
2321
					/**
2322
					 * Strip the whitespace Category links produce, see bug 87
2323
					 */
2324
					$s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2325
2326
					continue;
2327
				}
2328
			}
2329
2330
			# Self-link checking. For some languages, variants of the title are checked in
2331
			# LinkHolderArray::doVariants() to allow batching the existence checks necessary
2332
			# for linking to a different variant.
2333
			if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2334
				$s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2335
				continue;
2336
			}
2337
2338
			# NS_MEDIA is a pseudo-namespace for linking directly to a file
2339
			# @todo FIXME: Should do batch file existence checks, see comment below
2340
			if ( $ns == NS_MEDIA ) {
2341
				# Give extensions a chance to select the file revision for us
2342
				$options = [];
2343
				$descQuery = false;
2344
				Hooks::run( 'BeforeParserFetchFileAndTitle',
2345
					[ $this, $nt, &$options, &$descQuery ] );
2346
				# Fetch and register the file (file title may be different via hooks)
2347
				list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2348
				# Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2349
				$s .= $prefix . $this->armorLinks(
2350
					Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2351
				continue;
2352
			}
2353
2354
			# Some titles, such as valid special pages or files in foreign repos, should
2355
			# be shown as bluelinks even though they're not included in the page table
2356
			# @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2357
			# batch file existence checks for NS_FILE and NS_MEDIA
2358
			if ( $iw == '' && $nt->isAlwaysKnown() ) {
2359
				$this->mOutput->addLink( $nt );
2360
				$s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2361
			} else {
2362
				# Links will be added to the output link list after checking
2363
				$s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2364
			}
2365
		}
2366
		return $holders;
2367
	}
2368
2369
	/**
2370
	 * Render a forced-blue link inline; protect against double expansion of
2371
	 * URLs if we're in a mode that prepends full URL prefixes to internal links.
2372
	 * Since this little disaster has to split off the trail text to avoid
2373
	 * breaking URLs in the following text without breaking trails on the
2374
	 * wiki links, it's been made into a horrible function.
2375
	 *
2376
	 * @param Title $nt
2377
	 * @param string $text
2378
	 * @param string $trail
2379
	 * @param string $prefix
2380
	 * @return string HTML-wikitext mix oh yuck
2381
	 */
2382
	protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2383
		list( $inside, $trail ) = Linker::splitTrail( $trail );
2384
2385
		if ( $text == '' ) {
2386
			$text = htmlspecialchars( $nt->getPrefixedText() );
2387
		}
2388
2389
		$link = $this->getLinkRenderer()->makeKnownLink(
2390
			$nt, new HtmlArmor( "$prefix$text$inside" )
2391
		);
2392
2393
		return $this->armorLinks( $link ) . $trail;
2394
	}
2395
2396
	/**
2397
	 * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2398
	 * going to go through further parsing steps before inline URL expansion.
2399
	 *
2400
	 * Not needed quite as much as it used to be since free links are a bit
2401
	 * more sensible these days. But bracketed links are still an issue.
2402
	 *
2403
	 * @param string $text More-or-less HTML
2404
	 * @return string Less-or-more HTML with NOPARSE bits
2405
	 */
2406
	public function armorLinks( $text ) {
2407
		return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2408
			self::MARKER_PREFIX . "NOPARSE$1", $text );
2409
	}
2410
2411
	/**
2412
	 * Return true if subpage links should be expanded on this page.
2413
	 * @return bool
2414
	 */
2415
	public function areSubpagesAllowed() {
2416
		# Some namespaces don't allow subpages
2417
		return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2418
	}
2419
2420
	/**
2421
	 * Handle link to subpage if necessary
2422
	 *
2423
	 * @param string $target The source of the link
2424
	 * @param string &$text The link text, modified as necessary
2425
	 * @return string The full name of the link
2426
	 * @private
2427
	 */
2428
	public function maybeDoSubpageLink( $target, &$text ) {
2429
		return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2430
	}
2431
2432
	/**
2433
	 * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2434
	 *
2435
	 * @param string $text
2436
	 * @param bool $linestart Whether or not this is at the start of a line.
2437
	 * @private
2438
	 * @return string The lists rendered as HTML
2439
	 */
2440
	public function doBlockLevels( $text, $linestart ) {
2441
		return BlockLevelPass::doBlockLevels( $text, $linestart );
2442
	}
2443
2444
	/**
2445
	 * Return value of a magic variable (like PAGENAME)
2446
	 *
2447
	 * @private
2448
	 *
2449
	 * @param int $index
2450
	 * @param bool|PPFrame $frame
2451
	 *
2452
	 * @throws MWException
2453
	 * @return string
2454
	 */
2455
	public function getVariableValue( $index, $frame = false ) {
2456
		global $wgContLang, $wgSitename, $wgServer, $wgServerName;
2457
		global $wgArticlePath, $wgScriptPath, $wgStylePath;
2458
2459
		if ( is_null( $this->mTitle ) ) {
2460
			// If no title set, bad things are going to happen
2461
			// later. Title should always be set since this
2462
			// should only be called in the middle of a parse
2463
			// operation (but the unit-tests do funky stuff)
2464
			throw new MWException( __METHOD__ . ' Should only be '
2465
				. ' called while parsing (no title set)' );
2466
		}
2467
2468
		/**
2469
		 * Some of these require message or data lookups and can be
2470
		 * expensive to check many times.
2471
		 */
2472
		if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$this, &$this->mVarCache ] ) ) {
2473
			if ( isset( $this->mVarCache[$index] ) ) {
2474
				return $this->mVarCache[$index];
2475
			}
2476
		}
2477
2478
		$ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2479
		Hooks::run( 'ParserGetVariableValueTs', [ &$this, &$ts ] );
2480
2481
		$pageLang = $this->getFunctionLang();
2482
2483
		switch ( $index ) {
2484
			case '!':
2485
				$value = '|';
2486
				break;
2487
			case 'currentmonth':
2488
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2489
				break;
2490
			case 'currentmonth1':
2491
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2492
				break;
2493
			case 'currentmonthname':
2494
				$value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2495
				break;
2496
			case 'currentmonthnamegen':
2497
				$value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2498
				break;
2499
			case 'currentmonthabbrev':
2500
				$value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2501
				break;
2502
			case 'currentday':
2503
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2504
				break;
2505
			case 'currentday2':
2506
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2507
				break;
2508
			case 'localmonth':
2509
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2510
				break;
2511
			case 'localmonth1':
2512
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2513
				break;
2514
			case 'localmonthname':
2515
				$value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2516
				break;
2517
			case 'localmonthnamegen':
2518
				$value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2519
				break;
2520
			case 'localmonthabbrev':
2521
				$value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2522
				break;
2523
			case 'localday':
2524
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2525
				break;
2526
			case 'localday2':
2527
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2528
				break;
2529
			case 'pagename':
2530
				$value = wfEscapeWikiText( $this->mTitle->getText() );
2531
				break;
2532
			case 'pagenamee':
2533
				$value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2534
				break;
2535
			case 'fullpagename':
2536
				$value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2537
				break;
2538
			case 'fullpagenamee':
2539
				$value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2540
				break;
2541
			case 'subpagename':
2542
				$value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2543
				break;
2544
			case 'subpagenamee':
2545
				$value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2546
				break;
2547
			case 'rootpagename':
2548
				$value = wfEscapeWikiText( $this->mTitle->getRootText() );
2549
				break;
2550 View Code Duplication
			case 'rootpagenamee':
2551
				$value = wfEscapeWikiText( wfUrlencode( str_replace(
2552
					' ',
2553
					'_',
2554
					$this->mTitle->getRootText()
2555
				) ) );
2556
				break;
2557
			case 'basepagename':
2558
				$value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2559
				break;
2560 View Code Duplication
			case 'basepagenamee':
2561
				$value = wfEscapeWikiText( wfUrlencode( str_replace(
2562
					' ',
2563
					'_',
2564
					$this->mTitle->getBaseText()
2565
				) ) );
2566
				break;
2567 View Code Duplication
			case 'talkpagename':
2568
				if ( $this->mTitle->canTalk() ) {
2569
					$talkPage = $this->mTitle->getTalkPage();
2570
					$value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2571
				} else {
2572
					$value = '';
2573
				}
2574
				break;
2575 View Code Duplication
			case 'talkpagenamee':
2576
				if ( $this->mTitle->canTalk() ) {
2577
					$talkPage = $this->mTitle->getTalkPage();
2578
					$value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2579
				} else {
2580
					$value = '';
2581
				}
2582
				break;
2583
			case 'subjectpagename':
2584
				$subjPage = $this->mTitle->getSubjectPage();
2585
				$value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2586
				break;
2587
			case 'subjectpagenamee':
2588
				$subjPage = $this->mTitle->getSubjectPage();
2589
				$value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2590
				break;
2591
			case 'pageid': // requested in bug 23427
2592
				$pageid = $this->getTitle()->getArticleID();
2593
				if ( $pageid == 0 ) {
2594
					# 0 means the page doesn't exist in the database,
2595
					# which means the user is previewing a new page.
2596
					# The vary-revision flag must be set, because the magic word
2597
					# will have a different value once the page is saved.
2598
					$this->mOutput->setFlag( 'vary-revision' );
2599
					wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
2600
				}
2601
				$value = $pageid ? $pageid : null;
2602
				break;
2603
			case 'revisionid':
2604
				# Let the edit saving system know we should parse the page
2605
				# *after* a revision ID has been assigned.
2606
				$this->mOutput->setFlag( 'vary-revision-id' );
2607
				wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision-id...\n" );
2608
				$value = $this->mRevisionId;
2609
				if ( !$value && $this->mOptions->getSpeculativeRevIdCallback() ) {
2610
					$value = call_user_func( $this->mOptions->getSpeculativeRevIdCallback() );
2611
					$this->mOutput->setSpeculativeRevIdUsed( $value );
2612
				}
2613
				break;
2614 View Code Duplication
			case 'revisionday':
2615
				# Let the edit saving system know we should parse the page
2616
				# *after* a revision ID has been assigned. This is for null edits.
2617
				$this->mOutput->setFlag( 'vary-revision' );
2618
				wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
2619
				$value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
2620
				break;
2621 View Code Duplication
			case 'revisionday2':
2622
				# Let the edit saving system know we should parse the page
2623
				# *after* a revision ID has been assigned. This is for null edits.
2624
				$this->mOutput->setFlag( 'vary-revision' );
2625
				wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
2626
				$value = substr( $this->getRevisionTimestamp(), 6, 2 );
2627
				break;
2628 View Code Duplication
			case 'revisionmonth':
2629
				# Let the edit saving system know we should parse the page
2630
				# *after* a revision ID has been assigned. This is for null edits.
2631
				$this->mOutput->setFlag( 'vary-revision' );
2632
				wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
2633
				$value = substr( $this->getRevisionTimestamp(), 4, 2 );
2634
				break;
2635 View Code Duplication
			case 'revisionmonth1':
2636
				# Let the edit saving system know we should parse the page
2637
				# *after* a revision ID has been assigned. This is for null edits.
2638
				$this->mOutput->setFlag( 'vary-revision' );
2639
				wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
2640
				$value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
2641
				break;
2642 View Code Duplication
			case 'revisionyear':
2643
				# Let the edit saving system know we should parse the page
2644
				# *after* a revision ID has been assigned. This is for null edits.
2645
				$this->mOutput->setFlag( 'vary-revision' );
2646
				wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
2647
				$value = substr( $this->getRevisionTimestamp(), 0, 4 );
2648
				break;
2649
			case 'revisiontimestamp':
2650
				# Let the edit saving system know we should parse the page
2651
				# *after* a revision ID has been assigned. This is for null edits.
2652
				$this->mOutput->setFlag( 'vary-revision' );
2653
				wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
2654
				$value = $this->getRevisionTimestamp();
2655
				break;
2656
			case 'revisionuser':
2657
				# Let the edit saving system know we should parse the page
2658
				# *after* a revision ID has been assigned for null edits.
2659
				$this->mOutput->setFlag( 'vary-user' );
2660
				wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-user...\n" );
2661
				$value = $this->getRevisionUser();
2662
				break;
2663
			case 'revisionsize':
2664
				$value = $this->getRevisionSize();
2665
				break;
2666
			case 'namespace':
2667
				$value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2668
				break;
2669
			case 'namespacee':
2670
				$value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2671
				break;
2672
			case 'namespacenumber':
2673
				$value = $this->mTitle->getNamespace();
2674
				break;
2675
			case 'talkspace':
2676
				$value = $this->mTitle->canTalk()
2677
					? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2678
					: '';
2679
				break;
2680
			case 'talkspacee':
2681
				$value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2682
				break;
2683
			case 'subjectspace':
2684
				$value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2685
				break;
2686
			case 'subjectspacee':
2687
				$value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2688
				break;
2689
			case 'currentdayname':
2690
				$value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2691
				break;
2692
			case 'currentyear':
2693
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2694
				break;
2695
			case 'currenttime':
2696
				$value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2697
				break;
2698
			case 'currenthour':
2699
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2700
				break;
2701
			case 'currentweek':
2702
				# @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2703
				# int to remove the padding
2704
				$value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2705
				break;
2706
			case 'currentdow':
2707
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2708
				break;
2709
			case 'localdayname':
2710
				$value = $pageLang->getWeekdayName(
2711
					(int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2712
				);
2713
				break;
2714
			case 'localyear':
2715
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2716
				break;
2717
			case 'localtime':
2718
				$value = $pageLang->time(
2719
					MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2720
					false,
2721
					false
2722
				);
2723
				break;
2724
			case 'localhour':
2725
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2726
				break;
2727
			case 'localweek':
2728
				# @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2729
				# int to remove the padding
2730
				$value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2731
				break;
2732
			case 'localdow':
2733
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2734
				break;
2735
			case 'numberofarticles':
2736
				$value = $pageLang->formatNum( SiteStats::articles() );
2737
				break;
2738
			case 'numberoffiles':
2739
				$value = $pageLang->formatNum( SiteStats::images() );
2740
				break;
2741
			case 'numberofusers':
2742
				$value = $pageLang->formatNum( SiteStats::users() );
2743
				break;
2744
			case 'numberofactiveusers':
2745
				$value = $pageLang->formatNum( SiteStats::activeUsers() );
2746
				break;
2747
			case 'numberofpages':
2748
				$value = $pageLang->formatNum( SiteStats::pages() );
2749
				break;
2750
			case 'numberofadmins':
2751
				$value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2752
				break;
2753
			case 'numberofedits':
2754
				$value = $pageLang->formatNum( SiteStats::edits() );
2755
				break;
2756
			case 'currenttimestamp':
2757
				$value = wfTimestamp( TS_MW, $ts );
2758
				break;
2759
			case 'localtimestamp':
2760
				$value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2761
				break;
2762
			case 'currentversion':
2763
				$value = SpecialVersion::getVersion();
2764
				break;
2765
			case 'articlepath':
2766
				return $wgArticlePath;
2767
			case 'sitename':
2768
				return $wgSitename;
2769
			case 'server':
2770
				return $wgServer;
2771
			case 'servername':
2772
				return $wgServerName;
2773
			case 'scriptpath':
2774
				return $wgScriptPath;
2775
			case 'stylepath':
2776
				return $wgStylePath;
2777
			case 'directionmark':
2778
				return $pageLang->getDirMark();
2779
			case 'contentlanguage':
2780
				global $wgLanguageCode;
2781
				return $wgLanguageCode;
2782
			case 'cascadingsources':
2783
				$value = CoreParserFunctions::cascadingsources( $this );
2784
				break;
2785
			default:
2786
				$ret = null;
2787
				Hooks::run(
2788
					'ParserGetVariableValueSwitch',
2789
					[ &$this, &$this->mVarCache, &$index, &$ret, &$frame ]
2790
				);
2791
2792
				return $ret;
2793
		}
2794
2795
		if ( $index ) {
2796
			$this->mVarCache[$index] = $value;
2797
		}
2798
2799
		return $value;
2800
	}
2801
2802
	/**
2803
	 * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers
2804
	 *
2805
	 * @private
2806
	 */
2807
	public function initialiseVariables() {
2808
		$variableIDs = MagicWord::getVariableIDs();
2809
		$substIDs = MagicWord::getSubstIDs();
2810
2811
		$this->mVariables = new MagicWordArray( $variableIDs );
2812
		$this->mSubstWords = new MagicWordArray( $substIDs );
2813
	}
2814
2815
	/**
2816
	 * Preprocess some wikitext and return the document tree.
2817
	 * This is the ghost of replace_variables().
2818
	 *
2819
	 * @param string $text The text to parse
2820
	 * @param int $flags Bitwise combination of:
2821
	 *   - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being
2822
	 *     included. Default is to assume a direct page view.
2823
	 *
2824
	 * The generated DOM tree must depend only on the input text and the flags.
2825
	 * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
2826
	 *
2827
	 * Any flag added to the $flags parameter here, or any other parameter liable to cause a
2828
	 * change in the DOM tree for a given text, must be passed through the section identifier
2829
	 * in the section edit link and thus back to extractSections().
2830
	 *
2831
	 * The output of this function is currently only cached in process memory, but a persistent
2832
	 * cache may be implemented at a later date which takes further advantage of these strict
2833
	 * dependency requirements.
2834
	 *
2835
	 * @return PPNode
2836
	 */
2837
	public function preprocessToDom( $text, $flags = 0 ) {
2838
		$dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
2839
		return $dom;
2840
	}
2841
2842
	/**
2843
	 * Return a three-element array: leading whitespace, string contents, trailing whitespace
2844
	 *
2845
	 * @param string $s
2846
	 *
2847
	 * @return array
2848
	 */
2849
	public static function splitWhitespace( $s ) {
2850
		$ltrimmed = ltrim( $s );
2851
		$w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
2852
		$trimmed = rtrim( $ltrimmed );
2853
		$diff = strlen( $ltrimmed ) - strlen( $trimmed );
2854
		if ( $diff > 0 ) {
2855
			$w2 = substr( $ltrimmed, -$diff );
2856
		} else {
2857
			$w2 = '';
2858
		}
2859
		return [ $w1, $trimmed, $w2 ];
2860
	}
2861
2862
	/**
2863
	 * Replace magic variables, templates, and template arguments
2864
	 * with the appropriate text. Templates are substituted recursively,
2865
	 * taking care to avoid infinite loops.
2866
	 *
2867
	 * Note that the substitution depends on value of $mOutputType:
2868
	 *  self::OT_WIKI: only {{subst:}} templates
2869
	 *  self::OT_PREPROCESS: templates but not extension tags
2870
	 *  self::OT_HTML: all templates and extension tags
2871
	 *
2872
	 * @param string $text The text to transform
2873
	 * @param bool|PPFrame $frame Object describing the arguments passed to the
2874
	 *   template. Arguments may also be provided as an associative array, as
2875
	 *   was the usual case before MW1.12. Providing arguments this way may be
2876
	 *   useful for extensions wishing to perform variable replacement
2877
	 *   explicitly.
2878
	 * @param bool $argsOnly Only do argument (triple-brace) expansion, not
2879
	 *   double-brace expansion.
2880
	 * @return string
2881
	 */
2882
	public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2883
		# Is there any text? Also, Prevent too big inclusions!
2884
		$textSize = strlen( $text );
2885
		if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2886
			return $text;
2887
		}
2888
2889
		if ( $frame === false ) {
2890
			$frame = $this->getPreprocessor()->newFrame();
2891
		} elseif ( !( $frame instanceof PPFrame ) ) {
2892
			wfDebug( __METHOD__ . " called using plain parameters instead of "
2893
				. "a PPFrame instance. Creating custom frame.\n" );
2894
			$frame = $this->getPreprocessor()->newCustomFrame( $frame );
2895
		}
2896
2897
		$dom = $this->preprocessToDom( $text );
2898
		$flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2899
		$text = $frame->expand( $dom, $flags );
2900
2901
		return $text;
2902
	}
2903
2904
	/**
2905
	 * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
2906
	 *
2907
	 * @param array $args
2908
	 *
2909
	 * @return array
2910
	 */
2911
	public static function createAssocArgs( $args ) {
2912
		$assocArgs = [];
2913
		$index = 1;
2914
		foreach ( $args as $arg ) {
2915
			$eqpos = strpos( $arg, '=' );
2916
			if ( $eqpos === false ) {
2917
				$assocArgs[$index++] = $arg;
2918
			} else {
2919
				$name = trim( substr( $arg, 0, $eqpos ) );
2920
				$value = trim( substr( $arg, $eqpos + 1 ) );
2921
				if ( $value === false ) {
2922
					$value = '';
2923
				}
2924
				if ( $name !== false ) {
2925
					$assocArgs[$name] = $value;
2926
				}
2927
			}
2928
		}
2929
2930
		return $assocArgs;
2931
	}
2932
2933
	/**
2934
	 * Warn the user when a parser limitation is reached
2935
	 * Will warn at most once the user per limitation type
2936
	 *
2937
	 * The results are shown during preview and run through the Parser (See EditPage.php)
2938
	 *
2939
	 * @param string $limitationType Should be one of:
2940
	 *   'expensive-parserfunction' (corresponding messages:
2941
	 *       'expensive-parserfunction-warning',
2942
	 *       'expensive-parserfunction-category')
2943
	 *   'post-expand-template-argument' (corresponding messages:
2944
	 *       'post-expand-template-argument-warning',
2945
	 *       'post-expand-template-argument-category')
2946
	 *   'post-expand-template-inclusion' (corresponding messages:
2947
	 *       'post-expand-template-inclusion-warning',
2948
	 *       'post-expand-template-inclusion-category')
2949
	 *   'node-count-exceeded' (corresponding messages:
2950
	 *       'node-count-exceeded-warning',
2951
	 *       'node-count-exceeded-category')
2952
	 *   'expansion-depth-exceeded' (corresponding messages:
2953
	 *       'expansion-depth-exceeded-warning',
2954
	 *       'expansion-depth-exceeded-category')
2955
	 * @param string|int|null $current Current value
2956
	 * @param string|int|null $max Maximum allowed, when an explicit limit has been
2957
	 *	 exceeded, provide the values (optional)
2958
	 */
2959
	public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2960
		# does no harm if $current and $max are present but are unnecessary for the message
2961
		# Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2962
		# only during preview, and that would split the parser cache unnecessarily.
2963
		$warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
2964
			->text();
2965
		$this->mOutput->addWarning( $warning );
2966
		$this->addTrackingCategory( "$limitationType-category" );
2967
	}
2968
2969
	/**
2970
	 * Return the text of a template, after recursively
2971
	 * replacing any variables or templates within the template.
2972
	 *
2973
	 * @param array $piece The parts of the template
2974
	 *   $piece['title']: the title, i.e. the part before the |
2975
	 *   $piece['parts']: the parameter array
2976
	 *   $piece['lineStart']: whether the brace was at the start of a line
2977
	 * @param PPFrame $frame The current frame, contains template arguments
2978
	 * @throws Exception
2979
	 * @return string The text of the template
2980
	 */
2981
	public function braceSubstitution( $piece, $frame ) {
2982
2983
		// Flags
2984
2985
		// $text has been filled
2986
		$found = false;
2987
		// wiki markup in $text should be escaped
2988
		$nowiki = false;
2989
		// $text is HTML, armour it against wikitext transformation
2990
		$isHTML = false;
2991
		// Force interwiki transclusion to be done in raw mode not rendered
2992
		$forceRawInterwiki = false;
2993
		// $text is a DOM node needing expansion in a child frame
2994
		$isChildObj = false;
2995
		// $text is a DOM node needing expansion in the current frame
2996
		$isLocalObj = false;
2997
2998
		# Title object, where $text came from
2999
		$title = false;
3000
3001
		# $part1 is the bit before the first |, and must contain only title characters.
3002
		# Various prefixes will be stripped from it later.
3003
		$titleWithSpaces = $frame->expand( $piece['title'] );
3004
		$part1 = trim( $titleWithSpaces );
3005
		$titleText = false;
3006
3007
		# Original title text preserved for various purposes
3008
		$originalTitle = $part1;
3009
3010
		# $args is a list of argument nodes, starting from index 0, not including $part1
3011
		# @todo FIXME: If piece['parts'] is null then the call to getLength()
3012
		# below won't work b/c this $args isn't an object
3013
		$args = ( null == $piece['parts'] ) ? [] : $piece['parts'];
3014
3015
		$profileSection = null; // profile templates
3016
3017
		# SUBST
3018
		if ( !$found ) {
3019
			$substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3020
3021
			# Possibilities for substMatch: "subst", "safesubst" or FALSE
3022
			# Decide whether to expand template or keep wikitext as-is.
3023
			if ( $this->ot['wiki'] ) {
3024
				if ( $substMatch === false ) {
3025
					$literal = true;  # literal when in PST with no prefix
3026
				} else {
3027
					$literal = false; # expand when in PST with subst: or safesubst:
3028
				}
3029
			} else {
3030
				if ( $substMatch == 'subst' ) {
3031
					$literal = true;  # literal when not in PST with plain subst:
3032
				} else {
3033
					$literal = false; # expand when not in PST with safesubst: or no prefix
3034
				}
3035
			}
3036
			if ( $literal ) {
3037
				$text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3038
				$isLocalObj = true;
3039
				$found = true;
3040
			}
3041
		}
3042
3043
		# Variables
3044
		if ( !$found && $args->getLength() == 0 ) {
3045
			$id = $this->mVariables->matchStartToEnd( $part1 );
3046
			if ( $id !== false ) {
3047
				$text = $this->getVariableValue( $id, $frame );
3048
				if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3049
					$this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3050
				}
3051
				$found = true;
3052
			}
3053
		}
3054
3055
		# MSG, MSGNW and RAW
3056
		if ( !$found ) {
3057
			# Check for MSGNW:
3058
			$mwMsgnw = MagicWord::get( 'msgnw' );
3059
			if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3060
				$nowiki = true;
3061
			} else {
3062
				# Remove obsolete MSG:
3063
				$mwMsg = MagicWord::get( 'msg' );
3064
				$mwMsg->matchStartAndRemove( $part1 );
3065
			}
3066
3067
			# Check for RAW:
3068
			$mwRaw = MagicWord::get( 'raw' );
3069
			if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3070
				$forceRawInterwiki = true;
3071
			}
3072
		}
3073
3074
		# Parser functions
3075
		if ( !$found ) {
3076
			$colonPos = strpos( $part1, ':' );
3077
			if ( $colonPos !== false ) {
3078
				$func = substr( $part1, 0, $colonPos );
3079
				$funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3080
				$argsLength = $args->getLength();
3081
				for ( $i = 0; $i < $argsLength; $i++ ) {
3082
					$funcArgs[] = $args->item( $i );
3083
				}
3084
				try {
3085
					$result = $this->callParserFunction( $frame, $func, $funcArgs );
3086
				} catch ( Exception $ex ) {
3087
					throw $ex;
3088
				}
3089
3090
				# The interface for parser functions allows for extracting
3091
				# flags into the local scope. Extract any forwarded flags
3092
				# here.
3093
				extract( $result );
3094
			}
3095
		}
3096
3097
		# Finish mangling title and then check for loops.
3098
		# Set $title to a Title object and $titleText to the PDBK
3099
		if ( !$found ) {
3100
			$ns = NS_TEMPLATE;
3101
			# Split the title into page and subpage
3102
			$subpage = '';
3103
			$relative = $this->maybeDoSubpageLink( $part1, $subpage );
3104
			if ( $part1 !== $relative ) {
3105
				$part1 = $relative;
3106
				$ns = $this->mTitle->getNamespace();
3107
			}
3108
			$title = Title::newFromText( $part1, $ns );
3109
			if ( $title ) {
3110
				$titleText = $title->getPrefixedText();
3111
				# Check for language variants if the template is not found
3112
				if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3113
					$this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3114
				}
3115
				# Do recursion depth check
3116
				$limit = $this->mOptions->getMaxTemplateDepth();
3117 View Code Duplication
				if ( $frame->depth >= $limit ) {
3118
					$found = true;
3119
					$text = '<span class="error">'
3120
						. wfMessage( 'parser-template-recursion-depth-warning' )
3121
							->numParams( $limit )->inContentLanguage()->text()
3122
						. '</span>';
3123
				}
3124
			}
3125
		}
3126
3127
		# Load from database
3128
		if ( !$found && $title ) {
3129
			$profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3130
			if ( !$title->isExternal() ) {
3131
				if ( $title->isSpecialPage()
3132
					&& $this->mOptions->getAllowSpecialInclusion()
3133
					&& $this->ot['html']
3134
				) {
3135
					$specialPage = SpecialPageFactory::getPage( $title->getDBkey() );
3136
					// Pass the template arguments as URL parameters.
3137
					// "uselang" will have no effect since the Language object
3138
					// is forced to the one defined in ParserOptions.
3139
					$pageArgs = [];
3140
					$argsLength = $args->getLength();
3141
					for ( $i = 0; $i < $argsLength; $i++ ) {
3142
						$bits = $args->item( $i )->splitArg();
3143
						if ( strval( $bits['index'] ) === '' ) {
3144
							$name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3145
							$value = trim( $frame->expand( $bits['value'] ) );
3146
							$pageArgs[$name] = $value;
3147
						}
3148
					}
3149
3150
					// Create a new context to execute the special page
3151
					$context = new RequestContext;
3152
					$context->setTitle( $title );
3153
					$context->setRequest( new FauxRequest( $pageArgs ) );
3154
					if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3155
						$context->setUser( $this->getUser() );
3156
					} else {
3157
						// If this page is cached, then we better not be per user.
3158
						$context->setUser( User::newFromName( '127.0.0.1', false ) );
3159
					}
3160
					$context->setLanguage( $this->mOptions->getUserLangObj() );
3161
					$ret = SpecialPageFactory::capturePath(
3162
						$title, $context, $this->getLinkRenderer() );
3163
					if ( $ret ) {
3164
						$text = $context->getOutput()->getHTML();
3165
						$this->mOutput->addOutputPageMetadata( $context->getOutput() );
3166
						$found = true;
3167
						$isHTML = true;
3168
						if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3169
							$this->mOutput->updateRuntimeAdaptiveExpiry(
3170
								$specialPage->maxIncludeCacheTime()
3171
							);
3172
						}
3173
					}
3174
				} elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3175
					$found = false; # access denied
3176
					wfDebug( __METHOD__ . ": template inclusion denied for " .
3177
						$title->getPrefixedDBkey() . "\n" );
3178
				} else {
3179
					list( $text, $title ) = $this->getTemplateDom( $title );
3180
					if ( $text !== false ) {
3181
						$found = true;
3182
						$isChildObj = true;
3183
					}
3184
				}
3185
3186
				# If the title is valid but undisplayable, make a link to it
3187
				if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3188
					$text = "[[:$titleText]]";
3189
					$found = true;
3190
				}
3191
			} elseif ( $title->isTrans() ) {
3192
				# Interwiki transclusion
3193
				if ( $this->ot['html'] && !$forceRawInterwiki ) {
3194
					$text = $this->interwikiTransclude( $title, 'render' );
3195
					$isHTML = true;
3196
				} else {
3197
					$text = $this->interwikiTransclude( $title, 'raw' );
3198
					# Preprocess it like a template
3199
					$text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3200
					$isChildObj = true;
3201
				}
3202
				$found = true;
3203
			}
3204
3205
			# Do infinite loop check
3206
			# This has to be done after redirect resolution to avoid infinite loops via redirects
3207
			if ( !$frame->loopCheck( $title ) ) {
3208
				$found = true;
3209
				$text = '<span class="error">'
3210
					. wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3211
					. '</span>';
3212
				wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3213
			}
3214
		}
3215
3216
		# If we haven't found text to substitute by now, we're done
3217
		# Recover the source wikitext and return it
3218
		if ( !$found ) {
3219
			$text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3220
			if ( $profileSection ) {
3221
				$this->mProfiler->scopedProfileOut( $profileSection );
3222
			}
3223
			return [ 'object' => $text ];
3224
		}
3225
3226
		# Expand DOM-style return values in a child frame
3227
		if ( $isChildObj ) {
3228
			# Clean up argument array
3229
			$newFrame = $frame->newChild( $args, $title );
3230
3231
			if ( $nowiki ) {
3232
				$text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3233
			} elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3234
				# Expansion is eligible for the empty-frame cache
3235
				$text = $newFrame->cachedExpand( $titleText, $text );
3236
			} else {
3237
				# Uncached expansion
3238
				$text = $newFrame->expand( $text );
3239
			}
3240
		}
3241
		if ( $isLocalObj && $nowiki ) {
3242
			$text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3243
			$isLocalObj = false;
3244
		}
3245
3246
		if ( $profileSection ) {
3247
			$this->mProfiler->scopedProfileOut( $profileSection );
3248
		}
3249
3250
		# Replace raw HTML by a placeholder
3251
		if ( $isHTML ) {
3252
			$text = $this->insertStripItem( $text );
3253
		} elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3254
			# Escape nowiki-style return values
3255
			$text = wfEscapeWikiText( $text );
3256
		} elseif ( is_string( $text )
3257
			&& !$piece['lineStart']
3258
			&& preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3259
		) {
3260
			# Bug 529: if the template begins with a table or block-level
3261
			# element, it should be treated as beginning a new line.
3262
			# This behavior is somewhat controversial.
3263
			$text = "\n" . $text;
3264
		}
3265
3266
		if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3267
			# Error, oversize inclusion
3268
			if ( $titleText !== false ) {
3269
				# Make a working, properly escaped link if possible (bug 23588)
3270
				$text = "[[:$titleText]]";
3271
			} else {
3272
				# This will probably not be a working link, but at least it may
3273
				# provide some hint of where the problem is
3274
				preg_replace( '/^:/', '', $originalTitle );
3275
				$text = "[[:$originalTitle]]";
3276
			}
3277
			$text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3278
				. 'post-expand include size too large -->' );
3279
			$this->limitationWarn( 'post-expand-template-inclusion' );
3280
		}
3281
3282
		if ( $isLocalObj ) {
3283
			$ret = [ 'object' => $text ];
3284
		} else {
3285
			$ret = [ 'text' => $text ];
3286
		}
3287
3288
		return $ret;
3289
	}
3290
3291
	/**
3292
	 * Call a parser function and return an array with text and flags.
3293
	 *
3294
	 * The returned array will always contain a boolean 'found', indicating
3295
	 * whether the parser function was found or not. It may also contain the
3296
	 * following:
3297
	 *  text: string|object, resulting wikitext or PP DOM object
3298
	 *  isHTML: bool, $text is HTML, armour it against wikitext transformation
3299
	 *  isChildObj: bool, $text is a DOM node needing expansion in a child frame
3300
	 *  isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3301
	 *  nowiki: bool, wiki markup in $text should be escaped
3302
	 *
3303
	 * @since 1.21
3304
	 * @param PPFrame $frame The current frame, contains template arguments
3305
	 * @param string $function Function name
3306
	 * @param array $args Arguments to the function
3307
	 * @throws MWException
3308
	 * @return array
3309
	 */
3310
	public function callParserFunction( $frame, $function, array $args = [] ) {
3311
		global $wgContLang;
3312
3313
		# Case sensitive functions
3314
		if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3315
			$function = $this->mFunctionSynonyms[1][$function];
3316
		} else {
3317
			# Case insensitive functions
3318
			$function = $wgContLang->lc( $function );
3319
			if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3320
				$function = $this->mFunctionSynonyms[0][$function];
3321
			} else {
3322
				return [ 'found' => false ];
3323
			}
3324
		}
3325
3326
		list( $callback, $flags ) = $this->mFunctionHooks[$function];
3327
3328
		# Workaround for PHP bug 35229 and similar
3329
		if ( !is_callable( $callback ) ) {
3330
			throw new MWException( "Tag hook for $function is not callable\n" );
3331
		}
3332
3333
		$allArgs = [ &$this ];
3334
		if ( $flags & self::SFH_OBJECT_ARGS ) {
3335
			# Convert arguments to PPNodes and collect for appending to $allArgs
3336
			$funcArgs = [];
3337
			foreach ( $args as $k => $v ) {
3338
				if ( $v instanceof PPNode || $k === 0 ) {
3339
					$funcArgs[] = $v;
3340
				} else {
3341
					$funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3342
				}
3343
			}
3344
3345
			# Add a frame parameter, and pass the arguments as an array
3346
			$allArgs[] = $frame;
3347
			$allArgs[] = $funcArgs;
3348
		} else {
3349
			# Convert arguments to plain text and append to $allArgs
3350
			foreach ( $args as $k => $v ) {
3351
				if ( $v instanceof PPNode ) {
3352
					$allArgs[] = trim( $frame->expand( $v ) );
3353
				} elseif ( is_int( $k ) && $k >= 0 ) {
3354
					$allArgs[] = trim( $v );
3355
				} else {
3356
					$allArgs[] = trim( "$k=$v" );
3357
				}
3358
			}
3359
		}
3360
3361
		$result = call_user_func_array( $callback, $allArgs );
3362
3363
		# The interface for function hooks allows them to return a wikitext
3364
		# string or an array containing the string and any flags. This mungs
3365
		# things around to match what this method should return.
3366
		if ( !is_array( $result ) ) {
3367
			$result =[
3368
				'found' => true,
3369
				'text' => $result,
3370
			];
3371
		} else {
3372
			if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3373
				$result['text'] = $result[0];
3374
			}
3375
			unset( $result[0] );
3376
			$result += [
3377
				'found' => true,
3378
			];
3379
		}
3380
3381
		$noparse = true;
3382
		$preprocessFlags = 0;
3383
		if ( isset( $result['noparse'] ) ) {
3384
			$noparse = $result['noparse'];
3385
		}
3386
		if ( isset( $result['preprocessFlags'] ) ) {
3387
			$preprocessFlags = $result['preprocessFlags'];
3388
		}
3389
3390
		if ( !$noparse ) {
3391
			$result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3392
			$result['isChildObj'] = true;
3393
		}
3394
3395
		return $result;
3396
	}
3397
3398
	/**
3399
	 * Get the semi-parsed DOM representation of a template with a given title,
3400
	 * and its redirect destination title. Cached.
3401
	 *
3402
	 * @param Title $title
3403
	 *
3404
	 * @return array
3405
	 */
3406
	public function getTemplateDom( $title ) {
3407
		$cacheTitle = $title;
3408
		$titleText = $title->getPrefixedDBkey();
3409
3410
		if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3411
			list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3412
			$title = Title::makeTitle( $ns, $dbk );
3413
			$titleText = $title->getPrefixedDBkey();
3414
		}
3415
		if ( isset( $this->mTplDomCache[$titleText] ) ) {
3416
			return [ $this->mTplDomCache[$titleText], $title ];
3417
		}
3418
3419
		# Cache miss, go to the database
3420
		list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3421
3422
		if ( $text === false ) {
3423
			$this->mTplDomCache[$titleText] = false;
3424
			return [ false, $title ];
3425
		}
3426
3427
		$dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3428
		$this->mTplDomCache[$titleText] = $dom;
3429
3430
		if ( !$title->equals( $cacheTitle ) ) {
3431
			$this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3432
				[ $title->getNamespace(), $cdb = $title->getDBkey() ];
3433
		}
3434
3435
		return [ $dom, $title ];
3436
	}
3437
3438
	/**
3439
	 * Fetch the current revision of a given title. Note that the revision
3440
	 * (and even the title) may not exist in the database, so everything
3441
	 * contributing to the output of the parser should use this method
3442
	 * where possible, rather than getting the revisions themselves. This
3443
	 * method also caches its results, so using it benefits performance.
3444
	 *
3445
	 * @since 1.24
3446
	 * @param Title $title
3447
	 * @return Revision
3448
	 */
3449
	public function fetchCurrentRevisionOfTitle( $title ) {
3450
		$cacheKey = $title->getPrefixedDBkey();
3451
		if ( !$this->currentRevisionCache ) {
3452
			$this->currentRevisionCache = new MapCacheLRU( 100 );
3453
		}
3454
		if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3455
			$this->currentRevisionCache->set( $cacheKey,
3456
				// Defaults to Parser::statelessFetchRevision()
3457
				call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3458
			);
3459
		}
3460
		return $this->currentRevisionCache->get( $cacheKey );
3461
	}
3462
3463
	/**
3464
	 * Wrapper around Revision::newFromTitle to allow passing additional parameters
3465
	 * without passing them on to it.
3466
	 *
3467
	 * @since 1.24
3468
	 * @param Title $title
3469
	 * @param Parser|bool $parser
3470
	 * @return Revision|bool False if missing
3471
	 */
3472
	public static function statelessFetchRevision( Title $title, $parser = false ) {
3473
		$pageId = $title->getArticleID();
3474
		$revId = $title->getLatestRevID();
3475
3476
		$rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $pageId, $revId );
0 ignored issues
show
It seems like wfGetDB(DB_REPLICA) can be null; however, newKnownCurrent() does not accept null, maybe add an additional type check?

Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code:

/** @return stdClass|null */
function mayReturnNull() { }

function doesNotAcceptNull(stdClass $x) { }

// With potential error.
function withoutCheck() {
    $x = mayReturnNull();
    doesNotAcceptNull($x); // Potential error here.
}

// Safe - Alternative 1
function withCheck1() {
    $x = mayReturnNull();
    if ( ! $x instanceof stdClass) {
        throw new \LogicException('$x must be defined.');
    }
    doesNotAcceptNull($x);
}

// Safe - Alternative 2
function withCheck2() {
    $x = mayReturnNull();
    if ($x instanceof stdClass) {
        doesNotAcceptNull($x);
    }
}
Loading history...
3477
		if ( $rev ) {
3478
			$rev->setTitle( $title );
3479
		}
3480
3481
		return $rev;
3482
	}
3483
3484
	/**
3485
	 * Fetch the unparsed text of a template and register a reference to it.
3486
	 * @param Title $title
3487
	 * @return array ( string or false, Title )
3488
	 */
3489
	public function fetchTemplateAndTitle( $title ) {
3490
		// Defaults to Parser::statelessFetchTemplate()
3491
		$templateCb = $this->mOptions->getTemplateCallback();
3492
		$stuff = call_user_func( $templateCb, $title, $this );
3493
		// We use U+007F DELETE to distinguish strip markers from regular text.
3494
		$text = $stuff['text'];
3495
		if ( is_string( $stuff['text'] ) ) {
3496
			$text = strtr( $text, "\x7f", "?" );
3497
		}
3498
		$finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3499
		if ( isset( $stuff['deps'] ) ) {
3500
			foreach ( $stuff['deps'] as $dep ) {
3501
				$this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3502
				if ( $dep['title']->equals( $this->getTitle() ) ) {
3503
					// If we transclude ourselves, the final result
3504
					// will change based on the new version of the page
3505
					$this->mOutput->setFlag( 'vary-revision' );
3506
				}
3507
			}
3508
		}
3509
		return [ $text, $finalTitle ];
3510
	}
3511
3512
	/**
3513
	 * Fetch the unparsed text of a template and register a reference to it.
3514
	 * @param Title $title
3515
	 * @return string|bool
3516
	 */
3517
	public function fetchTemplate( $title ) {
3518
		return $this->fetchTemplateAndTitle( $title )[0];
3519
	}
3520
3521
	/**
3522
	 * Static function to get a template
3523
	 * Can be overridden via ParserOptions::setTemplateCallback().
3524
	 *
3525
	 * @param Title $title
3526
	 * @param bool|Parser $parser
3527
	 *
3528
	 * @return array
3529
	 */
3530
	public static function statelessFetchTemplate( $title, $parser = false ) {
3531
		$text = $skip = false;
3532
		$finalTitle = $title;
3533
		$deps = [];
3534
3535
		# Loop to fetch the article, with up to 1 redirect
3536
		// @codingStandardsIgnoreStart Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed
3537
		for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3538
			// @codingStandardsIgnoreEnd
3539
			# Give extensions a chance to select the revision instead
3540
			$id = false; # Assume current
3541
			Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3542
				[ $parser, $title, &$skip, &$id ] );
3543
3544
			if ( $skip ) {
3545
				$text = false;
3546
				$deps[] = [
3547
					'title' => $title,
3548
					'page_id' => $title->getArticleID(),
3549
					'rev_id' => null
3550
				];
3551
				break;
3552
			}
3553
			# Get the revision
3554
			if ( $id ) {
3555
				$rev = Revision::newFromId( $id );
3556
			} elseif ( $parser ) {
3557
				$rev = $parser->fetchCurrentRevisionOfTitle( $title );
3558
			} else {
3559
				$rev = Revision::newFromTitle( $title );
3560
			}
3561
			$rev_id = $rev ? $rev->getId() : 0;
3562
			# If there is no current revision, there is no page
3563
			if ( $id === false && !$rev ) {
3564
				$linkCache = LinkCache::singleton();
3565
				$linkCache->addBadLinkObj( $title );
3566
			}
3567
3568
			$deps[] = [
3569
				'title' => $title,
3570
				'page_id' => $title->getArticleID(),
3571
				'rev_id' => $rev_id ];
3572
			if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3573
				# We fetched a rev from a different title; register it too...
3574
				$deps[] = [
3575
					'title' => $rev->getTitle(),
3576
					'page_id' => $rev->getPage(),
3577
					'rev_id' => $rev_id ];
3578
			}
3579
3580
			if ( $rev ) {
3581
				$content = $rev->getContent();
3582
				$text = $content ? $content->getWikitextForTransclusion() : null;
3583
3584
				if ( $text === false || $text === null ) {
3585
					$text = false;
3586
					break;
3587
				}
3588
			} elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3589
				global $wgContLang;
3590
				$message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
3591
				if ( !$message->exists() ) {
3592
					$text = false;
3593
					break;
3594
				}
3595
				$content = $message->content();
3596
				$text = $message->plain();
3597
			} else {
3598
				break;
3599
			}
3600
			if ( !$content ) {
3601
				break;
3602
			}
3603
			# Redirect?
3604
			$finalTitle = $title;
3605
			$title = $content->getRedirectTarget();
3606
		}
3607
		return [
3608
			'text' => $text,
3609
			'finalTitle' => $finalTitle,
3610
			'deps' => $deps ];
3611
	}
3612
3613
	/**
3614
	 * Fetch a file and its title and register a reference to it.
3615
	 * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3616
	 * @param Title $title
3617
	 * @param array $options Array of options to RepoGroup::findFile
3618
	 * @return File|bool
3619
	 */
3620
	public function fetchFile( $title, $options = [] ) {
3621
		return $this->fetchFileAndTitle( $title, $options )[0];
3622
	}
3623
3624
	/**
3625
	 * Fetch a file and its title and register a reference to it.
3626
	 * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3627
	 * @param Title $title
3628
	 * @param array $options Array of options to RepoGroup::findFile
3629
	 * @return array ( File or false, Title of file )
3630
	 */
3631
	public function fetchFileAndTitle( $title, $options = [] ) {
3632
		$file = $this->fetchFileNoRegister( $title, $options );
3633
3634
		$time = $file ? $file->getTimestamp() : false;
3635
		$sha1 = $file ? $file->getSha1() : false;
3636
		# Register the file as a dependency...
3637
		$this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3638
		if ( $file && !$title->equals( $file->getTitle() ) ) {
3639
			# Update fetched file title
3640
			$title = $file->getTitle();
3641
			$this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3642
		}
3643
		return [ $file, $title ];
3644
	}
3645
3646
	/**
3647
	 * Helper function for fetchFileAndTitle.
3648
	 *
3649
	 * Also useful if you need to fetch a file but not use it yet,
3650
	 * for example to get the file's handler.
3651
	 *
3652
	 * @param Title $title
3653
	 * @param array $options Array of options to RepoGroup::findFile
3654
	 * @return File|bool
3655
	 */
3656
	protected function fetchFileNoRegister( $title, $options = [] ) {
3657
		if ( isset( $options['broken'] ) ) {
3658
			$file = false; // broken thumbnail forced by hook
3659
		} elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3660
			$file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3661
		} else { // get by (name,timestamp)
3662
			$file = wfFindFile( $title, $options );
3663
		}
3664
		return $file;
3665
	}
3666
3667
	/**
3668
	 * Transclude an interwiki link.
3669
	 *
3670
	 * @param Title $title
3671
	 * @param string $action
3672
	 *
3673
	 * @return string
3674
	 */
3675
	public function interwikiTransclude( $title, $action ) {
3676
		global $wgEnableScaryTranscluding;
3677
3678
		if ( !$wgEnableScaryTranscluding ) {
3679
			return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3680
		}
3681
3682
		$url = $title->getFullURL( [ 'action' => $action ] );
3683
3684
		if ( strlen( $url ) > 255 ) {
3685
			return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3686
		}
3687
		return $this->fetchScaryTemplateMaybeFromCache( $url );
3688
	}
3689
3690
	/**
3691
	 * @param string $url
3692
	 * @return mixed|string
3693
	 */
3694
	public function fetchScaryTemplateMaybeFromCache( $url ) {
3695
		global $wgTranscludeCacheExpiry;
3696
		$dbr = wfGetDB( DB_REPLICA );
3697
		$tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
3698
		$obj = $dbr->selectRow( 'transcache', [ 'tc_time', 'tc_contents' ],
3699
				[ 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ] );
3700
		if ( $obj ) {
3701
			return $obj->tc_contents;
3702
		}
3703
3704
		$req = MWHttpRequest::factory( $url, [], __METHOD__ );
3705
		$status = $req->execute(); // Status object
3706
		if ( $status->isOK() ) {
3707
			$text = $req->getContent();
3708
		} elseif ( $req->getStatus() != 200 ) {
3709
			// Though we failed to fetch the content, this status is useless.
3710
			return wfMessage( 'scarytranscludefailed-httpstatus' )
3711
				->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
3712
		} else {
3713
			return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3714
		}
3715
3716
		$dbw = wfGetDB( DB_MASTER );
3717
		$dbw->replace( 'transcache', [ 'tc_url' ], [
3718
			'tc_url' => $url,
3719
			'tc_time' => $dbw->timestamp( time() ),
3720
			'tc_contents' => $text
3721
		] );
3722
		return $text;
3723
	}
3724
3725
	/**
3726
	 * Triple brace replacement -- used for template arguments
3727
	 * @private
3728
	 *
3729
	 * @param array $piece
3730
	 * @param PPFrame $frame
3731
	 *
3732
	 * @return array
3733
	 */
3734
	public function argSubstitution( $piece, $frame ) {
3735
3736
		$error = false;
3737
		$parts = $piece['parts'];
3738
		$nameWithSpaces = $frame->expand( $piece['title'] );
3739
		$argName = trim( $nameWithSpaces );
3740
		$object = false;
3741
		$text = $frame->getArgument( $argName );
3742
		if ( $text === false && $parts->getLength() > 0
3743
			&& ( $this->ot['html']
3744
				|| $this->ot['pre']
3745
				|| ( $this->ot['wiki'] && $frame->isTemplate() )
3746
			)
3747
		) {
3748
			# No match in frame, use the supplied default
3749
			$object = $parts->item( 0 )->getChildren();
3750
		}
3751
		if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3752
			$error = '<!-- WARNING: argument omitted, expansion size too large -->';
3753
			$this->limitationWarn( 'post-expand-template-argument' );
3754
		}
3755
3756
		if ( $text === false && $object === false ) {
3757
			# No match anywhere
3758
			$object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3759
		}
3760
		if ( $error !== false ) {
3761
			$text .= $error;
3762
		}
3763
		if ( $object !== false ) {
3764
			$ret = [ 'object' => $object ];
3765
		} else {
3766
			$ret = [ 'text' => $text ];
3767
		}
3768
3769
		return $ret;
3770
	}
3771
3772
	/**
3773
	 * Return the text to be used for a given extension tag.
3774
	 * This is the ghost of strip().
3775
	 *
3776
	 * @param array $params Associative array of parameters:
3777
	 *     name       PPNode for the tag name
3778
	 *     attr       PPNode for unparsed text where tag attributes are thought to be
3779
	 *     attributes Optional associative array of parsed attributes
3780
	 *     inner      Contents of extension element
3781
	 *     noClose    Original text did not have a close tag
3782
	 * @param PPFrame $frame
3783
	 *
3784
	 * @throws MWException
3785
	 * @return string
3786
	 */
3787
	public function extensionSubstitution( $params, $frame ) {
3788
		static $errorStr = '<span class="error">';
3789
		static $errorLen = 20;
3790
3791
		$name = $frame->expand( $params['name'] );
3792
		if ( substr( $name, 0, $errorLen ) === $errorStr ) {
3793
			// Probably expansion depth or node count exceeded. Just punt the
3794
			// error up.
3795
			return $name;
3796
		}
3797
3798
		$attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3799
		if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
3800
			// See above
3801
			return $attrText;
3802
		}
3803
3804
		$content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3805
		if ( substr( $content, 0, $errorLen ) === $errorStr ) {
3806
			// See above
3807
			return $content;
3808
		}
3809
3810
		$marker = self::MARKER_PREFIX . "-$name-"
3811
			. sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3812
3813
		$isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
3814
			( $this->ot['html'] || $this->ot['pre'] );
3815
		if ( $isFunctionTag ) {
3816
			$markerType = 'none';
3817
		} else {
3818
			$markerType = 'general';
3819
		}
3820
		if ( $this->ot['html'] || $isFunctionTag ) {
3821
			$name = strtolower( $name );
3822
			$attributes = Sanitizer::decodeTagAttributes( $attrText );
3823
			if ( isset( $params['attributes'] ) ) {
3824
				$attributes = $attributes + $params['attributes'];
3825
			}
3826
3827
			if ( isset( $this->mTagHooks[$name] ) ) {
3828
				# Workaround for PHP bug 35229 and similar
3829
				if ( !is_callable( $this->mTagHooks[$name] ) ) {
3830
					throw new MWException( "Tag hook for $name is not callable\n" );
3831
				}
3832
				$output = call_user_func_array( $this->mTagHooks[$name],
3833
					[ $content, $attributes, $this, $frame ] );
3834
			} elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
3835
				list( $callback, ) = $this->mFunctionTagHooks[$name];
3836
				if ( !is_callable( $callback ) ) {
3837
					throw new MWException( "Tag hook for $name is not callable\n" );
3838
				}
3839
3840
				$output = call_user_func_array( $callback, [ &$this, $frame, $content, $attributes ] );
3841
			} else {
3842
				$output = '<span class="error">Invalid tag extension name: ' .
3843
					htmlspecialchars( $name ) . '</span>';
3844
			}
3845
3846
			if ( is_array( $output ) ) {
3847
				# Extract flags to local scope (to override $markerType)
3848
				$flags = $output;
3849
				$output = $flags[0];
3850
				unset( $flags[0] );
3851
				extract( $flags );
3852
			}
3853
		} else {
3854
			if ( is_null( $attrText ) ) {
3855
				$attrText = '';
3856
			}
3857
			if ( isset( $params['attributes'] ) ) {
3858
				foreach ( $params['attributes'] as $attrName => $attrValue ) {
3859
					$attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
3860
						htmlspecialchars( $attrValue ) . '"';
3861
				}
3862
			}
3863
			if ( $content === null ) {
3864
				$output = "<$name$attrText/>";
3865
			} else {
3866
				$close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
3867
				if ( substr( $close, 0, $errorLen ) === $errorStr ) {
3868
					// See above
3869
					return $close;
3870
				}
3871
				$output = "<$name$attrText>$content$close";
3872
			}
3873
		}
3874
3875
		if ( $markerType === 'none' ) {
3876
			return $output;
3877
		} elseif ( $markerType === 'nowiki' ) {
3878
			$this->mStripState->addNoWiki( $marker, $output );
3879
		} elseif ( $markerType === 'general' ) {
3880
			$this->mStripState->addGeneral( $marker, $output );
3881
		} else {
3882
			throw new MWException( __METHOD__ . ': invalid marker type' );
3883
		}
3884
		return $marker;
3885
	}
3886
3887
	/**
3888
	 * Increment an include size counter
3889
	 *
3890
	 * @param string $type The type of expansion
3891
	 * @param int $size The size of the text
3892
	 * @return bool False if this inclusion would take it over the maximum, true otherwise
3893
	 */
3894
	public function incrementIncludeSize( $type, $size ) {
3895
		if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
3896
			return false;
3897
		} else {
3898
			$this->mIncludeSizes[$type] += $size;
3899
			return true;
3900
		}
3901
	}
3902
3903
	/**
3904
	 * Increment the expensive function count
3905
	 *
3906
	 * @return bool False if the limit has been exceeded
3907
	 */
3908
	public function incrementExpensiveFunctionCount() {
3909
		$this->mExpensiveFunctionCount++;
3910
		return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
3911
	}
3912
3913
	/**
3914
	 * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
3915
	 * Fills $this->mDoubleUnderscores, returns the modified text
3916
	 *
3917
	 * @param string $text
3918
	 *
3919
	 * @return string
3920
	 */
3921
	public function doDoubleUnderscore( $text ) {
3922
3923
		# The position of __TOC__ needs to be recorded
3924
		$mw = MagicWord::get( 'toc' );
3925
		if ( $mw->match( $text ) ) {
3926
			$this->mShowToc = true;
3927
			$this->mForceTocPosition = true;
3928
3929
			# Set a placeholder. At the end we'll fill it in with the TOC.
3930
			$text = $mw->replace( '<!--MWTOC-->', $text, 1 );
3931
3932
			# Only keep the first one.
3933
			$text = $mw->replace( '', $text );
3934
		}
3935
3936
		# Now match and remove the rest of them
3937
		$mwa = MagicWord::getDoubleUnderscoreArray();
3938
		$this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
3939
3940
		if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
3941
			$this->mOutput->mNoGallery = true;
3942
		}
3943
		if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
3944
			$this->mShowToc = false;
3945
		}
3946
		if ( isset( $this->mDoubleUnderscores['hiddencat'] )
3947
			&& $this->mTitle->getNamespace() == NS_CATEGORY
3948
		) {
3949
			$this->addTrackingCategory( 'hidden-category-category' );
3950
		}
3951
		# (bug 8068) Allow control over whether robots index a page.
3952
		# @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here!  This
3953
		# is not desirable, the last one on the page should win.
3954 View Code Duplication
		if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
3955
			$this->mOutput->setIndexPolicy( 'noindex' );
3956
			$this->addTrackingCategory( 'noindex-category' );
3957
		}
3958 View Code Duplication
		if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
3959
			$this->mOutput->setIndexPolicy( 'index' );
3960
			$this->addTrackingCategory( 'index-category' );
3961
		}
3962
3963
		# Cache all double underscores in the database
3964
		foreach ( $this->mDoubleUnderscores as $key => $val ) {
3965
			$this->mOutput->setProperty( $key, '' );
3966
		}
3967
3968
		return $text;
3969
	}
3970
3971
	/**
3972
	 * @see ParserOutput::addTrackingCategory()
3973
	 * @param string $msg Message key
3974
	 * @return bool Whether the addition was successful
3975
	 */
3976
	public function addTrackingCategory( $msg ) {
3977
		return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
3978
	}
3979
3980
	/**
3981
	 * This function accomplishes several tasks:
3982
	 * 1) Auto-number headings if that option is enabled
3983
	 * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
3984
	 * 3) Add a Table of contents on the top for users who have enabled the option
3985
	 * 4) Auto-anchor headings
3986
	 *
3987
	 * It loops through all headlines, collects the necessary data, then splits up the
3988
	 * string and re-inserts the newly formatted headlines.
3989
	 *
3990
	 * @param string $text
3991
	 * @param string $origText Original, untouched wikitext
3992
	 * @param bool $isMain
3993
	 * @return mixed|string
3994
	 * @private
3995
	 */
3996
	public function formatHeadings( $text, $origText, $isMain = true ) {
3997
		global $wgMaxTocLevel, $wgExperimentalHtmlIds;
3998
3999
		# Inhibit editsection links if requested in the page
4000
		if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4001
			$maybeShowEditLink = $showEditLink = false;
4002
		} else {
4003
			$maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
4004
			$showEditLink = $this->mOptions->getEditSection();
4005
		}
4006
		if ( $showEditLink ) {
4007
			$this->mOutput->setEditSectionTokens( true );
4008
		}
4009
4010
		# Get all headlines for numbering them and adding funky stuff like [edit]
4011
		# links - this is for later, but we need the number of headlines right now
4012
		$matches = [];
4013
		$numMatches = preg_match_all(
4014
			'/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
4015
			$text,
4016
			$matches
4017
		);
4018
4019
		# if there are fewer than 4 headlines in the article, do not show TOC
4020
		# unless it's been explicitly enabled.
4021
		$enoughToc = $this->mShowToc &&
4022
			( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4023
4024
		# Allow user to stipulate that a page should have a "new section"
4025
		# link added via __NEWSECTIONLINK__
4026
		if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4027
			$this->mOutput->setNewSection( true );
4028
		}
4029
4030
		# Allow user to remove the "new section"
4031
		# link via __NONEWSECTIONLINK__
4032
		if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4033
			$this->mOutput->hideNewSection( true );
4034
		}
4035
4036
		# if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4037
		# override above conditions and always show TOC above first header
4038
		if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4039
			$this->mShowToc = true;
4040
			$enoughToc = true;
4041
		}
4042
4043
		# headline counter
4044
		$headlineCount = 0;
4045
		$numVisible = 0;
4046
4047
		# Ugh .. the TOC should have neat indentation levels which can be
4048
		# passed to the skin functions. These are determined here
4049
		$toc = '';
4050
		$full = '';
4051
		$head = [];
4052
		$sublevelCount = [];
4053
		$levelCount = [];
4054
		$level = 0;
4055
		$prevlevel = 0;
4056
		$toclevel = 0;
4057
		$prevtoclevel = 0;
4058
		$markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4059
		$baseTitleText = $this->mTitle->getPrefixedDBkey();
4060
		$oldType = $this->mOutputType;
4061
		$this->setOutputType( self::OT_WIKI );
4062
		$frame = $this->getPreprocessor()->newFrame();
4063
		$root = $this->preprocessToDom( $origText );
4064
		$node = $root->getFirstChild();
4065
		$byteOffset = 0;
4066
		$tocraw = [];
4067
		$refers = [];
4068
4069
		$headlines = $numMatches !== false ? $matches[3] : [];
4070
4071
		foreach ( $headlines as $headline ) {
4072
			$isTemplate = false;
4073
			$titleText = false;
4074
			$sectionIndex = false;
4075
			$numbering = '';
4076
			$markerMatches = [];
4077
			if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4078
				$serial = $markerMatches[1];
4079
				list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4080
				$isTemplate = ( $titleText != $baseTitleText );
4081
				$headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4082
			}
4083
4084
			if ( $toclevel ) {
4085
				$prevlevel = $level;
4086
			}
4087
			$level = $matches[1][$headlineCount];
4088
4089
			if ( $level > $prevlevel ) {
4090
				# Increase TOC level
4091
				$toclevel++;
4092
				$sublevelCount[$toclevel] = 0;
4093
				if ( $toclevel < $wgMaxTocLevel ) {
4094
					$prevtoclevel = $toclevel;
4095
					$toc .= Linker::tocIndent();
4096
					$numVisible++;
4097
				}
4098
			} elseif ( $level < $prevlevel && $toclevel > 1 ) {
4099
				# Decrease TOC level, find level to jump to
4100
4101
				for ( $i = $toclevel; $i > 0; $i-- ) {
4102
					if ( $levelCount[$i] == $level ) {
4103
						# Found last matching level
4104
						$toclevel = $i;
4105
						break;
4106
					} elseif ( $levelCount[$i] < $level ) {
4107
						# Found first matching level below current level
4108
						$toclevel = $i + 1;
4109
						break;
4110
					}
4111
				}
4112
				if ( $i == 0 ) {
4113
					$toclevel = 1;
4114
				}
4115
				if ( $toclevel < $wgMaxTocLevel ) {
4116
					if ( $prevtoclevel < $wgMaxTocLevel ) {
4117
						# Unindent only if the previous toc level was shown :p
4118
						$toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4119
						$prevtoclevel = $toclevel;
4120
					} else {
4121
						$toc .= Linker::tocLineEnd();
4122
					}
4123
				}
4124
			} else {
4125
				# No change in level, end TOC line
4126
				if ( $toclevel < $wgMaxTocLevel ) {
4127
					$toc .= Linker::tocLineEnd();
4128
				}
4129
			}
4130
4131
			$levelCount[$toclevel] = $level;
4132
4133
			# count number of headlines for each level
4134
			$sublevelCount[$toclevel]++;
4135
			$dot = 0;
4136
			for ( $i = 1; $i <= $toclevel; $i++ ) {
4137
				if ( !empty( $sublevelCount[$i] ) ) {
4138
					if ( $dot ) {
4139
						$numbering .= '.';
4140
					}
4141
					$numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4142
					$dot = 1;
4143
				}
4144
			}
4145
4146
			# The safe header is a version of the header text safe to use for links
4147
4148
			# Remove link placeholders by the link text.
4149
			#     <!--LINK number-->
4150
			# turns into
4151
			#     link text with suffix
4152
			# Do this before unstrip since link text can contain strip markers
4153
			$safeHeadline = $this->replaceLinkHoldersText( $headline );
4154
4155
			# Avoid insertion of weird stuff like <math> by expanding the relevant sections
4156
			$safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4157
4158
			# Strip out HTML (first regex removes any tag not allowed)
4159
			# Allowed tags are:
4160
			# * <sup> and <sub> (bug 8393)
4161
			# * <i> (bug 26375)
4162
			# * <b> (r105284)
4163
			# * <bdi> (bug 72884)
4164
			# * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4165
			# * <s> and <strike> (T35715)
4166
			# We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4167
			# to allow setting directionality in toc items.
4168
			$tocline = preg_replace(
4169
				[
4170
					'#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4171
					'#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4172
				],
4173
				[ '', '<$1>' ],
4174
				$safeHeadline
4175
			);
4176
4177
			# Strip '<span></span>', which is the result from the above if
4178
			# <span id="foo"></span> is used to produce an additional anchor
4179
			# for a section.
4180
			$tocline = str_replace( '<span></span>', '', $tocline );
4181
4182
			$tocline = trim( $tocline );
4183
4184
			# For the anchor, strip out HTML-y stuff period
4185
			$safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4186
			$safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4187
4188
			# Save headline for section edit hint before it's escaped
4189
			$headlineHint = $safeHeadline;
4190
4191
			if ( $wgExperimentalHtmlIds ) {
4192
				# For reverse compatibility, provide an id that's
4193
				# HTML4-compatible, like we used to.
4194
				# It may be worth noting, academically, that it's possible for
4195
				# the legacy anchor to conflict with a non-legacy headline
4196
				# anchor on the page.  In this case likely the "correct" thing
4197
				# would be to either drop the legacy anchors or make sure
4198
				# they're numbered first.  However, this would require people
4199
				# to type in section names like "abc_.D7.93.D7.90.D7.A4"
4200
				# manually, so let's not bother worrying about it.
4201
				$legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4202
					[ 'noninitial', 'legacy' ] );
4203
				$safeHeadline = Sanitizer::escapeId( $safeHeadline );
4204
4205
				if ( $legacyHeadline == $safeHeadline ) {
4206
					# No reason to have both (in fact, we can't)
4207
					$legacyHeadline = false;
4208
				}
4209
			} else {
4210
				$legacyHeadline = false;
4211
				$safeHeadline = Sanitizer::escapeId( $safeHeadline,
4212
					'noninitial' );
4213
			}
4214
4215
			# HTML names must be case-insensitively unique (bug 10721).
4216
			# This does not apply to Unicode characters per
4217
			# http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4218
			# @todo FIXME: We may be changing them depending on the current locale.
4219
			$arrayKey = strtolower( $safeHeadline );
4220
			if ( $legacyHeadline === false ) {
4221
				$legacyArrayKey = false;
4222
			} else {
4223
				$legacyArrayKey = strtolower( $legacyHeadline );
4224
			}
4225
4226
			# Create the anchor for linking from the TOC to the section
4227
			$anchor = $safeHeadline;
4228
			$legacyAnchor = $legacyHeadline;
4229 View Code Duplication
			if ( isset( $refers[$arrayKey] ) ) {
4230
				// @codingStandardsIgnoreStart
4231
				for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4232
				// @codingStandardsIgnoreEnd
4233
				$anchor .= "_$i";
4234
				$refers["${arrayKey}_$i"] = true;
4235
			} else {
4236
				$refers[$arrayKey] = true;
4237
			}
4238 View Code Duplication
			if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4239
				// @codingStandardsIgnoreStart
4240
				for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4241
				// @codingStandardsIgnoreEnd
4242
				$legacyAnchor .= "_$i";
4243
				$refers["${legacyArrayKey}_$i"] = true;
4244
			} else {
4245
				$refers[$legacyArrayKey] = true;
4246
			}
4247
4248
			# Don't number the heading if it is the only one (looks silly)
4249
			if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4250
				# the two are different if the line contains a link
4251
				$headline = Html::element(
4252
					'span',
4253
					[ 'class' => 'mw-headline-number' ],
4254
					$numbering
4255
				) . ' ' . $headline;
4256
			}
4257
4258
			if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4259
				$toc .= Linker::tocLine( $anchor, $tocline,
4260
					$numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4261
			}
4262
4263
			# Add the section to the section tree
4264
			# Find the DOM node for this header
4265
			$noOffset = ( $isTemplate || $sectionIndex === false );
4266
			while ( $node && !$noOffset ) {
4267
				if ( $node->getName() === 'h' ) {
4268
					$bits = $node->splitHeading();
4269
					if ( $bits['i'] == $sectionIndex ) {
4270
						break;
4271
					}
4272
				}
4273
				$byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4274
					$frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4275
				$node = $node->getNextSibling();
4276
			}
4277
			$tocraw[] = [
4278
				'toclevel' => $toclevel,
4279
				'level' => $level,
4280
				'line' => $tocline,
4281
				'number' => $numbering,
4282
				'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4283
				'fromtitle' => $titleText,
4284
				'byteoffset' => ( $noOffset ? null : $byteOffset ),
4285
				'anchor' => $anchor,
4286
			];
4287
4288
			# give headline the correct <h#> tag
4289
			if ( $maybeShowEditLink && $sectionIndex !== false ) {
4290
				// Output edit section links as markers with styles that can be customized by skins
4291
				if ( $isTemplate ) {
4292
					# Put a T flag in the section identifier, to indicate to extractSections()
4293
					# that sections inside <includeonly> should be counted.
4294
					$editsectionPage = $titleText;
4295
					$editsectionSection = "T-$sectionIndex";
4296
					$editsectionContent = null;
4297
				} else {
4298
					$editsectionPage = $this->mTitle->getPrefixedText();
4299
					$editsectionSection = $sectionIndex;
4300
					$editsectionContent = $headlineHint;
4301
				}
4302
				// We use a bit of pesudo-xml for editsection markers. The
4303
				// language converter is run later on. Using a UNIQ style marker
4304
				// leads to the converter screwing up the tokens when it
4305
				// converts stuff. And trying to insert strip tags fails too. At
4306
				// this point all real inputted tags have already been escaped,
4307
				// so we don't have to worry about a user trying to input one of
4308
				// these markers directly. We use a page and section attribute
4309
				// to stop the language converter from converting these
4310
				// important bits of data, but put the headline hint inside a
4311
				// content block because the language converter is supposed to
4312
				// be able to convert that piece of data.
4313
				// Gets replaced with html in ParserOutput::getText
4314
				$editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4315
				$editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4316
				if ( $editsectionContent !== null ) {
4317
					$editlink .= '>' . $editsectionContent . '</mw:editsection>';
4318
				} else {
4319
					$editlink .= '/>';
4320
				}
4321
			} else {
4322
				$editlink = '';
4323
			}
4324
			$head[$headlineCount] = Linker::makeHeadline( $level,
4325
				$matches['attrib'][$headlineCount], $anchor, $headline,
4326
				$editlink, $legacyAnchor );
4327
4328
			$headlineCount++;
4329
		}
4330
4331
		$this->setOutputType( $oldType );
4332
4333
		# Never ever show TOC if no headers
4334
		if ( $numVisible < 1 ) {
4335
			$enoughToc = false;
4336
		}
4337
4338
		if ( $enoughToc ) {
4339
			if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4340
				$toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4341
			}
4342
			$toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4343
			$this->mOutput->setTOCHTML( $toc );
4344
			$toc = self::TOC_START . $toc . self::TOC_END;
4345
			$this->mOutput->addModules( 'mediawiki.toc' );
4346
		}
4347
4348
		if ( $isMain ) {
4349
			$this->mOutput->setSections( $tocraw );
4350
		}
4351
4352
		# split up and insert constructed headlines
4353
		$blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4354
		$i = 0;
4355
4356
		// build an array of document sections
4357
		$sections = [];
4358
		foreach ( $blocks as $block ) {
4359
			// $head is zero-based, sections aren't.
4360
			if ( empty( $head[$i - 1] ) ) {
4361
				$sections[$i] = $block;
4362
			} else {
4363
				$sections[$i] = $head[$i - 1] . $block;
4364
			}
4365
4366
			/**
4367
			 * Send a hook, one per section.
4368
			 * The idea here is to be able to make section-level DIVs, but to do so in a
4369
			 * lower-impact, more correct way than r50769
4370
			 *
4371
			 * $this : caller
4372
			 * $section : the section number
4373
			 * &$sectionContent : ref to the content of the section
4374
			 * $showEditLinks : boolean describing whether this section has an edit link
4375
			 */
4376
			Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $showEditLink ] );
4377
4378
			$i++;
4379
		}
4380
4381
		if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4382
			// append the TOC at the beginning
4383
			// Top anchor now in skin
4384
			$sections[0] = $sections[0] . $toc . "\n";
4385
		}
4386
4387
		$full .= implode( '', $sections );
4388
4389
		if ( $this->mForceTocPosition ) {
4390
			return str_replace( '<!--MWTOC-->', $toc, $full );
4391
		} else {
4392
			return $full;
4393
		}
4394
	}
4395
4396
	/**
4397
	 * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4398
	 * conversion, substituting signatures, {{subst:}} templates, etc.
4399
	 *
4400
	 * @param string $text The text to transform
4401
	 * @param Title $title The Title object for the current article
4402
	 * @param User $user The User object describing the current user
4403
	 * @param ParserOptions $options Parsing options
4404
	 * @param bool $clearState Whether to clear the parser state first
4405
	 * @return string The altered wiki markup
4406
	 */
4407
	public function preSaveTransform( $text, Title $title, User $user,
4408
		ParserOptions $options, $clearState = true
4409
	) {
4410
		if ( $clearState ) {
4411
			$magicScopeVariable = $this->lock();
4412
		}
4413
		$this->startParse( $title, $options, self::OT_WIKI, $clearState );
4414
		$this->setUser( $user );
4415
4416
		// We still normalize line endings for backwards-compatibility
4417
		// with other code that just calls PST, but this should already
4418
		// be handled in TextContent subclasses
4419
		$text = TextContent::normalizeLineEndings( $text );
4420
4421
		if ( $options->getPreSaveTransform() ) {
4422
			$text = $this->pstPass2( $text, $user );
4423
		}
4424
		$text = $this->mStripState->unstripBoth( $text );
4425
4426
		$this->setUser( null ); # Reset
4427
4428
		return $text;
4429
	}
4430
4431
	/**
4432
	 * Pre-save transform helper function
4433
	 *
4434
	 * @param string $text
4435
	 * @param User $user
4436
	 *
4437
	 * @return string
4438
	 */
4439
	private function pstPass2( $text, $user ) {
4440
		global $wgContLang;
4441
4442
		# Note: This is the timestamp saved as hardcoded wikitext to
4443
		# the database, we use $wgContLang here in order to give
4444
		# everyone the same signature and use the default one rather
4445
		# than the one selected in each user's preferences.
4446
		# (see also bug 12815)
4447
		$ts = $this->mOptions->getTimestamp();
4448
		$timestamp = MWTimestamp::getLocalInstance( $ts );
4449
		$ts = $timestamp->format( 'YmdHis' );
4450
		$tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4451
4452
		$d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4453
4454
		# Variable replacement
4455
		# Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4456
		$text = $this->replaceVariables( $text );
4457
4458
		# This works almost by chance, as the replaceVariables are done before the getUserSig(),
4459
		# which may corrupt this parser instance via its wfMessage()->text() call-
4460
4461
		# Signatures
4462
		$sigText = $this->getUserSig( $user );
4463
		$text = strtr( $text, [
4464
			'~~~~~' => $d,
4465
			'~~~~' => "$sigText $d",
4466
			'~~~' => $sigText
4467
		] );
4468
4469
		# Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4470
		$tc = '[' . Title::legalChars() . ']';
4471
		$nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4472
4473
		// [[ns:page (context)|]]
4474
		$p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4475
		// [[ns:page(context)|]] (double-width brackets, added in r40257)
4476
		$p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4477
		// [[ns:page (context), context|]] (using either single or double-width comma)
4478
		$p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4479
		// [[|page]] (reverse pipe trick: add context from page title)
4480
		$p2 = "/\[\[\\|($tc+)]]/";
4481
4482
		# try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4483
		$text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4484
		$text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4485
		$text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4486
4487
		$t = $this->mTitle->getText();
4488
		$m = [];
4489
		if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4490
			$text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4491
		} elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4492
			$text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4493
		} else {
4494
			# if there's no context, don't bother duplicating the title
4495
			$text = preg_replace( $p2, '[[\\1]]', $text );
4496
		}
4497
4498
		return $text;
4499
	}
4500
4501
	/**
4502
	 * Fetch the user's signature text, if any, and normalize to
4503
	 * validated, ready-to-insert wikitext.
4504
	 * If you have pre-fetched the nickname or the fancySig option, you can
4505
	 * specify them here to save a database query.
4506
	 * Do not reuse this parser instance after calling getUserSig(),
4507
	 * as it may have changed if it's the $wgParser.
4508
	 *
4509
	 * @param User $user
4510
	 * @param string|bool $nickname Nickname to use or false to use user's default nickname
4511
	 * @param bool|null $fancySig whether the nicknname is the complete signature
4512
	 *    or null to use default value
4513
	 * @return string
4514
	 */
4515
	public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4516
		global $wgMaxSigChars;
4517
4518
		$username = $user->getName();
4519
4520
		# If not given, retrieve from the user object.
4521
		if ( $nickname === false ) {
4522
			$nickname = $user->getOption( 'nickname' );
4523
		}
4524
4525
		if ( is_null( $fancySig ) ) {
4526
			$fancySig = $user->getBoolOption( 'fancysig' );
4527
		}
4528
4529
		$nickname = $nickname == null ? $username : $nickname;
4530
4531
		if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4532
			$nickname = $username;
4533
			wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4534
		} elseif ( $fancySig !== false ) {
4535
			# Sig. might contain markup; validate this
4536
			if ( $this->validateSig( $nickname ) !== false ) {
4537
				# Validated; clean up (if needed) and return it
4538
				return $this->cleanSig( $nickname, true );
4539
			} else {
4540
				# Failed to validate; fall back to the default
4541
				$nickname = $username;
4542
				wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4543
			}
4544
		}
4545
4546
		# Make sure nickname doesnt get a sig in a sig
4547
		$nickname = self::cleanSigInSig( $nickname );
4548
4549
		# If we're still here, make it a link to the user page
4550
		$userText = wfEscapeWikiText( $username );
4551
		$nickText = wfEscapeWikiText( $nickname );
4552
		$msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4553
4554
		return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4555
			->title( $this->getTitle() )->text();
4556
	}
4557
4558
	/**
4559
	 * Check that the user's signature contains no bad XML
4560
	 *
4561
	 * @param string $text
4562
	 * @return string|bool An expanded string, or false if invalid.
4563
	 */
4564
	public function validateSig( $text ) {
4565
		return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4566
	}
4567
4568
	/**
4569
	 * Clean up signature text
4570
	 *
4571
	 * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4572
	 * 2) Substitute all transclusions
4573
	 *
4574
	 * @param string $text
4575
	 * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4576
	 * @return string Signature text
4577
	 */
4578
	public function cleanSig( $text, $parsing = false ) {
4579
		if ( !$parsing ) {
4580
			global $wgTitle;
4581
			$magicScopeVariable = $this->lock();
4582
			$this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4583
		}
4584
4585
		# Option to disable this feature
4586
		if ( !$this->mOptions->getCleanSignatures() ) {
4587
			return $text;
4588
		}
4589
4590
		# @todo FIXME: Regex doesn't respect extension tags or nowiki
4591
		#  => Move this logic to braceSubstitution()
4592
		$substWord = MagicWord::get( 'subst' );
4593
		$substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4594
		$substText = '{{' . $substWord->getSynonym( 0 );
4595
4596
		$text = preg_replace( $substRegex, $substText, $text );
4597
		$text = self::cleanSigInSig( $text );
4598
		$dom = $this->preprocessToDom( $text );
4599
		$frame = $this->getPreprocessor()->newFrame();
4600
		$text = $frame->expand( $dom );
4601
4602
		if ( !$parsing ) {
4603
			$text = $this->mStripState->unstripBoth( $text );
4604
		}
4605
4606
		return $text;
4607
	}
4608
4609
	/**
4610
	 * Strip 3, 4 or 5 tildes out of signatures.
4611
	 *
4612
	 * @param string $text
4613
	 * @return string Signature text with /~{3,5}/ removed
4614
	 */
4615
	public static function cleanSigInSig( $text ) {
4616
		$text = preg_replace( '/~{3,5}/', '', $text );
4617
		return $text;
4618
	}
4619
4620
	/**
4621
	 * Set up some variables which are usually set up in parse()
4622
	 * so that an external function can call some class members with confidence
4623
	 *
4624
	 * @param Title|null $title
4625
	 * @param ParserOptions $options
4626
	 * @param int $outputType
4627
	 * @param bool $clearState
4628
	 */
4629
	public function startExternalParse( Title $title = null, ParserOptions $options,
4630
		$outputType, $clearState = true
4631
	) {
4632
		$this->startParse( $title, $options, $outputType, $clearState );
4633
	}
4634
4635
	/**
4636
	 * @param Title|null $title
4637
	 * @param ParserOptions $options
4638
	 * @param int $outputType
4639
	 * @param bool $clearState
4640
	 */
4641
	private function startParse( Title $title = null, ParserOptions $options,
4642
		$outputType, $clearState = true
4643
	) {
4644
		$this->setTitle( $title );
4645
		$this->mOptions = $options;
4646
		$this->setOutputType( $outputType );
4647
		if ( $clearState ) {
4648
			$this->clearState();
4649
		}
4650
	}
4651
4652
	/**
4653
	 * Wrapper for preprocess()
4654
	 *
4655
	 * @param string $text The text to preprocess
4656
	 * @param ParserOptions $options Options
4657
	 * @param Title|null $title Title object or null to use $wgTitle
4658
	 * @return string
4659
	 */
4660
	public function transformMsg( $text, $options, $title = null ) {
4661
		static $executing = false;
4662
4663
		# Guard against infinite recursion
4664
		if ( $executing ) {
4665
			return $text;
4666
		}
4667
		$executing = true;
4668
4669
		if ( !$title ) {
4670
			global $wgTitle;
4671
			$title = $wgTitle;
4672
		}
4673
4674
		$text = $this->preprocess( $text, $title, $options );
4675
4676
		$executing = false;
4677
		return $text;
4678
	}
4679
4680
	/**
4681
	 * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4682
	 * The callback should have the following form:
4683
	 *    function myParserHook( $text, $params, $parser, $frame ) { ... }
4684
	 *
4685
	 * Transform and return $text. Use $parser for any required context, e.g. use
4686
	 * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4687
	 *
4688
	 * Hooks may return extended information by returning an array, of which the
4689
	 * first numbered element (index 0) must be the return string, and all other
4690
	 * entries are extracted into local variables within an internal function
4691
	 * in the Parser class.
4692
	 *
4693
	 * This interface (introduced r61913) appears to be undocumented, but
4694
	 * 'markerType' is used by some core tag hooks to override which strip
4695
	 * array their results are placed in. **Use great caution if attempting
4696
	 * this interface, as it is not documented and injudicious use could smash
4697
	 * private variables.**
4698
	 *
4699
	 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4700
	 * @param callable $callback The callback function (and object) to use for the tag
4701
	 * @throws MWException
4702
	 * @return callable|null The old value of the mTagHooks array associated with the hook
4703
	 */
4704 View Code Duplication
	public function setHook( $tag, $callback ) {
4705
		$tag = strtolower( $tag );
4706
		if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4707
			throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4708
		}
4709
		$oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
4710
		$this->mTagHooks[$tag] = $callback;
4711
		if ( !in_array( $tag, $this->mStripList ) ) {
4712
			$this->mStripList[] = $tag;
4713
		}
4714
4715
		return $oldVal;
4716
	}
4717
4718
	/**
4719
	 * As setHook(), but letting the contents be parsed.
4720
	 *
4721
	 * Transparent tag hooks are like regular XML-style tag hooks, except they
4722
	 * operate late in the transformation sequence, on HTML instead of wikitext.
4723
	 *
4724
	 * This is probably obsoleted by things dealing with parser frames?
4725
	 * The only extension currently using it is geoserver.
4726
	 *
4727
	 * @since 1.10
4728
	 * @todo better document or deprecate this
4729
	 *
4730
	 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4731
	 * @param callable $callback The callback function (and object) to use for the tag
4732
	 * @throws MWException
4733
	 * @return callable|null The old value of the mTagHooks array associated with the hook
4734
	 */
4735
	public function setTransparentTagHook( $tag, $callback ) {
4736
		$tag = strtolower( $tag );
4737
		if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4738
			throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4739
		}
4740
		$oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
4741
		$this->mTransparentTagHooks[$tag] = $callback;
4742
4743
		return $oldVal;
4744
	}
4745
4746
	/**
4747
	 * Remove all tag hooks
4748
	 */
4749
	public function clearTagHooks() {
4750
		$this->mTagHooks = [];
4751
		$this->mFunctionTagHooks = [];
4752
		$this->mStripList = $this->mDefaultStripList;
4753
	}
4754
4755
	/**
4756
	 * Create a function, e.g. {{sum:1|2|3}}
4757
	 * The callback function should have the form:
4758
	 *    function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
4759
	 *
4760
	 * Or with Parser::SFH_OBJECT_ARGS:
4761
	 *    function myParserFunction( $parser, $frame, $args ) { ... }
4762
	 *
4763
	 * The callback may either return the text result of the function, or an array with the text
4764
	 * in element 0, and a number of flags in the other elements. The names of the flags are
4765
	 * specified in the keys. Valid flags are:
4766
	 *   found                     The text returned is valid, stop processing the template. This
4767
	 *                             is on by default.
4768
	 *   nowiki                    Wiki markup in the return value should be escaped
4769
	 *   isHTML                    The returned text is HTML, armour it against wikitext transformation
4770
	 *
4771
	 * @param string $id The magic word ID
4772
	 * @param callable $callback The callback function (and object) to use
4773
	 * @param int $flags A combination of the following flags:
4774
	 *     Parser::SFH_NO_HASH      No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
4775
	 *
4776
	 *     Parser::SFH_OBJECT_ARGS  Pass the template arguments as PPNode objects instead of text.
4777
	 *     This allows for conditional expansion of the parse tree, allowing you to eliminate dead
4778
	 *     branches and thus speed up parsing. It is also possible to analyse the parse tree of
4779
	 *     the arguments, and to control the way they are expanded.
4780
	 *
4781
	 *     The $frame parameter is a PPFrame. This can be used to produce expanded text from the
4782
	 *     arguments, for instance:
4783
	 *         $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
4784
	 *
4785
	 *     For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
4786
	 *     future versions. Please call $frame->expand() on it anyway so that your code keeps
4787
	 *     working if/when this is changed.
4788
	 *
4789
	 *     If you want whitespace to be trimmed from $args, you need to do it yourself, post-
4790
	 *     expansion.
4791
	 *
4792
	 *     Please read the documentation in includes/parser/Preprocessor.php for more information
4793
	 *     about the methods available in PPFrame and PPNode.
4794
	 *
4795
	 * @throws MWException
4796
	 * @return string|callable The old callback function for this name, if any
4797
	 */
4798
	public function setFunctionHook( $id, $callback, $flags = 0 ) {
4799
		global $wgContLang;
4800
4801
		$oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
4802
		$this->mFunctionHooks[$id] = [ $callback, $flags ];
4803
4804
		# Add to function cache
4805
		$mw = MagicWord::get( $id );
4806
		if ( !$mw ) {
4807
			throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4808
		}
4809
4810
		$synonyms = $mw->getSynonyms();
4811
		$sensitive = intval( $mw->isCaseSensitive() );
4812
4813
		foreach ( $synonyms as $syn ) {
4814
			# Case
4815
			if ( !$sensitive ) {
4816
				$syn = $wgContLang->lc( $syn );
4817
			}
4818
			# Add leading hash
4819
			if ( !( $flags & self::SFH_NO_HASH ) ) {
4820
				$syn = '#' . $syn;
4821
			}
4822
			# Remove trailing colon
4823
			if ( substr( $syn, -1, 1 ) === ':' ) {
4824
				$syn = substr( $syn, 0, -1 );
4825
			}
4826
			$this->mFunctionSynonyms[$sensitive][$syn] = $id;
4827
		}
4828
		return $oldVal;
4829
	}
4830
4831
	/**
4832
	 * Get all registered function hook identifiers
4833
	 *
4834
	 * @return array
4835
	 */
4836
	public function getFunctionHooks() {
4837
		return array_keys( $this->mFunctionHooks );
4838
	}
4839
4840
	/**
4841
	 * Create a tag function, e.g. "<test>some stuff</test>".
4842
	 * Unlike tag hooks, tag functions are parsed at preprocessor level.
4843
	 * Unlike parser functions, their content is not preprocessed.
4844
	 * @param string $tag
4845
	 * @param callable $callback
4846
	 * @param int $flags
4847
	 * @throws MWException
4848
	 * @return null
4849
	 */
4850 View Code Duplication
	public function setFunctionTagHook( $tag, $callback, $flags ) {
4851
		$tag = strtolower( $tag );
4852
		if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4853
			throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
4854
		}
4855
		$old = isset( $this->mFunctionTagHooks[$tag] ) ?
4856
			$this->mFunctionTagHooks[$tag] : null;
4857
		$this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
4858
4859
		if ( !in_array( $tag, $this->mStripList ) ) {
4860
			$this->mStripList[] = $tag;
4861
		}
4862
4863
		return $old;
4864
	}
4865
4866
	/**
4867
	 * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
4868
	 * Placeholders created in Linker::link()
4869
	 *
4870
	 * @param string $text
4871
	 * @param int $options
4872
	 */
4873
	public function replaceLinkHolders( &$text, $options = 0 ) {
4874
		$this->mLinkHolders->replace( $text );
4875
	}
4876
4877
	/**
4878
	 * Replace "<!--LINK-->" link placeholders with plain text of links
4879
	 * (not HTML-formatted).
4880
	 *
4881
	 * @param string $text
4882
	 * @return string
4883
	 */
4884
	public function replaceLinkHoldersText( $text ) {
4885
		return $this->mLinkHolders->replaceText( $text );
4886
	}
4887
4888
	/**
4889
	 * Renders an image gallery from a text with one line per image.
4890
	 * text labels may be given by using |-style alternative text. E.g.
4891
	 *   Image:one.jpg|The number "1"
4892
	 *   Image:tree.jpg|A tree
4893
	 * given as text will return the HTML of a gallery with two images,
4894
	 * labeled 'The number "1"' and
4895
	 * 'A tree'.
4896
	 *
4897
	 * @param string $text
4898
	 * @param array $params
4899
	 * @return string HTML
4900
	 */
4901
	public function renderImageGallery( $text, $params ) {
4902
4903
		$mode = false;
4904
		if ( isset( $params['mode'] ) ) {
4905
			$mode = $params['mode'];
4906
		}
4907
4908
		try {
4909
			$ig = ImageGalleryBase::factory( $mode );
4910
		} catch ( Exception $e ) {
4911
			// If invalid type set, fallback to default.
4912
			$ig = ImageGalleryBase::factory( false );
4913
		}
4914
4915
		$ig->setContextTitle( $this->mTitle );
4916
		$ig->setShowBytes( false );
4917
		$ig->setShowFilename( false );
4918
		$ig->setParser( $this );
4919
		$ig->setHideBadImages();
4920
		$ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
4921
4922
		if ( isset( $params['showfilename'] ) ) {
4923
			$ig->setShowFilename( true );
4924
		} else {
4925
			$ig->setShowFilename( false );
4926
		}
4927
		if ( isset( $params['caption'] ) ) {
4928
			$caption = $params['caption'];
4929
			$caption = htmlspecialchars( $caption );
4930
			$caption = $this->replaceInternalLinks( $caption );
4931
			$ig->setCaptionHtml( $caption );
4932
		}
4933
		if ( isset( $params['perrow'] ) ) {
4934
			$ig->setPerRow( $params['perrow'] );
4935
		}
4936
		if ( isset( $params['widths'] ) ) {
4937
			$ig->setWidths( $params['widths'] );
4938
		}
4939
		if ( isset( $params['heights'] ) ) {
4940
			$ig->setHeights( $params['heights'] );
4941
		}
4942
		$ig->setAdditionalOptions( $params );
4943
4944
		Hooks::run( 'BeforeParserrenderImageGallery', [ &$this, &$ig ] );
4945
4946
		$lines = StringUtils::explode( "\n", $text );
4947
		foreach ( $lines as $line ) {
4948
			# match lines like these:
4949
			# Image:someimage.jpg|This is some image
4950
			$matches = [];
4951
			preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
4952
			# Skip empty lines
4953
			if ( count( $matches ) == 0 ) {
4954
				continue;
4955
			}
4956
4957
			if ( strpos( $matches[0], '%' ) !== false ) {
4958
				$matches[1] = rawurldecode( $matches[1] );
4959
			}
4960
			$title = Title::newFromText( $matches[1], NS_FILE );
4961
			if ( is_null( $title ) ) {
4962
				# Bogus title. Ignore these so we don't bomb out later.
4963
				continue;
4964
			}
4965
4966
			# We need to get what handler the file uses, to figure out parameters.
4967
			# Note, a hook can overide the file name, and chose an entirely different
4968
			# file (which potentially could be of a different type and have different handler).
4969
			$options = [];
4970
			$descQuery = false;
4971
			Hooks::run( 'BeforeParserFetchFileAndTitle',
4972
				[ $this, $title, &$options, &$descQuery ] );
4973
			# Don't register it now, as ImageGallery does that later.
4974
			$file = $this->fetchFileNoRegister( $title, $options );
4975
			$handler = $file ? $file->getHandler() : false;
4976
4977
			$paramMap = [
4978
				'img_alt' => 'gallery-internal-alt',
4979
				'img_link' => 'gallery-internal-link',
4980
			];
4981
			if ( $handler ) {
4982
				$paramMap = $paramMap + $handler->getParamMap();
4983
				// We don't want people to specify per-image widths.
4984
				// Additionally the width parameter would need special casing anyhow.
4985
				unset( $paramMap['img_width'] );
4986
			}
4987
4988
			$mwArray = new MagicWordArray( array_keys( $paramMap ) );
4989
4990
			$label = '';
4991
			$alt = '';
4992
			$link = '';
4993
			$handlerOptions = [];
4994
			if ( isset( $matches[3] ) ) {
4995
				// look for an |alt= definition while trying not to break existing
4996
				// captions with multiple pipes (|) in it, until a more sensible grammar
4997
				// is defined for images in galleries
4998
4999
				// FIXME: Doing recursiveTagParse at this stage, and the trim before
5000
				// splitting on '|' is a bit odd, and different from makeImage.
5001
				$matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5002
				$parameterMatches = StringUtils::explode( '|', $matches[3] );
5003
5004
				foreach ( $parameterMatches as $parameterMatch ) {
5005
					list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5006
					if ( $magicName ) {
5007
						$paramName = $paramMap[$magicName];
5008
5009
						switch ( $paramName ) {
5010
						case 'gallery-internal-alt':
5011
							$alt = $this->stripAltText( $match, false );
5012
							break;
5013
						case 'gallery-internal-link':
5014
							$linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
5015
							$chars = self::EXT_LINK_URL_CLASS;
5016
							$addr = self::EXT_LINK_ADDR;
5017
							$prots = $this->mUrlProtocols;
5018
							// check to see if link matches an absolute url, if not then it must be a wiki link.
5019
							if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
5020
								$link = $linkValue;
5021
							} else {
5022
								$localLinkTitle = Title::newFromText( $linkValue );
5023
								if ( $localLinkTitle !== null ) {
5024
									$link = $localLinkTitle->getLinkURL();
5025
								}
5026
							}
5027
							break;
5028
						default:
5029
							// Must be a handler specific parameter.
5030
							if ( $handler->validateParam( $paramName, $match ) ) {
5031
								$handlerOptions[$paramName] = $match;
5032
							} else {
5033
								// Guess not, consider it as caption.
5034
								wfDebug( "$parameterMatch failed parameter validation\n" );
5035
								$label = '|' . $parameterMatch;
5036
							}
5037
						}
5038
5039
					} else {
5040
						// Last pipe wins.
5041
						$label = '|' . $parameterMatch;
5042
					}
5043
				}
5044
				// Remove the pipe.
5045
				$label = substr( $label, 1 );
5046
			}
5047
5048
			$ig->add( $title, $label, $alt, $link, $handlerOptions );
5049
		}
5050
		$html = $ig->toHTML();
5051
		Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5052
		return $html;
5053
	}
5054
5055
	/**
5056
	 * @param MediaHandler $handler
5057
	 * @return array
5058
	 */
5059
	public function getImageParams( $handler ) {
5060
		if ( $handler ) {
5061
			$handlerClass = get_class( $handler );
5062
		} else {
5063
			$handlerClass = '';
5064
		}
5065
		if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5066
			# Initialise static lists
5067
			static $internalParamNames = [
5068
				'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5069
				'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5070
					'bottom', 'text-bottom' ],
5071
				'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5072
					'upright', 'border', 'link', 'alt', 'class' ],
5073
			];
5074
			static $internalParamMap;
5075
			if ( !$internalParamMap ) {
5076
				$internalParamMap = [];
5077
				foreach ( $internalParamNames as $type => $names ) {
5078
					foreach ( $names as $name ) {
5079
						$magicName = str_replace( '-', '_', "img_$name" );
5080
						$internalParamMap[$magicName] = [ $type, $name ];
5081
					}
5082
				}
5083
			}
5084
5085
			# Add handler params
5086
			$paramMap = $internalParamMap;
5087
			if ( $handler ) {
5088
				$handlerParamMap = $handler->getParamMap();
5089
				foreach ( $handlerParamMap as $magic => $paramName ) {
5090
					$paramMap[$magic] = [ 'handler', $paramName ];
5091
				}
5092
			}
5093
			$this->mImageParams[$handlerClass] = $paramMap;
5094
			$this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5095
		}
5096
		return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5097
	}
5098
5099
	/**
5100
	 * Parse image options text and use it to make an image
5101
	 *
5102
	 * @param Title $title
5103
	 * @param string $options
5104
	 * @param LinkHolderArray|bool $holders
5105
	 * @return string HTML
5106
	 */
5107
	public function makeImage( $title, $options, $holders = false ) {
5108
		# Check if the options text is of the form "options|alt text"
5109
		# Options are:
5110
		#  * thumbnail  make a thumbnail with enlarge-icon and caption, alignment depends on lang
5111
		#  * left       no resizing, just left align. label is used for alt= only
5112
		#  * right      same, but right aligned
5113
		#  * none       same, but not aligned
5114
		#  * ___px      scale to ___ pixels width, no aligning. e.g. use in taxobox
5115
		#  * center     center the image
5116
		#  * frame      Keep original image size, no magnify-button.
5117
		#  * framed     Same as "frame"
5118
		#  * frameless  like 'thumb' but without a frame. Keeps user preferences for width
5119
		#  * upright    reduce width for upright images, rounded to full __0 px
5120
		#  * border     draw a 1px border around the image
5121
		#  * alt        Text for HTML alt attribute (defaults to empty)
5122
		#  * class      Set a class for img node
5123
		#  * link       Set the target of the image link. Can be external, interwiki, or local
5124
		# vertical-align values (no % or length right now):
5125
		#  * baseline
5126
		#  * sub
5127
		#  * super
5128
		#  * top
5129
		#  * text-top
5130
		#  * middle
5131
		#  * bottom
5132
		#  * text-bottom
5133
5134
		$parts = StringUtils::explode( "|", $options );
5135
5136
		# Give extensions a chance to select the file revision for us
5137
		$options = [];
5138
		$descQuery = false;
5139
		Hooks::run( 'BeforeParserFetchFileAndTitle',
5140
			[ $this, $title, &$options, &$descQuery ] );
5141
		# Fetch and register the file (file title may be different via hooks)
5142
		list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5143
5144
		# Get parameter map
5145
		$handler = $file ? $file->getHandler() : false;
5146
5147
		list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5148
5149
		if ( !$file ) {
5150
			$this->addTrackingCategory( 'broken-file-category' );
5151
		}
5152
5153
		# Process the input parameters
5154
		$caption = '';
5155
		$params = [ 'frame' => [], 'handler' => [],
5156
			'horizAlign' => [], 'vertAlign' => [] ];
5157
		$seenformat = false;
5158
		foreach ( $parts as $part ) {
5159
			$part = trim( $part );
5160
			list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5161
			$validated = false;
5162
			if ( isset( $paramMap[$magicName] ) ) {
5163
				list( $type, $paramName ) = $paramMap[$magicName];
5164
5165
				# Special case; width and height come in one variable together
5166
				if ( $type === 'handler' && $paramName === 'width' ) {
5167
					$parsedWidthParam = $this->parseWidthParam( $value );
5168 View Code Duplication
					if ( isset( $parsedWidthParam['width'] ) ) {
5169
						$width = $parsedWidthParam['width'];
5170
						if ( $handler->validateParam( 'width', $width ) ) {
5171
							$params[$type]['width'] = $width;
5172
							$validated = true;
5173
						}
5174
					}
5175 View Code Duplication
					if ( isset( $parsedWidthParam['height'] ) ) {
5176
						$height = $parsedWidthParam['height'];
5177
						if ( $handler->validateParam( 'height', $height ) ) {
5178
							$params[$type]['height'] = $height;
5179
							$validated = true;
5180
						}
5181
					}
5182
					# else no validation -- bug 13436
5183
				} else {
5184
					if ( $type === 'handler' ) {
5185
						# Validate handler parameter
5186
						$validated = $handler->validateParam( $paramName, $value );
5187
					} else {
5188
						# Validate internal parameters
5189
						switch ( $paramName ) {
5190
						case 'manualthumb':
5191
						case 'alt':
5192
						case 'class':
5193
							# @todo FIXME: Possibly check validity here for
5194
							# manualthumb? downstream behavior seems odd with
5195
							# missing manual thumbs.
5196
							$validated = true;
5197
							$value = $this->stripAltText( $value, $holders );
5198
							break;
5199
						case 'link':
5200
							$chars = self::EXT_LINK_URL_CLASS;
5201
							$addr = self::EXT_LINK_ADDR;
5202
							$prots = $this->mUrlProtocols;
5203
							if ( $value === '' ) {
5204
								$paramName = 'no-link';
5205
								$value = true;
5206
								$validated = true;
5207
							} elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5208
								if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5209
									$paramName = 'link-url';
5210
									$this->mOutput->addExternalLink( $value );
5211
									if ( $this->mOptions->getExternalLinkTarget() ) {
5212
										$params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5213
									}
5214
									$validated = true;
5215
								}
5216
							} else {
5217
								$linkTitle = Title::newFromText( $value );
5218
								if ( $linkTitle ) {
5219
									$paramName = 'link-title';
5220
									$value = $linkTitle;
5221
									$this->mOutput->addLink( $linkTitle );
5222
									$validated = true;
5223
								}
5224
							}
5225
							break;
5226
						case 'frameless':
5227
						case 'framed':
5228
						case 'thumbnail':
5229
							// use first appearing option, discard others.
5230
							$validated = ! $seenformat;
5231
							$seenformat = true;
5232
							break;
5233
						default:
5234
							# Most other things appear to be empty or numeric...
5235
							$validated = ( $value === false || is_numeric( trim( $value ) ) );
5236
						}
5237
					}
5238
5239
					if ( $validated ) {
5240
						$params[$type][$paramName] = $value;
5241
					}
5242
				}
5243
			}
5244
			if ( !$validated ) {
5245
				$caption = $part;
5246
			}
5247
		}
5248
5249
		# Process alignment parameters
5250
		if ( $params['horizAlign'] ) {
5251
			$params['frame']['align'] = key( $params['horizAlign'] );
5252
		}
5253
		if ( $params['vertAlign'] ) {
5254
			$params['frame']['valign'] = key( $params['vertAlign'] );
5255
		}
5256
5257
		$params['frame']['caption'] = $caption;
5258
5259
		# Will the image be presented in a frame, with the caption below?
5260
		$imageIsFramed = isset( $params['frame']['frame'] )
5261
			|| isset( $params['frame']['framed'] )
5262
			|| isset( $params['frame']['thumbnail'] )
5263
			|| isset( $params['frame']['manualthumb'] );
5264
5265
		# In the old days, [[Image:Foo|text...]] would set alt text.  Later it
5266
		# came to also set the caption, ordinary text after the image -- which
5267
		# makes no sense, because that just repeats the text multiple times in
5268
		# screen readers.  It *also* came to set the title attribute.
5269
		# Now that we have an alt attribute, we should not set the alt text to
5270
		# equal the caption: that's worse than useless, it just repeats the
5271
		# text.  This is the framed/thumbnail case.  If there's no caption, we
5272
		# use the unnamed parameter for alt text as well, just for the time be-
5273
		# ing, if the unnamed param is set and the alt param is not.
5274
		# For the future, we need to figure out if we want to tweak this more,
5275
		# e.g., introducing a title= parameter for the title; ignoring the un-
5276
		# named parameter entirely for images without a caption; adding an ex-
5277
		# plicit caption= parameter and preserving the old magic unnamed para-
5278
		# meter for BC; ...
5279
		if ( $imageIsFramed ) { # Framed image
5280
			if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5281
				# No caption or alt text, add the filename as the alt text so
5282
				# that screen readers at least get some description of the image
5283
				$params['frame']['alt'] = $title->getText();
5284
			}
5285
			# Do not set $params['frame']['title'] because tooltips don't make sense
5286
			# for framed images
5287
		} else { # Inline image
5288
			if ( !isset( $params['frame']['alt'] ) ) {
5289
				# No alt text, use the "caption" for the alt text
5290
				if ( $caption !== '' ) {
5291
					$params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5292
				} else {
5293
					# No caption, fall back to using the filename for the
5294
					# alt text
5295
					$params['frame']['alt'] = $title->getText();
5296
				}
5297
			}
5298
			# Use the "caption" for the tooltip text
5299
			$params['frame']['title'] = $this->stripAltText( $caption, $holders );
5300
		}
5301
5302
		Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5303
5304
		# Linker does the rest
5305
		$time = isset( $options['time'] ) ? $options['time'] : false;
5306
		$ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5307
			$time, $descQuery, $this->mOptions->getThumbSize() );
5308
5309
		# Give the handler a chance to modify the parser object
5310
		if ( $handler ) {
5311
			$handler->parserTransformHook( $this, $file );
5312
		}
5313
5314
		return $ret;
5315
	}
5316
5317
	/**
5318
	 * @param string $caption
5319
	 * @param LinkHolderArray|bool $holders
5320
	 * @return mixed|string
5321
	 */
5322
	protected function stripAltText( $caption, $holders ) {
5323
		# Strip bad stuff out of the title (tooltip).  We can't just use
5324
		# replaceLinkHoldersText() here, because if this function is called
5325
		# from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5326
		if ( $holders ) {
5327
			$tooltip = $holders->replaceText( $caption );
5328
		} else {
5329
			$tooltip = $this->replaceLinkHoldersText( $caption );
5330
		}
5331
5332
		# make sure there are no placeholders in thumbnail attributes
5333
		# that are later expanded to html- so expand them now and
5334
		# remove the tags
5335
		$tooltip = $this->mStripState->unstripBoth( $tooltip );
5336
		$tooltip = Sanitizer::stripAllTags( $tooltip );
5337
5338
		return $tooltip;
5339
	}
5340
5341
	/**
5342
	 * Set a flag in the output object indicating that the content is dynamic and
5343
	 * shouldn't be cached.
5344
	 * @deprecated since 1.28; use getOutput()->updateCacheExpiry()
5345
	 */
5346
	public function disableCache() {
5347
		wfDebug( "Parser output marked as uncacheable.\n" );
5348
		if ( !$this->mOutput ) {
5349
			throw new MWException( __METHOD__ .
5350
				" can only be called when actually parsing something" );
5351
		}
5352
		$this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5353
	}
5354
5355
	/**
5356
	 * Callback from the Sanitizer for expanding items found in HTML attribute
5357
	 * values, so they can be safely tested and escaped.
5358
	 *
5359
	 * @param string $text
5360
	 * @param bool|PPFrame $frame
5361
	 * @return string
5362
	 */
5363
	public function attributeStripCallback( &$text, $frame = false ) {
5364
		$text = $this->replaceVariables( $text, $frame );
5365
		$text = $this->mStripState->unstripBoth( $text );
5366
		return $text;
5367
	}
5368
5369
	/**
5370
	 * Accessor
5371
	 *
5372
	 * @return array
5373
	 */
5374
	public function getTags() {
5375
		return array_merge(
5376
			array_keys( $this->mTransparentTagHooks ),
5377
			array_keys( $this->mTagHooks ),
5378
			array_keys( $this->mFunctionTagHooks )
5379
		);
5380
	}
5381
5382
	/**
5383
	 * Replace transparent tags in $text with the values given by the callbacks.
5384
	 *
5385
	 * Transparent tag hooks are like regular XML-style tag hooks, except they
5386
	 * operate late in the transformation sequence, on HTML instead of wikitext.
5387
	 *
5388
	 * @param string $text
5389
	 *
5390
	 * @return string
5391
	 */
5392
	public function replaceTransparentTags( $text ) {
5393
		$matches = [];
5394
		$elements = array_keys( $this->mTransparentTagHooks );
5395
		$text = self::extractTagsAndParams( $elements, $text, $matches );
5396
		$replacements = [];
5397
5398
		foreach ( $matches as $marker => $data ) {
5399
			list( $element, $content, $params, $tag ) = $data;
5400
			$tagName = strtolower( $element );
5401
			if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5402
				$output = call_user_func_array(
5403
					$this->mTransparentTagHooks[$tagName],
5404
					[ $content, $params, $this ]
5405
				);
5406
			} else {
5407
				$output = $tag;
5408
			}
5409
			$replacements[$marker] = $output;
5410
		}
5411
		return strtr( $text, $replacements );
5412
	}
5413
5414
	/**
5415
	 * Break wikitext input into sections, and either pull or replace
5416
	 * some particular section's text.
5417
	 *
5418
	 * External callers should use the getSection and replaceSection methods.
5419
	 *
5420
	 * @param string $text Page wikitext
5421
	 * @param string|number $sectionId A section identifier string of the form:
5422
	 *   "<flag1> - <flag2> - ... - <section number>"
5423
	 *
5424
	 * Currently the only recognised flag is "T", which means the target section number
5425
	 * was derived during a template inclusion parse, in other words this is a template
5426
	 * section edit link. If no flags are given, it was an ordinary section edit link.
5427
	 * This flag is required to avoid a section numbering mismatch when a section is
5428
	 * enclosed by "<includeonly>" (bug 6563).
5429
	 *
5430
	 * The section number 0 pulls the text before the first heading; other numbers will
5431
	 * pull the given section along with its lower-level subsections. If the section is
5432
	 * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5433
	 *
5434
	 * Section 0 is always considered to exist, even if it only contains the empty
5435
	 * string. If $text is the empty string and section 0 is replaced, $newText is
5436
	 * returned.
5437
	 *
5438
	 * @param string $mode One of "get" or "replace"
5439
	 * @param string $newText Replacement text for section data.
5440
	 * @return string For "get", the extracted section text.
5441
	 *   for "replace", the whole page with the section replaced.
5442
	 */
5443
	private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5444
		global $wgTitle; # not generally used but removes an ugly failure mode
5445
5446
		$magicScopeVariable = $this->lock();
5447
		$this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5448
		$outText = '';
5449
		$frame = $this->getPreprocessor()->newFrame();
5450
5451
		# Process section extraction flags
5452
		$flags = 0;
5453
		$sectionParts = explode( '-', $sectionId );
5454
		$sectionIndex = array_pop( $sectionParts );
5455
		foreach ( $sectionParts as $part ) {
5456
			if ( $part === 'T' ) {
5457
				$flags |= self::PTD_FOR_INCLUSION;
5458
			}
5459
		}
5460
5461
		# Check for empty input
5462
		if ( strval( $text ) === '' ) {
5463
			# Only sections 0 and T-0 exist in an empty document
5464
			if ( $sectionIndex == 0 ) {
5465
				if ( $mode === 'get' ) {
5466
					return '';
5467
				} else {
5468
					return $newText;
5469
				}
5470
			} else {
5471
				if ( $mode === 'get' ) {
5472
					return $newText;
5473
				} else {
5474
					return $text;
5475
				}
5476
			}
5477
		}
5478
5479
		# Preprocess the text
5480
		$root = $this->preprocessToDom( $text, $flags );
5481
5482
		# <h> nodes indicate section breaks
5483
		# They can only occur at the top level, so we can find them by iterating the root's children
5484
		$node = $root->getFirstChild();
5485
5486
		# Find the target section
5487
		if ( $sectionIndex == 0 ) {
5488
			# Section zero doesn't nest, level=big
5489
			$targetLevel = 1000;
5490
		} else {
5491
			while ( $node ) {
5492 View Code Duplication
				if ( $node->getName() === 'h' ) {
5493
					$bits = $node->splitHeading();
5494
					if ( $bits['i'] == $sectionIndex ) {
5495
						$targetLevel = $bits['level'];
5496
						break;
5497
					}
5498
				}
5499
				if ( $mode === 'replace' ) {
5500
					$outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5501
				}
5502
				$node = $node->getNextSibling();
5503
			}
5504
		}
5505
5506
		if ( !$node ) {
5507
			# Not found
5508
			if ( $mode === 'get' ) {
5509
				return $newText;
5510
			} else {
5511
				return $text;
5512
			}
5513
		}
5514
5515
		# Find the end of the section, including nested sections
5516
		do {
5517 View Code Duplication
			if ( $node->getName() === 'h' ) {
5518
				$bits = $node->splitHeading();
5519
				$curLevel = $bits['level'];
5520
				if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5521
					break;
5522
				}
5523
			}
5524
			if ( $mode === 'get' ) {
5525
				$outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5526
			}
5527
			$node = $node->getNextSibling();
5528
		} while ( $node );
5529
5530
		# Write out the remainder (in replace mode only)
5531
		if ( $mode === 'replace' ) {
5532
			# Output the replacement text
5533
			# Add two newlines on -- trailing whitespace in $newText is conventionally
5534
			# stripped by the editor, so we need both newlines to restore the paragraph gap
5535
			# Only add trailing whitespace if there is newText
5536
			if ( $newText != "" ) {
5537
				$outText .= $newText . "\n\n";
5538
			}
5539
5540
			while ( $node ) {
5541
				$outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5542
				$node = $node->getNextSibling();
5543
			}
5544
		}
5545
5546
		if ( is_string( $outText ) ) {
5547
			# Re-insert stripped tags
5548
			$outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5549
		}
5550
5551
		return $outText;
5552
	}
5553
5554
	/**
5555
	 * This function returns the text of a section, specified by a number ($section).
5556
	 * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5557
	 * the first section before any such heading (section 0).
5558
	 *
5559
	 * If a section contains subsections, these are also returned.
5560
	 *
5561
	 * @param string $text Text to look in
5562
	 * @param string|number $sectionId Section identifier as a number or string
5563
	 * (e.g. 0, 1 or 'T-1').
5564
	 * @param string $defaultText Default to return if section is not found
5565
	 *
5566
	 * @return string Text of the requested section
5567
	 */
5568
	public function getSection( $text, $sectionId, $defaultText = '' ) {
5569
		return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5570
	}
5571
5572
	/**
5573
	 * This function returns $oldtext after the content of the section
5574
	 * specified by $section has been replaced with $text. If the target
5575
	 * section does not exist, $oldtext is returned unchanged.
5576
	 *
5577
	 * @param string $oldText Former text of the article
5578
	 * @param string|number $sectionId Section identifier as a number or string
5579
	 * (e.g. 0, 1 or 'T-1').
5580
	 * @param string $newText Replacing text
5581
	 *
5582
	 * @return string Modified text
5583
	 */
5584
	public function replaceSection( $oldText, $sectionId, $newText ) {
5585
		return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5586
	}
5587
5588
	/**
5589
	 * Get the ID of the revision we are parsing
5590
	 *
5591
	 * @return int|null
5592
	 */
5593
	public function getRevisionId() {
5594
		return $this->mRevisionId;
5595
	}
5596
5597
	/**
5598
	 * Get the revision object for $this->mRevisionId
5599
	 *
5600
	 * @return Revision|null Either a Revision object or null
5601
	 * @since 1.23 (public since 1.23)
5602
	 */
5603
	public function getRevisionObject() {
5604
		if ( !is_null( $this->mRevisionObject ) ) {
5605
			return $this->mRevisionObject;
5606
		}
5607
		if ( is_null( $this->mRevisionId ) ) {
5608
			return null;
5609
		}
5610
5611
		$rev = call_user_func(
5612
			$this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
5613
		);
5614
5615
		# If the parse is for a new revision, then the callback should have
5616
		# already been set to force the object and should match mRevisionId.
5617
		# If not, try to fetch by mRevisionId for sanity.
5618
		if ( $rev && $rev->getId() != $this->mRevisionId ) {
5619
			$rev = Revision::newFromId( $this->mRevisionId );
5620
		}
5621
5622
		$this->mRevisionObject = $rev;
5623
5624
		return $this->mRevisionObject;
5625
	}
5626
5627
	/**
5628
	 * Get the timestamp associated with the current revision, adjusted for
5629
	 * the default server-local timestamp
5630
	 * @return string
5631
	 */
5632
	public function getRevisionTimestamp() {
5633
		if ( is_null( $this->mRevisionTimestamp ) ) {
5634
			global $wgContLang;
5635
5636
			$revObject = $this->getRevisionObject();
5637
			$timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
5638
5639
			# The cryptic '' timezone parameter tells to use the site-default
5640
			# timezone offset instead of the user settings.
5641
			# Since this value will be saved into the parser cache, served
5642
			# to other users, and potentially even used inside links and such,
5643
			# it needs to be consistent for all visitors.
5644
			$this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
5645
5646
		}
5647
		return $this->mRevisionTimestamp;
5648
	}
5649
5650
	/**
5651
	 * Get the name of the user that edited the last revision
5652
	 *
5653
	 * @return string User name
5654
	 */
5655
	public function getRevisionUser() {
5656
		if ( is_null( $this->mRevisionUser ) ) {
5657
			$revObject = $this->getRevisionObject();
5658
5659
			# if this template is subst: the revision id will be blank,
5660
			# so just use the current user's name
5661
			if ( $revObject ) {
5662
				$this->mRevisionUser = $revObject->getUserText();
5663
			} elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5664
				$this->mRevisionUser = $this->getUser()->getName();
5665
			}
5666
		}
5667
		return $this->mRevisionUser;
5668
	}
5669
5670
	/**
5671
	 * Get the size of the revision
5672
	 *
5673
	 * @return int|null Revision size
5674
	 */
5675
	public function getRevisionSize() {
5676
		if ( is_null( $this->mRevisionSize ) ) {
5677
			$revObject = $this->getRevisionObject();
5678
5679
			# if this variable is subst: the revision id will be blank,
5680
			# so just use the parser input size, because the own substituation
5681
			# will change the size.
5682
			if ( $revObject ) {
5683
				$this->mRevisionSize = $revObject->getSize();
5684
			} else {
5685
				$this->mRevisionSize = $this->mInputSize;
5686
			}
5687
		}
5688
		return $this->mRevisionSize;
5689
	}
5690
5691
	/**
5692
	 * Mutator for $mDefaultSort
5693
	 *
5694
	 * @param string $sort New value
5695
	 */
5696
	public function setDefaultSort( $sort ) {
5697
		$this->mDefaultSort = $sort;
5698
		$this->mOutput->setProperty( 'defaultsort', $sort );
5699
	}
5700
5701
	/**
5702
	 * Accessor for $mDefaultSort
5703
	 * Will use the empty string if none is set.
5704
	 *
5705
	 * This value is treated as a prefix, so the
5706
	 * empty string is equivalent to sorting by
5707
	 * page name.
5708
	 *
5709
	 * @return string
5710
	 */
5711
	public function getDefaultSort() {
5712
		if ( $this->mDefaultSort !== false ) {
5713
			return $this->mDefaultSort;
5714
		} else {
5715
			return '';
5716
		}
5717
	}
5718
5719
	/**
5720
	 * Accessor for $mDefaultSort
5721
	 * Unlike getDefaultSort(), will return false if none is set
5722
	 *
5723
	 * @return string|bool
5724
	 */
5725
	public function getCustomDefaultSort() {
5726
		return $this->mDefaultSort;
5727
	}
5728
5729
	/**
5730
	 * Try to guess the section anchor name based on a wikitext fragment
5731
	 * presumably extracted from a heading, for example "Header" from
5732
	 * "== Header ==".
5733
	 *
5734
	 * @param string $text
5735
	 *
5736
	 * @return string
5737
	 */
5738
	public function guessSectionNameFromWikiText( $text ) {
5739
		# Strip out wikitext links(they break the anchor)
5740
		$text = $this->stripSectionName( $text );
5741
		$text = Sanitizer::normalizeSectionNameWhitespace( $text );
5742
		return '#' . Sanitizer::escapeId( $text, 'noninitial' );
5743
	}
5744
5745
	/**
5746
	 * Same as guessSectionNameFromWikiText(), but produces legacy anchors
5747
	 * instead.  For use in redirects, since IE6 interprets Redirect: headers
5748
	 * as something other than UTF-8 (apparently?), resulting in breakage.
5749
	 *
5750
	 * @param string $text The section name
5751
	 * @return string An anchor
5752
	 */
5753
	public function guessLegacySectionNameFromWikiText( $text ) {
5754
		# Strip out wikitext links(they break the anchor)
5755
		$text = $this->stripSectionName( $text );
5756
		$text = Sanitizer::normalizeSectionNameWhitespace( $text );
5757
		return '#' . Sanitizer::escapeId( $text, [ 'noninitial', 'legacy' ] );
5758
	}
5759
5760
	/**
5761
	 * Strips a text string of wikitext for use in a section anchor
5762
	 *
5763
	 * Accepts a text string and then removes all wikitext from the
5764
	 * string and leaves only the resultant text (i.e. the result of
5765
	 * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
5766
	 * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
5767
	 * to create valid section anchors by mimicing the output of the
5768
	 * parser when headings are parsed.
5769
	 *
5770
	 * @param string $text Text string to be stripped of wikitext
5771
	 * for use in a Section anchor
5772
	 * @return string Filtered text string
5773
	 */
5774
	public function stripSectionName( $text ) {
5775
		# Strip internal link markup
5776
		$text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
5777
		$text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
5778
5779
		# Strip external link markup
5780
		# @todo FIXME: Not tolerant to blank link text
5781
		# I.E. [https://www.mediawiki.org] will render as [1] or something depending
5782
		# on how many empty links there are on the page - need to figure that out.
5783
		$text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
5784
5785
		# Parse wikitext quotes (italics & bold)
5786
		$text = $this->doQuotes( $text );
5787
5788
		# Strip HTML tags
5789
		$text = StringUtils::delimiterReplace( '<', '>', '', $text );
5790
		return $text;
5791
	}
5792
5793
	/**
5794
	 * strip/replaceVariables/unstrip for preprocessor regression testing
5795
	 *
5796
	 * @param string $text
5797
	 * @param Title $title
5798
	 * @param ParserOptions $options
5799
	 * @param int $outputType
5800
	 *
5801
	 * @return string
5802
	 */
5803
	public function testSrvus( $text, Title $title, ParserOptions $options,
5804
		$outputType = self::OT_HTML
5805
	) {
5806
		$magicScopeVariable = $this->lock();
5807
		$this->startParse( $title, $options, $outputType, true );
5808
5809
		$text = $this->replaceVariables( $text );
5810
		$text = $this->mStripState->unstripBoth( $text );
5811
		$text = Sanitizer::removeHTMLtags( $text );
5812
		return $text;
5813
	}
5814
5815
	/**
5816
	 * @param string $text
5817
	 * @param Title $title
5818
	 * @param ParserOptions $options
5819
	 * @return string
5820
	 */
5821
	public function testPst( $text, Title $title, ParserOptions $options ) {
5822
		return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
5823
	}
5824
5825
	/**
5826
	 * @param string $text
5827
	 * @param Title $title
5828
	 * @param ParserOptions $options
5829
	 * @return string
5830
	 */
5831
	public function testPreprocess( $text, Title $title, ParserOptions $options ) {
5832
		return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
5833
	}
5834
5835
	/**
5836
	 * Call a callback function on all regions of the given text that are not
5837
	 * inside strip markers, and replace those regions with the return value
5838
	 * of the callback. For example, with input:
5839
	 *
5840
	 *  aaa<MARKER>bbb
5841
	 *
5842
	 * This will call the callback function twice, with 'aaa' and 'bbb'. Those
5843
	 * two strings will be replaced with the value returned by the callback in
5844
	 * each case.
5845
	 *
5846
	 * @param string $s
5847
	 * @param callable $callback
5848
	 *
5849
	 * @return string
5850
	 */
5851
	public function markerSkipCallback( $s, $callback ) {
5852
		$i = 0;
5853
		$out = '';
5854
		while ( $i < strlen( $s ) ) {
5855
			$markerStart = strpos( $s, self::MARKER_PREFIX, $i );
5856
			if ( $markerStart === false ) {
5857
				$out .= call_user_func( $callback, substr( $s, $i ) );
5858
				break;
5859
			} else {
5860
				$out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
5861
				$markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
5862
				if ( $markerEnd === false ) {
5863
					$out .= substr( $s, $markerStart );
5864
					break;
5865
				} else {
5866
					$markerEnd += strlen( self::MARKER_SUFFIX );
5867
					$out .= substr( $s, $markerStart, $markerEnd - $markerStart );
5868
					$i = $markerEnd;
5869
				}
5870
			}
5871
		}
5872
		return $out;
5873
	}
5874
5875
	/**
5876
	 * Remove any strip markers found in the given text.
5877
	 *
5878
	 * @param string $text Input string
5879
	 * @return string
5880
	 */
5881
	public function killMarkers( $text ) {
5882
		return $this->mStripState->killMarkers( $text );
5883
	}
5884
5885
	/**
5886
	 * Save the parser state required to convert the given half-parsed text to
5887
	 * HTML. "Half-parsed" in this context means the output of
5888
	 * recursiveTagParse() or internalParse(). This output has strip markers
5889
	 * from replaceVariables (extensionSubstitution() etc.), and link
5890
	 * placeholders from replaceLinkHolders().
5891
	 *
5892
	 * Returns an array which can be serialized and stored persistently. This
5893
	 * array can later be loaded into another parser instance with
5894
	 * unserializeHalfParsedText(). The text can then be safely incorporated into
5895
	 * the return value of a parser hook.
5896
	 *
5897
	 * @param string $text
5898
	 *
5899
	 * @return array
5900
	 */
5901
	public function serializeHalfParsedText( $text ) {
5902
		$data = [
5903
			'text' => $text,
5904
			'version' => self::HALF_PARSED_VERSION,
5905
			'stripState' => $this->mStripState->getSubState( $text ),
5906
			'linkHolders' => $this->mLinkHolders->getSubArray( $text )
5907
		];
5908
		return $data;
5909
	}
5910
5911
	/**
5912
	 * Load the parser state given in the $data array, which is assumed to
5913
	 * have been generated by serializeHalfParsedText(). The text contents is
5914
	 * extracted from the array, and its markers are transformed into markers
5915
	 * appropriate for the current Parser instance. This transformed text is
5916
	 * returned, and can be safely included in the return value of a parser
5917
	 * hook.
5918
	 *
5919
	 * If the $data array has been stored persistently, the caller should first
5920
	 * check whether it is still valid, by calling isValidHalfParsedText().
5921
	 *
5922
	 * @param array $data Serialized data
5923
	 * @throws MWException
5924
	 * @return string
5925
	 */
5926
	public function unserializeHalfParsedText( $data ) {
5927 View Code Duplication
		if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
5928
			throw new MWException( __METHOD__ . ': invalid version' );
5929
		}
5930
5931
		# First, extract the strip state.
5932
		$texts = [ $data['text'] ];
5933
		$texts = $this->mStripState->merge( $data['stripState'], $texts );
5934
5935
		# Now renumber links
5936
		$texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
5937
5938
		# Should be good to go.
5939
		return $texts[0];
5940
	}
5941
5942
	/**
5943
	 * Returns true if the given array, presumed to be generated by
5944
	 * serializeHalfParsedText(), is compatible with the current version of the
5945
	 * parser.
5946
	 *
5947
	 * @param array $data
5948
	 *
5949
	 * @return bool
5950
	 */
5951
	public function isValidHalfParsedText( $data ) {
5952
		return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
5953
	}
5954
5955
	/**
5956
	 * Parsed a width param of imagelink like 300px or 200x300px
5957
	 *
5958
	 * @param string $value
5959
	 *
5960
	 * @return array
5961
	 * @since 1.20
5962
	 */
5963
	public function parseWidthParam( $value ) {
5964
		$parsedWidthParam = [];
5965
		if ( $value === '' ) {
5966
			return $parsedWidthParam;
5967
		}
5968
		$m = [];
5969
		# (bug 13500) In both cases (width/height and width only),
5970
		# permit trailing "px" for backward compatibility.
5971
		if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
5972
			$width = intval( $m[1] );
5973
			$height = intval( $m[2] );
5974
			$parsedWidthParam['width'] = $width;
5975
			$parsedWidthParam['height'] = $height;
5976
		} elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
5977
			$width = intval( $value );
5978
			$parsedWidthParam['width'] = $width;
5979
		}
5980
		return $parsedWidthParam;
5981
	}
5982
5983
	/**
5984
	 * Lock the current instance of the parser.
5985
	 *
5986
	 * This is meant to stop someone from calling the parser
5987
	 * recursively and messing up all the strip state.
5988
	 *
5989
	 * @throws MWException If parser is in a parse
5990
	 * @return ScopedCallback The lock will be released once the return value goes out of scope.
5991
	 */
5992
	protected function lock() {
5993
		if ( $this->mInParse ) {
5994
			throw new MWException( "Parser state cleared while parsing. "
5995
				. "Did you call Parser::parse recursively?" );
5996
		}
5997
		$this->mInParse = true;
5998
5999
		$recursiveCheck = new ScopedCallback( function() {
6000
			$this->mInParse = false;
6001
		} );
6002
6003
		return $recursiveCheck;
6004
	}
6005
6006
	/**
6007
	 * Strip outer <p></p> tag from the HTML source of a single paragraph.
6008
	 *
6009
	 * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6010
	 * or if there is more than one <p/> tag in the input HTML.
6011
	 *
6012
	 * @param string $html
6013
	 * @return string
6014
	 * @since 1.24
6015
	 */
6016
	public static function stripOuterParagraph( $html ) {
6017
		$m = [];
6018
		if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
6019
			if ( strpos( $m[1], '</p>' ) === false ) {
6020
				$html = $m[1];
6021
			}
6022
		}
6023
6024
		return $html;
6025
	}
6026
6027
	/**
6028
	 * Return this parser if it is not doing anything, otherwise
6029
	 * get a fresh parser. You can use this method by doing
6030
	 * $myParser = $wgParser->getFreshParser(), or more simply
6031
	 * $wgParser->getFreshParser()->parse( ... );
6032
	 * if you're unsure if $wgParser is safe to use.
6033
	 *
6034
	 * @since 1.24
6035
	 * @return Parser A parser object that is not parsing anything
6036
	 */
6037
	public function getFreshParser() {
6038
		global $wgParserConf;
6039
		if ( $this->mInParse ) {
6040
			return new $wgParserConf['class']( $wgParserConf );
6041
		} else {
6042
			return $this;
6043
		}
6044
	}
6045
6046
	/**
6047
	 * Set's up the PHP implementation of OOUI for use in this request
6048
	 * and instructs OutputPage to enable OOUI for itself.
6049
	 *
6050
	 * @since 1.26
6051
	 */
6052
	public function enableOOUI() {
6053
		OutputPage::setupOOUI();
6054
		$this->mOutput->setEnableOOUI( true );
6055
	}
6056
}
6057