Completed
Branch master (4b8315)
by
unknown
17:52
created

includes/parser/Parser.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

Code
1
<?php
2
/**
3
 * PHP parser that converts wiki markup to HTML.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License along
16
 * with this program; if not, write to the Free Software Foundation, Inc.,
17
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
 * http://www.gnu.org/copyleft/gpl.html
19
 *
20
 * @file
21
 * @ingroup Parser
22
 */
23
use MediaWiki\Linker\LinkRenderer;
24
use MediaWiki\MediaWikiServices;
25
use Wikimedia\ScopedCallback;
26
27
/**
28
 * @defgroup Parser Parser
29
 */
30
31
/**
32
 * PHP Parser - Processes wiki markup (which uses a more user-friendly
33
 * syntax, such as "[[link]]" for making links), and provides a one-way
34
 * transformation of that wiki markup it into (X)HTML output / markup
35
 * (which in turn the browser understands, and can display).
36
 *
37
 * There are seven main entry points into the Parser class:
38
 *
39
 * - Parser::parse()
40
 *     produces HTML output
41
 * - Parser::preSaveTransform()
42
 *     produces altered wiki markup
43
 * - Parser::preprocess()
44
 *     removes HTML comments and expands templates
45
 * - Parser::cleanSig() and Parser::cleanSigInSig()
46
 *     cleans a signature before saving it to preferences
47
 * - Parser::getSection()
48
 *     return the content of a section from an article for section editing
49
 * - Parser::replaceSection()
50
 *     replaces a section by number inside an article
51
 * - Parser::getPreloadText()
52
 *     removes <noinclude> sections and <includeonly> tags
53
 *
54
 * Globals used:
55
 *    object: $wgContLang
56
 *
57
 * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
58
 *
59
 * @par Settings:
60
 * $wgNamespacesWithSubpages
61
 *
62
 * @par Settings only within ParserOptions:
63
 * $wgAllowExternalImages
64
 * $wgAllowSpecialInclusion
65
 * $wgInterwikiMagic
66
 * $wgMaxArticleSize
67
 *
68
 * @ingroup Parser
69
 */
70
class Parser {
71
	/**
72
	 * Update this version number when the ParserOutput format
73
	 * changes in an incompatible way, so the parser cache
74
	 * can automatically discard old data.
75
	 */
76
	const VERSION = '1.6.4';
77
78
	/**
79
	 * Update this version number when the output of serialiseHalfParsedText()
80
	 * changes in an incompatible way
81
	 */
82
	const HALF_PARSED_VERSION = 2;
83
84
	# Flags for Parser::setFunctionHook
85
	const SFH_NO_HASH = 1;
86
	const SFH_OBJECT_ARGS = 2;
87
88
	# Constants needed for external link processing
89
	# Everything except bracket, space, or control characters
90
	# \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
91
	# as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052
92
	const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
93
	# Simplified expression to match an IPv4 or IPv6 address, or
94
	# at least one character of a host name (embeds EXT_LINK_URL_CLASS)
95
	const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
96
	# RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR)
97
	// @codingStandardsIgnoreStart Generic.Files.LineLength
98
	const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+)
99
		\\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu';
100
	// @codingStandardsIgnoreEnd
101
102
	# Regular expression for a non-newline space
103
	const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
104
105
	# Flags for preprocessToDom
106
	const PTD_FOR_INCLUSION = 1;
107
108
	# Allowed values for $this->mOutputType
109
	# Parameter to startExternalParse().
110
	const OT_HTML = 1; # like parse()
111
	const OT_WIKI = 2; # like preSaveTransform()
112
	const OT_PREPROCESS = 3; # like preprocess()
113
	const OT_MSG = 3;
114
	const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged.
115
116
	/**
117
	 * @var string Prefix and suffix for temporary replacement strings
118
	 * for the multipass parser.
119
	 *
120
	 * \x7f should never appear in input as it's disallowed in XML.
121
	 * Using it at the front also gives us a little extra robustness
122
	 * since it shouldn't match when butted up against identifier-like
123
	 * string constructs.
124
	 *
125
	 * Must not consist of all title characters, or else it will change
126
	 * the behavior of <nowiki> in a link.
127
	 *
128
	 * Must have a character that needs escaping in attributes, otherwise
129
	 * someone could put a strip marker in an attribute, to get around
130
	 * escaping quote marks, and break out of the attribute. Thus we add
131
	 * `'".
132
	 */
133
	const MARKER_SUFFIX = "-QINU`\"'\x7f";
134
	const MARKER_PREFIX = "\x7f'\"`UNIQ-";
135
136
	# Markers used for wrapping the table of contents
137
	const TOC_START = '<mw:toc>';
138
	const TOC_END = '</mw:toc>';
139
140
	# Persistent:
141
	public $mTagHooks = [];
142
	public $mTransparentTagHooks = [];
143
	public $mFunctionHooks = [];
144
	public $mFunctionSynonyms = [ 0 => [], 1 => [] ];
145
	public $mFunctionTagHooks = [];
146
	public $mStripList = [];
147
	public $mDefaultStripList = [];
148
	public $mVarCache = [];
149
	public $mImageParams = [];
150
	public $mImageParamsMagicArray = [];
151
	public $mMarkerIndex = 0;
152
	public $mFirstCall = true;
153
154
	# Initialised by initialiseVariables()
155
156
	/**
157
	 * @var MagicWordArray
158
	 */
159
	public $mVariables;
160
161
	/**
162
	 * @var MagicWordArray
163
	 */
164
	public $mSubstWords;
165
	# Initialised in constructor
166
	public $mConf, $mExtLinkBracketedRegex, $mUrlProtocols;
167
168
	# Initialized in getPreprocessor()
169
	/** @var Preprocessor */
170
	public $mPreprocessor;
171
172
	# Cleared with clearState():
173
	/**
174
	 * @var ParserOutput
175
	 */
176
	public $mOutput;
177
	public $mAutonumber;
178
179
	/**
180
	 * @var StripState
181
	 */
182
	public $mStripState;
183
184
	public $mIncludeCount;
185
	/**
186
	 * @var LinkHolderArray
187
	 */
188
	public $mLinkHolders;
189
190
	public $mLinkID;
191
	public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth;
192
	public $mDefaultSort;
193
	public $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores;
194
	public $mExpensiveFunctionCount; # number of expensive parser function calls
195
	public $mShowToc, $mForceTocPosition;
196
197
	/**
198
	 * @var User
199
	 */
200
	public $mUser; # User object; only used when doing pre-save transform
201
202
	# Temporary
203
	# These are variables reset at least once per parse regardless of $clearState
204
205
	/**
206
	 * @var ParserOptions
207
	 */
208
	public $mOptions;
209
210
	/**
211
	 * @var Title
212
	 */
213
	public $mTitle;        # Title context, used for self-link rendering and similar things
214
	public $mOutputType;   # Output type, one of the OT_xxx constants
215
	public $ot;            # Shortcut alias, see setOutputType()
216
	public $mRevisionObject; # The revision object of the specified revision ID
217
	public $mRevisionId;   # ID to display in {{REVISIONID}} tags
218
	public $mRevisionTimestamp; # The timestamp of the specified revision ID
219
	public $mRevisionUser; # User to display in {{REVISIONUSER}} tag
220
	public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable
221
	public $mRevIdForTs;   # The revision ID which was used to fetch the timestamp
222
	public $mInputSize = false; # For {{PAGESIZE}} on current page.
223
224
	/**
225
	 * @var string Deprecated accessor for the strip marker prefix.
226
	 * @deprecated since 1.26; use Parser::MARKER_PREFIX instead.
227
	 **/
228
	public $mUniqPrefix = Parser::MARKER_PREFIX;
229
230
	/**
231
	 * @var array Array with the language name of each language link (i.e. the
232
	 * interwiki prefix) in the key, value arbitrary. Used to avoid sending
233
	 * duplicate language links to the ParserOutput.
234
	 */
235
	public $mLangLinkLanguages;
236
237
	/**
238
	 * @var MapCacheLRU|null
239
	 * @since 1.24
240
	 *
241
	 * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
242
	 */
243
	public $currentRevisionCache;
244
245
	/**
246
	 * @var bool Recursive call protection.
247
	 * This variable should be treated as if it were private.
248
	 */
249
	public $mInParse = false;
250
251
	/** @var SectionProfiler */
252
	protected $mProfiler;
253
254
	/**
255
	 * @var LinkRenderer
256
	 */
257
	protected $mLinkRenderer;
258
259
	/**
260
	 * @param array $conf
261
	 */
262
	public function __construct( $conf = [] ) {
263
		$this->mConf = $conf;
264
		$this->mUrlProtocols = wfUrlProtocols();
265
		$this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
266
			self::EXT_LINK_ADDR .
267
			self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
268
		if ( isset( $conf['preprocessorClass'] ) ) {
269
			$this->mPreprocessorClass = $conf['preprocessorClass'];
270
		} elseif ( defined( 'HPHP_VERSION' ) ) {
271
			# Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop
272
			$this->mPreprocessorClass = 'Preprocessor_Hash';
273
		} elseif ( extension_loaded( 'domxml' ) ) {
274
			# PECL extension that conflicts with the core DOM extension (bug 13770)
275
			wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" );
276
			$this->mPreprocessorClass = 'Preprocessor_Hash';
277
		} elseif ( extension_loaded( 'dom' ) ) {
278
			$this->mPreprocessorClass = 'Preprocessor_DOM';
279
		} else {
280
			$this->mPreprocessorClass = 'Preprocessor_Hash';
281
		}
282
		wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" );
283
	}
284
285
	/**
286
	 * Reduce memory usage to reduce the impact of circular references
287
	 */
288
	public function __destruct() {
289
		if ( isset( $this->mLinkHolders ) ) {
290
			unset( $this->mLinkHolders );
291
		}
292
		foreach ( $this as $name => $value ) {
293
			unset( $this->$name );
294
		}
295
	}
296
297
	/**
298
	 * Allow extensions to clean up when the parser is cloned
299
	 */
300
	public function __clone() {
301
		$this->mInParse = false;
302
303
		// Bug 56226: When you create a reference "to" an object field, that
304
		// makes the object field itself be a reference too (until the other
305
		// reference goes out of scope). When cloning, any field that's a
306
		// reference is copied as a reference in the new object. Both of these
307
		// are defined PHP5 behaviors, as inconvenient as it is for us when old
308
		// hooks from PHP4 days are passing fields by reference.
309
		foreach ( [ 'mStripState', 'mVarCache' ] as $k ) {
310
			// Make a non-reference copy of the field, then rebind the field to
311
			// reference the new copy.
312
			$tmp = $this->$k;
313
			$this->$k =& $tmp;
314
			unset( $tmp );
315
		}
316
317
		Hooks::run( 'ParserCloned', [ $this ] );
318
	}
319
320
	/**
321
	 * Do various kinds of initialisation on the first call of the parser
322
	 */
323
	public function firstCallInit() {
324
		if ( !$this->mFirstCall ) {
325
			return;
326
		}
327
		$this->mFirstCall = false;
328
329
		CoreParserFunctions::register( $this );
330
		CoreTagHooks::register( $this );
331
		$this->initialiseVariables();
332
333
		Hooks::run( 'ParserFirstCallInit', [ &$this ] );
334
	}
335
336
	/**
337
	 * Clear Parser state
338
	 *
339
	 * @private
340
	 */
341
	public function clearState() {
342
		if ( $this->mFirstCall ) {
343
			$this->firstCallInit();
344
		}
345
		$this->mOutput = new ParserOutput;
346
		$this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
347
		$this->mAutonumber = 0;
348
		$this->mIncludeCount = [];
349
		$this->mLinkHolders = new LinkHolderArray( $this );
350
		$this->mLinkID = 0;
351
		$this->mRevisionObject = $this->mRevisionTimestamp =
352
			$this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null;
353
		$this->mVarCache = [];
354
		$this->mUser = null;
355
		$this->mLangLinkLanguages = [];
356
		$this->currentRevisionCache = null;
357
358
		$this->mStripState = new StripState;
359
360
		# Clear these on every parse, bug 4549
361
		$this->mTplRedirCache = $this->mTplDomCache = [];
362
363
		$this->mShowToc = true;
364
		$this->mForceTocPosition = false;
365
		$this->mIncludeSizes = [
366
			'post-expand' => 0,
367
			'arg' => 0,
368
		];
369
		$this->mPPNodeCount = 0;
370
		$this->mGeneratedPPNodeCount = 0;
371
		$this->mHighestExpansionDepth = 0;
372
		$this->mDefaultSort = false;
373
		$this->mHeadings = [];
374
		$this->mDoubleUnderscores = [];
375
		$this->mExpensiveFunctionCount = 0;
376
377
		# Fix cloning
378
		if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) {
379
			$this->mPreprocessor = null;
380
		}
381
382
		$this->mProfiler = new SectionProfiler();
383
384
		Hooks::run( 'ParserClearState', [ &$this ] );
385
	}
386
387
	/**
388
	 * Convert wikitext to HTML
389
	 * Do not call this function recursively.
390
	 *
391
	 * @param string $text Text we want to parse
392
	 * @param Title $title
393
	 * @param ParserOptions $options
394
	 * @param bool $linestart
395
	 * @param bool $clearState
396
	 * @param int $revid Number to pass in {{REVISIONID}}
397
	 * @return ParserOutput A ParserOutput
398
	 */
399
	public function parse(
400
		$text, Title $title, ParserOptions $options,
401
		$linestart = true, $clearState = true, $revid = null
402
	) {
403
		/**
404
		 * First pass--just handle <nowiki> sections, pass the rest off
405
		 * to internalParse() which does all the real work.
406
		 */
407
408
		global $wgShowHostnames;
409
410
		if ( $clearState ) {
411
			// We use U+007F DELETE to construct strip markers, so we have to make
412
			// sure that this character does not occur in the input text.
413
			$text = strtr( $text, "\x7f", "?" );
414
			$magicScopeVariable = $this->lock();
415
		}
416
417
		$this->startParse( $title, $options, self::OT_HTML, $clearState );
418
419
		$this->currentRevisionCache = null;
420
		$this->mInputSize = strlen( $text );
421
		if ( $this->mOptions->getEnableLimitReport() ) {
422
			$this->mOutput->resetParseStartTime();
423
		}
424
425
		$oldRevisionId = $this->mRevisionId;
426
		$oldRevisionObject = $this->mRevisionObject;
427
		$oldRevisionTimestamp = $this->mRevisionTimestamp;
428
		$oldRevisionUser = $this->mRevisionUser;
429
		$oldRevisionSize = $this->mRevisionSize;
430
		if ( $revid !== null ) {
431
			$this->mRevisionId = $revid;
432
			$this->mRevisionObject = null;
433
			$this->mRevisionTimestamp = null;
434
			$this->mRevisionUser = null;
435
			$this->mRevisionSize = null;
436
		}
437
438
		Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
439
		# No more strip!
440
		Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
441
		$text = $this->internalParse( $text );
442
		Hooks::run( 'ParserAfterParse', [ &$this, &$text, &$this->mStripState ] );
443
444
		$text = $this->internalParseHalfParsed( $text, true, $linestart );
445
446
		/**
447
		 * A converted title will be provided in the output object if title and
448
		 * content conversion are enabled, the article text does not contain
449
		 * a conversion-suppressing double-underscore tag, and no
450
		 * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
451
		 * automatic link conversion.
452
		 */
453
		if ( !( $options->getDisableTitleConversion()
454
			|| isset( $this->mDoubleUnderscores['nocontentconvert'] )
455
			|| isset( $this->mDoubleUnderscores['notitleconvert'] )
456
			|| $this->mOutput->getDisplayTitle() !== false )
457
		) {
458
			$convruletitle = $this->getConverterLanguage()->getConvRuleTitle();
459
			if ( $convruletitle ) {
460
				$this->mOutput->setTitleText( $convruletitle );
461
			} else {
462
				$titleText = $this->getConverterLanguage()->convertTitle( $title );
463
				$this->mOutput->setTitleText( $titleText );
464
			}
465
		}
466
467
		# Done parsing! Compute runtime adaptive expiry if set
468
		$this->mOutput->finalizeAdaptiveCacheExpiry();
469
470
		# Warn if too many heavyweight parser functions were used
471
		if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) {
472
			$this->limitationWarn( 'expensive-parserfunction',
473
				$this->mExpensiveFunctionCount,
474
				$this->mOptions->getExpensiveParserFunctionLimit()
475
			);
476
		}
477
478
		# Information on include size limits, for the benefit of users who try to skirt them
479
		if ( $this->mOptions->getEnableLimitReport() ) {
480
			$max = $this->mOptions->getMaxIncludeSize();
481
482
			$cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' );
483
			if ( $cpuTime !== null ) {
484
				$this->mOutput->setLimitReportData( 'limitreport-cputime',
485
					sprintf( "%.3f", $cpuTime )
486
				);
487
			}
488
489
			$wallTime = $this->mOutput->getTimeSinceStart( 'wall' );
490
			$this->mOutput->setLimitReportData( 'limitreport-walltime',
491
				sprintf( "%.3f", $wallTime )
492
			);
493
494
			$this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes',
495
				[ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ]
496
			);
497
			$this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes',
498
				[ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ]
499
			);
500
			$this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize',
501
				[ $this->mIncludeSizes['post-expand'], $max ]
502
			);
503
			$this->mOutput->setLimitReportData( 'limitreport-templateargumentsize',
504
				[ $this->mIncludeSizes['arg'], $max ]
505
			);
506
			$this->mOutput->setLimitReportData( 'limitreport-expansiondepth',
507
				[ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ]
508
			);
509
			$this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
510
				[ $this->mExpensiveFunctionCount, $this->mOptions->getExpensiveParserFunctionLimit() ]
511
			);
512
			Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] );
513
514
			$limitReport = "NewPP limit report\n";
515
			if ( $wgShowHostnames ) {
516
				$limitReport .= 'Parsed by ' . wfHostname() . "\n";
517
			}
518
			$limitReport .= 'Cached time: ' . $this->mOutput->getCacheTime() . "\n";
519
			$limitReport .= 'Cache expiry: ' . $this->mOutput->getCacheExpiry() . "\n";
520
			$limitReport .= 'Dynamic content: ' .
521
				( $this->mOutput->hasDynamicContent() ? 'true' : 'false' ) .
522
				"\n";
523
524
			foreach ( $this->mOutput->getLimitReportData() as $key => $value ) {
525
				if ( Hooks::run( 'ParserLimitReportFormat',
526
					[ $key, &$value, &$limitReport, false, false ]
527
				) ) {
528
					$keyMsg = wfMessage( $key )->inLanguage( 'en' )->useDatabase( false );
529
					$valueMsg = wfMessage( [ "$key-value-text", "$key-value" ] )
530
						->inLanguage( 'en' )->useDatabase( false );
531
					if ( !$valueMsg->exists() ) {
532
						$valueMsg = new RawMessage( '$1' );
533
					}
534
					if ( !$keyMsg->isDisabled() && !$valueMsg->isDisabled() ) {
535
						$valueMsg->params( $value );
536
						$limitReport .= "{$keyMsg->text()}: {$valueMsg->text()}\n";
537
					}
538
				}
539
			}
540
			// Since we're not really outputting HTML, decode the entities and
541
			// then re-encode the things that need hiding inside HTML comments.
542
			$limitReport = htmlspecialchars_decode( $limitReport );
543
			Hooks::run( 'ParserLimitReport', [ $this, &$limitReport ] );
544
545
			// Sanitize for comment. Note '‐' in the replacement is U+2010,
546
			// which looks much like the problematic '-'.
547
			$limitReport = str_replace( [ '-', '&' ], [ '‐', '&amp;' ], $limitReport );
548
			$text .= "\n<!-- \n$limitReport-->\n";
549
550
			// Add on template profiling data
551
			$dataByFunc = $this->mProfiler->getFunctionStats();
552
			uasort( $dataByFunc, function ( $a, $b ) {
553
				return $a['real'] < $b['real']; // descending order
554
			} );
555
			$profileReport = "Transclusion expansion time report (%,ms,calls,template)\n";
556
			foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
557
				$profileReport .= sprintf( "%6.2f%% %8.3f %6d - %s\n",
558
					$item['%real'], $item['real'], $item['calls'],
559
					htmlspecialchars( $item['name'] ) );
560
			}
561
			$text .= "\n<!-- \n$profileReport-->\n";
562
563
			if ( $this->mGeneratedPPNodeCount > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 ) {
564
				wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' .
565
					$this->mTitle->getPrefixedDBkey() );
566
			}
567
		}
568
		$this->mOutput->setText( $text );
569
570
		$this->mRevisionId = $oldRevisionId;
571
		$this->mRevisionObject = $oldRevisionObject;
572
		$this->mRevisionTimestamp = $oldRevisionTimestamp;
573
		$this->mRevisionUser = $oldRevisionUser;
574
		$this->mRevisionSize = $oldRevisionSize;
575
		$this->mInputSize = false;
576
		$this->currentRevisionCache = null;
577
578
		return $this->mOutput;
579
	}
580
581
	/**
582
	 * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
583
	 * can be called from an extension tag hook.
584
	 *
585
	 * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
586
	 * instead, which means that lists and links have not been fully parsed yet,
587
	 * and strip markers are still present.
588
	 *
589
	 * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
590
	 *
591
	 * Use this function if you're a parser tag hook and you want to parse
592
	 * wikitext before or after applying additional transformations, and you
593
	 * intend to *return the result as hook output*, which will cause it to go
594
	 * through the rest of parsing process automatically.
595
	 *
596
	 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
597
	 * $text are not expanded
598
	 *
599
	 * @param string $text Text extension wants to have parsed
600
	 * @param bool|PPFrame $frame The frame to use for expanding any template variables
601
	 * @return string UNSAFE half-parsed HTML
602
	 */
603
	public function recursiveTagParse( $text, $frame = false ) {
604
		Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
605
		Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
606
		$text = $this->internalParse( $text, false, $frame );
607
		return $text;
608
	}
609
610
	/**
611
	 * Fully parse wikitext to fully parsed HTML. This recursive parser entry
612
	 * point can be called from an extension tag hook.
613
	 *
614
	 * The output of this function is fully-parsed HTML that is safe for output.
615
	 * If you're a parser tag hook, you might want to use recursiveTagParse()
616
	 * instead.
617
	 *
618
	 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
619
	 * $text are not expanded
620
	 *
621
	 * @since 1.25
622
	 *
623
	 * @param string $text Text extension wants to have parsed
624
	 * @param bool|PPFrame $frame The frame to use for expanding any template variables
625
	 * @return string Fully parsed HTML
626
	 */
627
	public function recursiveTagParseFully( $text, $frame = false ) {
628
		$text = $this->recursiveTagParse( $text, $frame );
629
		$text = $this->internalParseHalfParsed( $text, false );
630
		return $text;
631
	}
632
633
	/**
634
	 * Expand templates and variables in the text, producing valid, static wikitext.
635
	 * Also removes comments.
636
	 * Do not call this function recursively.
637
	 * @param string $text
638
	 * @param Title $title
639
	 * @param ParserOptions $options
640
	 * @param int|null $revid
641
	 * @param bool|PPFrame $frame
642
	 * @return mixed|string
643
	 */
644
	public function preprocess( $text, Title $title = null,
645
		ParserOptions $options, $revid = null, $frame = false
646
	) {
647
		$magicScopeVariable = $this->lock();
648
		$this->startParse( $title, $options, self::OT_PREPROCESS, true );
649
		if ( $revid !== null ) {
650
			$this->mRevisionId = $revid;
651
		}
652
		Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] );
653
		Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] );
654
		$text = $this->replaceVariables( $text, $frame );
655
		$text = $this->mStripState->unstripBoth( $text );
656
		return $text;
657
	}
658
659
	/**
660
	 * Recursive parser entry point that can be called from an extension tag
661
	 * hook.
662
	 *
663
	 * @param string $text Text to be expanded
664
	 * @param bool|PPFrame $frame The frame to use for expanding any template variables
665
	 * @return string
666
	 * @since 1.19
667
	 */
668
	public function recursivePreprocess( $text, $frame = false ) {
669
		$text = $this->replaceVariables( $text, $frame );
670
		$text = $this->mStripState->unstripBoth( $text );
671
		return $text;
672
	}
673
674
	/**
675
	 * Process the wikitext for the "?preload=" feature. (bug 5210)
676
	 *
677
	 * "<noinclude>", "<includeonly>" etc. are parsed as for template
678
	 * transclusion, comments, templates, arguments, tags hooks and parser
679
	 * functions are untouched.
680
	 *
681
	 * @param string $text
682
	 * @param Title $title
683
	 * @param ParserOptions $options
684
	 * @param array $params
685
	 * @return string
686
	 */
687
	public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) {
688
		$msg = new RawMessage( $text );
689
		$text = $msg->params( $params )->plain();
690
691
		# Parser (re)initialisation
692
		$magicScopeVariable = $this->lock();
693
		$this->startParse( $title, $options, self::OT_PLAIN, true );
694
695
		$flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
696
		$dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
697
		$text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
698
		$text = $this->mStripState->unstripBoth( $text );
699
		return $text;
700
	}
701
702
	/**
703
	 * Get a random string
704
	 *
705
	 * @return string
706
	 * @deprecated since 1.26; use wfRandomString() instead.
707
	 */
708
	public static function getRandomString() {
709
		wfDeprecated( __METHOD__, '1.26' );
710
		return wfRandomString( 16 );
711
	}
712
713
	/**
714
	 * Set the current user.
715
	 * Should only be used when doing pre-save transform.
716
	 *
717
	 * @param User|null $user User object or null (to reset)
718
	 */
719
	public function setUser( $user ) {
720
		$this->mUser = $user;
721
	}
722
723
	/**
724
	 * Accessor for mUniqPrefix.
725
	 *
726
	 * @return string
727
	 * @deprecated since 1.26; use Parser::MARKER_PREFIX instead.
728
	 */
729
	public function uniqPrefix() {
730
		wfDeprecated( __METHOD__, '1.26' );
731
		return self::MARKER_PREFIX;
732
	}
733
734
	/**
735
	 * Set the context title
736
	 *
737
	 * @param Title $t
738
	 */
739
	public function setTitle( $t ) {
740
		if ( !$t ) {
741
			$t = Title::newFromText( 'NO TITLE' );
742
		}
743
744
		if ( $t->hasFragment() ) {
745
			# Strip the fragment to avoid various odd effects
746
			$this->mTitle = $t->createFragmentTarget( '' );
747
		} else {
748
			$this->mTitle = $t;
749
		}
750
	}
751
752
	/**
753
	 * Accessor for the Title object
754
	 *
755
	 * @return Title
756
	 */
757
	public function getTitle() {
758
		return $this->mTitle;
759
	}
760
761
	/**
762
	 * Accessor/mutator for the Title object
763
	 *
764
	 * @param Title $x Title object or null to just get the current one
765
	 * @return Title
766
	 */
767
	public function Title( $x = null ) {
768
		return wfSetVar( $this->mTitle, $x );
769
	}
770
771
	/**
772
	 * Set the output type
773
	 *
774
	 * @param int $ot New value
775
	 */
776
	public function setOutputType( $ot ) {
777
		$this->mOutputType = $ot;
778
		# Shortcut alias
779
		$this->ot = [
780
			'html' => $ot == self::OT_HTML,
781
			'wiki' => $ot == self::OT_WIKI,
782
			'pre' => $ot == self::OT_PREPROCESS,
783
			'plain' => $ot == self::OT_PLAIN,
784
		];
785
	}
786
787
	/**
788
	 * Accessor/mutator for the output type
789
	 *
790
	 * @param int|null $x New value or null to just get the current one
791
	 * @return int
792
	 */
793
	public function OutputType( $x = null ) {
794
		return wfSetVar( $this->mOutputType, $x );
795
	}
796
797
	/**
798
	 * Get the ParserOutput object
799
	 *
800
	 * @return ParserOutput
801
	 */
802
	public function getOutput() {
803
		return $this->mOutput;
804
	}
805
806
	/**
807
	 * Get the ParserOptions object
808
	 *
809
	 * @return ParserOptions
810
	 */
811
	public function getOptions() {
812
		return $this->mOptions;
813
	}
814
815
	/**
816
	 * Accessor/mutator for the ParserOptions object
817
	 *
818
	 * @param ParserOptions $x New value or null to just get the current one
819
	 * @return ParserOptions Current ParserOptions object
820
	 */
821
	public function Options( $x = null ) {
822
		return wfSetVar( $this->mOptions, $x );
823
	}
824
825
	/**
826
	 * @return int
827
	 */
828
	public function nextLinkID() {
829
		return $this->mLinkID++;
830
	}
831
832
	/**
833
	 * @param int $id
834
	 */
835
	public function setLinkID( $id ) {
836
		$this->mLinkID = $id;
837
	}
838
839
	/**
840
	 * Get a language object for use in parser functions such as {{FORMATNUM:}}
841
	 * @return Language
842
	 */
843
	public function getFunctionLang() {
844
		return $this->getTargetLanguage();
845
	}
846
847
	/**
848
	 * Get the target language for the content being parsed. This is usually the
849
	 * language that the content is in.
850
	 *
851
	 * @since 1.19
852
	 *
853
	 * @throws MWException
854
	 * @return Language
855
	 */
856
	public function getTargetLanguage() {
857
		$target = $this->mOptions->getTargetLanguage();
858
859
		if ( $target !== null ) {
860
			return $target;
861
		} elseif ( $this->mOptions->getInterfaceMessage() ) {
862
			return $this->mOptions->getUserLangObj();
863
		} elseif ( is_null( $this->mTitle ) ) {
864
			throw new MWException( __METHOD__ . ': $this->mTitle is null' );
865
		}
866
867
		return $this->mTitle->getPageLanguage();
868
	}
869
870
	/**
871
	 * Get the language object for language conversion
872
	 * @return Language|null
873
	 */
874
	public function getConverterLanguage() {
875
		return $this->getTargetLanguage();
876
	}
877
878
	/**
879
	 * Get a User object either from $this->mUser, if set, or from the
880
	 * ParserOptions object otherwise
881
	 *
882
	 * @return User
883
	 */
884
	public function getUser() {
885
		if ( !is_null( $this->mUser ) ) {
886
			return $this->mUser;
887
		}
888
		return $this->mOptions->getUser();
889
	}
890
891
	/**
892
	 * Get a preprocessor object
893
	 *
894
	 * @return Preprocessor
895
	 */
896
	public function getPreprocessor() {
897
		if ( !isset( $this->mPreprocessor ) ) {
898
			$class = $this->mPreprocessorClass;
899
			$this->mPreprocessor = new $class( $this );
900
		}
901
		return $this->mPreprocessor;
902
	}
903
904
	/**
905
	 * Get a LinkRenderer instance to make links with
906
	 *
907
	 * @since 1.28
908
	 * @return LinkRenderer
909
	 */
910
	public function getLinkRenderer() {
911
		if ( !$this->mLinkRenderer ) {
912
			$this->mLinkRenderer = MediaWikiServices::getInstance()
913
				->getLinkRendererFactory()->create();
914
			$this->mLinkRenderer->setStubThreshold(
915
				$this->getOptions()->getStubThreshold()
916
			);
917
		}
918
919
		return $this->mLinkRenderer;
920
	}
921
922
	/**
923
	 * Replaces all occurrences of HTML-style comments and the given tags
924
	 * in the text with a random marker and returns the next text. The output
925
	 * parameter $matches will be an associative array filled with data in
926
	 * the form:
927
	 *
928
	 * @code
929
	 *   'UNIQ-xxxxx' => [
930
	 *     'element',
931
	 *     'tag content',
932
	 *     [ 'param' => 'x' ],
933
	 *     '<element param="x">tag content</element>' ]
934
	 * @endcode
935
	 *
936
	 * @param array $elements List of element names. Comments are always extracted.
937
	 * @param string $text Source text string.
938
	 * @param array $matches Out parameter, Array: extracted tags
939
	 * @param string|null $uniq_prefix
940
	 * @return string Stripped text
941
	 * @since 1.26 The uniq_prefix argument is deprecated.
942
	 */
943
	public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) {
944
		if ( $uniq_prefix !== null ) {
945
			wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' );
946
		}
947
		static $n = 1;
948
		$stripped = '';
949
		$matches = [];
950
951
		$taglist = implode( '|', $elements );
952
		$start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i";
953
954
		while ( $text != '' ) {
955
			$p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
956
			$stripped .= $p[0];
957
			if ( count( $p ) < 5 ) {
958
				break;
959
			}
960
			if ( count( $p ) > 5 ) {
961
				# comment
962
				$element = $p[4];
963
				$attributes = '';
964
				$close = '';
965
				$inside = $p[5];
966
			} else {
967
				# tag
968
				$element = $p[1];
969
				$attributes = $p[2];
970
				$close = $p[3];
971
				$inside = $p[4];
972
			}
973
974
			$marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
975
			$stripped .= $marker;
976
977
			if ( $close === '/>' ) {
978
				# Empty element tag, <tag />
979
				$content = null;
980
				$text = $inside;
981
				$tail = null;
982
			} else {
983
				if ( $element === '!--' ) {
984
					$end = '/(-->)/';
985
				} else {
986
					$end = "/(<\\/$element\\s*>)/i";
987
				}
988
				$q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
989
				$content = $q[0];
990
				if ( count( $q ) < 3 ) {
991
					# No end tag -- let it run out to the end of the text.
992
					$tail = '';
993
					$text = '';
994
				} else {
995
					$tail = $q[1];
996
					$text = $q[2];
997
				}
998
			}
999
1000
			$matches[$marker] = [ $element,
1001
				$content,
1002
				Sanitizer::decodeTagAttributes( $attributes ),
1003
				"<$element$attributes$close$content$tail" ];
1004
		}
1005
		return $stripped;
1006
	}
1007
1008
	/**
1009
	 * Get a list of strippable XML-like elements
1010
	 *
1011
	 * @return array
1012
	 */
1013
	public function getStripList() {
1014
		return $this->mStripList;
1015
	}
1016
1017
	/**
1018
	 * Add an item to the strip state
1019
	 * Returns the unique tag which must be inserted into the stripped text
1020
	 * The tag will be replaced with the original text in unstrip()
1021
	 *
1022
	 * @param string $text
1023
	 *
1024
	 * @return string
1025
	 */
1026
	public function insertStripItem( $text ) {
1027
		$marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1028
		$this->mMarkerIndex++;
1029
		$this->mStripState->addGeneral( $marker, $text );
1030
		return $marker;
1031
	}
1032
1033
	/**
1034
	 * parse the wiki syntax used to render tables
1035
	 *
1036
	 * @private
1037
	 * @param string $text
1038
	 * @return string
1039
	 */
1040
	public function doTableStuff( $text ) {
1041
1042
		$lines = StringUtils::explode( "\n", $text );
1043
		$out = '';
1044
		$td_history = []; # Is currently a td tag open?
1045
		$last_tag_history = []; # Save history of last lag activated (td, th or caption)
1046
		$tr_history = []; # Is currently a tr tag open?
1047
		$tr_attributes = []; # history of tr attributes
1048
		$has_opened_tr = []; # Did this table open a <tr> element?
1049
		$indent_level = 0; # indent level of the table
1050
1051
		foreach ( $lines as $outLine ) {
1052
			$line = trim( $outLine );
1053
1054
			if ( $line === '' ) { # empty line, go to next line
1055
				$out .= $outLine . "\n";
1056
				continue;
1057
			}
1058
1059
			$first_character = $line[0];
1060
			$first_two = substr( $line, 0, 2 );
1061
			$matches = [];
1062
1063
			if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1064
				# First check if we are starting a new table
1065
				$indent_level = strlen( $matches[1] );
1066
1067
				$attributes = $this->mStripState->unstripBoth( $matches[2] );
1068
				$attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1069
1070
				$outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1071
				array_push( $td_history, false );
1072
				array_push( $last_tag_history, '' );
1073
				array_push( $tr_history, false );
1074
				array_push( $tr_attributes, '' );
1075
				array_push( $has_opened_tr, false );
1076
			} elseif ( count( $td_history ) == 0 ) {
1077
				# Don't do any of the following
1078
				$out .= $outLine . "\n";
1079
				continue;
1080
			} elseif ( $first_two === '|}' ) {
1081
				# We are ending a table
1082
				$line = '</table>' . substr( $line, 2 );
1083
				$last_tag = array_pop( $last_tag_history );
1084
1085
				if ( !array_pop( $has_opened_tr ) ) {
1086
					$line = "<tr><td></td></tr>{$line}";
1087
				}
1088
1089
				if ( array_pop( $tr_history ) ) {
1090
					$line = "</tr>{$line}";
1091
				}
1092
1093
				if ( array_pop( $td_history ) ) {
1094
					$line = "</{$last_tag}>{$line}";
1095
				}
1096
				array_pop( $tr_attributes );
1097
				$outLine = $line . str_repeat( '</dd></dl>', $indent_level );
1098
			} elseif ( $first_two === '|-' ) {
1099
				# Now we have a table row
1100
				$line = preg_replace( '#^\|-+#', '', $line );
1101
1102
				# Whats after the tag is now only attributes
1103
				$attributes = $this->mStripState->unstripBoth( $line );
1104
				$attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1105
				array_pop( $tr_attributes );
1106
				array_push( $tr_attributes, $attributes );
1107
1108
				$line = '';
1109
				$last_tag = array_pop( $last_tag_history );
1110
				array_pop( $has_opened_tr );
1111
				array_push( $has_opened_tr, true );
1112
1113
				if ( array_pop( $tr_history ) ) {
1114
					$line = '</tr>';
1115
				}
1116
1117
				if ( array_pop( $td_history ) ) {
1118
					$line = "</{$last_tag}>{$line}";
1119
				}
1120
1121
				$outLine = $line;
1122
				array_push( $tr_history, false );
1123
				array_push( $td_history, false );
1124
				array_push( $last_tag_history, '' );
1125
			} elseif ( $first_character === '|'
1126
				|| $first_character === '!'
1127
				|| $first_two === '|+'
1128
			) {
1129
				# This might be cell elements, td, th or captions
1130
				if ( $first_two === '|+' ) {
1131
					$first_character = '+';
1132
					$line = substr( $line, 2 );
1133
				} else {
1134
					$line = substr( $line, 1 );
1135
				}
1136
1137
				// Implies both are valid for table headings.
1138
				if ( $first_character === '!' ) {
1139
					$line = StringUtils::replaceMarkup( '!!', '||', $line );
1140
				}
1141
1142
				# Split up multiple cells on the same line.
1143
				# FIXME : This can result in improper nesting of tags processed
1144
				# by earlier parser steps.
1145
				$cells = explode( '||', $line );
1146
1147
				$outLine = '';
1148
1149
				# Loop through each table cell
1150
				foreach ( $cells as $cell ) {
1151
					$previous = '';
1152
					if ( $first_character !== '+' ) {
1153
						$tr_after = array_pop( $tr_attributes );
1154
						if ( !array_pop( $tr_history ) ) {
1155
							$previous = "<tr{$tr_after}>\n";
1156
						}
1157
						array_push( $tr_history, true );
1158
						array_push( $tr_attributes, '' );
1159
						array_pop( $has_opened_tr );
1160
						array_push( $has_opened_tr, true );
1161
					}
1162
1163
					$last_tag = array_pop( $last_tag_history );
1164
1165
					if ( array_pop( $td_history ) ) {
1166
						$previous = "</{$last_tag}>\n{$previous}";
1167
					}
1168
1169
					if ( $first_character === '|' ) {
1170
						$last_tag = 'td';
1171
					} elseif ( $first_character === '!' ) {
1172
						$last_tag = 'th';
1173
					} elseif ( $first_character === '+' ) {
1174
						$last_tag = 'caption';
1175
					} else {
1176
						$last_tag = '';
1177
					}
1178
1179
					array_push( $last_tag_history, $last_tag );
1180
1181
					# A cell could contain both parameters and data
1182
					$cell_data = explode( '|', $cell, 2 );
1183
1184
					# Bug 553: Note that a '|' inside an invalid link should not
1185
					# be mistaken as delimiting cell parameters
1186
					if ( strpos( $cell_data[0], '[[' ) !== false ) {
1187
						$cell = "{$previous}<{$last_tag}>{$cell}";
1188
					} elseif ( count( $cell_data ) == 1 ) {
1189
						$cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
1190
					} else {
1191
						$attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1192
						$attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1193
						$cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
1194
					}
1195
1196
					$outLine .= $cell;
1197
					array_push( $td_history, true );
1198
				}
1199
			}
1200
			$out .= $outLine . "\n";
1201
		}
1202
1203
		# Closing open td, tr && table
1204
		while ( count( $td_history ) > 0 ) {
1205
			if ( array_pop( $td_history ) ) {
1206
				$out .= "</td>\n";
1207
			}
1208
			if ( array_pop( $tr_history ) ) {
1209
				$out .= "</tr>\n";
1210
			}
1211
			if ( !array_pop( $has_opened_tr ) ) {
1212
				$out .= "<tr><td></td></tr>\n";
1213
			}
1214
1215
			$out .= "</table>\n";
1216
		}
1217
1218
		# Remove trailing line-ending (b/c)
1219 View Code Duplication
		if ( substr( $out, -1 ) === "\n" ) {
1220
			$out = substr( $out, 0, -1 );
1221
		}
1222
1223
		# special case: don't return empty table
1224
		if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1225
			$out = '';
1226
		}
1227
1228
		return $out;
1229
	}
1230
1231
	/**
1232
	 * Helper function for parse() that transforms wiki markup into half-parsed
1233
	 * HTML. Only called for $mOutputType == self::OT_HTML.
1234
	 *
1235
	 * @private
1236
	 *
1237
	 * @param string $text The text to parse
1238
	 * @param bool $isMain Whether this is being called from the main parse() function
1239
	 * @param PPFrame|bool $frame A pre-processor frame
1240
	 *
1241
	 * @return string
1242
	 */
1243
	public function internalParse( $text, $isMain = true, $frame = false ) {
1244
1245
		$origText = $text;
1246
1247
		# Hook to suspend the parser in this state
1248
		if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$this, &$text, &$this->mStripState ] ) ) {
1249
			return $text;
1250
		}
1251
1252
		# if $frame is provided, then use $frame for replacing any variables
1253
		if ( $frame ) {
1254
			# use frame depth to infer how include/noinclude tags should be handled
1255
			# depth=0 means this is the top-level document; otherwise it's an included document
1256
			if ( !$frame->depth ) {
1257
				$flag = 0;
1258
			} else {
1259
				$flag = Parser::PTD_FOR_INCLUSION;
1260
			}
1261
			$dom = $this->preprocessToDom( $text, $flag );
1262
			$text = $frame->expand( $dom );
1263
		} else {
1264
			# if $frame is not provided, then use old-style replaceVariables
1265
			$text = $this->replaceVariables( $text );
1266
		}
1267
1268
		Hooks::run( 'InternalParseBeforeSanitize', [ &$this, &$text, &$this->mStripState ] );
1269
		$text = Sanitizer::removeHTMLtags(
1270
			$text,
1271
			[ &$this, 'attributeStripCallback' ],
1272
			false,
1273
			array_keys( $this->mTransparentTagHooks ),
1274
			[],
1275
			[ &$this, 'addTrackingCategory' ]
1276
		);
1277
		Hooks::run( 'InternalParseBeforeLinks', [ &$this, &$text, &$this->mStripState ] );
1278
1279
		# Tables need to come after variable replacement for things to work
1280
		# properly; putting them before other transformations should keep
1281
		# exciting things like link expansions from showing up in surprising
1282
		# places.
1283
		$text = $this->doTableStuff( $text );
1284
1285
		$text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1286
1287
		$text = $this->doDoubleUnderscore( $text );
1288
1289
		$text = $this->doHeadings( $text );
1290
		$text = $this->replaceInternalLinks( $text );
1291
		$text = $this->doAllQuotes( $text );
1292
		$text = $this->replaceExternalLinks( $text );
1293
1294
		# replaceInternalLinks may sometimes leave behind
1295
		# absolute URLs, which have to be masked to hide them from replaceExternalLinks
1296
		$text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1297
1298
		$text = $this->doMagicLinks( $text );
1299
		$text = $this->formatHeadings( $text, $origText, $isMain );
1300
1301
		return $text;
1302
	}
1303
1304
	/**
1305
	 * Helper function for parse() that transforms half-parsed HTML into fully
1306
	 * parsed HTML.
1307
	 *
1308
	 * @param string $text
1309
	 * @param bool $isMain
1310
	 * @param bool $linestart
1311
	 * @return string
1312
	 */
1313
	private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1314
		$text = $this->mStripState->unstripGeneral( $text );
1315
1316
		if ( $isMain ) {
1317
			Hooks::run( 'ParserAfterUnstrip', [ &$this, &$text ] );
1318
		}
1319
1320
		# Clean up special characters, only run once, next-to-last before doBlockLevels
1321
		$fixtags = [
1322
			# french spaces, last one Guillemet-left
1323
			# only if there is something before the space
1324
			'/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
1325
			# french spaces, Guillemet-right
1326
			'/(\\302\\253) /' => '\\1&#160;',
1327
			'/&#160;(!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
1328
		];
1329
		$text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
1330
1331
		$text = $this->doBlockLevels( $text, $linestart );
1332
1333
		$this->replaceLinkHolders( $text );
1334
1335
		/**
1336
		 * The input doesn't get language converted if
1337
		 * a) It's disabled
1338
		 * b) Content isn't converted
1339
		 * c) It's a conversion table
1340
		 * d) it is an interface message (which is in the user language)
1341
		 */
1342
		if ( !( $this->mOptions->getDisableContentConversion()
1343
			|| isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
1344
		) {
1345
			if ( !$this->mOptions->getInterfaceMessage() ) {
1346
				# The position of the convert() call should not be changed. it
1347
				# assumes that the links are all replaced and the only thing left
1348
				# is the <nowiki> mark.
1349
				$text = $this->getConverterLanguage()->convert( $text );
1350
			}
1351
		}
1352
1353
		$text = $this->mStripState->unstripNoWiki( $text );
1354
1355
		if ( $isMain ) {
1356
			Hooks::run( 'ParserBeforeTidy', [ &$this, &$text ] );
1357
		}
1358
1359
		$text = $this->replaceTransparentTags( $text );
1360
		$text = $this->mStripState->unstripGeneral( $text );
1361
1362
		$text = Sanitizer::normalizeCharReferences( $text );
1363
1364
		if ( MWTidy::isEnabled() ) {
1365
			if ( $this->mOptions->getTidy() ) {
1366
				$text = MWTidy::tidy( $text );
1367
			}
1368
		} else {
1369
			# attempt to sanitize at least some nesting problems
1370
			# (bug #2702 and quite a few others)
1371
			$tidyregs = [
1372
				# ''Something [http://www.cool.com cool''] -->
1373
				# <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
1374
				'/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
1375
				'\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
1376
				# fix up an anchor inside another anchor, only
1377
				# at least for a single single nested link (bug 3695)
1378
				'/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
1379
				'\\1\\2</a>\\3</a>\\1\\4</a>',
1380
				# fix div inside inline elements- doBlockLevels won't wrap a line which
1381
				# contains a div, so fix it up here; replace
1382
				# div with escaped text
1383
				'/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
1384
				'\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
1385
				# remove empty italic or bold tag pairs, some
1386
				# introduced by rules above
1387
				'/<([bi])><\/\\1>/' => '',
1388
			];
1389
1390
			$text = preg_replace(
1391
				array_keys( $tidyregs ),
1392
				array_values( $tidyregs ),
1393
				$text );
1394
		}
1395
1396
		if ( $isMain ) {
1397
			Hooks::run( 'ParserAfterTidy', [ &$this, &$text ] );
1398
		}
1399
1400
		return $text;
1401
	}
1402
1403
	/**
1404
	 * Replace special strings like "ISBN xxx" and "RFC xxx" with
1405
	 * magic external links.
1406
	 *
1407
	 * DML
1408
	 * @private
1409
	 *
1410
	 * @param string $text
1411
	 *
1412
	 * @return string
1413
	 */
1414
	public function doMagicLinks( $text ) {
1415
		$prots = wfUrlProtocolsWithoutProtRel();
1416
		$urlChar = self::EXT_LINK_URL_CLASS;
1417
		$addr = self::EXT_LINK_ADDR;
1418
		$space = self::SPACE_NOT_NL; #  non-newline space
1419
		$spdash = "(?:-|$space)"; # a dash or a non-newline space
1420
		$spaces = "$space++"; # possessive match of 1 or more spaces
1421
		$text = preg_replace_callback(
1422
			'!(?:                            # Start cases
1423
				(<a[ \t\r\n>].*?</a>) |      # m[1]: Skip link text
1424
				(<.*?>) |                    # m[2]: Skip stuff inside
1425
				                             #       HTML elements' . "
1426
				(\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
1427
				                             # m[4]: Post-protocol path
1428
				\b(?:RFC|PMID) $spaces       # m[5]: RFC or PMID, capture number
1429
					([0-9]+)\b |
1430
				\bISBN $spaces (             # m[6]: ISBN, capture number
1431
					(?: 97[89] $spdash? )?   #  optional 13-digit ISBN prefix
1432
					(?: [0-9]  $spdash? ){9} #  9 digits with opt. delimiters
1433
					[0-9Xx]                  #  check digit
1434
				)\b
1435
			)!xu", [ &$this, 'magicLinkCallback' ], $text );
1436
		return $text;
1437
	}
1438
1439
	/**
1440
	 * @throws MWException
1441
	 * @param array $m
1442
	 * @return HTML|string
1443
	 */
1444
	public function magicLinkCallback( $m ) {
1445
		if ( isset( $m[1] ) && $m[1] !== '' ) {
1446
			# Skip anchor
1447
			return $m[0];
1448
		} elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1449
			# Skip HTML element
1450
			return $m[0];
1451
		} elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1452
			# Free external link
1453
			return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1454
		} elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1455
			# RFC or PMID
1456
			if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1457
				if ( !$this->mOptions->getMagicRFCLinks() ) {
1458
					return $m[0];
1459
				}
1460
				$keyword = 'RFC';
1461
				$urlmsg = 'rfcurl';
1462
				$cssClass = 'mw-magiclink-rfc';
1463
				$trackingCat = 'magiclink-tracking-rfc';
1464
				$id = $m[5];
1465
			} elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1466
				if ( !$this->mOptions->getMagicPMIDLinks() ) {
1467
					return $m[0];
1468
				}
1469
				$keyword = 'PMID';
1470
				$urlmsg = 'pubmedurl';
1471
				$cssClass = 'mw-magiclink-pmid';
1472
				$trackingCat = 'magiclink-tracking-pmid';
1473
				$id = $m[5];
1474
			} else {
1475
				throw new MWException( __METHOD__ . ': unrecognised match type "' .
1476
					substr( $m[0], 0, 20 ) . '"' );
1477
			}
1478
			$url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1479
			$this->addTrackingCategory( $trackingCat );
1480
			return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle );
1481
		} elseif ( isset( $m[6] ) && $m[6] !== ''
1482
			&& $this->mOptions->getMagicISBNLinks()
1483
		) {
1484
			# ISBN
1485
			$isbn = $m[6];
1486
			$space = self::SPACE_NOT_NL; #  non-newline space
1487
			$isbn = preg_replace( "/$space/", ' ', $isbn );
1488
			$num = strtr( $isbn, [
1489
				'-' => '',
1490
				' ' => '',
1491
				'x' => 'X',
1492
			] );
1493
			$this->addTrackingCategory( 'magiclink-tracking-isbn' );
1494
			return $this->getLinkRenderer()->makeKnownLink(
1495
				SpecialPage::getTitleFor( 'Booksources', $num ),
1496
				"ISBN $isbn",
1497
				[
1498
					'class' => 'internal mw-magiclink-isbn',
1499
					'title' => false // suppress title attribute
1500
				]
1501
			);
1502
		} else {
1503
			return $m[0];
1504
		}
1505
	}
1506
1507
	/**
1508
	 * Make a free external link, given a user-supplied URL
1509
	 *
1510
	 * @param string $url
1511
	 * @param int $numPostProto
1512
	 *   The number of characters after the protocol.
1513
	 * @return string HTML
1514
	 * @private
1515
	 */
1516
	public function makeFreeExternalLink( $url, $numPostProto ) {
1517
		$trail = '';
1518
1519
		# The characters '<' and '>' (which were escaped by
1520
		# removeHTMLtags()) should not be included in
1521
		# URLs, per RFC 2396.
1522
		# Make &nbsp; terminate a URL as well (bug T84937)
1523
		$m2 = [];
1524 View Code Duplication
		if ( preg_match(
1525
			'/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1526
			$url,
1527
			$m2,
1528
			PREG_OFFSET_CAPTURE
1529
		) ) {
1530
			$trail = substr( $url, $m2[0][1] ) . $trail;
1531
			$url = substr( $url, 0, $m2[0][1] );
1532
		}
1533
1534
		# Move trailing punctuation to $trail
1535
		$sep = ',;\.:!?';
1536
		# If there is no left bracket, then consider right brackets fair game too
1537
		if ( strpos( $url, '(' ) === false ) {
1538
			$sep .= ')';
1539
		}
1540
1541
		$urlRev = strrev( $url );
1542
		$numSepChars = strspn( $urlRev, $sep );
1543
		# Don't break a trailing HTML entity by moving the ; into $trail
1544
		# This is in hot code, so use substr_compare to avoid having to
1545
		# create a new string object for the comparison
1546
		if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1547
			# more optimization: instead of running preg_match with a $
1548
			# anchor, which can be slow, do the match on the reversed
1549
			# string starting at the desired offset.
1550
			# un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1551
			if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1552
				$numSepChars--;
1553
			}
1554
		}
1555
		if ( $numSepChars ) {
1556
			$trail = substr( $url, -$numSepChars ) . $trail;
1557
			$url = substr( $url, 0, -$numSepChars );
1558
		}
1559
1560
		# Verify that we still have a real URL after trail removal, and
1561
		# not just lone protocol
1562
		if ( strlen( $trail ) >= $numPostProto ) {
1563
			return $url . $trail;
1564
		}
1565
1566
		$url = Sanitizer::cleanUrl( $url );
1567
1568
		# Is this an external image?
1569
		$text = $this->maybeMakeExternalImage( $url );
1570
		if ( $text === false ) {
1571
			# Not an image, make a link
1572
			$text = Linker::makeExternalLink( $url,
1573
				$this->getConverterLanguage()->markNoConversion( $url, true ),
1574
				true, 'free',
1575
				$this->getExternalLinkAttribs( $url ), $this->mTitle );
1576
			# Register it in the output object...
1577
			# Replace unnecessary URL escape codes with their equivalent characters
1578
			$pasteurized = self::normalizeLinkUrl( $url );
1579
			$this->mOutput->addExternalLink( $pasteurized );
1580
		}
1581
		return $text . $trail;
1582
	}
1583
1584
	/**
1585
	 * Parse headers and return html
1586
	 *
1587
	 * @private
1588
	 *
1589
	 * @param string $text
1590
	 *
1591
	 * @return string
1592
	 */
1593
	public function doHeadings( $text ) {
1594
		for ( $i = 6; $i >= 1; --$i ) {
1595
			$h = str_repeat( '=', $i );
1596
			$text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
1597
		}
1598
		return $text;
1599
	}
1600
1601
	/**
1602
	 * Replace single quotes with HTML markup
1603
	 * @private
1604
	 *
1605
	 * @param string $text
1606
	 *
1607
	 * @return string The altered text
1608
	 */
1609
	public function doAllQuotes( $text ) {
1610
		$outtext = '';
1611
		$lines = StringUtils::explode( "\n", $text );
1612
		foreach ( $lines as $line ) {
1613
			$outtext .= $this->doQuotes( $line ) . "\n";
1614
		}
1615
		$outtext = substr( $outtext, 0, -1 );
1616
		return $outtext;
1617
	}
1618
1619
	/**
1620
	 * Helper function for doAllQuotes()
1621
	 *
1622
	 * @param string $text
1623
	 *
1624
	 * @return string
1625
	 */
1626
	public function doQuotes( $text ) {
1627
		$arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1628
		$countarr = count( $arr );
1629
		if ( $countarr == 1 ) {
1630
			return $text;
1631
		}
1632
1633
		// First, do some preliminary work. This may shift some apostrophes from
1634
		// being mark-up to being text. It also counts the number of occurrences
1635
		// of bold and italics mark-ups.
1636
		$numbold = 0;
1637
		$numitalics = 0;
1638
		for ( $i = 1; $i < $countarr; $i += 2 ) {
1639
			$thislen = strlen( $arr[$i] );
1640
			// If there are ever four apostrophes, assume the first is supposed to
1641
			// be text, and the remaining three constitute mark-up for bold text.
1642
			// (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
1643
			if ( $thislen == 4 ) {
1644
				$arr[$i - 1] .= "'";
1645
				$arr[$i] = "'''";
1646
				$thislen = 3;
1647
			} elseif ( $thislen > 5 ) {
1648
				// If there are more than 5 apostrophes in a row, assume they're all
1649
				// text except for the last 5.
1650
				// (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
1651
				$arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
1652
				$arr[$i] = "'''''";
1653
				$thislen = 5;
1654
			}
1655
			// Count the number of occurrences of bold and italics mark-ups.
1656
			if ( $thislen == 2 ) {
1657
				$numitalics++;
1658
			} elseif ( $thislen == 3 ) {
1659
				$numbold++;
1660
			} elseif ( $thislen == 5 ) {
1661
				$numitalics++;
1662
				$numbold++;
1663
			}
1664
		}
1665
1666
		// If there is an odd number of both bold and italics, it is likely
1667
		// that one of the bold ones was meant to be an apostrophe followed
1668
		// by italics. Which one we cannot know for certain, but it is more
1669
		// likely to be one that has a single-letter word before it.
1670
		if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
1671
			$firstsingleletterword = -1;
1672
			$firstmultiletterword = -1;
1673
			$firstspace = -1;
1674
			for ( $i = 1; $i < $countarr; $i += 2 ) {
1675
				if ( strlen( $arr[$i] ) == 3 ) {
1676
					$x1 = substr( $arr[$i - 1], -1 );
1677
					$x2 = substr( $arr[$i - 1], -2, 1 );
1678
					if ( $x1 === ' ' ) {
1679
						if ( $firstspace == -1 ) {
1680
							$firstspace = $i;
1681
						}
1682
					} elseif ( $x2 === ' ' ) {
1683
						$firstsingleletterword = $i;
1684
						// if $firstsingleletterword is set, we don't
1685
						// look at the other options, so we can bail early.
1686
						break;
1687
					} else {
1688
						if ( $firstmultiletterword == -1 ) {
1689
							$firstmultiletterword = $i;
1690
						}
1691
					}
1692
				}
1693
			}
1694
1695
			// If there is a single-letter word, use it!
1696
			if ( $firstsingleletterword > -1 ) {
1697
				$arr[$firstsingleletterword] = "''";
1698
				$arr[$firstsingleletterword - 1] .= "'";
1699
			} elseif ( $firstmultiletterword > -1 ) {
1700
				// If not, but there's a multi-letter word, use that one.
1701
				$arr[$firstmultiletterword] = "''";
1702
				$arr[$firstmultiletterword - 1] .= "'";
1703
			} elseif ( $firstspace > -1 ) {
1704
				// ... otherwise use the first one that has neither.
1705
				// (notice that it is possible for all three to be -1 if, for example,
1706
				// there is only one pentuple-apostrophe in the line)
1707
				$arr[$firstspace] = "''";
1708
				$arr[$firstspace - 1] .= "'";
1709
			}
1710
		}
1711
1712
		// Now let's actually convert our apostrophic mush to HTML!
1713
		$output = '';
1714
		$buffer = '';
1715
		$state = '';
1716
		$i = 0;
1717
		foreach ( $arr as $r ) {
1718
			if ( ( $i % 2 ) == 0 ) {
1719
				if ( $state === 'both' ) {
1720
					$buffer .= $r;
1721
				} else {
1722
					$output .= $r;
1723
				}
1724
			} else {
1725
				$thislen = strlen( $r );
1726
				if ( $thislen == 2 ) {
1727 View Code Duplication
					if ( $state === 'i' ) {
1728
						$output .= '</i>';
1729
						$state = '';
1730
					} elseif ( $state === 'bi' ) {
1731
						$output .= '</i>';
1732
						$state = 'b';
1733
					} elseif ( $state === 'ib' ) {
1734
						$output .= '</b></i><b>';
1735
						$state = 'b';
1736
					} elseif ( $state === 'both' ) {
1737
						$output .= '<b><i>' . $buffer . '</i>';
1738
						$state = 'b';
1739
					} else { // $state can be 'b' or ''
1740
						$output .= '<i>';
1741
						$state .= 'i';
1742
					}
1743 View Code Duplication
				} elseif ( $thislen == 3 ) {
1744
					if ( $state === 'b' ) {
1745
						$output .= '</b>';
1746
						$state = '';
1747
					} elseif ( $state === 'bi' ) {
1748
						$output .= '</i></b><i>';
1749
						$state = 'i';
1750
					} elseif ( $state === 'ib' ) {
1751
						$output .= '</b>';
1752
						$state = 'i';
1753
					} elseif ( $state === 'both' ) {
1754
						$output .= '<i><b>' . $buffer . '</b>';
1755
						$state = 'i';
1756
					} else { // $state can be 'i' or ''
1757
						$output .= '<b>';
1758
						$state .= 'b';
1759
					}
1760
				} elseif ( $thislen == 5 ) {
1761
					if ( $state === 'b' ) {
1762
						$output .= '</b><i>';
1763
						$state = 'i';
1764
					} elseif ( $state === 'i' ) {
1765
						$output .= '</i><b>';
1766
						$state = 'b';
1767
					} elseif ( $state === 'bi' ) {
1768
						$output .= '</i></b>';
1769
						$state = '';
1770
					} elseif ( $state === 'ib' ) {
1771
						$output .= '</b></i>';
1772
						$state = '';
1773
					} elseif ( $state === 'both' ) {
1774
						$output .= '<i><b>' . $buffer . '</b></i>';
1775
						$state = '';
1776
					} else { // ($state == '')
1777
						$buffer = '';
1778
						$state = 'both';
1779
					}
1780
				}
1781
			}
1782
			$i++;
1783
		}
1784
		// Now close all remaining tags.  Notice that the order is important.
1785
		if ( $state === 'b' || $state === 'ib' ) {
1786
			$output .= '</b>';
1787
		}
1788
		if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
1789
			$output .= '</i>';
1790
		}
1791
		if ( $state === 'bi' ) {
1792
			$output .= '</b>';
1793
		}
1794
		// There might be lonely ''''', so make sure we have a buffer
1795
		if ( $state === 'both' && $buffer ) {
1796
			$output .= '<b><i>' . $buffer . '</i></b>';
1797
		}
1798
		return $output;
1799
	}
1800
1801
	/**
1802
	 * Replace external links (REL)
1803
	 *
1804
	 * Note: this is all very hackish and the order of execution matters a lot.
1805
	 * Make sure to run tests/parser/parserTests.php if you change this code.
1806
	 *
1807
	 * @private
1808
	 *
1809
	 * @param string $text
1810
	 *
1811
	 * @throws MWException
1812
	 * @return string
1813
	 */
1814
	public function replaceExternalLinks( $text ) {
1815
1816
		$bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1817
		if ( $bits === false ) {
1818
			throw new MWException( "PCRE needs to be compiled with "
1819
				. "--enable-unicode-properties in order for MediaWiki to function" );
1820
		}
1821
		$s = array_shift( $bits );
1822
1823
		$i = 0;
1824
		while ( $i < count( $bits ) ) {
1825
			$url = $bits[$i++];
1826
			$i++; // protocol
1827
			$text = $bits[$i++];
1828
			$trail = $bits[$i++];
1829
1830
			# The characters '<' and '>' (which were escaped by
1831
			# removeHTMLtags()) should not be included in
1832
			# URLs, per RFC 2396.
1833
			$m2 = [];
1834 View Code Duplication
			if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
1835
				$text = substr( $url, $m2[0][1] ) . ' ' . $text;
1836
				$url = substr( $url, 0, $m2[0][1] );
1837
			}
1838
1839
			# If the link text is an image URL, replace it with an <img> tag
1840
			# This happened by accident in the original parser, but some people used it extensively
1841
			$img = $this->maybeMakeExternalImage( $text );
1842
			if ( $img !== false ) {
1843
				$text = $img;
1844
			}
1845
1846
			$dtrail = '';
1847
1848
			# Set linktype for CSS - if URL==text, link is essentially free
1849
			$linktype = ( $text === $url ) ? 'free' : 'text';
1850
1851
			# No link text, e.g. [http://domain.tld/some.link]
1852
			if ( $text == '' ) {
1853
				# Autonumber
1854
				$langObj = $this->getTargetLanguage();
1855
				$text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
1856
				$linktype = 'autonumber';
1857
			} else {
1858
				# Have link text, e.g. [http://domain.tld/some.link text]s
1859
				# Check for trail
1860
				list( $dtrail, $trail ) = Linker::splitTrail( $trail );
1861
			}
1862
1863
			$text = $this->getConverterLanguage()->markNoConversion( $text );
1864
1865
			$url = Sanitizer::cleanUrl( $url );
1866
1867
			# Use the encoded URL
1868
			# This means that users can paste URLs directly into the text
1869
			# Funny characters like ö aren't valid in URLs anyway
1870
			# This was changed in August 2004
1871
			$s .= Linker::makeExternalLink( $url, $text, false, $linktype,
1872
				$this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail;
1873
1874
			# Register link in the output object.
1875
			# Replace unnecessary URL escape codes with the referenced character
1876
			# This prevents spammers from hiding links from the filters
1877
			$pasteurized = self::normalizeLinkUrl( $url );
1878
			$this->mOutput->addExternalLink( $pasteurized );
1879
		}
1880
1881
		return $s;
1882
	}
1883
1884
	/**
1885
	 * Get the rel attribute for a particular external link.
1886
	 *
1887
	 * @since 1.21
1888
	 * @param string|bool $url Optional URL, to extract the domain from for rel =>
1889
	 *   nofollow if appropriate
1890
	 * @param Title $title Optional Title, for wgNoFollowNsExceptions lookups
1891
	 * @return string|null Rel attribute for $url
1892
	 */
1893
	public static function getExternalLinkRel( $url = false, $title = null ) {
1894
		global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
1895
		$ns = $title ? $title->getNamespace() : false;
1896
		if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
1897
			&& !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
1898
		) {
1899
			return 'nofollow';
1900
		}
1901
		return null;
1902
	}
1903
1904
	/**
1905
	 * Get an associative array of additional HTML attributes appropriate for a
1906
	 * particular external link.  This currently may include rel => nofollow
1907
	 * (depending on configuration, namespace, and the URL's domain) and/or a
1908
	 * target attribute (depending on configuration).
1909
	 *
1910
	 * @param string $url URL to extract the domain from for rel =>
1911
	 *   nofollow if appropriate
1912
	 * @return array Associative array of HTML attributes
1913
	 */
1914
	public function getExternalLinkAttribs( $url ) {
1915
		$attribs = [];
1916
		$rel = self::getExternalLinkRel( $url, $this->mTitle );
1917
1918
		$target = $this->mOptions->getExternalLinkTarget();
1919
		if ( $target ) {
1920
			$attribs['target'] = $target;
1921
			if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
1922
				// T133507. New windows can navigate parent cross-origin.
1923
				// Including noreferrer due to lacking browser
1924
				// support of noopener. Eventually noreferrer should be removed.
1925
				if ( $rel !== '' ) {
1926
					$rel .= ' ';
1927
				}
1928
				$rel .= 'noreferrer noopener';
1929
			}
1930
		}
1931
		$attribs['rel'] = $rel;
1932
		return $attribs;
1933
	}
1934
1935
	/**
1936
	 * Replace unusual escape codes in a URL with their equivalent characters
1937
	 *
1938
	 * @deprecated since 1.24, use normalizeLinkUrl
1939
	 * @param string $url
1940
	 * @return string
1941
	 */
1942
	public static function replaceUnusualEscapes( $url ) {
1943
		wfDeprecated( __METHOD__, '1.24' );
1944
		return self::normalizeLinkUrl( $url );
1945
	}
1946
1947
	/**
1948
	 * Replace unusual escape codes in a URL with their equivalent characters
1949
	 *
1950
	 * This generally follows the syntax defined in RFC 3986, with special
1951
	 * consideration for HTTP query strings.
1952
	 *
1953
	 * @param string $url
1954
	 * @return string
1955
	 */
1956
	public static function normalizeLinkUrl( $url ) {
1957
		# First, make sure unsafe characters are encoded
1958
		$url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/',
1959
			function ( $m ) {
1960
				return rawurlencode( $m[0] );
1961
			},
1962
			$url
1963
		);
1964
1965
		$ret = '';
1966
		$end = strlen( $url );
1967
1968
		# Fragment part - 'fragment'
1969
		$start = strpos( $url, '#' );
1970 View Code Duplication
		if ( $start !== false && $start < $end ) {
1971
			$ret = self::normalizeUrlComponent(
1972
				substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
1973
			$end = $start;
1974
		}
1975
1976
		# Query part - 'query' minus &=+;
1977
		$start = strpos( $url, '?' );
1978 View Code Duplication
		if ( $start !== false && $start < $end ) {
1979
			$ret = self::normalizeUrlComponent(
1980
				substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
1981
			$end = $start;
1982
		}
1983
1984
		# Scheme and path part - 'pchar'
1985
		# (we assume no userinfo or encoded colons in the host)
1986
		$ret = self::normalizeUrlComponent(
1987
			substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
1988
1989
		return $ret;
1990
	}
1991
1992
	private static function normalizeUrlComponent( $component, $unsafe ) {
1993
		$callback = function ( $matches ) use ( $unsafe ) {
1994
			$char = urldecode( $matches[0] );
1995
			$ord = ord( $char );
1996
			if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
1997
				# Unescape it
1998
				return $char;
1999
			} else {
2000
				# Leave it escaped, but use uppercase for a-f
2001
				return strtoupper( $matches[0] );
2002
			}
2003
		};
2004
		return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2005
	}
2006
2007
	/**
2008
	 * make an image if it's allowed, either through the global
2009
	 * option, through the exception, or through the on-wiki whitelist
2010
	 *
2011
	 * @param string $url
2012
	 *
2013
	 * @return string
2014
	 */
2015
	private function maybeMakeExternalImage( $url ) {
2016
		$imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2017
		$imagesexception = !empty( $imagesfrom );
2018
		$text = false;
2019
		# $imagesfrom could be either a single string or an array of strings, parse out the latter
2020
		if ( $imagesexception && is_array( $imagesfrom ) ) {
2021
			$imagematch = false;
2022
			foreach ( $imagesfrom as $match ) {
2023
				if ( strpos( $url, $match ) === 0 ) {
2024
					$imagematch = true;
2025
					break;
2026
				}
2027
			}
2028
		} elseif ( $imagesexception ) {
2029
			$imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2030
		} else {
2031
			$imagematch = false;
2032
		}
2033
2034
		if ( $this->mOptions->getAllowExternalImages()
2035
			|| ( $imagesexception && $imagematch )
2036
		) {
2037
			if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2038
				# Image found
2039
				$text = Linker::makeExternalImage( $url );
2040
			}
2041
		}
2042
		if ( !$text && $this->mOptions->getEnableImageWhitelist()
2043
			&& preg_match( self::EXT_IMAGE_REGEX, $url )
2044
		) {
2045
			$whitelist = explode(
2046
				"\n",
2047
				wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2048
			);
2049
2050
			foreach ( $whitelist as $entry ) {
2051
				# Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2052
				if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2053
					continue;
2054
				}
2055
				if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2056
					# Image matches a whitelist entry
2057
					$text = Linker::makeExternalImage( $url );
2058
					break;
2059
				}
2060
			}
2061
		}
2062
		return $text;
2063
	}
2064
2065
	/**
2066
	 * Process [[ ]] wikilinks
2067
	 *
2068
	 * @param string $s
2069
	 *
2070
	 * @return string Processed text
2071
	 *
2072
	 * @private
2073
	 */
2074
	public function replaceInternalLinks( $s ) {
2075
		$this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) );
2076
		return $s;
2077
	}
2078
2079
	/**
2080
	 * Process [[ ]] wikilinks (RIL)
2081
	 * @param string $s
2082
	 * @throws MWException
2083
	 * @return LinkHolderArray
2084
	 *
2085
	 * @private
2086
	 */
2087
	public function replaceInternalLinks2( &$s ) {
2088
		global $wgExtraInterlanguageLinkPrefixes;
2089
2090
		static $tc = false, $e1, $e1_img;
2091
		# the % is needed to support urlencoded titles as well
2092
		if ( !$tc ) {
2093
			$tc = Title::legalChars() . '#%';
2094
			# Match a link having the form [[namespace:link|alternate]]trail
2095
			$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2096
			# Match cases where there is no "]]", which might still be images
2097
			$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2098
		}
2099
2100
		$holders = new LinkHolderArray( $this );
2101
2102
		# split the entire text string on occurrences of [[
2103
		$a = StringUtils::explode( '[[', ' ' . $s );
2104
		# get the first element (all text up to first [[), and remove the space we added
2105
		$s = $a->current();
2106
		$a->next();
2107
		$line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2108
		$s = substr( $s, 1 );
2109
2110
		$useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2111
		$e2 = null;
2112
		if ( $useLinkPrefixExtension ) {
2113
			# Match the end of a line for a word that's not followed by whitespace,
2114
			# e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2115
			global $wgContLang;
2116
			$charset = $wgContLang->linkPrefixCharset();
2117
			$e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2118
		}
2119
2120
		if ( is_null( $this->mTitle ) ) {
2121
			throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" );
2122
		}
2123
		$nottalk = !$this->mTitle->isTalkPage();
2124
2125 View Code Duplication
		if ( $useLinkPrefixExtension ) {
2126
			$m = [];
2127
			if ( preg_match( $e2, $s, $m ) ) {
2128
				$first_prefix = $m[2];
2129
			} else {
2130
				$first_prefix = false;
2131
			}
2132
		} else {
2133
			$prefix = '';
2134
		}
2135
2136
		$useSubpages = $this->areSubpagesAllowed();
2137
2138
		// @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
2139
		# Loop for each link
2140
		for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2141
			// @codingStandardsIgnoreEnd
2142
2143
			# Check for excessive memory usage
2144
			if ( $holders->isBig() ) {
2145
				# Too big
2146
				# Do the existence check, replace the link holders and clear the array
2147
				$holders->replace( $s );
2148
				$holders->clear();
2149
			}
2150
2151
			if ( $useLinkPrefixExtension ) {
2152 View Code Duplication
				if ( preg_match( $e2, $s, $m ) ) {
2153
					$prefix = $m[2];
2154
					$s = $m[1];
2155
				} else {
2156
					$prefix = '';
2157
				}
2158
				# first link
2159
				if ( $first_prefix ) {
2160
					$prefix = $first_prefix;
2161
					$first_prefix = false;
2162
				}
2163
			}
2164
2165
			$might_be_img = false;
2166
2167
			if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2168
				$text = $m[2];
2169
				# If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2170
				# [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2171
				# the real problem is with the $e1 regex
2172
				# See bug 1300.
2173
				# Still some problems for cases where the ] is meant to be outside punctuation,
2174
				# and no image is in sight. See bug 2095.
2175
				if ( $text !== ''
2176
					&& substr( $m[3], 0, 1 ) === ']'
2177
					&& strpos( $text, '[' ) !== false
2178
				) {
2179
					$text .= ']'; # so that replaceExternalLinks($text) works later
2180
					$m[3] = substr( $m[3], 1 );
2181
				}
2182
				# fix up urlencoded title texts
2183 View Code Duplication
				if ( strpos( $m[1], '%' ) !== false ) {
2184
					# Should anchors '#' also be rejected?
2185
					$m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2186
				}
2187
				$trail = $m[3];
2188
			} elseif ( preg_match( $e1_img, $line, $m ) ) {
2189
				# Invalid, but might be an image with a link in its caption
2190
				$might_be_img = true;
2191
				$text = $m[2];
2192 View Code Duplication
				if ( strpos( $m[1], '%' ) !== false ) {
2193
					$m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2194
				}
2195
				$trail = "";
2196
			} else { # Invalid form; output directly
2197
				$s .= $prefix . '[[' . $line;
2198
				continue;
2199
			}
2200
2201
			$origLink = $m[1];
2202
2203
			# Don't allow internal links to pages containing
2204
			# PROTO: where PROTO is a valid URL protocol; these
2205
			# should be external links.
2206
			if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) {
2207
				$s .= $prefix . '[[' . $line;
2208
				continue;
2209
			}
2210
2211
			# Make subpage if necessary
2212
			if ( $useSubpages ) {
2213
				$link = $this->maybeDoSubpageLink( $origLink, $text );
2214
			} else {
2215
				$link = $origLink;
2216
			}
2217
2218
			$noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2219
			if ( !$noforce ) {
2220
				# Strip off leading ':'
2221
				$link = substr( $link, 1 );
2222
			}
2223
2224
			$unstrip = $this->mStripState->unstripNoWiki( $link );
2225
			$nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null;
2226
			if ( $nt === null ) {
2227
				$s .= $prefix . '[[' . $line;
2228
				continue;
2229
			}
2230
2231
			$ns = $nt->getNamespace();
2232
			$iw = $nt->getInterwiki();
2233
2234
			if ( $might_be_img ) { # if this is actually an invalid link
2235
				if ( $ns == NS_FILE && $noforce ) { # but might be an image
2236
					$found = false;
2237
					while ( true ) {
2238
						# look at the next 'line' to see if we can close it there
2239
						$a->next();
2240
						$next_line = $a->current();
2241
						if ( $next_line === false || $next_line === null ) {
2242
							break;
2243
						}
2244
						$m = explode( ']]', $next_line, 3 );
2245
						if ( count( $m ) == 3 ) {
2246
							# the first ]] closes the inner link, the second the image
2247
							$found = true;
2248
							$text .= "[[{$m[0]}]]{$m[1]}";
2249
							$trail = $m[2];
2250
							break;
2251
						} elseif ( count( $m ) == 2 ) {
2252
							# if there's exactly one ]] that's fine, we'll keep looking
2253
							$text .= "[[{$m[0]}]]{$m[1]}";
2254
						} else {
2255
							# if $next_line is invalid too, we need look no further
2256
							$text .= '[[' . $next_line;
2257
							break;
2258
						}
2259
					}
2260
					if ( !$found ) {
2261
						# we couldn't find the end of this imageLink, so output it raw
2262
						# but don't ignore what might be perfectly normal links in the text we've examined
2263
						$holders->merge( $this->replaceInternalLinks2( $text ) );
2264
						$s .= "{$prefix}[[$link|$text";
2265
						# note: no $trail, because without an end, there *is* no trail
2266
						continue;
2267
					}
2268
				} else { # it's not an image, so output it raw
2269
					$s .= "{$prefix}[[$link|$text";
2270
					# note: no $trail, because without an end, there *is* no trail
2271
					continue;
2272
				}
2273
			}
2274
2275
			$wasblank = ( $text == '' );
2276
			if ( $wasblank ) {
2277
				$text = $link;
2278
			} else {
2279
				# Bug 4598 madness. Handle the quotes only if they come from the alternate part
2280
				# [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2281
				# [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2282
				#    -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2283
				$text = $this->doQuotes( $text );
2284
			}
2285
2286
			# Link not escaped by : , create the various objects
2287
			if ( $noforce && !$nt->wasLocalInterwiki() ) {
2288
				# Interwikis
2289
				if (
2290
					$iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2291
						Language::fetchLanguageName( $iw, null, 'mw' ) ||
2292
						in_array( $iw, $wgExtraInterlanguageLinkPrefixes )
2293
					)
2294
				) {
2295
					# Bug 24502: filter duplicates
2296
					if ( !isset( $this->mLangLinkLanguages[$iw] ) ) {
2297
						$this->mLangLinkLanguages[$iw] = true;
2298
						$this->mOutput->addLanguageLink( $nt->getFullText() );
2299
					}
2300
2301
					$s = rtrim( $s . $prefix );
2302
					$s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail;
2303
					continue;
2304
				}
2305
2306
				if ( $ns == NS_FILE ) {
2307
					if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) {
2308
						if ( $wasblank ) {
2309
							# if no parameters were passed, $text
2310
							# becomes something like "File:Foo.png",
2311
							# which we don't want to pass on to the
2312
							# image generator
2313
							$text = '';
2314
						} else {
2315
							# recursively parse links inside the image caption
2316
							# actually, this will parse them in any other parameters, too,
2317
							# but it might be hard to fix that, and it doesn't matter ATM
2318
							$text = $this->replaceExternalLinks( $text );
2319
							$holders->merge( $this->replaceInternalLinks2( $text ) );
2320
						}
2321
						# cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
2322
						$s .= $prefix . $this->armorLinks(
2323
							$this->makeImage( $nt, $text, $holders ) ) . $trail;
2324
						continue;
2325
					}
2326
				} elseif ( $ns == NS_CATEGORY ) {
2327
					$s = rtrim( $s . "\n" ); # bug 87
2328
2329
					if ( $wasblank ) {
2330
						$sortkey = $this->getDefaultSort();
2331
					} else {
2332
						$sortkey = $text;
2333
					}
2334
					$sortkey = Sanitizer::decodeCharReferences( $sortkey );
2335
					$sortkey = str_replace( "\n", '', $sortkey );
2336
					$sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey );
2337
					$this->mOutput->addCategory( $nt->getDBkey(), $sortkey );
2338
2339
					/**
2340
					 * Strip the whitespace Category links produce, see bug 87
2341
					 */
2342
					$s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail;
2343
2344
					continue;
2345
				}
2346
			}
2347
2348
			# Self-link checking. For some languages, variants of the title are checked in
2349
			# LinkHolderArray::doVariants() to allow batching the existence checks necessary
2350
			# for linking to a different variant.
2351
			if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) {
2352
				$s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail );
2353
				continue;
2354
			}
2355
2356
			# NS_MEDIA is a pseudo-namespace for linking directly to a file
2357
			# @todo FIXME: Should do batch file existence checks, see comment below
2358
			if ( $ns == NS_MEDIA ) {
2359
				# Give extensions a chance to select the file revision for us
2360
				$options = [];
2361
				$descQuery = false;
2362
				Hooks::run( 'BeforeParserFetchFileAndTitle',
2363
					[ $this, $nt, &$options, &$descQuery ] );
2364
				# Fetch and register the file (file title may be different via hooks)
2365
				list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options );
2366
				# Cloak with NOPARSE to avoid replacement in replaceExternalLinks
2367
				$s .= $prefix . $this->armorLinks(
2368
					Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2369
				continue;
2370
			}
2371
2372
			# Some titles, such as valid special pages or files in foreign repos, should
2373
			# be shown as bluelinks even though they're not included in the page table
2374
			# @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2375
			# batch file existence checks for NS_FILE and NS_MEDIA
2376
			if ( $iw == '' && $nt->isAlwaysKnown() ) {
2377
				$this->mOutput->addLink( $nt );
2378
				$s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2379
			} else {
2380
				# Links will be added to the output link list after checking
2381
				$s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix );
2382
			}
2383
		}
2384
		return $holders;
2385
	}
2386
2387
	/**
2388
	 * Render a forced-blue link inline; protect against double expansion of
2389
	 * URLs if we're in a mode that prepends full URL prefixes to internal links.
2390
	 * Since this little disaster has to split off the trail text to avoid
2391
	 * breaking URLs in the following text without breaking trails on the
2392
	 * wiki links, it's been made into a horrible function.
2393
	 *
2394
	 * @param Title $nt
2395
	 * @param string $text
2396
	 * @param string $trail
2397
	 * @param string $prefix
2398
	 * @return string HTML-wikitext mix oh yuck
2399
	 */
2400
	protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) {
2401
		list( $inside, $trail ) = Linker::splitTrail( $trail );
2402
2403
		if ( $text == '' ) {
2404
			$text = htmlspecialchars( $nt->getPrefixedText() );
2405
		}
2406
2407
		$link = $this->getLinkRenderer()->makeKnownLink(
2408
			$nt, new HtmlArmor( "$prefix$text$inside" )
2409
		);
2410
2411
		return $this->armorLinks( $link ) . $trail;
2412
	}
2413
2414
	/**
2415
	 * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2416
	 * going to go through further parsing steps before inline URL expansion.
2417
	 *
2418
	 * Not needed quite as much as it used to be since free links are a bit
2419
	 * more sensible these days. But bracketed links are still an issue.
2420
	 *
2421
	 * @param string $text More-or-less HTML
2422
	 * @return string Less-or-more HTML with NOPARSE bits
2423
	 */
2424
	public function armorLinks( $text ) {
2425
		return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/',
2426
			self::MARKER_PREFIX . "NOPARSE$1", $text );
2427
	}
2428
2429
	/**
2430
	 * Return true if subpage links should be expanded on this page.
2431
	 * @return bool
2432
	 */
2433
	public function areSubpagesAllowed() {
2434
		# Some namespaces don't allow subpages
2435
		return MWNamespace::hasSubpages( $this->mTitle->getNamespace() );
2436
	}
2437
2438
	/**
2439
	 * Handle link to subpage if necessary
2440
	 *
2441
	 * @param string $target The source of the link
2442
	 * @param string &$text The link text, modified as necessary
2443
	 * @return string The full name of the link
2444
	 * @private
2445
	 */
2446
	public function maybeDoSubpageLink( $target, &$text ) {
2447
		return Linker::normalizeSubpageLink( $this->mTitle, $target, $text );
2448
	}
2449
2450
	/**
2451
	 * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2452
	 *
2453
	 * @param string $text
2454
	 * @param bool $linestart Whether or not this is at the start of a line.
2455
	 * @private
2456
	 * @return string The lists rendered as HTML
2457
	 */
2458
	public function doBlockLevels( $text, $linestart ) {
2459
		return BlockLevelPass::doBlockLevels( $text, $linestart );
2460
	}
2461
2462
	/**
2463
	 * Return value of a magic variable (like PAGENAME)
2464
	 *
2465
	 * @private
2466
	 *
2467
	 * @param int $index
2468
	 * @param bool|PPFrame $frame
2469
	 *
2470
	 * @throws MWException
2471
	 * @return string
2472
	 */
2473
	public function getVariableValue( $index, $frame = false ) {
2474
		global $wgContLang, $wgSitename, $wgServer, $wgServerName;
2475
		global $wgArticlePath, $wgScriptPath, $wgStylePath;
2476
2477
		if ( is_null( $this->mTitle ) ) {
2478
			// If no title set, bad things are going to happen
2479
			// later. Title should always be set since this
2480
			// should only be called in the middle of a parse
2481
			// operation (but the unit-tests do funky stuff)
2482
			throw new MWException( __METHOD__ . ' Should only be '
2483
				. ' called while parsing (no title set)' );
2484
		}
2485
2486
		/**
2487
		 * Some of these require message or data lookups and can be
2488
		 * expensive to check many times.
2489
		 */
2490
		if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$this, &$this->mVarCache ] ) ) {
2491
			if ( isset( $this->mVarCache[$index] ) ) {
2492
				return $this->mVarCache[$index];
2493
			}
2494
		}
2495
2496
		$ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() );
2497
		Hooks::run( 'ParserGetVariableValueTs', [ &$this, &$ts ] );
2498
2499
		$pageLang = $this->getFunctionLang();
2500
2501
		switch ( $index ) {
2502
			case '!':
2503
				$value = '|';
2504
				break;
2505
			case 'currentmonth':
2506
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) );
2507
				break;
2508
			case 'currentmonth1':
2509
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2510
				break;
2511
			case 'currentmonthname':
2512
				$value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2513
				break;
2514
			case 'currentmonthnamegen':
2515
				$value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2516
				break;
2517
			case 'currentmonthabbrev':
2518
				$value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) );
2519
				break;
2520
			case 'currentday':
2521
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) );
2522
				break;
2523
			case 'currentday2':
2524
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) );
2525
				break;
2526
			case 'localmonth':
2527
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) );
2528
				break;
2529
			case 'localmonth1':
2530
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2531
				break;
2532
			case 'localmonthname':
2533
				$value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2534
				break;
2535
			case 'localmonthnamegen':
2536
				$value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2537
				break;
2538
			case 'localmonthabbrev':
2539
				$value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) );
2540
				break;
2541
			case 'localday':
2542
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) );
2543
				break;
2544
			case 'localday2':
2545
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) );
2546
				break;
2547
			case 'pagename':
2548
				$value = wfEscapeWikiText( $this->mTitle->getText() );
2549
				break;
2550
			case 'pagenamee':
2551
				$value = wfEscapeWikiText( $this->mTitle->getPartialURL() );
2552
				break;
2553
			case 'fullpagename':
2554
				$value = wfEscapeWikiText( $this->mTitle->getPrefixedText() );
2555
				break;
2556
			case 'fullpagenamee':
2557
				$value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() );
2558
				break;
2559
			case 'subpagename':
2560
				$value = wfEscapeWikiText( $this->mTitle->getSubpageText() );
2561
				break;
2562
			case 'subpagenamee':
2563
				$value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() );
2564
				break;
2565
			case 'rootpagename':
2566
				$value = wfEscapeWikiText( $this->mTitle->getRootText() );
2567
				break;
2568 View Code Duplication
			case 'rootpagenamee':
2569
				$value = wfEscapeWikiText( wfUrlencode( str_replace(
2570
					' ',
2571
					'_',
2572
					$this->mTitle->getRootText()
2573
				) ) );
2574
				break;
2575
			case 'basepagename':
2576
				$value = wfEscapeWikiText( $this->mTitle->getBaseText() );
2577
				break;
2578 View Code Duplication
			case 'basepagenamee':
2579
				$value = wfEscapeWikiText( wfUrlencode( str_replace(
2580
					' ',
2581
					'_',
2582
					$this->mTitle->getBaseText()
2583
				) ) );
2584
				break;
2585 View Code Duplication
			case 'talkpagename':
2586
				if ( $this->mTitle->canTalk() ) {
2587
					$talkPage = $this->mTitle->getTalkPage();
2588
					$value = wfEscapeWikiText( $talkPage->getPrefixedText() );
2589
				} else {
2590
					$value = '';
2591
				}
2592
				break;
2593 View Code Duplication
			case 'talkpagenamee':
2594
				if ( $this->mTitle->canTalk() ) {
2595
					$talkPage = $this->mTitle->getTalkPage();
2596
					$value = wfEscapeWikiText( $talkPage->getPrefixedURL() );
2597
				} else {
2598
					$value = '';
2599
				}
2600
				break;
2601
			case 'subjectpagename':
2602
				$subjPage = $this->mTitle->getSubjectPage();
2603
				$value = wfEscapeWikiText( $subjPage->getPrefixedText() );
2604
				break;
2605
			case 'subjectpagenamee':
2606
				$subjPage = $this->mTitle->getSubjectPage();
2607
				$value = wfEscapeWikiText( $subjPage->getPrefixedURL() );
2608
				break;
2609
			case 'pageid': // requested in bug 23427
2610
				$pageid = $this->getTitle()->getArticleID();
2611
				if ( $pageid == 0 ) {
2612
					# 0 means the page doesn't exist in the database,
2613
					# which means the user is previewing a new page.
2614
					# The vary-revision flag must be set, because the magic word
2615
					# will have a different value once the page is saved.
2616
					$this->mOutput->setFlag( 'vary-revision' );
2617
					wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" );
2618
				}
2619
				$value = $pageid ? $pageid : null;
2620
				break;
2621
			case 'revisionid':
2622
				# Let the edit saving system know we should parse the page
2623
				# *after* a revision ID has been assigned.
2624
				$this->mOutput->setFlag( 'vary-revision-id' );
2625
				wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision-id...\n" );
2626
				$value = $this->mRevisionId;
2627
				if ( !$value && $this->mOptions->getSpeculativeRevIdCallback() ) {
2628
					$value = call_user_func( $this->mOptions->getSpeculativeRevIdCallback() );
2629
					$this->mOutput->setSpeculativeRevIdUsed( $value );
2630
				}
2631
				break;
2632 View Code Duplication
			case 'revisionday':
2633
				# Let the edit saving system know we should parse the page
2634
				# *after* a revision ID has been assigned. This is for null edits.
2635
				$this->mOutput->setFlag( 'vary-revision' );
2636
				wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" );
2637
				$value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) );
2638
				break;
2639 View Code Duplication
			case 'revisionday2':
2640
				# Let the edit saving system know we should parse the page
2641
				# *after* a revision ID has been assigned. This is for null edits.
2642
				$this->mOutput->setFlag( 'vary-revision' );
2643
				wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" );
2644
				$value = substr( $this->getRevisionTimestamp(), 6, 2 );
2645
				break;
2646 View Code Duplication
			case 'revisionmonth':
2647
				# Let the edit saving system know we should parse the page
2648
				# *after* a revision ID has been assigned. This is for null edits.
2649
				$this->mOutput->setFlag( 'vary-revision' );
2650
				wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" );
2651
				$value = substr( $this->getRevisionTimestamp(), 4, 2 );
2652
				break;
2653 View Code Duplication
			case 'revisionmonth1':
2654
				# Let the edit saving system know we should parse the page
2655
				# *after* a revision ID has been assigned. This is for null edits.
2656
				$this->mOutput->setFlag( 'vary-revision' );
2657
				wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" );
2658
				$value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) );
2659
				break;
2660 View Code Duplication
			case 'revisionyear':
2661
				# Let the edit saving system know we should parse the page
2662
				# *after* a revision ID has been assigned. This is for null edits.
2663
				$this->mOutput->setFlag( 'vary-revision' );
2664
				wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" );
2665
				$value = substr( $this->getRevisionTimestamp(), 0, 4 );
2666
				break;
2667
			case 'revisiontimestamp':
2668
				# Let the edit saving system know we should parse the page
2669
				# *after* a revision ID has been assigned. This is for null edits.
2670
				$this->mOutput->setFlag( 'vary-revision' );
2671
				wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" );
2672
				$value = $this->getRevisionTimestamp();
2673
				break;
2674
			case 'revisionuser':
2675
				# Let the edit saving system know we should parse the page
2676
				# *after* a revision ID has been assigned for null edits.
2677
				$this->mOutput->setFlag( 'vary-user' );
2678
				wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-user...\n" );
2679
				$value = $this->getRevisionUser();
2680
				break;
2681
			case 'revisionsize':
2682
				$value = $this->getRevisionSize();
2683
				break;
2684
			case 'namespace':
2685
				$value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2686
				break;
2687
			case 'namespacee':
2688
				$value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) );
2689
				break;
2690
			case 'namespacenumber':
2691
				$value = $this->mTitle->getNamespace();
2692
				break;
2693
			case 'talkspace':
2694
				$value = $this->mTitle->canTalk()
2695
					? str_replace( '_', ' ', $this->mTitle->getTalkNsText() )
2696
					: '';
2697
				break;
2698
			case 'talkspacee':
2699
				$value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : '';
2700
				break;
2701
			case 'subjectspace':
2702
				$value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() );
2703
				break;
2704
			case 'subjectspacee':
2705
				$value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) );
2706
				break;
2707
			case 'currentdayname':
2708
				$value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 );
2709
				break;
2710
			case 'currentyear':
2711
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true );
2712
				break;
2713
			case 'currenttime':
2714
				$value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false );
2715
				break;
2716
			case 'currenthour':
2717
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true );
2718
				break;
2719
			case 'currentweek':
2720
				# @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2721
				# int to remove the padding
2722
				$value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) );
2723
				break;
2724
			case 'currentdow':
2725
				$value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) );
2726
				break;
2727
			case 'localdayname':
2728
				$value = $pageLang->getWeekdayName(
2729
					(int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1
2730
				);
2731
				break;
2732
			case 'localyear':
2733
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true );
2734
				break;
2735
			case 'localtime':
2736
				$value = $pageLang->time(
2737
					MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ),
2738
					false,
2739
					false
2740
				);
2741
				break;
2742
			case 'localhour':
2743
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true );
2744
				break;
2745
			case 'localweek':
2746
				# @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to
2747
				# int to remove the padding
2748
				$value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) );
2749
				break;
2750
			case 'localdow':
2751
				$value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) );
2752
				break;
2753
			case 'numberofarticles':
2754
				$value = $pageLang->formatNum( SiteStats::articles() );
2755
				break;
2756
			case 'numberoffiles':
2757
				$value = $pageLang->formatNum( SiteStats::images() );
2758
				break;
2759
			case 'numberofusers':
2760
				$value = $pageLang->formatNum( SiteStats::users() );
2761
				break;
2762
			case 'numberofactiveusers':
2763
				$value = $pageLang->formatNum( SiteStats::activeUsers() );
2764
				break;
2765
			case 'numberofpages':
2766
				$value = $pageLang->formatNum( SiteStats::pages() );
2767
				break;
2768
			case 'numberofadmins':
2769
				$value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) );
2770
				break;
2771
			case 'numberofedits':
2772
				$value = $pageLang->formatNum( SiteStats::edits() );
2773
				break;
2774
			case 'currenttimestamp':
2775
				$value = wfTimestamp( TS_MW, $ts );
2776
				break;
2777
			case 'localtimestamp':
2778
				$value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' );
2779
				break;
2780
			case 'currentversion':
2781
				$value = SpecialVersion::getVersion();
2782
				break;
2783
			case 'articlepath':
2784
				return $wgArticlePath;
2785
			case 'sitename':
2786
				return $wgSitename;
2787
			case 'server':
2788
				return $wgServer;
2789
			case 'servername':
2790
				return $wgServerName;
2791
			case 'scriptpath':
2792
				return $wgScriptPath;
2793
			case 'stylepath':
2794
				return $wgStylePath;
2795
			case 'directionmark':
2796
				return $pageLang->getDirMark();
2797
			case 'contentlanguage':
2798
				global $wgLanguageCode;
2799
				return $wgLanguageCode;
2800
			case 'cascadingsources':
2801
				$value = CoreParserFunctions::cascadingsources( $this );
2802
				break;
2803
			default:
2804
				$ret = null;
2805
				Hooks::run(
2806
					'ParserGetVariableValueSwitch',
2807
					[ &$this, &$this->mVarCache, &$index, &$ret, &$frame ]
2808
				);
2809
2810
				return $ret;
2811
		}
2812
2813
		if ( $index ) {
2814
			$this->mVarCache[$index] = $value;
2815
		}
2816
2817
		return $value;
2818
	}
2819
2820
	/**
2821
	 * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers
2822
	 *
2823
	 * @private
2824
	 */
2825
	public function initialiseVariables() {
2826
		$variableIDs = MagicWord::getVariableIDs();
2827
		$substIDs = MagicWord::getSubstIDs();
2828
2829
		$this->mVariables = new MagicWordArray( $variableIDs );
2830
		$this->mSubstWords = new MagicWordArray( $substIDs );
2831
	}
2832
2833
	/**
2834
	 * Preprocess some wikitext and return the document tree.
2835
	 * This is the ghost of replace_variables().
2836
	 *
2837
	 * @param string $text The text to parse
2838
	 * @param int $flags Bitwise combination of:
2839
	 *   - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being
2840
	 *     included. Default is to assume a direct page view.
2841
	 *
2842
	 * The generated DOM tree must depend only on the input text and the flags.
2843
	 * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
2844
	 *
2845
	 * Any flag added to the $flags parameter here, or any other parameter liable to cause a
2846
	 * change in the DOM tree for a given text, must be passed through the section identifier
2847
	 * in the section edit link and thus back to extractSections().
2848
	 *
2849
	 * The output of this function is currently only cached in process memory, but a persistent
2850
	 * cache may be implemented at a later date which takes further advantage of these strict
2851
	 * dependency requirements.
2852
	 *
2853
	 * @return PPNode
2854
	 */
2855
	public function preprocessToDom( $text, $flags = 0 ) {
2856
		$dom = $this->getPreprocessor()->preprocessToObj( $text, $flags );
2857
		return $dom;
2858
	}
2859
2860
	/**
2861
	 * Return a three-element array: leading whitespace, string contents, trailing whitespace
2862
	 *
2863
	 * @param string $s
2864
	 *
2865
	 * @return array
2866
	 */
2867
	public static function splitWhitespace( $s ) {
2868
		$ltrimmed = ltrim( $s );
2869
		$w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) );
2870
		$trimmed = rtrim( $ltrimmed );
2871
		$diff = strlen( $ltrimmed ) - strlen( $trimmed );
2872
		if ( $diff > 0 ) {
2873
			$w2 = substr( $ltrimmed, -$diff );
2874
		} else {
2875
			$w2 = '';
2876
		}
2877
		return [ $w1, $trimmed, $w2 ];
2878
	}
2879
2880
	/**
2881
	 * Replace magic variables, templates, and template arguments
2882
	 * with the appropriate text. Templates are substituted recursively,
2883
	 * taking care to avoid infinite loops.
2884
	 *
2885
	 * Note that the substitution depends on value of $mOutputType:
2886
	 *  self::OT_WIKI: only {{subst:}} templates
2887
	 *  self::OT_PREPROCESS: templates but not extension tags
2888
	 *  self::OT_HTML: all templates and extension tags
2889
	 *
2890
	 * @param string $text The text to transform
2891
	 * @param bool|PPFrame $frame Object describing the arguments passed to the
2892
	 *   template. Arguments may also be provided as an associative array, as
2893
	 *   was the usual case before MW1.12. Providing arguments this way may be
2894
	 *   useful for extensions wishing to perform variable replacement
2895
	 *   explicitly.
2896
	 * @param bool $argsOnly Only do argument (triple-brace) expansion, not
2897
	 *   double-brace expansion.
2898
	 * @return string
2899
	 */
2900
	public function replaceVariables( $text, $frame = false, $argsOnly = false ) {
2901
		# Is there any text? Also, Prevent too big inclusions!
2902
		$textSize = strlen( $text );
2903
		if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2904
			return $text;
2905
		}
2906
2907
		if ( $frame === false ) {
2908
			$frame = $this->getPreprocessor()->newFrame();
2909
		} elseif ( !( $frame instanceof PPFrame ) ) {
2910
			wfDebug( __METHOD__ . " called using plain parameters instead of "
2911
				. "a PPFrame instance. Creating custom frame.\n" );
2912
			$frame = $this->getPreprocessor()->newCustomFrame( $frame );
2913
		}
2914
2915
		$dom = $this->preprocessToDom( $text );
2916
		$flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2917
		$text = $frame->expand( $dom, $flags );
2918
2919
		return $text;
2920
	}
2921
2922
	/**
2923
	 * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
2924
	 *
2925
	 * @param array $args
2926
	 *
2927
	 * @return array
2928
	 */
2929
	public static function createAssocArgs( $args ) {
2930
		$assocArgs = [];
2931
		$index = 1;
2932
		foreach ( $args as $arg ) {
2933
			$eqpos = strpos( $arg, '=' );
2934
			if ( $eqpos === false ) {
2935
				$assocArgs[$index++] = $arg;
2936
			} else {
2937
				$name = trim( substr( $arg, 0, $eqpos ) );
2938
				$value = trim( substr( $arg, $eqpos + 1 ) );
2939
				if ( $value === false ) {
2940
					$value = '';
2941
				}
2942
				if ( $name !== false ) {
2943
					$assocArgs[$name] = $value;
2944
				}
2945
			}
2946
		}
2947
2948
		return $assocArgs;
2949
	}
2950
2951
	/**
2952
	 * Warn the user when a parser limitation is reached
2953
	 * Will warn at most once the user per limitation type
2954
	 *
2955
	 * The results are shown during preview and run through the Parser (See EditPage.php)
2956
	 *
2957
	 * @param string $limitationType Should be one of:
2958
	 *   'expensive-parserfunction' (corresponding messages:
2959
	 *       'expensive-parserfunction-warning',
2960
	 *       'expensive-parserfunction-category')
2961
	 *   'post-expand-template-argument' (corresponding messages:
2962
	 *       'post-expand-template-argument-warning',
2963
	 *       'post-expand-template-argument-category')
2964
	 *   'post-expand-template-inclusion' (corresponding messages:
2965
	 *       'post-expand-template-inclusion-warning',
2966
	 *       'post-expand-template-inclusion-category')
2967
	 *   'node-count-exceeded' (corresponding messages:
2968
	 *       'node-count-exceeded-warning',
2969
	 *       'node-count-exceeded-category')
2970
	 *   'expansion-depth-exceeded' (corresponding messages:
2971
	 *       'expansion-depth-exceeded-warning',
2972
	 *       'expansion-depth-exceeded-category')
2973
	 * @param string|int|null $current Current value
2974
	 * @param string|int|null $max Maximum allowed, when an explicit limit has been
2975
	 *	 exceeded, provide the values (optional)
2976
	 */
2977
	public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2978
		# does no harm if $current and $max are present but are unnecessary for the message
2979
		# Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2980
		# only during preview, and that would split the parser cache unnecessarily.
2981
		$warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max )
2982
			->text();
2983
		$this->mOutput->addWarning( $warning );
2984
		$this->addTrackingCategory( "$limitationType-category" );
2985
	}
2986
2987
	/**
2988
	 * Return the text of a template, after recursively
2989
	 * replacing any variables or templates within the template.
2990
	 *
2991
	 * @param array $piece The parts of the template
2992
	 *   $piece['title']: the title, i.e. the part before the |
2993
	 *   $piece['parts']: the parameter array
2994
	 *   $piece['lineStart']: whether the brace was at the start of a line
2995
	 * @param PPFrame $frame The current frame, contains template arguments
2996
	 * @throws Exception
2997
	 * @return string The text of the template
2998
	 */
2999
	public function braceSubstitution( $piece, $frame ) {
3000
3001
		// Flags
3002
3003
		// $text has been filled
3004
		$found = false;
3005
		// wiki markup in $text should be escaped
3006
		$nowiki = false;
3007
		// $text is HTML, armour it against wikitext transformation
3008
		$isHTML = false;
3009
		// Force interwiki transclusion to be done in raw mode not rendered
3010
		$forceRawInterwiki = false;
3011
		// $text is a DOM node needing expansion in a child frame
3012
		$isChildObj = false;
3013
		// $text is a DOM node needing expansion in the current frame
3014
		$isLocalObj = false;
3015
3016
		# Title object, where $text came from
3017
		$title = false;
3018
3019
		# $part1 is the bit before the first |, and must contain only title characters.
3020
		# Various prefixes will be stripped from it later.
3021
		$titleWithSpaces = $frame->expand( $piece['title'] );
3022
		$part1 = trim( $titleWithSpaces );
3023
		$titleText = false;
3024
3025
		# Original title text preserved for various purposes
3026
		$originalTitle = $part1;
3027
3028
		# $args is a list of argument nodes, starting from index 0, not including $part1
3029
		# @todo FIXME: If piece['parts'] is null then the call to getLength()
3030
		# below won't work b/c this $args isn't an object
3031
		$args = ( null == $piece['parts'] ) ? [] : $piece['parts'];
3032
3033
		$profileSection = null; // profile templates
3034
3035
		# SUBST
3036
		if ( !$found ) {
3037
			$substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3038
3039
			# Possibilities for substMatch: "subst", "safesubst" or FALSE
3040
			# Decide whether to expand template or keep wikitext as-is.
3041
			if ( $this->ot['wiki'] ) {
3042
				if ( $substMatch === false ) {
3043
					$literal = true;  # literal when in PST with no prefix
3044
				} else {
3045
					$literal = false; # expand when in PST with subst: or safesubst:
3046
				}
3047
			} else {
3048
				if ( $substMatch == 'subst' ) {
3049
					$literal = true;  # literal when not in PST with plain subst:
3050
				} else {
3051
					$literal = false; # expand when not in PST with safesubst: or no prefix
3052
				}
3053
			}
3054
			if ( $literal ) {
3055
				$text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3056
				$isLocalObj = true;
3057
				$found = true;
3058
			}
3059
		}
3060
3061
		# Variables
3062
		if ( !$found && $args->getLength() == 0 ) {
3063
			$id = $this->mVariables->matchStartToEnd( $part1 );
3064
			if ( $id !== false ) {
3065
				$text = $this->getVariableValue( $id, $frame );
3066
				if ( MagicWord::getCacheTTL( $id ) > -1 ) {
3067
					$this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) );
3068
				}
3069
				$found = true;
3070
			}
3071
		}
3072
3073
		# MSG, MSGNW and RAW
3074
		if ( !$found ) {
3075
			# Check for MSGNW:
3076
			$mwMsgnw = MagicWord::get( 'msgnw' );
3077
			if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3078
				$nowiki = true;
3079
			} else {
3080
				# Remove obsolete MSG:
3081
				$mwMsg = MagicWord::get( 'msg' );
3082
				$mwMsg->matchStartAndRemove( $part1 );
3083
			}
3084
3085
			# Check for RAW:
3086
			$mwRaw = MagicWord::get( 'raw' );
3087
			if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3088
				$forceRawInterwiki = true;
3089
			}
3090
		}
3091
3092
		# Parser functions
3093
		if ( !$found ) {
3094
			$colonPos = strpos( $part1, ':' );
3095
			if ( $colonPos !== false ) {
3096
				$func = substr( $part1, 0, $colonPos );
3097
				$funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3098
				$argsLength = $args->getLength();
3099
				for ( $i = 0; $i < $argsLength; $i++ ) {
3100
					$funcArgs[] = $args->item( $i );
3101
				}
3102
				try {
3103
					$result = $this->callParserFunction( $frame, $func, $funcArgs );
3104
				} catch ( Exception $ex ) {
3105
					throw $ex;
3106
				}
3107
3108
				# The interface for parser functions allows for extracting
3109
				# flags into the local scope. Extract any forwarded flags
3110
				# here.
3111
				extract( $result );
3112
			}
3113
		}
3114
3115
		# Finish mangling title and then check for loops.
3116
		# Set $title to a Title object and $titleText to the PDBK
3117
		if ( !$found ) {
3118
			$ns = NS_TEMPLATE;
3119
			# Split the title into page and subpage
3120
			$subpage = '';
3121
			$relative = $this->maybeDoSubpageLink( $part1, $subpage );
3122
			if ( $part1 !== $relative ) {
3123
				$part1 = $relative;
3124
				$ns = $this->mTitle->getNamespace();
3125
			}
3126
			$title = Title::newFromText( $part1, $ns );
3127
			if ( $title ) {
3128
				$titleText = $title->getPrefixedText();
3129
				# Check for language variants if the template is not found
3130
				if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) {
3131
					$this->getConverterLanguage()->findVariantLink( $part1, $title, true );
3132
				}
3133
				# Do recursion depth check
3134
				$limit = $this->mOptions->getMaxTemplateDepth();
3135 View Code Duplication
				if ( $frame->depth >= $limit ) {
3136
					$found = true;
3137
					$text = '<span class="error">'
3138
						. wfMessage( 'parser-template-recursion-depth-warning' )
3139
							->numParams( $limit )->inContentLanguage()->text()
3140
						. '</span>';
3141
				}
3142
			}
3143
		}
3144
3145
		# Load from database
3146
		if ( !$found && $title ) {
3147
			$profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3148
			if ( !$title->isExternal() ) {
3149
				if ( $title->isSpecialPage()
3150
					&& $this->mOptions->getAllowSpecialInclusion()
3151
					&& $this->ot['html']
3152
				) {
3153
					$specialPage = SpecialPageFactory::getPage( $title->getDBkey() );
3154
					// Pass the template arguments as URL parameters.
3155
					// "uselang" will have no effect since the Language object
3156
					// is forced to the one defined in ParserOptions.
3157
					$pageArgs = [];
3158
					$argsLength = $args->getLength();
3159
					for ( $i = 0; $i < $argsLength; $i++ ) {
3160
						$bits = $args->item( $i )->splitArg();
3161
						if ( strval( $bits['index'] ) === '' ) {
3162
							$name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3163
							$value = trim( $frame->expand( $bits['value'] ) );
3164
							$pageArgs[$name] = $value;
3165
						}
3166
					}
3167
3168
					// Create a new context to execute the special page
3169
					$context = new RequestContext;
3170
					$context->setTitle( $title );
3171
					$context->setRequest( new FauxRequest( $pageArgs ) );
3172
					if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3173
						$context->setUser( $this->getUser() );
3174
					} else {
3175
						// If this page is cached, then we better not be per user.
3176
						$context->setUser( User::newFromName( '127.0.0.1', false ) );
3177
					}
3178
					$context->setLanguage( $this->mOptions->getUserLangObj() );
3179
					$ret = SpecialPageFactory::capturePath(
3180
						$title, $context, $this->getLinkRenderer() );
3181
					if ( $ret ) {
3182
						$text = $context->getOutput()->getHTML();
3183
						$this->mOutput->addOutputPageMetadata( $context->getOutput() );
3184
						$found = true;
3185
						$isHTML = true;
3186
						if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3187
							$this->mOutput->updateRuntimeAdaptiveExpiry(
3188
								$specialPage->maxIncludeCacheTime()
3189
							);
3190
						}
3191
					}
3192
				} elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) {
3193
					$found = false; # access denied
3194
					wfDebug( __METHOD__ . ": template inclusion denied for " .
3195
						$title->getPrefixedDBkey() . "\n" );
3196
				} else {
3197
					list( $text, $title ) = $this->getTemplateDom( $title );
3198
					if ( $text !== false ) {
3199
						$found = true;
3200
						$isChildObj = true;
3201
					}
3202
				}
3203
3204
				# If the title is valid but undisplayable, make a link to it
3205
				if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3206
					$text = "[[:$titleText]]";
3207
					$found = true;
3208
				}
3209
			} elseif ( $title->isTrans() ) {
3210
				# Interwiki transclusion
3211
				if ( $this->ot['html'] && !$forceRawInterwiki ) {
3212
					$text = $this->interwikiTransclude( $title, 'render' );
3213
					$isHTML = true;
3214
				} else {
3215
					$text = $this->interwikiTransclude( $title, 'raw' );
3216
					# Preprocess it like a template
3217
					$text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3218
					$isChildObj = true;
3219
				}
3220
				$found = true;
3221
			}
3222
3223
			# Do infinite loop check
3224
			# This has to be done after redirect resolution to avoid infinite loops via redirects
3225
			if ( !$frame->loopCheck( $title ) ) {
3226
				$found = true;
3227
				$text = '<span class="error">'
3228
					. wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3229
					. '</span>';
3230
				wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" );
3231
			}
3232
		}
3233
3234
		# If we haven't found text to substitute by now, we're done
3235
		# Recover the source wikitext and return it
3236
		if ( !$found ) {
3237
			$text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3238
			if ( $profileSection ) {
3239
				$this->mProfiler->scopedProfileOut( $profileSection );
3240
			}
3241
			return [ 'object' => $text ];
3242
		}
3243
3244
		# Expand DOM-style return values in a child frame
3245
		if ( $isChildObj ) {
3246
			# Clean up argument array
3247
			$newFrame = $frame->newChild( $args, $title );
3248
3249
			if ( $nowiki ) {
3250
				$text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3251
			} elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3252
				# Expansion is eligible for the empty-frame cache
3253
				$text = $newFrame->cachedExpand( $titleText, $text );
3254
			} else {
3255
				# Uncached expansion
3256
				$text = $newFrame->expand( $text );
3257
			}
3258
		}
3259
		if ( $isLocalObj && $nowiki ) {
3260
			$text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3261
			$isLocalObj = false;
3262
		}
3263
3264
		if ( $profileSection ) {
3265
			$this->mProfiler->scopedProfileOut( $profileSection );
3266
		}
3267
3268
		# Replace raw HTML by a placeholder
3269
		if ( $isHTML ) {
3270
			$text = $this->insertStripItem( $text );
3271
		} elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3272
			# Escape nowiki-style return values
3273
			$text = wfEscapeWikiText( $text );
3274
		} elseif ( is_string( $text )
3275
			&& !$piece['lineStart']
3276
			&& preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3277
		) {
3278
			# Bug 529: if the template begins with a table or block-level
3279
			# element, it should be treated as beginning a new line.
3280
			# This behavior is somewhat controversial.
3281
			$text = "\n" . $text;
3282
		}
3283
3284
		if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3285
			# Error, oversize inclusion
3286
			if ( $titleText !== false ) {
3287
				# Make a working, properly escaped link if possible (bug 23588)
3288
				$text = "[[:$titleText]]";
3289
			} else {
3290
				# This will probably not be a working link, but at least it may
3291
				# provide some hint of where the problem is
3292
				preg_replace( '/^:/', '', $originalTitle );
3293
				$text = "[[:$originalTitle]]";
3294
			}
3295
			$text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3296
				. 'post-expand include size too large -->' );
3297
			$this->limitationWarn( 'post-expand-template-inclusion' );
3298
		}
3299
3300
		if ( $isLocalObj ) {
3301
			$ret = [ 'object' => $text ];
3302
		} else {
3303
			$ret = [ 'text' => $text ];
3304
		}
3305
3306
		return $ret;
3307
	}
3308
3309
	/**
3310
	 * Call a parser function and return an array with text and flags.
3311
	 *
3312
	 * The returned array will always contain a boolean 'found', indicating
3313
	 * whether the parser function was found or not. It may also contain the
3314
	 * following:
3315
	 *  text: string|object, resulting wikitext or PP DOM object
3316
	 *  isHTML: bool, $text is HTML, armour it against wikitext transformation
3317
	 *  isChildObj: bool, $text is a DOM node needing expansion in a child frame
3318
	 *  isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3319
	 *  nowiki: bool, wiki markup in $text should be escaped
3320
	 *
3321
	 * @since 1.21
3322
	 * @param PPFrame $frame The current frame, contains template arguments
3323
	 * @param string $function Function name
3324
	 * @param array $args Arguments to the function
3325
	 * @throws MWException
3326
	 * @return array
3327
	 */
3328
	public function callParserFunction( $frame, $function, array $args = [] ) {
3329
		global $wgContLang;
3330
3331
		# Case sensitive functions
3332
		if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3333
			$function = $this->mFunctionSynonyms[1][$function];
3334
		} else {
3335
			# Case insensitive functions
3336
			$function = $wgContLang->lc( $function );
3337
			if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3338
				$function = $this->mFunctionSynonyms[0][$function];
3339
			} else {
3340
				return [ 'found' => false ];
3341
			}
3342
		}
3343
3344
		list( $callback, $flags ) = $this->mFunctionHooks[$function];
3345
3346
		# Workaround for PHP bug 35229 and similar
3347
		if ( !is_callable( $callback ) ) {
3348
			throw new MWException( "Tag hook for $function is not callable\n" );
3349
		}
3350
3351
		$allArgs = [ &$this ];
3352
		if ( $flags & self::SFH_OBJECT_ARGS ) {
3353
			# Convert arguments to PPNodes and collect for appending to $allArgs
3354
			$funcArgs = [];
3355
			foreach ( $args as $k => $v ) {
3356
				if ( $v instanceof PPNode || $k === 0 ) {
3357
					$funcArgs[] = $v;
3358
				} else {
3359
					$funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3360
				}
3361
			}
3362
3363
			# Add a frame parameter, and pass the arguments as an array
3364
			$allArgs[] = $frame;
3365
			$allArgs[] = $funcArgs;
3366
		} else {
3367
			# Convert arguments to plain text and append to $allArgs
3368
			foreach ( $args as $k => $v ) {
3369
				if ( $v instanceof PPNode ) {
3370
					$allArgs[] = trim( $frame->expand( $v ) );
3371
				} elseif ( is_int( $k ) && $k >= 0 ) {
3372
					$allArgs[] = trim( $v );
3373
				} else {
3374
					$allArgs[] = trim( "$k=$v" );
3375
				}
3376
			}
3377
		}
3378
3379
		$result = call_user_func_array( $callback, $allArgs );
3380
3381
		# The interface for function hooks allows them to return a wikitext
3382
		# string or an array containing the string and any flags. This mungs
3383
		# things around to match what this method should return.
3384
		if ( !is_array( $result ) ) {
3385
			$result =[
3386
				'found' => true,
3387
				'text' => $result,
3388
			];
3389
		} else {
3390
			if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3391
				$result['text'] = $result[0];
3392
			}
3393
			unset( $result[0] );
3394
			$result += [
3395
				'found' => true,
3396
			];
3397
		}
3398
3399
		$noparse = true;
3400
		$preprocessFlags = 0;
3401
		if ( isset( $result['noparse'] ) ) {
3402
			$noparse = $result['noparse'];
3403
		}
3404
		if ( isset( $result['preprocessFlags'] ) ) {
3405
			$preprocessFlags = $result['preprocessFlags'];
3406
		}
3407
3408
		if ( !$noparse ) {
3409
			$result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3410
			$result['isChildObj'] = true;
3411
		}
3412
3413
		return $result;
3414
	}
3415
3416
	/**
3417
	 * Get the semi-parsed DOM representation of a template with a given title,
3418
	 * and its redirect destination title. Cached.
3419
	 *
3420
	 * @param Title $title
3421
	 *
3422
	 * @return array
3423
	 */
3424
	public function getTemplateDom( $title ) {
3425
		$cacheTitle = $title;
3426
		$titleText = $title->getPrefixedDBkey();
3427
3428
		if ( isset( $this->mTplRedirCache[$titleText] ) ) {
3429
			list( $ns, $dbk ) = $this->mTplRedirCache[$titleText];
3430
			$title = Title::makeTitle( $ns, $dbk );
3431
			$titleText = $title->getPrefixedDBkey();
3432
		}
3433
		if ( isset( $this->mTplDomCache[$titleText] ) ) {
3434
			return [ $this->mTplDomCache[$titleText], $title ];
3435
		}
3436
3437
		# Cache miss, go to the database
3438
		list( $text, $title ) = $this->fetchTemplateAndTitle( $title );
3439
3440
		if ( $text === false ) {
3441
			$this->mTplDomCache[$titleText] = false;
3442
			return [ false, $title ];
3443
		}
3444
3445
		$dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION );
3446
		$this->mTplDomCache[$titleText] = $dom;
3447
3448
		if ( !$title->equals( $cacheTitle ) ) {
3449
			$this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] =
3450
				[ $title->getNamespace(), $cdb = $title->getDBkey() ];
3451
		}
3452
3453
		return [ $dom, $title ];
3454
	}
3455
3456
	/**
3457
	 * Fetch the current revision of a given title. Note that the revision
3458
	 * (and even the title) may not exist in the database, so everything
3459
	 * contributing to the output of the parser should use this method
3460
	 * where possible, rather than getting the revisions themselves. This
3461
	 * method also caches its results, so using it benefits performance.
3462
	 *
3463
	 * @since 1.24
3464
	 * @param Title $title
3465
	 * @return Revision
3466
	 */
3467
	public function fetchCurrentRevisionOfTitle( $title ) {
3468
		$cacheKey = $title->getPrefixedDBkey();
3469
		if ( !$this->currentRevisionCache ) {
3470
			$this->currentRevisionCache = new MapCacheLRU( 100 );
3471
		}
3472
		if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3473
			$this->currentRevisionCache->set( $cacheKey,
3474
				// Defaults to Parser::statelessFetchRevision()
3475
				call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this )
3476
			);
3477
		}
3478
		return $this->currentRevisionCache->get( $cacheKey );
3479
	}
3480
3481
	/**
3482
	 * Wrapper around Revision::newFromTitle to allow passing additional parameters
3483
	 * without passing them on to it.
3484
	 *
3485
	 * @since 1.24
3486
	 * @param Title $title
3487
	 * @param Parser|bool $parser
3488
	 * @return Revision|bool False if missing
3489
	 */
3490
	public static function statelessFetchRevision( Title $title, $parser = false ) {
3491
		$pageId = $title->getArticleID();
3492
		$revId = $title->getLatestRevID();
3493
3494
		$rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $pageId, $revId );
3495
		if ( $rev ) {
3496
			$rev->setTitle( $title );
3497
		}
3498
3499
		return $rev;
3500
	}
3501
3502
	/**
3503
	 * Fetch the unparsed text of a template and register a reference to it.
3504
	 * @param Title $title
3505
	 * @return array ( string or false, Title )
3506
	 */
3507
	public function fetchTemplateAndTitle( $title ) {
3508
		// Defaults to Parser::statelessFetchTemplate()
3509
		$templateCb = $this->mOptions->getTemplateCallback();
3510
		$stuff = call_user_func( $templateCb, $title, $this );
3511
		// We use U+007F DELETE to distinguish strip markers from regular text.
3512
		$text = $stuff['text'];
3513
		if ( is_string( $stuff['text'] ) ) {
3514
			$text = strtr( $text, "\x7f", "?" );
3515
		}
3516
		$finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title;
3517
		if ( isset( $stuff['deps'] ) ) {
3518
			foreach ( $stuff['deps'] as $dep ) {
3519
				$this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3520
				if ( $dep['title']->equals( $this->getTitle() ) ) {
3521
					// If we transclude ourselves, the final result
3522
					// will change based on the new version of the page
3523
					$this->mOutput->setFlag( 'vary-revision' );
3524
				}
3525
			}
3526
		}
3527
		return [ $text, $finalTitle ];
3528
	}
3529
3530
	/**
3531
	 * Fetch the unparsed text of a template and register a reference to it.
3532
	 * @param Title $title
3533
	 * @return string|bool
3534
	 */
3535
	public function fetchTemplate( $title ) {
3536
		return $this->fetchTemplateAndTitle( $title )[0];
3537
	}
3538
3539
	/**
3540
	 * Static function to get a template
3541
	 * Can be overridden via ParserOptions::setTemplateCallback().
3542
	 *
3543
	 * @param Title $title
3544
	 * @param bool|Parser $parser
3545
	 *
3546
	 * @return array
3547
	 */
3548
	public static function statelessFetchTemplate( $title, $parser = false ) {
3549
		$text = $skip = false;
3550
		$finalTitle = $title;
3551
		$deps = [];
3552
3553
		# Loop to fetch the article, with up to 1 redirect
3554
		// @codingStandardsIgnoreStart Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed
3555
		for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) {
3556
			// @codingStandardsIgnoreEnd
3557
			# Give extensions a chance to select the revision instead
3558
			$id = false; # Assume current
3559
			Hooks::run( 'BeforeParserFetchTemplateAndtitle',
3560
				[ $parser, $title, &$skip, &$id ] );
3561
3562
			if ( $skip ) {
3563
				$text = false;
3564
				$deps[] = [
3565
					'title' => $title,
3566
					'page_id' => $title->getArticleID(),
3567
					'rev_id' => null
3568
				];
3569
				break;
3570
			}
3571
			# Get the revision
3572
			if ( $id ) {
3573
				$rev = Revision::newFromId( $id );
3574
			} elseif ( $parser ) {
3575
				$rev = $parser->fetchCurrentRevisionOfTitle( $title );
0 ignored issues
show
It seems like $parser is not always an object, but can also be of type boolean. Maybe add an additional type check?

If a variable is not always an object, we recommend to add an additional type check to ensure your method call is safe:

function someFunction(A $objectMaybe = null)
{
    if ($objectMaybe instanceof A) {
        $objectMaybe->doSomething();
    }
}
Loading history...
3576
			} else {
3577
				$rev = Revision::newFromTitle( $title );
3578
			}
3579
			$rev_id = $rev ? $rev->getId() : 0;
3580
			# If there is no current revision, there is no page
3581
			if ( $id === false && !$rev ) {
3582
				$linkCache = LinkCache::singleton();
3583
				$linkCache->addBadLinkObj( $title );
3584
			}
3585
3586
			$deps[] = [
3587
				'title' => $title,
3588
				'page_id' => $title->getArticleID(),
3589
				'rev_id' => $rev_id ];
3590
			if ( $rev && !$title->equals( $rev->getTitle() ) ) {
3591
				# We fetched a rev from a different title; register it too...
3592
				$deps[] = [
3593
					'title' => $rev->getTitle(),
3594
					'page_id' => $rev->getPage(),
3595
					'rev_id' => $rev_id ];
3596
			}
3597
3598
			if ( $rev ) {
3599
				$content = $rev->getContent();
3600
				$text = $content ? $content->getWikitextForTransclusion() : null;
3601
3602
				if ( $text === false || $text === null ) {
3603
					$text = false;
3604
					break;
3605
				}
3606
			} elseif ( $title->getNamespace() == NS_MEDIAWIKI ) {
3607
				global $wgContLang;
3608
				$message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage();
3609
				if ( !$message->exists() ) {
3610
					$text = false;
3611
					break;
3612
				}
3613
				$content = $message->content();
3614
				$text = $message->plain();
3615
			} else {
3616
				break;
3617
			}
3618
			if ( !$content ) {
3619
				break;
3620
			}
3621
			# Redirect?
3622
			$finalTitle = $title;
3623
			$title = $content->getRedirectTarget();
3624
		}
3625
		return [
3626
			'text' => $text,
3627
			'finalTitle' => $finalTitle,
3628
			'deps' => $deps ];
3629
	}
3630
3631
	/**
3632
	 * Fetch a file and its title and register a reference to it.
3633
	 * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3634
	 * @param Title $title
3635
	 * @param array $options Array of options to RepoGroup::findFile
3636
	 * @return File|bool
3637
	 */
3638
	public function fetchFile( $title, $options = [] ) {
3639
		return $this->fetchFileAndTitle( $title, $options )[0];
3640
	}
3641
3642
	/**
3643
	 * Fetch a file and its title and register a reference to it.
3644
	 * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3645
	 * @param Title $title
3646
	 * @param array $options Array of options to RepoGroup::findFile
3647
	 * @return array ( File or false, Title of file )
3648
	 */
3649
	public function fetchFileAndTitle( $title, $options = [] ) {
3650
		$file = $this->fetchFileNoRegister( $title, $options );
3651
3652
		$time = $file ? $file->getTimestamp() : false;
3653
		$sha1 = $file ? $file->getSha1() : false;
3654
		# Register the file as a dependency...
3655
		$this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3656
		if ( $file && !$title->equals( $file->getTitle() ) ) {
3657
			# Update fetched file title
3658
			$title = $file->getTitle();
3659
			$this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
3660
		}
3661
		return [ $file, $title ];
3662
	}
3663
3664
	/**
3665
	 * Helper function for fetchFileAndTitle.
3666
	 *
3667
	 * Also useful if you need to fetch a file but not use it yet,
3668
	 * for example to get the file's handler.
3669
	 *
3670
	 * @param Title $title
3671
	 * @param array $options Array of options to RepoGroup::findFile
3672
	 * @return File|bool
3673
	 */
3674
	protected function fetchFileNoRegister( $title, $options = [] ) {
3675
		if ( isset( $options['broken'] ) ) {
3676
			$file = false; // broken thumbnail forced by hook
3677
		} elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3678
			$file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
3679
		} else { // get by (name,timestamp)
3680
			$file = wfFindFile( $title, $options );
3681
		}
3682
		return $file;
3683
	}
3684
3685
	/**
3686
	 * Transclude an interwiki link.
3687
	 *
3688
	 * @param Title $title
3689
	 * @param string $action
3690
	 *
3691
	 * @return string
3692
	 */
3693
	public function interwikiTransclude( $title, $action ) {
3694
		global $wgEnableScaryTranscluding;
3695
3696
		if ( !$wgEnableScaryTranscluding ) {
3697
			return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3698
		}
3699
3700
		$url = $title->getFullURL( [ 'action' => $action ] );
3701
3702
		if ( strlen( $url ) > 255 ) {
3703
			return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3704
		}
3705
		return $this->fetchScaryTemplateMaybeFromCache( $url );
3706
	}
3707
3708
	/**
3709
	 * @param string $url
3710
	 * @return mixed|string
3711
	 */
3712
	public function fetchScaryTemplateMaybeFromCache( $url ) {
3713
		global $wgTranscludeCacheExpiry;
3714
		$dbr = wfGetDB( DB_REPLICA );
3715
		$tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry );
3716
		$obj = $dbr->selectRow( 'transcache', [ 'tc_time', 'tc_contents' ],
3717
				[ 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ] );
3718
		if ( $obj ) {
3719
			return $obj->tc_contents;
3720
		}
3721
3722
		$req = MWHttpRequest::factory( $url, [], __METHOD__ );
3723
		$status = $req->execute(); // Status object
3724
		if ( $status->isOK() ) {
3725
			$text = $req->getContent();
3726
		} elseif ( $req->getStatus() != 200 ) {
3727
			// Though we failed to fetch the content, this status is useless.
3728
			return wfMessage( 'scarytranscludefailed-httpstatus' )
3729
				->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text();
3730
		} else {
3731
			return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3732
		}
3733
3734
		$dbw = wfGetDB( DB_MASTER );
3735
		$dbw->replace( 'transcache', [ 'tc_url' ], [
3736
			'tc_url' => $url,
3737
			'tc_time' => $dbw->timestamp( time() ),
3738
			'tc_contents' => $text
3739
		] );
3740
		return $text;
3741
	}
3742
3743
	/**
3744
	 * Triple brace replacement -- used for template arguments
3745
	 * @private
3746
	 *
3747
	 * @param array $piece
3748
	 * @param PPFrame $frame
3749
	 *
3750
	 * @return array
3751
	 */
3752
	public function argSubstitution( $piece, $frame ) {
3753
3754
		$error = false;
3755
		$parts = $piece['parts'];
3756
		$nameWithSpaces = $frame->expand( $piece['title'] );
3757
		$argName = trim( $nameWithSpaces );
3758
		$object = false;
3759
		$text = $frame->getArgument( $argName );
3760
		if ( $text === false && $parts->getLength() > 0
3761
			&& ( $this->ot['html']
3762
				|| $this->ot['pre']
3763
				|| ( $this->ot['wiki'] && $frame->isTemplate() )
3764
			)
3765
		) {
3766
			# No match in frame, use the supplied default
3767
			$object = $parts->item( 0 )->getChildren();
3768
		}
3769
		if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3770
			$error = '<!-- WARNING: argument omitted, expansion size too large -->';
3771
			$this->limitationWarn( 'post-expand-template-argument' );
3772
		}
3773
3774
		if ( $text === false && $object === false ) {
3775
			# No match anywhere
3776
			$object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3777
		}
3778
		if ( $error !== false ) {
3779
			$text .= $error;
3780
		}
3781
		if ( $object !== false ) {
3782
			$ret = [ 'object' => $object ];
3783
		} else {
3784
			$ret = [ 'text' => $text ];
3785
		}
3786
3787
		return $ret;
3788
	}
3789
3790
	/**
3791
	 * Return the text to be used for a given extension tag.
3792
	 * This is the ghost of strip().
3793
	 *
3794
	 * @param array $params Associative array of parameters:
3795
	 *     name       PPNode for the tag name
3796
	 *     attr       PPNode for unparsed text where tag attributes are thought to be
3797
	 *     attributes Optional associative array of parsed attributes
3798
	 *     inner      Contents of extension element
3799
	 *     noClose    Original text did not have a close tag
3800
	 * @param PPFrame $frame
3801
	 *
3802
	 * @throws MWException
3803
	 * @return string
3804
	 */
3805
	public function extensionSubstitution( $params, $frame ) {
3806
		static $errorStr = '<span class="error">';
3807
		static $errorLen = 20;
3808
3809
		$name = $frame->expand( $params['name'] );
3810
		if ( substr( $name, 0, $errorLen ) === $errorStr ) {
3811
			// Probably expansion depth or node count exceeded. Just punt the
3812
			// error up.
3813
			return $name;
3814
		}
3815
3816
		$attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] );
3817
		if ( substr( $attrText, 0, $errorLen ) === $errorStr ) {
3818
			// See above
3819
			return $attrText;
3820
		}
3821
3822
		// We can't safely check if the expansion for $content resulted in an
3823
		// error, because the content could happen to be the error string
3824
		// (T149622).
3825
		$content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
3826
3827
		$marker = self::MARKER_PREFIX . "-$name-"
3828
			. sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
3829
3830
		$isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) &&
3831
			( $this->ot['html'] || $this->ot['pre'] );
3832
		if ( $isFunctionTag ) {
3833
			$markerType = 'none';
3834
		} else {
3835
			$markerType = 'general';
3836
		}
3837
		if ( $this->ot['html'] || $isFunctionTag ) {
3838
			$name = strtolower( $name );
3839
			$attributes = Sanitizer::decodeTagAttributes( $attrText );
3840
			if ( isset( $params['attributes'] ) ) {
3841
				$attributes = $attributes + $params['attributes'];
3842
			}
3843
3844
			if ( isset( $this->mTagHooks[$name] ) ) {
3845
				# Workaround for PHP bug 35229 and similar
3846
				if ( !is_callable( $this->mTagHooks[$name] ) ) {
3847
					throw new MWException( "Tag hook for $name is not callable\n" );
3848
				}
3849
				$output = call_user_func_array( $this->mTagHooks[$name],
3850
					[ $content, $attributes, $this, $frame ] );
3851
			} elseif ( isset( $this->mFunctionTagHooks[$name] ) ) {
3852
				list( $callback, ) = $this->mFunctionTagHooks[$name];
3853
				if ( !is_callable( $callback ) ) {
3854
					throw new MWException( "Tag hook for $name is not callable\n" );
3855
				}
3856
3857
				$output = call_user_func_array( $callback, [ &$this, $frame, $content, $attributes ] );
3858
			} else {
3859
				$output = '<span class="error">Invalid tag extension name: ' .
3860
					htmlspecialchars( $name ) . '</span>';
3861
			}
3862
3863
			if ( is_array( $output ) ) {
3864
				# Extract flags to local scope (to override $markerType)
3865
				$flags = $output;
3866
				$output = $flags[0];
3867
				unset( $flags[0] );
3868
				extract( $flags );
3869
			}
3870
		} else {
3871
			if ( is_null( $attrText ) ) {
3872
				$attrText = '';
3873
			}
3874
			if ( isset( $params['attributes'] ) ) {
3875
				foreach ( $params['attributes'] as $attrName => $attrValue ) {
3876
					$attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
3877
						htmlspecialchars( $attrValue ) . '"';
3878
				}
3879
			}
3880
			if ( $content === null ) {
3881
				$output = "<$name$attrText/>";
3882
			} else {
3883
				$close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] );
3884
				if ( substr( $close, 0, $errorLen ) === $errorStr ) {
3885
					// See above
3886
					return $close;
3887
				}
3888
				$output = "<$name$attrText>$content$close";
3889
			}
3890
		}
3891
3892
		if ( $markerType === 'none' ) {
3893
			return $output;
3894
		} elseif ( $markerType === 'nowiki' ) {
3895
			$this->mStripState->addNoWiki( $marker, $output );
3896
		} elseif ( $markerType === 'general' ) {
3897
			$this->mStripState->addGeneral( $marker, $output );
3898
		} else {
3899
			throw new MWException( __METHOD__ . ': invalid marker type' );
3900
		}
3901
		return $marker;
3902
	}
3903
3904
	/**
3905
	 * Increment an include size counter
3906
	 *
3907
	 * @param string $type The type of expansion
3908
	 * @param int $size The size of the text
3909
	 * @return bool False if this inclusion would take it over the maximum, true otherwise
3910
	 */
3911
	public function incrementIncludeSize( $type, $size ) {
3912
		if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
3913
			return false;
3914
		} else {
3915
			$this->mIncludeSizes[$type] += $size;
3916
			return true;
3917
		}
3918
	}
3919
3920
	/**
3921
	 * Increment the expensive function count
3922
	 *
3923
	 * @return bool False if the limit has been exceeded
3924
	 */
3925
	public function incrementExpensiveFunctionCount() {
3926
		$this->mExpensiveFunctionCount++;
3927
		return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
3928
	}
3929
3930
	/**
3931
	 * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
3932
	 * Fills $this->mDoubleUnderscores, returns the modified text
3933
	 *
3934
	 * @param string $text
3935
	 *
3936
	 * @return string
3937
	 */
3938
	public function doDoubleUnderscore( $text ) {
3939
3940
		# The position of __TOC__ needs to be recorded
3941
		$mw = MagicWord::get( 'toc' );
3942
		if ( $mw->match( $text ) ) {
3943
			$this->mShowToc = true;
3944
			$this->mForceTocPosition = true;
3945
3946
			# Set a placeholder. At the end we'll fill it in with the TOC.
3947
			$text = $mw->replace( '<!--MWTOC-->', $text, 1 );
3948
3949
			# Only keep the first one.
3950
			$text = $mw->replace( '', $text );
3951
		}
3952
3953
		# Now match and remove the rest of them
3954
		$mwa = MagicWord::getDoubleUnderscoreArray();
3955
		$this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
3956
3957
		if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
3958
			$this->mOutput->mNoGallery = true;
3959
		}
3960
		if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
3961
			$this->mShowToc = false;
3962
		}
3963
		if ( isset( $this->mDoubleUnderscores['hiddencat'] )
3964
			&& $this->mTitle->getNamespace() == NS_CATEGORY
3965
		) {
3966
			$this->addTrackingCategory( 'hidden-category-category' );
3967
		}
3968
		# (bug 8068) Allow control over whether robots index a page.
3969
		# @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here!  This
3970
		# is not desirable, the last one on the page should win.
3971 View Code Duplication
		if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) {
3972
			$this->mOutput->setIndexPolicy( 'noindex' );
3973
			$this->addTrackingCategory( 'noindex-category' );
3974
		}
3975 View Code Duplication
		if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) {
3976
			$this->mOutput->setIndexPolicy( 'index' );
3977
			$this->addTrackingCategory( 'index-category' );
3978
		}
3979
3980
		# Cache all double underscores in the database
3981
		foreach ( $this->mDoubleUnderscores as $key => $val ) {
3982
			$this->mOutput->setProperty( $key, '' );
3983
		}
3984
3985
		return $text;
3986
	}
3987
3988
	/**
3989
	 * @see ParserOutput::addTrackingCategory()
3990
	 * @param string $msg Message key
3991
	 * @return bool Whether the addition was successful
3992
	 */
3993
	public function addTrackingCategory( $msg ) {
3994
		return $this->mOutput->addTrackingCategory( $msg, $this->mTitle );
3995
	}
3996
3997
	/**
3998
	 * This function accomplishes several tasks:
3999
	 * 1) Auto-number headings if that option is enabled
4000
	 * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
4001
	 * 3) Add a Table of contents on the top for users who have enabled the option
4002
	 * 4) Auto-anchor headings
4003
	 *
4004
	 * It loops through all headlines, collects the necessary data, then splits up the
4005
	 * string and re-inserts the newly formatted headlines.
4006
	 *
4007
	 * @param string $text
4008
	 * @param string $origText Original, untouched wikitext
4009
	 * @param bool $isMain
4010
	 * @return mixed|string
4011
	 * @private
4012
	 */
4013
	public function formatHeadings( $text, $origText, $isMain = true ) {
4014
		global $wgMaxTocLevel, $wgExperimentalHtmlIds;
4015
4016
		# Inhibit editsection links if requested in the page
4017
		if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4018
			$maybeShowEditLink = $showEditLink = false;
4019
		} else {
4020
			$maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */
4021
			$showEditLink = $this->mOptions->getEditSection();
4022
		}
4023
		if ( $showEditLink ) {
4024
			$this->mOutput->setEditSectionTokens( true );
4025
		}
4026
4027
		# Get all headlines for numbering them and adding funky stuff like [edit]
4028
		# links - this is for later, but we need the number of headlines right now
4029
		$matches = [];
4030
		$numMatches = preg_match_all(
4031
			'/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i',
4032
			$text,
4033
			$matches
4034
		);
4035
4036
		# if there are fewer than 4 headlines in the article, do not show TOC
4037
		# unless it's been explicitly enabled.
4038
		$enoughToc = $this->mShowToc &&
4039
			( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4040
4041
		# Allow user to stipulate that a page should have a "new section"
4042
		# link added via __NEWSECTIONLINK__
4043
		if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4044
			$this->mOutput->setNewSection( true );
4045
		}
4046
4047
		# Allow user to remove the "new section"
4048
		# link via __NONEWSECTIONLINK__
4049
		if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4050
			$this->mOutput->hideNewSection( true );
4051
		}
4052
4053
		# if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4054
		# override above conditions and always show TOC above first header
4055
		if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4056
			$this->mShowToc = true;
4057
			$enoughToc = true;
4058
		}
4059
4060
		# headline counter
4061
		$headlineCount = 0;
4062
		$numVisible = 0;
4063
4064
		# Ugh .. the TOC should have neat indentation levels which can be
4065
		# passed to the skin functions. These are determined here
4066
		$toc = '';
4067
		$full = '';
4068
		$head = [];
4069
		$sublevelCount = [];
4070
		$levelCount = [];
4071
		$level = 0;
4072
		$prevlevel = 0;
4073
		$toclevel = 0;
4074
		$prevtoclevel = 0;
4075
		$markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4076
		$baseTitleText = $this->mTitle->getPrefixedDBkey();
4077
		$oldType = $this->mOutputType;
4078
		$this->setOutputType( self::OT_WIKI );
4079
		$frame = $this->getPreprocessor()->newFrame();
4080
		$root = $this->preprocessToDom( $origText );
4081
		$node = $root->getFirstChild();
4082
		$byteOffset = 0;
4083
		$tocraw = [];
4084
		$refers = [];
4085
4086
		$headlines = $numMatches !== false ? $matches[3] : [];
4087
4088
		foreach ( $headlines as $headline ) {
4089
			$isTemplate = false;
4090
			$titleText = false;
4091
			$sectionIndex = false;
4092
			$numbering = '';
4093
			$markerMatches = [];
4094
			if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4095
				$serial = $markerMatches[1];
4096
				list( $titleText, $sectionIndex ) = $this->mHeadings[$serial];
4097
				$isTemplate = ( $titleText != $baseTitleText );
4098
				$headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4099
			}
4100
4101
			if ( $toclevel ) {
4102
				$prevlevel = $level;
4103
			}
4104
			$level = $matches[1][$headlineCount];
4105
4106
			if ( $level > $prevlevel ) {
4107
				# Increase TOC level
4108
				$toclevel++;
4109
				$sublevelCount[$toclevel] = 0;
4110
				if ( $toclevel < $wgMaxTocLevel ) {
4111
					$prevtoclevel = $toclevel;
4112
					$toc .= Linker::tocIndent();
4113
					$numVisible++;
4114
				}
4115
			} elseif ( $level < $prevlevel && $toclevel > 1 ) {
4116
				# Decrease TOC level, find level to jump to
4117
4118
				for ( $i = $toclevel; $i > 0; $i-- ) {
4119
					if ( $levelCount[$i] == $level ) {
4120
						# Found last matching level
4121
						$toclevel = $i;
4122
						break;
4123
					} elseif ( $levelCount[$i] < $level ) {
4124
						# Found first matching level below current level
4125
						$toclevel = $i + 1;
4126
						break;
4127
					}
4128
				}
4129
				if ( $i == 0 ) {
4130
					$toclevel = 1;
4131
				}
4132
				if ( $toclevel < $wgMaxTocLevel ) {
4133
					if ( $prevtoclevel < $wgMaxTocLevel ) {
4134
						# Unindent only if the previous toc level was shown :p
4135
						$toc .= Linker::tocUnindent( $prevtoclevel - $toclevel );
4136
						$prevtoclevel = $toclevel;
4137
					} else {
4138
						$toc .= Linker::tocLineEnd();
4139
					}
4140
				}
4141
			} else {
4142
				# No change in level, end TOC line
4143
				if ( $toclevel < $wgMaxTocLevel ) {
4144
					$toc .= Linker::tocLineEnd();
4145
				}
4146
			}
4147
4148
			$levelCount[$toclevel] = $level;
4149
4150
			# count number of headlines for each level
4151
			$sublevelCount[$toclevel]++;
4152
			$dot = 0;
4153
			for ( $i = 1; $i <= $toclevel; $i++ ) {
4154
				if ( !empty( $sublevelCount[$i] ) ) {
4155
					if ( $dot ) {
4156
						$numbering .= '.';
4157
					}
4158
					$numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] );
4159
					$dot = 1;
4160
				}
4161
			}
4162
4163
			# The safe header is a version of the header text safe to use for links
4164
4165
			# Remove link placeholders by the link text.
4166
			#     <!--LINK number-->
4167
			# turns into
4168
			#     link text with suffix
4169
			# Do this before unstrip since link text can contain strip markers
4170
			$safeHeadline = $this->replaceLinkHoldersText( $headline );
4171
4172
			# Avoid insertion of weird stuff like <math> by expanding the relevant sections
4173
			$safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4174
4175
			# Strip out HTML (first regex removes any tag not allowed)
4176
			# Allowed tags are:
4177
			# * <sup> and <sub> (bug 8393)
4178
			# * <i> (bug 26375)
4179
			# * <b> (r105284)
4180
			# * <bdi> (bug 72884)
4181
			# * <span dir="rtl"> and <span dir="ltr"> (bug 35167)
4182
			# * <s> and <strike> (T35715)
4183
			# We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>,
4184
			# to allow setting directionality in toc items.
4185
			$tocline = preg_replace(
4186
				[
4187
					'#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#',
4188
					'#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#'
4189
				],
4190
				[ '', '<$1>' ],
4191
				$safeHeadline
4192
			);
4193
4194
			# Strip '<span></span>', which is the result from the above if
4195
			# <span id="foo"></span> is used to produce an additional anchor
4196
			# for a section.
4197
			$tocline = str_replace( '<span></span>', '', $tocline );
4198
4199
			$tocline = trim( $tocline );
4200
4201
			# For the anchor, strip out HTML-y stuff period
4202
			$safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline );
4203
			$safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4204
4205
			# Save headline for section edit hint before it's escaped
4206
			$headlineHint = $safeHeadline;
4207
4208
			if ( $wgExperimentalHtmlIds ) {
4209
				# For reverse compatibility, provide an id that's
4210
				# HTML4-compatible, like we used to.
4211
				# It may be worth noting, academically, that it's possible for
4212
				# the legacy anchor to conflict with a non-legacy headline
4213
				# anchor on the page.  In this case likely the "correct" thing
4214
				# would be to either drop the legacy anchors or make sure
4215
				# they're numbered first.  However, this would require people
4216
				# to type in section names like "abc_.D7.93.D7.90.D7.A4"
4217
				# manually, so let's not bother worrying about it.
4218
				$legacyHeadline = Sanitizer::escapeId( $safeHeadline,
4219
					[ 'noninitial', 'legacy' ] );
4220
				$safeHeadline = Sanitizer::escapeId( $safeHeadline );
4221
4222
				if ( $legacyHeadline == $safeHeadline ) {
4223
					# No reason to have both (in fact, we can't)
4224
					$legacyHeadline = false;
4225
				}
4226
			} else {
4227
				$legacyHeadline = false;
4228
				$safeHeadline = Sanitizer::escapeId( $safeHeadline,
4229
					'noninitial' );
4230
			}
4231
4232
			# HTML names must be case-insensitively unique (bug 10721).
4233
			# This does not apply to Unicode characters per
4234
			# https://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
4235
			# @todo FIXME: We may be changing them depending on the current locale.
4236
			$arrayKey = strtolower( $safeHeadline );
4237
			if ( $legacyHeadline === false ) {
4238
				$legacyArrayKey = false;
4239
			} else {
4240
				$legacyArrayKey = strtolower( $legacyHeadline );
4241
			}
4242
4243
			# Create the anchor for linking from the TOC to the section
4244
			$anchor = $safeHeadline;
4245
			$legacyAnchor = $legacyHeadline;
4246 View Code Duplication
			if ( isset( $refers[$arrayKey] ) ) {
4247
				// @codingStandardsIgnoreStart
4248
				for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i );
4249
				// @codingStandardsIgnoreEnd
4250
				$anchor .= "_$i";
4251
				$refers["${arrayKey}_$i"] = true;
4252
			} else {
4253
				$refers[$arrayKey] = true;
4254
			}
4255 View Code Duplication
			if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) {
4256
				// @codingStandardsIgnoreStart
4257
				for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i );
4258
				// @codingStandardsIgnoreEnd
4259
				$legacyAnchor .= "_$i";
4260
				$refers["${legacyArrayKey}_$i"] = true;
4261
			} else {
4262
				$refers[$legacyArrayKey] = true;
4263
			}
4264
4265
			# Don't number the heading if it is the only one (looks silly)
4266
			if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) {
4267
				# the two are different if the line contains a link
4268
				$headline = Html::element(
4269
					'span',
4270
					[ 'class' => 'mw-headline-number' ],
4271
					$numbering
4272
				) . ' ' . $headline;
4273
			}
4274
4275
			if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) {
4276
				$toc .= Linker::tocLine( $anchor, $tocline,
4277
					$numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) );
4278
			}
4279
4280
			# Add the section to the section tree
4281
			# Find the DOM node for this header
4282
			$noOffset = ( $isTemplate || $sectionIndex === false );
4283
			while ( $node && !$noOffset ) {
4284
				if ( $node->getName() === 'h' ) {
4285
					$bits = $node->splitHeading();
4286
					if ( $bits['i'] == $sectionIndex ) {
4287
						break;
4288
					}
4289
				}
4290
				$byteOffset += mb_strlen( $this->mStripState->unstripBoth(
4291
					$frame->expand( $node, PPFrame::RECOVER_ORIG ) ) );
4292
				$node = $node->getNextSibling();
4293
			}
4294
			$tocraw[] = [
4295
				'toclevel' => $toclevel,
4296
				'level' => $level,
4297
				'line' => $tocline,
4298
				'number' => $numbering,
4299
				'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex,
4300
				'fromtitle' => $titleText,
4301
				'byteoffset' => ( $noOffset ? null : $byteOffset ),
4302
				'anchor' => $anchor,
4303
			];
4304
4305
			# give headline the correct <h#> tag
4306
			if ( $maybeShowEditLink && $sectionIndex !== false ) {
4307
				// Output edit section links as markers with styles that can be customized by skins
4308
				if ( $isTemplate ) {
4309
					# Put a T flag in the section identifier, to indicate to extractSections()
4310
					# that sections inside <includeonly> should be counted.
4311
					$editsectionPage = $titleText;
4312
					$editsectionSection = "T-$sectionIndex";
4313
					$editsectionContent = null;
4314
				} else {
4315
					$editsectionPage = $this->mTitle->getPrefixedText();
4316
					$editsectionSection = $sectionIndex;
4317
					$editsectionContent = $headlineHint;
4318
				}
4319
				// We use a bit of pesudo-xml for editsection markers. The
4320
				// language converter is run later on. Using a UNIQ style marker
4321
				// leads to the converter screwing up the tokens when it
4322
				// converts stuff. And trying to insert strip tags fails too. At
4323
				// this point all real inputted tags have already been escaped,
4324
				// so we don't have to worry about a user trying to input one of
4325
				// these markers directly. We use a page and section attribute
4326
				// to stop the language converter from converting these
4327
				// important bits of data, but put the headline hint inside a
4328
				// content block because the language converter is supposed to
4329
				// be able to convert that piece of data.
4330
				// Gets replaced with html in ParserOutput::getText
4331
				$editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage );
4332
				$editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"';
4333
				if ( $editsectionContent !== null ) {
4334
					$editlink .= '>' . $editsectionContent . '</mw:editsection>';
4335
				} else {
4336
					$editlink .= '/>';
4337
				}
4338
			} else {
4339
				$editlink = '';
4340
			}
4341
			$head[$headlineCount] = Linker::makeHeadline( $level,
4342
				$matches['attrib'][$headlineCount], $anchor, $headline,
4343
				$editlink, $legacyAnchor );
4344
4345
			$headlineCount++;
4346
		}
4347
4348
		$this->setOutputType( $oldType );
4349
4350
		# Never ever show TOC if no headers
4351
		if ( $numVisible < 1 ) {
4352
			$enoughToc = false;
4353
		}
4354
4355
		if ( $enoughToc ) {
4356
			if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) {
4357
				$toc .= Linker::tocUnindent( $prevtoclevel - 1 );
4358
			}
4359
			$toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() );
4360
			$this->mOutput->setTOCHTML( $toc );
4361
			$toc = self::TOC_START . $toc . self::TOC_END;
4362
			$this->mOutput->addModules( 'mediawiki.toc' );
4363
		}
4364
4365
		if ( $isMain ) {
4366
			$this->mOutput->setSections( $tocraw );
4367
		}
4368
4369
		# split up and insert constructed headlines
4370
		$blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text );
4371
		$i = 0;
4372
4373
		// build an array of document sections
4374
		$sections = [];
4375
		foreach ( $blocks as $block ) {
4376
			// $head is zero-based, sections aren't.
4377
			if ( empty( $head[$i - 1] ) ) {
4378
				$sections[$i] = $block;
4379
			} else {
4380
				$sections[$i] = $head[$i - 1] . $block;
4381
			}
4382
4383
			/**
4384
			 * Send a hook, one per section.
4385
			 * The idea here is to be able to make section-level DIVs, but to do so in a
4386
			 * lower-impact, more correct way than r50769
4387
			 *
4388
			 * $this : caller
4389
			 * $section : the section number
4390
			 * &$sectionContent : ref to the content of the section
4391
			 * $showEditLinks : boolean describing whether this section has an edit link
4392
			 */
4393
			Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $showEditLink ] );
4394
4395
			$i++;
4396
		}
4397
4398
		if ( $enoughToc && $isMain && !$this->mForceTocPosition ) {
4399
			// append the TOC at the beginning
4400
			// Top anchor now in skin
4401
			$sections[0] = $sections[0] . $toc . "\n";
4402
		}
4403
4404
		$full .= implode( '', $sections );
4405
4406
		if ( $this->mForceTocPosition ) {
4407
			return str_replace( '<!--MWTOC-->', $toc, $full );
4408
		} else {
4409
			return $full;
4410
		}
4411
	}
4412
4413
	/**
4414
	 * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4415
	 * conversion, substituting signatures, {{subst:}} templates, etc.
4416
	 *
4417
	 * @param string $text The text to transform
4418
	 * @param Title $title The Title object for the current article
4419
	 * @param User $user The User object describing the current user
4420
	 * @param ParserOptions $options Parsing options
4421
	 * @param bool $clearState Whether to clear the parser state first
4422
	 * @return string The altered wiki markup
4423
	 */
4424
	public function preSaveTransform( $text, Title $title, User $user,
4425
		ParserOptions $options, $clearState = true
4426
	) {
4427
		if ( $clearState ) {
4428
			$magicScopeVariable = $this->lock();
4429
		}
4430
		$this->startParse( $title, $options, self::OT_WIKI, $clearState );
4431
		$this->setUser( $user );
4432
4433
		// We still normalize line endings for backwards-compatibility
4434
		// with other code that just calls PST, but this should already
4435
		// be handled in TextContent subclasses
4436
		$text = TextContent::normalizeLineEndings( $text );
4437
4438
		if ( $options->getPreSaveTransform() ) {
4439
			$text = $this->pstPass2( $text, $user );
4440
		}
4441
		$text = $this->mStripState->unstripBoth( $text );
4442
4443
		$this->setUser( null ); # Reset
4444
4445
		return $text;
4446
	}
4447
4448
	/**
4449
	 * Pre-save transform helper function
4450
	 *
4451
	 * @param string $text
4452
	 * @param User $user
4453
	 *
4454
	 * @return string
4455
	 */
4456
	private function pstPass2( $text, $user ) {
4457
		global $wgContLang;
4458
4459
		# Note: This is the timestamp saved as hardcoded wikitext to
4460
		# the database, we use $wgContLang here in order to give
4461
		# everyone the same signature and use the default one rather
4462
		# than the one selected in each user's preferences.
4463
		# (see also bug 12815)
4464
		$ts = $this->mOptions->getTimestamp();
4465
		$timestamp = MWTimestamp::getLocalInstance( $ts );
4466
		$ts = $timestamp->format( 'YmdHis' );
4467
		$tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4468
4469
		$d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4470
4471
		# Variable replacement
4472
		# Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4473
		$text = $this->replaceVariables( $text );
4474
4475
		# This works almost by chance, as the replaceVariables are done before the getUserSig(),
4476
		# which may corrupt this parser instance via its wfMessage()->text() call-
4477
4478
		# Signatures
4479
		$sigText = $this->getUserSig( $user );
4480
		$text = strtr( $text, [
4481
			'~~~~~' => $d,
4482
			'~~~~' => "$sigText $d",
4483
			'~~~' => $sigText
4484
		] );
4485
4486
		# Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4487
		$tc = '[' . Title::legalChars() . ']';
4488
		$nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4489
4490
		// [[ns:page (context)|]]
4491
		$p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4492
		// [[ns:page(context)|]] (double-width brackets, added in r40257)
4493
		$p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4494
		// [[ns:page (context), context|]] (using either single or double-width comma)
4495
		$p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/";
4496
		// [[|page]] (reverse pipe trick: add context from page title)
4497
		$p2 = "/\[\[\\|($tc+)]]/";
4498
4499
		# try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4500
		$text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4501
		$text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4502
		$text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4503
4504
		$t = $this->mTitle->getText();
4505
		$m = [];
4506
		if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4507
			$text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4508
		} elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4509
			$text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4510
		} else {
4511
			# if there's no context, don't bother duplicating the title
4512
			$text = preg_replace( $p2, '[[\\1]]', $text );
4513
		}
4514
4515
		return $text;
4516
	}
4517
4518
	/**
4519
	 * Fetch the user's signature text, if any, and normalize to
4520
	 * validated, ready-to-insert wikitext.
4521
	 * If you have pre-fetched the nickname or the fancySig option, you can
4522
	 * specify them here to save a database query.
4523
	 * Do not reuse this parser instance after calling getUserSig(),
4524
	 * as it may have changed if it's the $wgParser.
4525
	 *
4526
	 * @param User $user
4527
	 * @param string|bool $nickname Nickname to use or false to use user's default nickname
4528
	 * @param bool|null $fancySig whether the nicknname is the complete signature
4529
	 *    or null to use default value
4530
	 * @return string
4531
	 */
4532
	public function getUserSig( &$user, $nickname = false, $fancySig = null ) {
4533
		global $wgMaxSigChars;
4534
4535
		$username = $user->getName();
4536
4537
		# If not given, retrieve from the user object.
4538
		if ( $nickname === false ) {
4539
			$nickname = $user->getOption( 'nickname' );
4540
		}
4541
4542
		if ( is_null( $fancySig ) ) {
4543
			$fancySig = $user->getBoolOption( 'fancysig' );
4544
		}
4545
4546
		$nickname = $nickname == null ? $username : $nickname;
4547
4548
		if ( mb_strlen( $nickname ) > $wgMaxSigChars ) {
4549
			$nickname = $username;
4550
			wfDebug( __METHOD__ . ": $username has overlong signature.\n" );
4551
		} elseif ( $fancySig !== false ) {
4552
			# Sig. might contain markup; validate this
4553
			if ( $this->validateSig( $nickname ) !== false ) {
4554
				# Validated; clean up (if needed) and return it
4555
				return $this->cleanSig( $nickname, true );
4556
			} else {
4557
				# Failed to validate; fall back to the default
4558
				$nickname = $username;
4559
				wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" );
4560
			}
4561
		}
4562
4563
		# Make sure nickname doesnt get a sig in a sig
4564
		$nickname = self::cleanSigInSig( $nickname );
4565
4566
		# If we're still here, make it a link to the user page
4567
		$userText = wfEscapeWikiText( $username );
4568
		$nickText = wfEscapeWikiText( $nickname );
4569
		$msgName = $user->isAnon() ? 'signature-anon' : 'signature';
4570
4571
		return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4572
			->title( $this->getTitle() )->text();
4573
	}
4574
4575
	/**
4576
	 * Check that the user's signature contains no bad XML
4577
	 *
4578
	 * @param string $text
4579
	 * @return string|bool An expanded string, or false if invalid.
4580
	 */
4581
	public function validateSig( $text ) {
4582
		return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4583
	}
4584
4585
	/**
4586
	 * Clean up signature text
4587
	 *
4588
	 * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4589
	 * 2) Substitute all transclusions
4590
	 *
4591
	 * @param string $text
4592
	 * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4593
	 * @return string Signature text
4594
	 */
4595
	public function cleanSig( $text, $parsing = false ) {
4596
		if ( !$parsing ) {
4597
			global $wgTitle;
4598
			$magicScopeVariable = $this->lock();
4599
			$this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true );
4600
		}
4601
4602
		# Option to disable this feature
4603
		if ( !$this->mOptions->getCleanSignatures() ) {
4604
			return $text;
4605
		}
4606
4607
		# @todo FIXME: Regex doesn't respect extension tags or nowiki
4608
		#  => Move this logic to braceSubstitution()
4609
		$substWord = MagicWord::get( 'subst' );
4610
		$substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4611
		$substText = '{{' . $substWord->getSynonym( 0 );
4612
4613
		$text = preg_replace( $substRegex, $substText, $text );
4614
		$text = self::cleanSigInSig( $text );
4615
		$dom = $this->preprocessToDom( $text );
4616
		$frame = $this->getPreprocessor()->newFrame();
4617
		$text = $frame->expand( $dom );
4618
4619
		if ( !$parsing ) {
4620
			$text = $this->mStripState->unstripBoth( $text );
4621
		}
4622
4623
		return $text;
4624
	}
4625
4626
	/**
4627
	 * Strip 3, 4 or 5 tildes out of signatures.
4628
	 *
4629
	 * @param string $text
4630
	 * @return string Signature text with /~{3,5}/ removed
4631
	 */
4632
	public static function cleanSigInSig( $text ) {
4633
		$text = preg_replace( '/~{3,5}/', '', $text );
4634
		return $text;
4635
	}
4636
4637
	/**
4638
	 * Set up some variables which are usually set up in parse()
4639
	 * so that an external function can call some class members with confidence
4640
	 *
4641
	 * @param Title|null $title
4642
	 * @param ParserOptions $options
4643
	 * @param int $outputType
4644
	 * @param bool $clearState
4645
	 */
4646
	public function startExternalParse( Title $title = null, ParserOptions $options,
4647
		$outputType, $clearState = true
4648
	) {
4649
		$this->startParse( $title, $options, $outputType, $clearState );
4650
	}
4651
4652
	/**
4653
	 * @param Title|null $title
4654
	 * @param ParserOptions $options
4655
	 * @param int $outputType
4656
	 * @param bool $clearState
4657
	 */
4658
	private function startParse( Title $title = null, ParserOptions $options,
4659
		$outputType, $clearState = true
4660
	) {
4661
		$this->setTitle( $title );
4662
		$this->mOptions = $options;
4663
		$this->setOutputType( $outputType );
4664
		if ( $clearState ) {
4665
			$this->clearState();
4666
		}
4667
	}
4668
4669
	/**
4670
	 * Wrapper for preprocess()
4671
	 *
4672
	 * @param string $text The text to preprocess
4673
	 * @param ParserOptions $options Options
4674
	 * @param Title|null $title Title object or null to use $wgTitle
4675
	 * @return string
4676
	 */
4677
	public function transformMsg( $text, $options, $title = null ) {
4678
		static $executing = false;
4679
4680
		# Guard against infinite recursion
4681
		if ( $executing ) {
4682
			return $text;
4683
		}
4684
		$executing = true;
4685
4686
		if ( !$title ) {
4687
			global $wgTitle;
4688
			$title = $wgTitle;
4689
		}
4690
4691
		$text = $this->preprocess( $text, $title, $options );
4692
4693
		$executing = false;
4694
		return $text;
4695
	}
4696
4697
	/**
4698
	 * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4699
	 * The callback should have the following form:
4700
	 *    function myParserHook( $text, $params, $parser, $frame ) { ... }
4701
	 *
4702
	 * Transform and return $text. Use $parser for any required context, e.g. use
4703
	 * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4704
	 *
4705
	 * Hooks may return extended information by returning an array, of which the
4706
	 * first numbered element (index 0) must be the return string, and all other
4707
	 * entries are extracted into local variables within an internal function
4708
	 * in the Parser class.
4709
	 *
4710
	 * This interface (introduced r61913) appears to be undocumented, but
4711
	 * 'markerType' is used by some core tag hooks to override which strip
4712
	 * array their results are placed in. **Use great caution if attempting
4713
	 * this interface, as it is not documented and injudicious use could smash
4714
	 * private variables.**
4715
	 *
4716
	 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4717
	 * @param callable $callback The callback function (and object) to use for the tag
4718
	 * @throws MWException
4719
	 * @return callable|null The old value of the mTagHooks array associated with the hook
4720
	 */
4721 View Code Duplication
	public function setHook( $tag, $callback ) {
4722
		$tag = strtolower( $tag );
4723
		if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4724
			throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
4725
		}
4726
		$oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null;
4727
		$this->mTagHooks[$tag] = $callback;
4728
		if ( !in_array( $tag, $this->mStripList ) ) {
4729
			$this->mStripList[] = $tag;
4730
		}
4731
4732
		return $oldVal;
4733
	}
4734
4735
	/**
4736
	 * As setHook(), but letting the contents be parsed.
4737
	 *
4738
	 * Transparent tag hooks are like regular XML-style tag hooks, except they
4739
	 * operate late in the transformation sequence, on HTML instead of wikitext.
4740
	 *
4741
	 * This is probably obsoleted by things dealing with parser frames?
4742
	 * The only extension currently using it is geoserver.
4743
	 *
4744
	 * @since 1.10
4745
	 * @todo better document or deprecate this
4746
	 *
4747
	 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4748
	 * @param callable $callback The callback function (and object) to use for the tag
4749
	 * @throws MWException
4750
	 * @return callable|null The old value of the mTagHooks array associated with the hook
4751
	 */
4752
	public function setTransparentTagHook( $tag, $callback ) {
4753
		$tag = strtolower( $tag );
4754
		if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4755
			throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" );
4756
		}
4757
		$oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null;
4758
		$this->mTransparentTagHooks[$tag] = $callback;
4759
4760
		return $oldVal;
4761
	}
4762
4763
	/**
4764
	 * Remove all tag hooks
4765
	 */
4766
	public function clearTagHooks() {
4767
		$this->mTagHooks = [];
4768
		$this->mFunctionTagHooks = [];
4769
		$this->mStripList = $this->mDefaultStripList;
4770
	}
4771
4772
	/**
4773
	 * Create a function, e.g. {{sum:1|2|3}}
4774
	 * The callback function should have the form:
4775
	 *    function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
4776
	 *
4777
	 * Or with Parser::SFH_OBJECT_ARGS:
4778
	 *    function myParserFunction( $parser, $frame, $args ) { ... }
4779
	 *
4780
	 * The callback may either return the text result of the function, or an array with the text
4781
	 * in element 0, and a number of flags in the other elements. The names of the flags are
4782
	 * specified in the keys. Valid flags are:
4783
	 *   found                     The text returned is valid, stop processing the template. This
4784
	 *                             is on by default.
4785
	 *   nowiki                    Wiki markup in the return value should be escaped
4786
	 *   isHTML                    The returned text is HTML, armour it against wikitext transformation
4787
	 *
4788
	 * @param string $id The magic word ID
4789
	 * @param callable $callback The callback function (and object) to use
4790
	 * @param int $flags A combination of the following flags:
4791
	 *     Parser::SFH_NO_HASH      No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
4792
	 *
4793
	 *     Parser::SFH_OBJECT_ARGS  Pass the template arguments as PPNode objects instead of text.
4794
	 *     This allows for conditional expansion of the parse tree, allowing you to eliminate dead
4795
	 *     branches and thus speed up parsing. It is also possible to analyse the parse tree of
4796
	 *     the arguments, and to control the way they are expanded.
4797
	 *
4798
	 *     The $frame parameter is a PPFrame. This can be used to produce expanded text from the
4799
	 *     arguments, for instance:
4800
	 *         $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
4801
	 *
4802
	 *     For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
4803
	 *     future versions. Please call $frame->expand() on it anyway so that your code keeps
4804
	 *     working if/when this is changed.
4805
	 *
4806
	 *     If you want whitespace to be trimmed from $args, you need to do it yourself, post-
4807
	 *     expansion.
4808
	 *
4809
	 *     Please read the documentation in includes/parser/Preprocessor.php for more information
4810
	 *     about the methods available in PPFrame and PPNode.
4811
	 *
4812
	 * @throws MWException
4813
	 * @return string|callable The old callback function for this name, if any
4814
	 */
4815
	public function setFunctionHook( $id, $callback, $flags = 0 ) {
4816
		global $wgContLang;
4817
4818
		$oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null;
4819
		$this->mFunctionHooks[$id] = [ $callback, $flags ];
4820
4821
		# Add to function cache
4822
		$mw = MagicWord::get( $id );
4823
		if ( !$mw ) {
4824
			throw new MWException( __METHOD__ . '() expecting a magic word identifier.' );
4825
		}
4826
4827
		$synonyms = $mw->getSynonyms();
4828
		$sensitive = intval( $mw->isCaseSensitive() );
4829
4830
		foreach ( $synonyms as $syn ) {
4831
			# Case
4832
			if ( !$sensitive ) {
4833
				$syn = $wgContLang->lc( $syn );
4834
			}
4835
			# Add leading hash
4836
			if ( !( $flags & self::SFH_NO_HASH ) ) {
4837
				$syn = '#' . $syn;
4838
			}
4839
			# Remove trailing colon
4840
			if ( substr( $syn, -1, 1 ) === ':' ) {
4841
				$syn = substr( $syn, 0, -1 );
4842
			}
4843
			$this->mFunctionSynonyms[$sensitive][$syn] = $id;
4844
		}
4845
		return $oldVal;
4846
	}
4847
4848
	/**
4849
	 * Get all registered function hook identifiers
4850
	 *
4851
	 * @return array
4852
	 */
4853
	public function getFunctionHooks() {
4854
		return array_keys( $this->mFunctionHooks );
4855
	}
4856
4857
	/**
4858
	 * Create a tag function, e.g. "<test>some stuff</test>".
4859
	 * Unlike tag hooks, tag functions are parsed at preprocessor level.
4860
	 * Unlike parser functions, their content is not preprocessed.
4861
	 * @param string $tag
4862
	 * @param callable $callback
4863
	 * @param int $flags
4864
	 * @throws MWException
4865
	 * @return null
4866
	 */
4867 View Code Duplication
	public function setFunctionTagHook( $tag, $callback, $flags ) {
4868
		$tag = strtolower( $tag );
4869
		if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
4870
			throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" );
4871
		}
4872
		$old = isset( $this->mFunctionTagHooks[$tag] ) ?
4873
			$this->mFunctionTagHooks[$tag] : null;
4874
		$this->mFunctionTagHooks[$tag] = [ $callback, $flags ];
4875
4876
		if ( !in_array( $tag, $this->mStripList ) ) {
4877
			$this->mStripList[] = $tag;
4878
		}
4879
4880
		return $old;
4881
	}
4882
4883
	/**
4884
	 * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
4885
	 * Placeholders created in Linker::link()
4886
	 *
4887
	 * @param string $text
4888
	 * @param int $options
4889
	 */
4890
	public function replaceLinkHolders( &$text, $options = 0 ) {
4891
		$this->mLinkHolders->replace( $text );
4892
	}
4893
4894
	/**
4895
	 * Replace "<!--LINK-->" link placeholders with plain text of links
4896
	 * (not HTML-formatted).
4897
	 *
4898
	 * @param string $text
4899
	 * @return string
4900
	 */
4901
	public function replaceLinkHoldersText( $text ) {
4902
		return $this->mLinkHolders->replaceText( $text );
4903
	}
4904
4905
	/**
4906
	 * Renders an image gallery from a text with one line per image.
4907
	 * text labels may be given by using |-style alternative text. E.g.
4908
	 *   Image:one.jpg|The number "1"
4909
	 *   Image:tree.jpg|A tree
4910
	 * given as text will return the HTML of a gallery with two images,
4911
	 * labeled 'The number "1"' and
4912
	 * 'A tree'.
4913
	 *
4914
	 * @param string $text
4915
	 * @param array $params
4916
	 * @return string HTML
4917
	 */
4918
	public function renderImageGallery( $text, $params ) {
4919
4920
		$mode = false;
4921
		if ( isset( $params['mode'] ) ) {
4922
			$mode = $params['mode'];
4923
		}
4924
4925
		try {
4926
			$ig = ImageGalleryBase::factory( $mode );
4927
		} catch ( Exception $e ) {
4928
			// If invalid type set, fallback to default.
4929
			$ig = ImageGalleryBase::factory( false );
4930
		}
4931
4932
		$ig->setContextTitle( $this->mTitle );
4933
		$ig->setShowBytes( false );
4934
		$ig->setShowFilename( false );
4935
		$ig->setParser( $this );
4936
		$ig->setHideBadImages();
4937
		$ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) );
4938
4939
		if ( isset( $params['showfilename'] ) ) {
4940
			$ig->setShowFilename( true );
4941
		} else {
4942
			$ig->setShowFilename( false );
4943
		}
4944
		if ( isset( $params['caption'] ) ) {
4945
			$caption = $params['caption'];
4946
			$caption = htmlspecialchars( $caption );
4947
			$caption = $this->replaceInternalLinks( $caption );
4948
			$ig->setCaptionHtml( $caption );
4949
		}
4950
		if ( isset( $params['perrow'] ) ) {
4951
			$ig->setPerRow( $params['perrow'] );
4952
		}
4953
		if ( isset( $params['widths'] ) ) {
4954
			$ig->setWidths( $params['widths'] );
4955
		}
4956
		if ( isset( $params['heights'] ) ) {
4957
			$ig->setHeights( $params['heights'] );
4958
		}
4959
		$ig->setAdditionalOptions( $params );
4960
4961
		Hooks::run( 'BeforeParserrenderImageGallery', [ &$this, &$ig ] );
4962
4963
		$lines = StringUtils::explode( "\n", $text );
4964
		foreach ( $lines as $line ) {
4965
			# match lines like these:
4966
			# Image:someimage.jpg|This is some image
4967
			$matches = [];
4968
			preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
4969
			# Skip empty lines
4970
			if ( count( $matches ) == 0 ) {
4971
				continue;
4972
			}
4973
4974
			if ( strpos( $matches[0], '%' ) !== false ) {
4975
				$matches[1] = rawurldecode( $matches[1] );
4976
			}
4977
			$title = Title::newFromText( $matches[1], NS_FILE );
4978
			if ( is_null( $title ) ) {
4979
				# Bogus title. Ignore these so we don't bomb out later.
4980
				continue;
4981
			}
4982
4983
			# We need to get what handler the file uses, to figure out parameters.
4984
			# Note, a hook can overide the file name, and chose an entirely different
4985
			# file (which potentially could be of a different type and have different handler).
4986
			$options = [];
4987
			$descQuery = false;
4988
			Hooks::run( 'BeforeParserFetchFileAndTitle',
4989
				[ $this, $title, &$options, &$descQuery ] );
4990
			# Don't register it now, as ImageGallery does that later.
4991
			$file = $this->fetchFileNoRegister( $title, $options );
4992
			$handler = $file ? $file->getHandler() : false;
4993
4994
			$paramMap = [
4995
				'img_alt' => 'gallery-internal-alt',
4996
				'img_link' => 'gallery-internal-link',
4997
			];
4998
			if ( $handler ) {
4999
				$paramMap = $paramMap + $handler->getParamMap();
5000
				// We don't want people to specify per-image widths.
5001
				// Additionally the width parameter would need special casing anyhow.
5002
				unset( $paramMap['img_width'] );
5003
			}
5004
5005
			$mwArray = new MagicWordArray( array_keys( $paramMap ) );
5006
5007
			$label = '';
5008
			$alt = '';
5009
			$link = '';
5010
			$handlerOptions = [];
5011
			if ( isset( $matches[3] ) ) {
5012
				// look for an |alt= definition while trying not to break existing
5013
				// captions with multiple pipes (|) in it, until a more sensible grammar
5014
				// is defined for images in galleries
5015
5016
				// FIXME: Doing recursiveTagParse at this stage, and the trim before
5017
				// splitting on '|' is a bit odd, and different from makeImage.
5018
				$matches[3] = $this->recursiveTagParse( trim( $matches[3] ) );
5019
				$parameterMatches = StringUtils::explode( '|', $matches[3] );
5020
5021
				foreach ( $parameterMatches as $parameterMatch ) {
5022
					list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch );
5023
					if ( $magicName ) {
5024
						$paramName = $paramMap[$magicName];
5025
5026
						switch ( $paramName ) {
5027
						case 'gallery-internal-alt':
5028
							$alt = $this->stripAltText( $match, false );
5029
							break;
5030
						case 'gallery-internal-link':
5031
							$linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) );
5032
							$chars = self::EXT_LINK_URL_CLASS;
5033
							$addr = self::EXT_LINK_ADDR;
5034
							$prots = $this->mUrlProtocols;
5035
							// check to see if link matches an absolute url, if not then it must be a wiki link.
5036
							if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) {
5037
								$link = $linkValue;
5038
							} else {
5039
								$localLinkTitle = Title::newFromText( $linkValue );
5040
								if ( $localLinkTitle !== null ) {
5041
									$link = $localLinkTitle->getLinkURL();
5042
								}
5043
							}
5044
							break;
5045
						default:
5046
							// Must be a handler specific parameter.
5047
							if ( $handler->validateParam( $paramName, $match ) ) {
5048
								$handlerOptions[$paramName] = $match;
5049
							} else {
5050
								// Guess not, consider it as caption.
5051
								wfDebug( "$parameterMatch failed parameter validation\n" );
5052
								$label = '|' . $parameterMatch;
5053
							}
5054
						}
5055
5056
					} else {
5057
						// Last pipe wins.
5058
						$label = '|' . $parameterMatch;
5059
					}
5060
				}
5061
				// Remove the pipe.
5062
				$label = substr( $label, 1 );
5063
			}
5064
5065
			$ig->add( $title, $label, $alt, $link, $handlerOptions );
5066
		}
5067
		$html = $ig->toHTML();
5068
		Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] );
5069
		return $html;
5070
	}
5071
5072
	/**
5073
	 * @param MediaHandler $handler
5074
	 * @return array
5075
	 */
5076
	public function getImageParams( $handler ) {
5077
		if ( $handler ) {
5078
			$handlerClass = get_class( $handler );
5079
		} else {
5080
			$handlerClass = '';
5081
		}
5082
		if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5083
			# Initialise static lists
5084
			static $internalParamNames = [
5085
				'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5086
				'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5087
					'bottom', 'text-bottom' ],
5088
				'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
5089
					'upright', 'border', 'link', 'alt', 'class' ],
5090
			];
5091
			static $internalParamMap;
5092
			if ( !$internalParamMap ) {
5093
				$internalParamMap = [];
5094
				foreach ( $internalParamNames as $type => $names ) {
5095
					foreach ( $names as $name ) {
5096
						$magicName = str_replace( '-', '_', "img_$name" );
5097
						$internalParamMap[$magicName] = [ $type, $name ];
5098
					}
5099
				}
5100
			}
5101
5102
			# Add handler params
5103
			$paramMap = $internalParamMap;
5104
			if ( $handler ) {
5105
				$handlerParamMap = $handler->getParamMap();
5106
				foreach ( $handlerParamMap as $magic => $paramName ) {
5107
					$paramMap[$magic] = [ 'handler', $paramName ];
5108
				}
5109
			}
5110
			$this->mImageParams[$handlerClass] = $paramMap;
5111
			$this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) );
5112
		}
5113
		return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5114
	}
5115
5116
	/**
5117
	 * Parse image options text and use it to make an image
5118
	 *
5119
	 * @param Title $title
5120
	 * @param string $options
5121
	 * @param LinkHolderArray|bool $holders
5122
	 * @return string HTML
5123
	 */
5124
	public function makeImage( $title, $options, $holders = false ) {
5125
		# Check if the options text is of the form "options|alt text"
5126
		# Options are:
5127
		#  * thumbnail  make a thumbnail with enlarge-icon and caption, alignment depends on lang
5128
		#  * left       no resizing, just left align. label is used for alt= only
5129
		#  * right      same, but right aligned
5130
		#  * none       same, but not aligned
5131
		#  * ___px      scale to ___ pixels width, no aligning. e.g. use in taxobox
5132
		#  * center     center the image
5133
		#  * frame      Keep original image size, no magnify-button.
5134
		#  * framed     Same as "frame"
5135
		#  * frameless  like 'thumb' but without a frame. Keeps user preferences for width
5136
		#  * upright    reduce width for upright images, rounded to full __0 px
5137
		#  * border     draw a 1px border around the image
5138
		#  * alt        Text for HTML alt attribute (defaults to empty)
5139
		#  * class      Set a class for img node
5140
		#  * link       Set the target of the image link. Can be external, interwiki, or local
5141
		# vertical-align values (no % or length right now):
5142
		#  * baseline
5143
		#  * sub
5144
		#  * super
5145
		#  * top
5146
		#  * text-top
5147
		#  * middle
5148
		#  * bottom
5149
		#  * text-bottom
5150
5151
		$parts = StringUtils::explode( "|", $options );
5152
5153
		# Give extensions a chance to select the file revision for us
5154
		$options = [];
5155
		$descQuery = false;
5156
		Hooks::run( 'BeforeParserFetchFileAndTitle',
5157
			[ $this, $title, &$options, &$descQuery ] );
5158
		# Fetch and register the file (file title may be different via hooks)
5159
		list( $file, $title ) = $this->fetchFileAndTitle( $title, $options );
5160
5161
		# Get parameter map
5162
		$handler = $file ? $file->getHandler() : false;
5163
5164
		list( $paramMap, $mwArray ) = $this->getImageParams( $handler );
5165
5166
		if ( !$file ) {
5167
			$this->addTrackingCategory( 'broken-file-category' );
5168
		}
5169
5170
		# Process the input parameters
5171
		$caption = '';
5172
		$params = [ 'frame' => [], 'handler' => [],
5173
			'horizAlign' => [], 'vertAlign' => [] ];
5174
		$seenformat = false;
5175
		foreach ( $parts as $part ) {
5176
			$part = trim( $part );
5177
			list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part );
5178
			$validated = false;
5179
			if ( isset( $paramMap[$magicName] ) ) {
5180
				list( $type, $paramName ) = $paramMap[$magicName];
5181
5182
				# Special case; width and height come in one variable together
5183
				if ( $type === 'handler' && $paramName === 'width' ) {
5184
					$parsedWidthParam = $this->parseWidthParam( $value );
5185 View Code Duplication
					if ( isset( $parsedWidthParam['width'] ) ) {
5186
						$width = $parsedWidthParam['width'];
5187
						if ( $handler->validateParam( 'width', $width ) ) {
5188
							$params[$type]['width'] = $width;
5189
							$validated = true;
5190
						}
5191
					}
5192 View Code Duplication
					if ( isset( $parsedWidthParam['height'] ) ) {
5193
						$height = $parsedWidthParam['height'];
5194
						if ( $handler->validateParam( 'height', $height ) ) {
5195
							$params[$type]['height'] = $height;
5196
							$validated = true;
5197
						}
5198
					}
5199
					# else no validation -- bug 13436
5200
				} else {
5201
					if ( $type === 'handler' ) {
5202
						# Validate handler parameter
5203
						$validated = $handler->validateParam( $paramName, $value );
5204
					} else {
5205
						# Validate internal parameters
5206
						switch ( $paramName ) {
5207
						case 'manualthumb':
5208
						case 'alt':
5209
						case 'class':
5210
							# @todo FIXME: Possibly check validity here for
5211
							# manualthumb? downstream behavior seems odd with
5212
							# missing manual thumbs.
5213
							$validated = true;
5214
							$value = $this->stripAltText( $value, $holders );
5215
							break;
5216
						case 'link':
5217
							$chars = self::EXT_LINK_URL_CLASS;
5218
							$addr = self::EXT_LINK_ADDR;
5219
							$prots = $this->mUrlProtocols;
5220
							if ( $value === '' ) {
5221
								$paramName = 'no-link';
5222
								$value = true;
5223
								$validated = true;
5224
							} elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5225
								if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) {
5226
									$paramName = 'link-url';
5227
									$this->mOutput->addExternalLink( $value );
5228
									if ( $this->mOptions->getExternalLinkTarget() ) {
5229
										$params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
5230
									}
5231
									$validated = true;
5232
								}
5233
							} else {
5234
								$linkTitle = Title::newFromText( $value );
5235
								if ( $linkTitle ) {
5236
									$paramName = 'link-title';
5237
									$value = $linkTitle;
5238
									$this->mOutput->addLink( $linkTitle );
5239
									$validated = true;
5240
								}
5241
							}
5242
							break;
5243
						case 'frameless':
5244
						case 'framed':
5245
						case 'thumbnail':
5246
							// use first appearing option, discard others.
5247
							$validated = ! $seenformat;
5248
							$seenformat = true;
5249
							break;
5250
						default:
5251
							# Most other things appear to be empty or numeric...
5252
							$validated = ( $value === false || is_numeric( trim( $value ) ) );
5253
						}
5254
					}
5255
5256
					if ( $validated ) {
5257
						$params[$type][$paramName] = $value;
5258
					}
5259
				}
5260
			}
5261
			if ( !$validated ) {
5262
				$caption = $part;
5263
			}
5264
		}
5265
5266
		# Process alignment parameters
5267
		if ( $params['horizAlign'] ) {
5268
			$params['frame']['align'] = key( $params['horizAlign'] );
5269
		}
5270
		if ( $params['vertAlign'] ) {
5271
			$params['frame']['valign'] = key( $params['vertAlign'] );
5272
		}
5273
5274
		$params['frame']['caption'] = $caption;
5275
5276
		# Will the image be presented in a frame, with the caption below?
5277
		$imageIsFramed = isset( $params['frame']['frame'] )
5278
			|| isset( $params['frame']['framed'] )
5279
			|| isset( $params['frame']['thumbnail'] )
5280
			|| isset( $params['frame']['manualthumb'] );
5281
5282
		# In the old days, [[Image:Foo|text...]] would set alt text.  Later it
5283
		# came to also set the caption, ordinary text after the image -- which
5284
		# makes no sense, because that just repeats the text multiple times in
5285
		# screen readers.  It *also* came to set the title attribute.
5286
		# Now that we have an alt attribute, we should not set the alt text to
5287
		# equal the caption: that's worse than useless, it just repeats the
5288
		# text.  This is the framed/thumbnail case.  If there's no caption, we
5289
		# use the unnamed parameter for alt text as well, just for the time be-
5290
		# ing, if the unnamed param is set and the alt param is not.
5291
		# For the future, we need to figure out if we want to tweak this more,
5292
		# e.g., introducing a title= parameter for the title; ignoring the un-
5293
		# named parameter entirely for images without a caption; adding an ex-
5294
		# plicit caption= parameter and preserving the old magic unnamed para-
5295
		# meter for BC; ...
5296
		if ( $imageIsFramed ) { # Framed image
5297
			if ( $caption === '' && !isset( $params['frame']['alt'] ) ) {
5298
				# No caption or alt text, add the filename as the alt text so
5299
				# that screen readers at least get some description of the image
5300
				$params['frame']['alt'] = $title->getText();
5301
			}
5302
			# Do not set $params['frame']['title'] because tooltips don't make sense
5303
			# for framed images
5304
		} else { # Inline image
5305
			if ( !isset( $params['frame']['alt'] ) ) {
5306
				# No alt text, use the "caption" for the alt text
5307
				if ( $caption !== '' ) {
5308
					$params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5309
				} else {
5310
					# No caption, fall back to using the filename for the
5311
					# alt text
5312
					$params['frame']['alt'] = $title->getText();
5313
				}
5314
			}
5315
			# Use the "caption" for the tooltip text
5316
			$params['frame']['title'] = $this->stripAltText( $caption, $holders );
5317
		}
5318
5319
		Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] );
5320
5321
		# Linker does the rest
5322
		$time = isset( $options['time'] ) ? $options['time'] : false;
5323
		$ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'],
5324
			$time, $descQuery, $this->mOptions->getThumbSize() );
5325
5326
		# Give the handler a chance to modify the parser object
5327
		if ( $handler ) {
5328
			$handler->parserTransformHook( $this, $file );
5329
		}
5330
5331
		return $ret;
5332
	}
5333
5334
	/**
5335
	 * @param string $caption
5336
	 * @param LinkHolderArray|bool $holders
5337
	 * @return mixed|string
5338
	 */
5339
	protected function stripAltText( $caption, $holders ) {
5340
		# Strip bad stuff out of the title (tooltip).  We can't just use
5341
		# replaceLinkHoldersText() here, because if this function is called
5342
		# from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
5343
		if ( $holders ) {
5344
			$tooltip = $holders->replaceText( $caption );
5345
		} else {
5346
			$tooltip = $this->replaceLinkHoldersText( $caption );
5347
		}
5348
5349
		# make sure there are no placeholders in thumbnail attributes
5350
		# that are later expanded to html- so expand them now and
5351
		# remove the tags
5352
		$tooltip = $this->mStripState->unstripBoth( $tooltip );
5353
		$tooltip = Sanitizer::stripAllTags( $tooltip );
5354
5355
		return $tooltip;
5356
	}
5357
5358
	/**
5359
	 * Set a flag in the output object indicating that the content is dynamic and
5360
	 * shouldn't be cached.
5361
	 * @deprecated since 1.28; use getOutput()->updateCacheExpiry()
5362
	 */
5363
	public function disableCache() {
5364
		wfDebug( "Parser output marked as uncacheable.\n" );
5365
		if ( !$this->mOutput ) {
5366
			throw new MWException( __METHOD__ .
5367
				" can only be called when actually parsing something" );
5368
		}
5369
		$this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency
5370
	}
5371
5372
	/**
5373
	 * Callback from the Sanitizer for expanding items found in HTML attribute
5374
	 * values, so they can be safely tested and escaped.
5375
	 *
5376
	 * @param string $text
5377
	 * @param bool|PPFrame $frame
5378
	 * @return string
5379
	 */
5380
	public function attributeStripCallback( &$text, $frame = false ) {
5381
		$text = $this->replaceVariables( $text, $frame );
5382
		$text = $this->mStripState->unstripBoth( $text );
5383
		return $text;
5384
	}
5385
5386
	/**
5387
	 * Accessor
5388
	 *
5389
	 * @return array
5390
	 */
5391
	public function getTags() {
5392
		return array_merge(
5393
			array_keys( $this->mTransparentTagHooks ),
5394
			array_keys( $this->mTagHooks ),
5395
			array_keys( $this->mFunctionTagHooks )
5396
		);
5397
	}
5398
5399
	/**
5400
	 * Replace transparent tags in $text with the values given by the callbacks.
5401
	 *
5402
	 * Transparent tag hooks are like regular XML-style tag hooks, except they
5403
	 * operate late in the transformation sequence, on HTML instead of wikitext.
5404
	 *
5405
	 * @param string $text
5406
	 *
5407
	 * @return string
5408
	 */
5409
	public function replaceTransparentTags( $text ) {
5410
		$matches = [];
5411
		$elements = array_keys( $this->mTransparentTagHooks );
5412
		$text = self::extractTagsAndParams( $elements, $text, $matches );
5413
		$replacements = [];
5414
5415
		foreach ( $matches as $marker => $data ) {
5416
			list( $element, $content, $params, $tag ) = $data;
5417
			$tagName = strtolower( $element );
5418
			if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
5419
				$output = call_user_func_array(
5420
					$this->mTransparentTagHooks[$tagName],
5421
					[ $content, $params, $this ]
5422
				);
5423
			} else {
5424
				$output = $tag;
5425
			}
5426
			$replacements[$marker] = $output;
5427
		}
5428
		return strtr( $text, $replacements );
5429
	}
5430
5431
	/**
5432
	 * Break wikitext input into sections, and either pull or replace
5433
	 * some particular section's text.
5434
	 *
5435
	 * External callers should use the getSection and replaceSection methods.
5436
	 *
5437
	 * @param string $text Page wikitext
5438
	 * @param string|number $sectionId A section identifier string of the form:
5439
	 *   "<flag1> - <flag2> - ... - <section number>"
5440
	 *
5441
	 * Currently the only recognised flag is "T", which means the target section number
5442
	 * was derived during a template inclusion parse, in other words this is a template
5443
	 * section edit link. If no flags are given, it was an ordinary section edit link.
5444
	 * This flag is required to avoid a section numbering mismatch when a section is
5445
	 * enclosed by "<includeonly>" (bug 6563).
5446
	 *
5447
	 * The section number 0 pulls the text before the first heading; other numbers will
5448
	 * pull the given section along with its lower-level subsections. If the section is
5449
	 * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5450
	 *
5451
	 * Section 0 is always considered to exist, even if it only contains the empty
5452
	 * string. If $text is the empty string and section 0 is replaced, $newText is
5453
	 * returned.
5454
	 *
5455
	 * @param string $mode One of "get" or "replace"
5456
	 * @param string $newText Replacement text for section data.
5457
	 * @return string For "get", the extracted section text.
5458
	 *   for "replace", the whole page with the section replaced.
5459
	 */
5460
	private function extractSections( $text, $sectionId, $mode, $newText = '' ) {
5461
		global $wgTitle; # not generally used but removes an ugly failure mode
5462
5463
		$magicScopeVariable = $this->lock();
5464
		$this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true );
5465
		$outText = '';
5466
		$frame = $this->getPreprocessor()->newFrame();
5467
5468
		# Process section extraction flags
5469
		$flags = 0;
5470
		$sectionParts = explode( '-', $sectionId );
5471
		$sectionIndex = array_pop( $sectionParts );
5472
		foreach ( $sectionParts as $part ) {
5473
			if ( $part === 'T' ) {
5474
				$flags |= self::PTD_FOR_INCLUSION;
5475
			}
5476
		}
5477
5478
		# Check for empty input
5479
		if ( strval( $text ) === '' ) {
5480
			# Only sections 0 and T-0 exist in an empty document
5481
			if ( $sectionIndex == 0 ) {
5482
				if ( $mode === 'get' ) {
5483
					return '';
5484
				} else {
5485
					return $newText;
5486
				}
5487
			} else {
5488
				if ( $mode === 'get' ) {
5489
					return $newText;
5490
				} else {
5491
					return $text;
5492
				}
5493
			}
5494
		}
5495
5496
		# Preprocess the text
5497
		$root = $this->preprocessToDom( $text, $flags );
5498
5499
		# <h> nodes indicate section breaks
5500
		# They can only occur at the top level, so we can find them by iterating the root's children
5501
		$node = $root->getFirstChild();
5502
5503
		# Find the target section
5504
		if ( $sectionIndex == 0 ) {
5505
			# Section zero doesn't nest, level=big
5506
			$targetLevel = 1000;
5507
		} else {
5508
			while ( $node ) {
5509 View Code Duplication
				if ( $node->getName() === 'h' ) {
5510
					$bits = $node->splitHeading();
5511
					if ( $bits['i'] == $sectionIndex ) {
5512
						$targetLevel = $bits['level'];
5513
						break;
5514
					}
5515
				}
5516
				if ( $mode === 'replace' ) {
5517
					$outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5518
				}
5519
				$node = $node->getNextSibling();
5520
			}
5521
		}
5522
5523
		if ( !$node ) {
5524
			# Not found
5525
			if ( $mode === 'get' ) {
5526
				return $newText;
5527
			} else {
5528
				return $text;
5529
			}
5530
		}
5531
5532
		# Find the end of the section, including nested sections
5533
		do {
5534 View Code Duplication
			if ( $node->getName() === 'h' ) {
5535
				$bits = $node->splitHeading();
5536
				$curLevel = $bits['level'];
5537
				if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5538
					break;
5539
				}
5540
			}
5541
			if ( $mode === 'get' ) {
5542
				$outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5543
			}
5544
			$node = $node->getNextSibling();
5545
		} while ( $node );
5546
5547
		# Write out the remainder (in replace mode only)
5548
		if ( $mode === 'replace' ) {
5549
			# Output the replacement text
5550
			# Add two newlines on -- trailing whitespace in $newText is conventionally
5551
			# stripped by the editor, so we need both newlines to restore the paragraph gap
5552
			# Only add trailing whitespace if there is newText
5553
			if ( $newText != "" ) {
5554
				$outText .= $newText . "\n\n";
5555
			}
5556
5557
			while ( $node ) {
5558
				$outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5559
				$node = $node->getNextSibling();
5560
			}
5561
		}
5562
5563
		if ( is_string( $outText ) ) {
5564
			# Re-insert stripped tags
5565
			$outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5566
		}
5567
5568
		return $outText;
5569
	}
5570
5571
	/**
5572
	 * This function returns the text of a section, specified by a number ($section).
5573
	 * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5574
	 * the first section before any such heading (section 0).
5575
	 *
5576
	 * If a section contains subsections, these are also returned.
5577
	 *
5578
	 * @param string $text Text to look in
5579
	 * @param string|number $sectionId Section identifier as a number or string
5580
	 * (e.g. 0, 1 or 'T-1').
5581
	 * @param string $defaultText Default to return if section is not found
5582
	 *
5583
	 * @return string Text of the requested section
5584
	 */
5585
	public function getSection( $text, $sectionId, $defaultText = '' ) {
5586
		return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5587
	}
5588
5589
	/**
5590
	 * This function returns $oldtext after the content of the section
5591
	 * specified by $section has been replaced with $text. If the target
5592
	 * section does not exist, $oldtext is returned unchanged.
5593
	 *
5594
	 * @param string $oldText Former text of the article
5595
	 * @param string|number $sectionId Section identifier as a number or string
5596
	 * (e.g. 0, 1 or 'T-1').
5597
	 * @param string $newText Replacing text
5598
	 *
5599
	 * @return string Modified text
5600
	 */
5601
	public function replaceSection( $oldText, $sectionId, $newText ) {
5602
		return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5603
	}
5604
5605
	/**
5606
	 * Get the ID of the revision we are parsing
5607
	 *
5608
	 * @return int|null
5609
	 */
5610
	public function getRevisionId() {
5611
		return $this->mRevisionId;
5612
	}
5613
5614
	/**
5615
	 * Get the revision object for $this->mRevisionId
5616
	 *
5617
	 * @return Revision|null Either a Revision object or null
5618
	 * @since 1.23 (public since 1.23)
5619
	 */
5620
	public function getRevisionObject() {
5621
		if ( !is_null( $this->mRevisionObject ) ) {
5622
			return $this->mRevisionObject;
5623
		}
5624
		if ( is_null( $this->mRevisionId ) ) {
5625
			return null;
5626
		}
5627
5628
		$rev = call_user_func(
5629
			$this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this
5630
		);
5631
5632
		# If the parse is for a new revision, then the callback should have
5633
		# already been set to force the object and should match mRevisionId.
5634
		# If not, try to fetch by mRevisionId for sanity.
5635
		if ( $rev && $rev->getId() != $this->mRevisionId ) {
5636
			$rev = Revision::newFromId( $this->mRevisionId );
5637
		}
5638
5639
		$this->mRevisionObject = $rev;
5640
5641
		return $this->mRevisionObject;
5642
	}
5643
5644
	/**
5645
	 * Get the timestamp associated with the current revision, adjusted for
5646
	 * the default server-local timestamp
5647
	 * @return string
5648
	 */
5649
	public function getRevisionTimestamp() {
5650
		if ( is_null( $this->mRevisionTimestamp ) ) {
5651
			global $wgContLang;
5652
5653
			$revObject = $this->getRevisionObject();
5654
			$timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow();
5655
5656
			# The cryptic '' timezone parameter tells to use the site-default
5657
			# timezone offset instead of the user settings.
5658
			# Since this value will be saved into the parser cache, served
5659
			# to other users, and potentially even used inside links and such,
5660
			# it needs to be consistent for all visitors.
5661
			$this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' );
5662
5663
		}
5664
		return $this->mRevisionTimestamp;
5665
	}
5666
5667
	/**
5668
	 * Get the name of the user that edited the last revision
5669
	 *
5670
	 * @return string User name
5671
	 */
5672
	public function getRevisionUser() {
5673
		if ( is_null( $this->mRevisionUser ) ) {
5674
			$revObject = $this->getRevisionObject();
5675
5676
			# if this template is subst: the revision id will be blank,
5677
			# so just use the current user's name
5678
			if ( $revObject ) {
5679
				$this->mRevisionUser = $revObject->getUserText();
5680
			} elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
5681
				$this->mRevisionUser = $this->getUser()->getName();
5682
			}
5683
		}
5684
		return $this->mRevisionUser;
5685
	}
5686
5687
	/**
5688
	 * Get the size of the revision
5689
	 *
5690
	 * @return int|null Revision size
5691
	 */
5692
	public function getRevisionSize() {
5693
		if ( is_null( $this->mRevisionSize ) ) {
5694
			$revObject = $this->getRevisionObject();
5695
5696
			# if this variable is subst: the revision id will be blank,
5697
			# so just use the parser input size, because the own substituation
5698
			# will change the size.
5699
			if ( $revObject ) {
5700
				$this->mRevisionSize = $revObject->getSize();
5701
			} else {
5702
				$this->mRevisionSize = $this->mInputSize;
5703
			}
5704
		}
5705
		return $this->mRevisionSize;
5706
	}
5707
5708
	/**
5709
	 * Mutator for $mDefaultSort
5710
	 *
5711
	 * @param string $sort New value
5712
	 */
5713
	public function setDefaultSort( $sort ) {
5714
		$this->mDefaultSort = $sort;
5715
		$this->mOutput->setProperty( 'defaultsort', $sort );
5716
	}
5717
5718
	/**
5719
	 * Accessor for $mDefaultSort
5720
	 * Will use the empty string if none is set.
5721
	 *
5722
	 * This value is treated as a prefix, so the
5723
	 * empty string is equivalent to sorting by
5724
	 * page name.
5725
	 *
5726
	 * @return string
5727
	 */
5728
	public function getDefaultSort() {
5729
		if ( $this->mDefaultSort !== false ) {
5730
			return $this->mDefaultSort;
5731
		} else {
5732
			return '';
5733
		}
5734
	}
5735
5736
	/**
5737
	 * Accessor for $mDefaultSort
5738
	 * Unlike getDefaultSort(), will return false if none is set
5739
	 *
5740
	 * @return string|bool
5741
	 */
5742
	public function getCustomDefaultSort() {
5743
		return $this->mDefaultSort;
5744
	}
5745
5746
	/**
5747
	 * Try to guess the section anchor name based on a wikitext fragment
5748
	 * presumably extracted from a heading, for example "Header" from
5749
	 * "== Header ==".
5750
	 *
5751
	 * @param string $text
5752
	 *
5753
	 * @return string
5754
	 */
5755
	public function guessSectionNameFromWikiText( $text ) {
5756
		# Strip out wikitext links(they break the anchor)
5757
		$text = $this->stripSectionName( $text );
5758
		$text = Sanitizer::normalizeSectionNameWhitespace( $text );
5759
		return '#' . Sanitizer::escapeId( $text, 'noninitial' );
5760
	}
5761
5762
	/**
5763
	 * Same as guessSectionNameFromWikiText(), but produces legacy anchors
5764
	 * instead.  For use in redirects, since IE6 interprets Redirect: headers
5765
	 * as something other than UTF-8 (apparently?), resulting in breakage.
5766
	 *
5767
	 * @param string $text The section name
5768
	 * @return string An anchor
5769
	 */
5770
	public function guessLegacySectionNameFromWikiText( $text ) {
5771
		# Strip out wikitext links(they break the anchor)
5772
		$text = $this->stripSectionName( $text );
5773
		$text = Sanitizer::normalizeSectionNameWhitespace( $text );
5774
		return '#' . Sanitizer::escapeId( $text, [ 'noninitial', 'legacy' ] );
5775
	}
5776
5777
	/**
5778
	 * Strips a text string of wikitext for use in a section anchor
5779
	 *
5780
	 * Accepts a text string and then removes all wikitext from the
5781
	 * string and leaves only the resultant text (i.e. the result of
5782
	 * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
5783
	 * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
5784
	 * to create valid section anchors by mimicing the output of the
5785
	 * parser when headings are parsed.
5786
	 *
5787
	 * @param string $text Text string to be stripped of wikitext
5788
	 * for use in a Section anchor
5789
	 * @return string Filtered text string
5790
	 */
5791
	public function stripSectionName( $text ) {
5792
		# Strip internal link markup
5793
		$text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
5794
		$text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
5795
5796
		# Strip external link markup
5797
		# @todo FIXME: Not tolerant to blank link text
5798
		# I.E. [https://www.mediawiki.org] will render as [1] or something depending
5799
		# on how many empty links there are on the page - need to figure that out.
5800
		$text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
5801
5802
		# Parse wikitext quotes (italics & bold)
5803
		$text = $this->doQuotes( $text );
5804
5805
		# Strip HTML tags
5806
		$text = StringUtils::delimiterReplace( '<', '>', '', $text );
5807
		return $text;
5808
	}
5809
5810
	/**
5811
	 * strip/replaceVariables/unstrip for preprocessor regression testing
5812
	 *
5813
	 * @param string $text
5814
	 * @param Title $title
5815
	 * @param ParserOptions $options
5816
	 * @param int $outputType
5817
	 *
5818
	 * @return string
5819
	 */
5820
	public function testSrvus( $text, Title $title, ParserOptions $options,
5821
		$outputType = self::OT_HTML
5822
	) {
5823
		$magicScopeVariable = $this->lock();
5824
		$this->startParse( $title, $options, $outputType, true );
5825
5826
		$text = $this->replaceVariables( $text );
5827
		$text = $this->mStripState->unstripBoth( $text );
5828
		$text = Sanitizer::removeHTMLtags( $text );
5829
		return $text;
5830
	}
5831
5832
	/**
5833
	 * @param string $text
5834
	 * @param Title $title
5835
	 * @param ParserOptions $options
5836
	 * @return string
5837
	 */
5838
	public function testPst( $text, Title $title, ParserOptions $options ) {
5839
		return $this->preSaveTransform( $text, $title, $options->getUser(), $options );
5840
	}
5841
5842
	/**
5843
	 * @param string $text
5844
	 * @param Title $title
5845
	 * @param ParserOptions $options
5846
	 * @return string
5847
	 */
5848
	public function testPreprocess( $text, Title $title, ParserOptions $options ) {
5849
		return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
5850
	}
5851
5852
	/**
5853
	 * Call a callback function on all regions of the given text that are not
5854
	 * inside strip markers, and replace those regions with the return value
5855
	 * of the callback. For example, with input:
5856
	 *
5857
	 *  aaa<MARKER>bbb
5858
	 *
5859
	 * This will call the callback function twice, with 'aaa' and 'bbb'. Those
5860
	 * two strings will be replaced with the value returned by the callback in
5861
	 * each case.
5862
	 *
5863
	 * @param string $s
5864
	 * @param callable $callback
5865
	 *
5866
	 * @return string
5867
	 */
5868
	public function markerSkipCallback( $s, $callback ) {
5869
		$i = 0;
5870
		$out = '';
5871
		while ( $i < strlen( $s ) ) {
5872
			$markerStart = strpos( $s, self::MARKER_PREFIX, $i );
5873
			if ( $markerStart === false ) {
5874
				$out .= call_user_func( $callback, substr( $s, $i ) );
5875
				break;
5876
			} else {
5877
				$out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
5878
				$markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
5879
				if ( $markerEnd === false ) {
5880
					$out .= substr( $s, $markerStart );
5881
					break;
5882
				} else {
5883
					$markerEnd += strlen( self::MARKER_SUFFIX );
5884
					$out .= substr( $s, $markerStart, $markerEnd - $markerStart );
5885
					$i = $markerEnd;
5886
				}
5887
			}
5888
		}
5889
		return $out;
5890
	}
5891
5892
	/**
5893
	 * Remove any strip markers found in the given text.
5894
	 *
5895
	 * @param string $text Input string
5896
	 * @return string
5897
	 */
5898
	public function killMarkers( $text ) {
5899
		return $this->mStripState->killMarkers( $text );
5900
	}
5901
5902
	/**
5903
	 * Save the parser state required to convert the given half-parsed text to
5904
	 * HTML. "Half-parsed" in this context means the output of
5905
	 * recursiveTagParse() or internalParse(). This output has strip markers
5906
	 * from replaceVariables (extensionSubstitution() etc.), and link
5907
	 * placeholders from replaceLinkHolders().
5908
	 *
5909
	 * Returns an array which can be serialized and stored persistently. This
5910
	 * array can later be loaded into another parser instance with
5911
	 * unserializeHalfParsedText(). The text can then be safely incorporated into
5912
	 * the return value of a parser hook.
5913
	 *
5914
	 * @param string $text
5915
	 *
5916
	 * @return array
5917
	 */
5918
	public function serializeHalfParsedText( $text ) {
5919
		$data = [
5920
			'text' => $text,
5921
			'version' => self::HALF_PARSED_VERSION,
5922
			'stripState' => $this->mStripState->getSubState( $text ),
5923
			'linkHolders' => $this->mLinkHolders->getSubArray( $text )
5924
		];
5925
		return $data;
5926
	}
5927
5928
	/**
5929
	 * Load the parser state given in the $data array, which is assumed to
5930
	 * have been generated by serializeHalfParsedText(). The text contents is
5931
	 * extracted from the array, and its markers are transformed into markers
5932
	 * appropriate for the current Parser instance. This transformed text is
5933
	 * returned, and can be safely included in the return value of a parser
5934
	 * hook.
5935
	 *
5936
	 * If the $data array has been stored persistently, the caller should first
5937
	 * check whether it is still valid, by calling isValidHalfParsedText().
5938
	 *
5939
	 * @param array $data Serialized data
5940
	 * @throws MWException
5941
	 * @return string
5942
	 */
5943
	public function unserializeHalfParsedText( $data ) {
5944 View Code Duplication
		if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
5945
			throw new MWException( __METHOD__ . ': invalid version' );
5946
		}
5947
5948
		# First, extract the strip state.
5949
		$texts = [ $data['text'] ];
5950
		$texts = $this->mStripState->merge( $data['stripState'], $texts );
5951
5952
		# Now renumber links
5953
		$texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
5954
5955
		# Should be good to go.
5956
		return $texts[0];
5957
	}
5958
5959
	/**
5960
	 * Returns true if the given array, presumed to be generated by
5961
	 * serializeHalfParsedText(), is compatible with the current version of the
5962
	 * parser.
5963
	 *
5964
	 * @param array $data
5965
	 *
5966
	 * @return bool
5967
	 */
5968
	public function isValidHalfParsedText( $data ) {
5969
		return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
5970
	}
5971
5972
	/**
5973
	 * Parsed a width param of imagelink like 300px or 200x300px
5974
	 *
5975
	 * @param string $value
5976
	 *
5977
	 * @return array
5978
	 * @since 1.20
5979
	 */
5980
	public function parseWidthParam( $value ) {
5981
		$parsedWidthParam = [];
5982
		if ( $value === '' ) {
5983
			return $parsedWidthParam;
5984
		}
5985
		$m = [];
5986
		# (bug 13500) In both cases (width/height and width only),
5987
		# permit trailing "px" for backward compatibility.
5988
		if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
5989
			$width = intval( $m[1] );
5990
			$height = intval( $m[2] );
5991
			$parsedWidthParam['width'] = $width;
5992
			$parsedWidthParam['height'] = $height;
5993
		} elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) {
5994
			$width = intval( $value );
5995
			$parsedWidthParam['width'] = $width;
5996
		}
5997
		return $parsedWidthParam;
5998
	}
5999
6000
	/**
6001
	 * Lock the current instance of the parser.
6002
	 *
6003
	 * This is meant to stop someone from calling the parser
6004
	 * recursively and messing up all the strip state.
6005
	 *
6006
	 * @throws MWException If parser is in a parse
6007
	 * @return ScopedCallback The lock will be released once the return value goes out of scope.
6008
	 */
6009
	protected function lock() {
6010
		if ( $this->mInParse ) {
6011
			throw new MWException( "Parser state cleared while parsing. "
6012
				. "Did you call Parser::parse recursively?" );
6013
		}
6014
		$this->mInParse = true;
6015
6016
		$recursiveCheck = new ScopedCallback( function() {
6017
			$this->mInParse = false;
6018
		} );
6019
6020
		return $recursiveCheck;
6021
	}
6022
6023
	/**
6024
	 * Strip outer <p></p> tag from the HTML source of a single paragraph.
6025
	 *
6026
	 * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6027
	 * or if there is more than one <p/> tag in the input HTML.
6028
	 *
6029
	 * @param string $html
6030
	 * @return string
6031
	 * @since 1.24
6032
	 */
6033
	public static function stripOuterParagraph( $html ) {
6034
		$m = [];
6035
		if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) {
6036
			if ( strpos( $m[1], '</p>' ) === false ) {
6037
				$html = $m[1];
6038
			}
6039
		}
6040
6041
		return $html;
6042
	}
6043
6044
	/**
6045
	 * Return this parser if it is not doing anything, otherwise
6046
	 * get a fresh parser. You can use this method by doing
6047
	 * $myParser = $wgParser->getFreshParser(), or more simply
6048
	 * $wgParser->getFreshParser()->parse( ... );
6049
	 * if you're unsure if $wgParser is safe to use.
6050
	 *
6051
	 * @since 1.24
6052
	 * @return Parser A parser object that is not parsing anything
6053
	 */
6054
	public function getFreshParser() {
6055
		global $wgParserConf;
6056
		if ( $this->mInParse ) {
6057
			return new $wgParserConf['class']( $wgParserConf );
6058
		} else {
6059
			return $this;
6060
		}
6061
	}
6062
6063
	/**
6064
	 * Set's up the PHP implementation of OOUI for use in this request
6065
	 * and instructs OutputPage to enable OOUI for itself.
6066
	 *
6067
	 * @since 1.26
6068
	 */
6069
	public function enableOOUI() {
6070
		OutputPage::setupOOUI();
6071
		$this->mOutput->setEnableOOUI( true );
6072
	}
6073
}
6074