These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | /** |
||
3 | * PHP parser that converts wiki markup to HTML. |
||
4 | * |
||
5 | * This program is free software; you can redistribute it and/or modify |
||
6 | * it under the terms of the GNU General Public License as published by |
||
7 | * the Free Software Foundation; either version 2 of the License, or |
||
8 | * (at your option) any later version. |
||
9 | * |
||
10 | * This program is distributed in the hope that it will be useful, |
||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
13 | * GNU General Public License for more details. |
||
14 | * |
||
15 | * You should have received a copy of the GNU General Public License along |
||
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
18 | * http://www.gnu.org/copyleft/gpl.html |
||
19 | * |
||
20 | * @file |
||
21 | * @ingroup Parser |
||
22 | */ |
||
23 | use MediaWiki\Linker\LinkRenderer; |
||
24 | use MediaWiki\MediaWikiServices; |
||
25 | use Wikimedia\ScopedCallback; |
||
26 | |||
27 | /** |
||
28 | * @defgroup Parser Parser |
||
29 | */ |
||
30 | |||
31 | /** |
||
32 | * PHP Parser - Processes wiki markup (which uses a more user-friendly |
||
33 | * syntax, such as "[[link]]" for making links), and provides a one-way |
||
34 | * transformation of that wiki markup it into (X)HTML output / markup |
||
35 | * (which in turn the browser understands, and can display). |
||
36 | * |
||
37 | * There are seven main entry points into the Parser class: |
||
38 | * |
||
39 | * - Parser::parse() |
||
40 | * produces HTML output |
||
41 | * - Parser::preSaveTransform() |
||
42 | * produces altered wiki markup |
||
43 | * - Parser::preprocess() |
||
44 | * removes HTML comments and expands templates |
||
45 | * - Parser::cleanSig() and Parser::cleanSigInSig() |
||
46 | * cleans a signature before saving it to preferences |
||
47 | * - Parser::getSection() |
||
48 | * return the content of a section from an article for section editing |
||
49 | * - Parser::replaceSection() |
||
50 | * replaces a section by number inside an article |
||
51 | * - Parser::getPreloadText() |
||
52 | * removes <noinclude> sections and <includeonly> tags |
||
53 | * |
||
54 | * Globals used: |
||
55 | * object: $wgContLang |
||
56 | * |
||
57 | * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away! |
||
58 | * |
||
59 | * @par Settings: |
||
60 | * $wgNamespacesWithSubpages |
||
61 | * |
||
62 | * @par Settings only within ParserOptions: |
||
63 | * $wgAllowExternalImages |
||
64 | * $wgAllowSpecialInclusion |
||
65 | * $wgInterwikiMagic |
||
66 | * $wgMaxArticleSize |
||
67 | * |
||
68 | * @ingroup Parser |
||
69 | */ |
||
70 | class Parser { |
||
71 | /** |
||
72 | * Update this version number when the ParserOutput format |
||
73 | * changes in an incompatible way, so the parser cache |
||
74 | * can automatically discard old data. |
||
75 | */ |
||
76 | const VERSION = '1.6.4'; |
||
77 | |||
78 | /** |
||
79 | * Update this version number when the output of serialiseHalfParsedText() |
||
80 | * changes in an incompatible way |
||
81 | */ |
||
82 | const HALF_PARSED_VERSION = 2; |
||
83 | |||
84 | # Flags for Parser::setFunctionHook |
||
85 | const SFH_NO_HASH = 1; |
||
86 | const SFH_OBJECT_ARGS = 2; |
||
87 | |||
88 | # Constants needed for external link processing |
||
89 | # Everything except bracket, space, or control characters |
||
90 | # \p{Zs} is unicode 'separator, space' category. It covers the space 0x20 |
||
91 | # as well as U+3000 is IDEOGRAPHIC SPACE for bug 19052 |
||
92 | const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]'; |
||
93 | # Simplified expression to match an IPv4 or IPv6 address, or |
||
94 | # at least one character of a host name (embeds EXT_LINK_URL_CLASS) |
||
95 | const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])'; |
||
96 | # RegExp to make image URLs (embeds IPv6 part of EXT_LINK_ADDR) |
||
97 | // @codingStandardsIgnoreStart Generic.Files.LineLength |
||
98 | const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}]+) |
||
99 | \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sxu'; |
||
100 | // @codingStandardsIgnoreEnd |
||
101 | |||
102 | # Regular expression for a non-newline space |
||
103 | const SPACE_NOT_NL = '(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})'; |
||
104 | |||
105 | # Flags for preprocessToDom |
||
106 | const PTD_FOR_INCLUSION = 1; |
||
107 | |||
108 | # Allowed values for $this->mOutputType |
||
109 | # Parameter to startExternalParse(). |
||
110 | const OT_HTML = 1; # like parse() |
||
111 | const OT_WIKI = 2; # like preSaveTransform() |
||
112 | const OT_PREPROCESS = 3; # like preprocess() |
||
113 | const OT_MSG = 3; |
||
114 | const OT_PLAIN = 4; # like extractSections() - portions of the original are returned unchanged. |
||
115 | |||
116 | /** |
||
117 | * @var string Prefix and suffix for temporary replacement strings |
||
118 | * for the multipass parser. |
||
119 | * |
||
120 | * \x7f should never appear in input as it's disallowed in XML. |
||
121 | * Using it at the front also gives us a little extra robustness |
||
122 | * since it shouldn't match when butted up against identifier-like |
||
123 | * string constructs. |
||
124 | * |
||
125 | * Must not consist of all title characters, or else it will change |
||
126 | * the behavior of <nowiki> in a link. |
||
127 | * |
||
128 | * Must have a character that needs escaping in attributes, otherwise |
||
129 | * someone could put a strip marker in an attribute, to get around |
||
130 | * escaping quote marks, and break out of the attribute. Thus we add |
||
131 | * `'". |
||
132 | */ |
||
133 | const MARKER_SUFFIX = "-QINU`\"'\x7f"; |
||
134 | const MARKER_PREFIX = "\x7f'\"`UNIQ-"; |
||
135 | |||
136 | # Markers used for wrapping the table of contents |
||
137 | const TOC_START = '<mw:toc>'; |
||
138 | const TOC_END = '</mw:toc>'; |
||
139 | |||
140 | # Persistent: |
||
141 | public $mTagHooks = []; |
||
142 | public $mTransparentTagHooks = []; |
||
143 | public $mFunctionHooks = []; |
||
144 | public $mFunctionSynonyms = [ 0 => [], 1 => [] ]; |
||
145 | public $mFunctionTagHooks = []; |
||
146 | public $mStripList = []; |
||
147 | public $mDefaultStripList = []; |
||
148 | public $mVarCache = []; |
||
149 | public $mImageParams = []; |
||
150 | public $mImageParamsMagicArray = []; |
||
151 | public $mMarkerIndex = 0; |
||
152 | public $mFirstCall = true; |
||
153 | |||
154 | # Initialised by initialiseVariables() |
||
155 | |||
156 | /** |
||
157 | * @var MagicWordArray |
||
158 | */ |
||
159 | public $mVariables; |
||
160 | |||
161 | /** |
||
162 | * @var MagicWordArray |
||
163 | */ |
||
164 | public $mSubstWords; |
||
165 | # Initialised in constructor |
||
166 | public $mConf, $mExtLinkBracketedRegex, $mUrlProtocols; |
||
167 | |||
168 | # Initialized in getPreprocessor() |
||
169 | /** @var Preprocessor */ |
||
170 | public $mPreprocessor; |
||
171 | |||
172 | # Cleared with clearState(): |
||
173 | /** |
||
174 | * @var ParserOutput |
||
175 | */ |
||
176 | public $mOutput; |
||
177 | public $mAutonumber; |
||
178 | |||
179 | /** |
||
180 | * @var StripState |
||
181 | */ |
||
182 | public $mStripState; |
||
183 | |||
184 | public $mIncludeCount; |
||
185 | /** |
||
186 | * @var LinkHolderArray |
||
187 | */ |
||
188 | public $mLinkHolders; |
||
189 | |||
190 | public $mLinkID; |
||
191 | public $mIncludeSizes, $mPPNodeCount, $mGeneratedPPNodeCount, $mHighestExpansionDepth; |
||
192 | public $mDefaultSort; |
||
193 | public $mTplRedirCache, $mTplDomCache, $mHeadings, $mDoubleUnderscores; |
||
194 | public $mExpensiveFunctionCount; # number of expensive parser function calls |
||
195 | public $mShowToc, $mForceTocPosition; |
||
196 | |||
197 | /** |
||
198 | * @var User |
||
199 | */ |
||
200 | public $mUser; # User object; only used when doing pre-save transform |
||
201 | |||
202 | # Temporary |
||
203 | # These are variables reset at least once per parse regardless of $clearState |
||
204 | |||
205 | /** |
||
206 | * @var ParserOptions |
||
207 | */ |
||
208 | public $mOptions; |
||
209 | |||
210 | /** |
||
211 | * @var Title |
||
212 | */ |
||
213 | public $mTitle; # Title context, used for self-link rendering and similar things |
||
214 | public $mOutputType; # Output type, one of the OT_xxx constants |
||
215 | public $ot; # Shortcut alias, see setOutputType() |
||
216 | public $mRevisionObject; # The revision object of the specified revision ID |
||
217 | public $mRevisionId; # ID to display in {{REVISIONID}} tags |
||
218 | public $mRevisionTimestamp; # The timestamp of the specified revision ID |
||
219 | public $mRevisionUser; # User to display in {{REVISIONUSER}} tag |
||
220 | public $mRevisionSize; # Size to display in {{REVISIONSIZE}} variable |
||
221 | public $mRevIdForTs; # The revision ID which was used to fetch the timestamp |
||
222 | public $mInputSize = false; # For {{PAGESIZE}} on current page. |
||
223 | |||
224 | /** |
||
225 | * @var string Deprecated accessor for the strip marker prefix. |
||
226 | * @deprecated since 1.26; use Parser::MARKER_PREFIX instead. |
||
227 | **/ |
||
228 | public $mUniqPrefix = Parser::MARKER_PREFIX; |
||
229 | |||
230 | /** |
||
231 | * @var array Array with the language name of each language link (i.e. the |
||
232 | * interwiki prefix) in the key, value arbitrary. Used to avoid sending |
||
233 | * duplicate language links to the ParserOutput. |
||
234 | */ |
||
235 | public $mLangLinkLanguages; |
||
236 | |||
237 | /** |
||
238 | * @var MapCacheLRU|null |
||
239 | * @since 1.24 |
||
240 | * |
||
241 | * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey() |
||
242 | */ |
||
243 | public $currentRevisionCache; |
||
244 | |||
245 | /** |
||
246 | * @var bool Recursive call protection. |
||
247 | * This variable should be treated as if it were private. |
||
248 | */ |
||
249 | public $mInParse = false; |
||
250 | |||
251 | /** @var SectionProfiler */ |
||
252 | protected $mProfiler; |
||
253 | |||
254 | /** |
||
255 | * @var LinkRenderer |
||
256 | */ |
||
257 | protected $mLinkRenderer; |
||
258 | |||
259 | /** |
||
260 | * @param array $conf |
||
261 | */ |
||
262 | public function __construct( $conf = [] ) { |
||
263 | $this->mConf = $conf; |
||
264 | $this->mUrlProtocols = wfUrlProtocols(); |
||
265 | $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' . |
||
266 | self::EXT_LINK_ADDR . |
||
267 | self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su'; |
||
268 | if ( isset( $conf['preprocessorClass'] ) ) { |
||
269 | $this->mPreprocessorClass = $conf['preprocessorClass']; |
||
270 | } elseif ( defined( 'HPHP_VERSION' ) ) { |
||
271 | # Preprocessor_Hash is much faster than Preprocessor_DOM under HipHop |
||
272 | $this->mPreprocessorClass = 'Preprocessor_Hash'; |
||
273 | } elseif ( extension_loaded( 'domxml' ) ) { |
||
274 | # PECL extension that conflicts with the core DOM extension (bug 13770) |
||
275 | wfDebug( "Warning: you have the obsolete domxml extension for PHP. Please remove it!\n" ); |
||
276 | $this->mPreprocessorClass = 'Preprocessor_Hash'; |
||
277 | } elseif ( extension_loaded( 'dom' ) ) { |
||
278 | $this->mPreprocessorClass = 'Preprocessor_DOM'; |
||
279 | } else { |
||
280 | $this->mPreprocessorClass = 'Preprocessor_Hash'; |
||
281 | } |
||
282 | wfDebug( __CLASS__ . ": using preprocessor: {$this->mPreprocessorClass}\n" ); |
||
283 | } |
||
284 | |||
285 | /** |
||
286 | * Reduce memory usage to reduce the impact of circular references |
||
287 | */ |
||
288 | public function __destruct() { |
||
289 | if ( isset( $this->mLinkHolders ) ) { |
||
290 | unset( $this->mLinkHolders ); |
||
291 | } |
||
292 | foreach ( $this as $name => $value ) { |
||
293 | unset( $this->$name ); |
||
294 | } |
||
295 | } |
||
296 | |||
297 | /** |
||
298 | * Allow extensions to clean up when the parser is cloned |
||
299 | */ |
||
300 | public function __clone() { |
||
301 | $this->mInParse = false; |
||
302 | |||
303 | // Bug 56226: When you create a reference "to" an object field, that |
||
304 | // makes the object field itself be a reference too (until the other |
||
305 | // reference goes out of scope). When cloning, any field that's a |
||
306 | // reference is copied as a reference in the new object. Both of these |
||
307 | // are defined PHP5 behaviors, as inconvenient as it is for us when old |
||
308 | // hooks from PHP4 days are passing fields by reference. |
||
309 | foreach ( [ 'mStripState', 'mVarCache' ] as $k ) { |
||
310 | // Make a non-reference copy of the field, then rebind the field to |
||
311 | // reference the new copy. |
||
312 | $tmp = $this->$k; |
||
313 | $this->$k =& $tmp; |
||
314 | unset( $tmp ); |
||
315 | } |
||
316 | |||
317 | Hooks::run( 'ParserCloned', [ $this ] ); |
||
318 | } |
||
319 | |||
320 | /** |
||
321 | * Do various kinds of initialisation on the first call of the parser |
||
322 | */ |
||
323 | public function firstCallInit() { |
||
324 | if ( !$this->mFirstCall ) { |
||
325 | return; |
||
326 | } |
||
327 | $this->mFirstCall = false; |
||
328 | |||
329 | CoreParserFunctions::register( $this ); |
||
330 | CoreTagHooks::register( $this ); |
||
331 | $this->initialiseVariables(); |
||
332 | |||
333 | Hooks::run( 'ParserFirstCallInit', [ &$this ] ); |
||
334 | } |
||
335 | |||
336 | /** |
||
337 | * Clear Parser state |
||
338 | * |
||
339 | * @private |
||
340 | */ |
||
341 | public function clearState() { |
||
342 | if ( $this->mFirstCall ) { |
||
343 | $this->firstCallInit(); |
||
344 | } |
||
345 | $this->mOutput = new ParserOutput; |
||
346 | $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] ); |
||
347 | $this->mAutonumber = 0; |
||
348 | $this->mIncludeCount = []; |
||
349 | $this->mLinkHolders = new LinkHolderArray( $this ); |
||
350 | $this->mLinkID = 0; |
||
351 | $this->mRevisionObject = $this->mRevisionTimestamp = |
||
352 | $this->mRevisionId = $this->mRevisionUser = $this->mRevisionSize = null; |
||
353 | $this->mVarCache = []; |
||
354 | $this->mUser = null; |
||
355 | $this->mLangLinkLanguages = []; |
||
356 | $this->currentRevisionCache = null; |
||
357 | |||
358 | $this->mStripState = new StripState; |
||
359 | |||
360 | # Clear these on every parse, bug 4549 |
||
361 | $this->mTplRedirCache = $this->mTplDomCache = []; |
||
362 | |||
363 | $this->mShowToc = true; |
||
364 | $this->mForceTocPosition = false; |
||
365 | $this->mIncludeSizes = [ |
||
366 | 'post-expand' => 0, |
||
367 | 'arg' => 0, |
||
368 | ]; |
||
369 | $this->mPPNodeCount = 0; |
||
370 | $this->mGeneratedPPNodeCount = 0; |
||
371 | $this->mHighestExpansionDepth = 0; |
||
372 | $this->mDefaultSort = false; |
||
373 | $this->mHeadings = []; |
||
374 | $this->mDoubleUnderscores = []; |
||
375 | $this->mExpensiveFunctionCount = 0; |
||
376 | |||
377 | # Fix cloning |
||
378 | if ( isset( $this->mPreprocessor ) && $this->mPreprocessor->parser !== $this ) { |
||
379 | $this->mPreprocessor = null; |
||
380 | } |
||
381 | |||
382 | $this->mProfiler = new SectionProfiler(); |
||
383 | |||
384 | Hooks::run( 'ParserClearState', [ &$this ] ); |
||
385 | } |
||
386 | |||
387 | /** |
||
388 | * Convert wikitext to HTML |
||
389 | * Do not call this function recursively. |
||
390 | * |
||
391 | * @param string $text Text we want to parse |
||
392 | * @param Title $title |
||
393 | * @param ParserOptions $options |
||
394 | * @param bool $linestart |
||
395 | * @param bool $clearState |
||
396 | * @param int $revid Number to pass in {{REVISIONID}} |
||
397 | * @return ParserOutput A ParserOutput |
||
398 | */ |
||
399 | public function parse( |
||
400 | $text, Title $title, ParserOptions $options, |
||
401 | $linestart = true, $clearState = true, $revid = null |
||
402 | ) { |
||
403 | /** |
||
404 | * First pass--just handle <nowiki> sections, pass the rest off |
||
405 | * to internalParse() which does all the real work. |
||
406 | */ |
||
407 | |||
408 | global $wgShowHostnames; |
||
409 | |||
410 | if ( $clearState ) { |
||
411 | // We use U+007F DELETE to construct strip markers, so we have to make |
||
412 | // sure that this character does not occur in the input text. |
||
413 | $text = strtr( $text, "\x7f", "?" ); |
||
414 | $magicScopeVariable = $this->lock(); |
||
415 | } |
||
416 | |||
417 | $this->startParse( $title, $options, self::OT_HTML, $clearState ); |
||
418 | |||
419 | $this->currentRevisionCache = null; |
||
420 | $this->mInputSize = strlen( $text ); |
||
421 | if ( $this->mOptions->getEnableLimitReport() ) { |
||
422 | $this->mOutput->resetParseStartTime(); |
||
423 | } |
||
424 | |||
425 | $oldRevisionId = $this->mRevisionId; |
||
426 | $oldRevisionObject = $this->mRevisionObject; |
||
427 | $oldRevisionTimestamp = $this->mRevisionTimestamp; |
||
428 | $oldRevisionUser = $this->mRevisionUser; |
||
429 | $oldRevisionSize = $this->mRevisionSize; |
||
430 | if ( $revid !== null ) { |
||
431 | $this->mRevisionId = $revid; |
||
432 | $this->mRevisionObject = null; |
||
433 | $this->mRevisionTimestamp = null; |
||
434 | $this->mRevisionUser = null; |
||
435 | $this->mRevisionSize = null; |
||
436 | } |
||
437 | |||
438 | Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] ); |
||
439 | # No more strip! |
||
440 | Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] ); |
||
441 | $text = $this->internalParse( $text ); |
||
442 | Hooks::run( 'ParserAfterParse', [ &$this, &$text, &$this->mStripState ] ); |
||
443 | |||
444 | $text = $this->internalParseHalfParsed( $text, true, $linestart ); |
||
445 | |||
446 | /** |
||
447 | * A converted title will be provided in the output object if title and |
||
448 | * content conversion are enabled, the article text does not contain |
||
449 | * a conversion-suppressing double-underscore tag, and no |
||
450 | * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over |
||
451 | * automatic link conversion. |
||
452 | */ |
||
453 | if ( !( $options->getDisableTitleConversion() |
||
454 | || isset( $this->mDoubleUnderscores['nocontentconvert'] ) |
||
455 | || isset( $this->mDoubleUnderscores['notitleconvert'] ) |
||
456 | || $this->mOutput->getDisplayTitle() !== false ) |
||
457 | ) { |
||
458 | $convruletitle = $this->getConverterLanguage()->getConvRuleTitle(); |
||
459 | if ( $convruletitle ) { |
||
460 | $this->mOutput->setTitleText( $convruletitle ); |
||
461 | } else { |
||
462 | $titleText = $this->getConverterLanguage()->convertTitle( $title ); |
||
463 | $this->mOutput->setTitleText( $titleText ); |
||
464 | } |
||
465 | } |
||
466 | |||
467 | # Done parsing! Compute runtime adaptive expiry if set |
||
468 | $this->mOutput->finalizeAdaptiveCacheExpiry(); |
||
469 | |||
470 | # Warn if too many heavyweight parser functions were used |
||
471 | if ( $this->mExpensiveFunctionCount > $this->mOptions->getExpensiveParserFunctionLimit() ) { |
||
472 | $this->limitationWarn( 'expensive-parserfunction', |
||
473 | $this->mExpensiveFunctionCount, |
||
474 | $this->mOptions->getExpensiveParserFunctionLimit() |
||
475 | ); |
||
476 | } |
||
477 | |||
478 | # Information on include size limits, for the benefit of users who try to skirt them |
||
479 | if ( $this->mOptions->getEnableLimitReport() ) { |
||
480 | $max = $this->mOptions->getMaxIncludeSize(); |
||
481 | |||
482 | $cpuTime = $this->mOutput->getTimeSinceStart( 'cpu' ); |
||
483 | if ( $cpuTime !== null ) { |
||
484 | $this->mOutput->setLimitReportData( 'limitreport-cputime', |
||
485 | sprintf( "%.3f", $cpuTime ) |
||
486 | ); |
||
487 | } |
||
488 | |||
489 | $wallTime = $this->mOutput->getTimeSinceStart( 'wall' ); |
||
490 | $this->mOutput->setLimitReportData( 'limitreport-walltime', |
||
491 | sprintf( "%.3f", $wallTime ) |
||
492 | ); |
||
493 | |||
494 | $this->mOutput->setLimitReportData( 'limitreport-ppvisitednodes', |
||
495 | [ $this->mPPNodeCount, $this->mOptions->getMaxPPNodeCount() ] |
||
496 | ); |
||
497 | $this->mOutput->setLimitReportData( 'limitreport-ppgeneratednodes', |
||
498 | [ $this->mGeneratedPPNodeCount, $this->mOptions->getMaxGeneratedPPNodeCount() ] |
||
499 | ); |
||
500 | $this->mOutput->setLimitReportData( 'limitreport-postexpandincludesize', |
||
501 | [ $this->mIncludeSizes['post-expand'], $max ] |
||
502 | ); |
||
503 | $this->mOutput->setLimitReportData( 'limitreport-templateargumentsize', |
||
504 | [ $this->mIncludeSizes['arg'], $max ] |
||
505 | ); |
||
506 | $this->mOutput->setLimitReportData( 'limitreport-expansiondepth', |
||
507 | [ $this->mHighestExpansionDepth, $this->mOptions->getMaxPPExpandDepth() ] |
||
508 | ); |
||
509 | $this->mOutput->setLimitReportData( 'limitreport-expensivefunctioncount', |
||
510 | [ $this->mExpensiveFunctionCount, |
||
511 | $this->mOptions->getExpensiveParserFunctionLimit() ] |
||
512 | ); |
||
513 | Hooks::run( 'ParserLimitReportPrepare', [ $this, $this->mOutput ] ); |
||
514 | |||
515 | $limitReport = ''; |
||
516 | Hooks::run( 'ParserLimitReport', [ $this, &$limitReport ] ); |
||
517 | if ( $limitReport != '' ) { |
||
518 | // Sanitize for comment. Note '‐' in the replacement is U+2010, |
||
519 | // which looks much like the problematic '-'. |
||
520 | $limitReport = str_replace( [ '-', '&' ], [ '‐', '&' ], $limitReport ); |
||
521 | $text .= "\n<!-- \nNewPP limit report\n$limitReport-->\n"; |
||
522 | } |
||
523 | |||
524 | // Add on template profiling data in human/machine readable way |
||
525 | $dataByFunc = $this->mProfiler->getFunctionStats(); |
||
526 | uasort( $dataByFunc, function ( $a, $b ) { |
||
527 | return $a['real'] < $b['real']; // descending order |
||
528 | } ); |
||
529 | $profileReport = []; |
||
530 | foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) { |
||
531 | $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s", |
||
532 | $item['%real'], $item['real'], $item['calls'], $item['name'] ); |
||
533 | } |
||
534 | $this->mOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport ); |
||
535 | |||
536 | // Add other cache related metadata |
||
537 | if ( $wgShowHostnames ) { |
||
538 | $this->mOutput->setLimitReportData( 'cachereport-origin', wfHostname() ); |
||
539 | } |
||
540 | $this->mOutput->setLimitReportData( 'cachereport-timestamp', |
||
541 | $this->mOutput->getCacheTime() ); |
||
542 | $this->mOutput->setLimitReportData( 'cachereport-ttl', |
||
543 | $this->mOutput->getCacheExpiry() ); |
||
544 | $this->mOutput->setLimitReportData( 'cachereport-transientcontent', |
||
545 | $this->mOutput->hasDynamicContent() ); |
||
546 | |||
547 | if ( $this->mGeneratedPPNodeCount |
||
548 | > $this->mOptions->getMaxGeneratedPPNodeCount() / 10 |
||
549 | ) { |
||
550 | wfDebugLog( 'generated-pp-node-count', $this->mGeneratedPPNodeCount . ' ' . |
||
551 | $this->mTitle->getPrefixedDBkey() ); |
||
552 | } |
||
553 | } |
||
554 | $this->mOutput->setText( $text ); |
||
555 | |||
556 | $this->mRevisionId = $oldRevisionId; |
||
557 | $this->mRevisionObject = $oldRevisionObject; |
||
558 | $this->mRevisionTimestamp = $oldRevisionTimestamp; |
||
559 | $this->mRevisionUser = $oldRevisionUser; |
||
560 | $this->mRevisionSize = $oldRevisionSize; |
||
561 | $this->mInputSize = false; |
||
562 | $this->currentRevisionCache = null; |
||
563 | |||
564 | return $this->mOutput; |
||
565 | } |
||
566 | |||
567 | /** |
||
568 | * Half-parse wikitext to half-parsed HTML. This recursive parser entry point |
||
569 | * can be called from an extension tag hook. |
||
570 | * |
||
571 | * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed" |
||
572 | * instead, which means that lists and links have not been fully parsed yet, |
||
573 | * and strip markers are still present. |
||
574 | * |
||
575 | * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML. |
||
576 | * |
||
577 | * Use this function if you're a parser tag hook and you want to parse |
||
578 | * wikitext before or after applying additional transformations, and you |
||
579 | * intend to *return the result as hook output*, which will cause it to go |
||
580 | * through the rest of parsing process automatically. |
||
581 | * |
||
582 | * If $frame is not provided, then template variables (e.g., {{{1}}}) within |
||
583 | * $text are not expanded |
||
584 | * |
||
585 | * @param string $text Text extension wants to have parsed |
||
586 | * @param bool|PPFrame $frame The frame to use for expanding any template variables |
||
587 | * @return string UNSAFE half-parsed HTML |
||
588 | */ |
||
589 | public function recursiveTagParse( $text, $frame = false ) { |
||
590 | Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] ); |
||
591 | Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] ); |
||
592 | $text = $this->internalParse( $text, false, $frame ); |
||
593 | return $text; |
||
594 | } |
||
595 | |||
596 | /** |
||
597 | * Fully parse wikitext to fully parsed HTML. This recursive parser entry |
||
598 | * point can be called from an extension tag hook. |
||
599 | * |
||
600 | * The output of this function is fully-parsed HTML that is safe for output. |
||
601 | * If you're a parser tag hook, you might want to use recursiveTagParse() |
||
602 | * instead. |
||
603 | * |
||
604 | * If $frame is not provided, then template variables (e.g., {{{1}}}) within |
||
605 | * $text are not expanded |
||
606 | * |
||
607 | * @since 1.25 |
||
608 | * |
||
609 | * @param string $text Text extension wants to have parsed |
||
610 | * @param bool|PPFrame $frame The frame to use for expanding any template variables |
||
611 | * @return string Fully parsed HTML |
||
612 | */ |
||
613 | public function recursiveTagParseFully( $text, $frame = false ) { |
||
614 | $text = $this->recursiveTagParse( $text, $frame ); |
||
615 | $text = $this->internalParseHalfParsed( $text, false ); |
||
616 | return $text; |
||
617 | } |
||
618 | |||
619 | /** |
||
620 | * Expand templates and variables in the text, producing valid, static wikitext. |
||
621 | * Also removes comments. |
||
622 | * Do not call this function recursively. |
||
623 | * @param string $text |
||
624 | * @param Title $title |
||
625 | * @param ParserOptions $options |
||
626 | * @param int|null $revid |
||
627 | * @param bool|PPFrame $frame |
||
628 | * @return mixed|string |
||
629 | */ |
||
630 | public function preprocess( $text, Title $title = null, |
||
631 | ParserOptions $options, $revid = null, $frame = false |
||
632 | ) { |
||
633 | $magicScopeVariable = $this->lock(); |
||
634 | $this->startParse( $title, $options, self::OT_PREPROCESS, true ); |
||
635 | if ( $revid !== null ) { |
||
636 | $this->mRevisionId = $revid; |
||
637 | } |
||
638 | Hooks::run( 'ParserBeforeStrip', [ &$this, &$text, &$this->mStripState ] ); |
||
639 | Hooks::run( 'ParserAfterStrip', [ &$this, &$text, &$this->mStripState ] ); |
||
640 | $text = $this->replaceVariables( $text, $frame ); |
||
641 | $text = $this->mStripState->unstripBoth( $text ); |
||
642 | return $text; |
||
643 | } |
||
644 | |||
645 | /** |
||
646 | * Recursive parser entry point that can be called from an extension tag |
||
647 | * hook. |
||
648 | * |
||
649 | * @param string $text Text to be expanded |
||
650 | * @param bool|PPFrame $frame The frame to use for expanding any template variables |
||
651 | * @return string |
||
652 | * @since 1.19 |
||
653 | */ |
||
654 | public function recursivePreprocess( $text, $frame = false ) { |
||
655 | $text = $this->replaceVariables( $text, $frame ); |
||
656 | $text = $this->mStripState->unstripBoth( $text ); |
||
657 | return $text; |
||
658 | } |
||
659 | |||
660 | /** |
||
661 | * Process the wikitext for the "?preload=" feature. (bug 5210) |
||
662 | * |
||
663 | * "<noinclude>", "<includeonly>" etc. are parsed as for template |
||
664 | * transclusion, comments, templates, arguments, tags hooks and parser |
||
665 | * functions are untouched. |
||
666 | * |
||
667 | * @param string $text |
||
668 | * @param Title $title |
||
669 | * @param ParserOptions $options |
||
670 | * @param array $params |
||
671 | * @return string |
||
672 | */ |
||
673 | public function getPreloadText( $text, Title $title, ParserOptions $options, $params = [] ) { |
||
674 | $msg = new RawMessage( $text ); |
||
675 | $text = $msg->params( $params )->plain(); |
||
676 | |||
677 | # Parser (re)initialisation |
||
678 | $magicScopeVariable = $this->lock(); |
||
679 | $this->startParse( $title, $options, self::OT_PLAIN, true ); |
||
680 | |||
681 | $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES; |
||
682 | $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); |
||
683 | $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags ); |
||
684 | $text = $this->mStripState->unstripBoth( $text ); |
||
685 | return $text; |
||
686 | } |
||
687 | |||
688 | /** |
||
689 | * Get a random string |
||
690 | * |
||
691 | * @return string |
||
692 | * @deprecated since 1.26; use wfRandomString() instead. |
||
693 | */ |
||
694 | public static function getRandomString() { |
||
695 | wfDeprecated( __METHOD__, '1.26' ); |
||
696 | return wfRandomString( 16 ); |
||
697 | } |
||
698 | |||
699 | /** |
||
700 | * Set the current user. |
||
701 | * Should only be used when doing pre-save transform. |
||
702 | * |
||
703 | * @param User|null $user User object or null (to reset) |
||
704 | */ |
||
705 | public function setUser( $user ) { |
||
706 | $this->mUser = $user; |
||
707 | } |
||
708 | |||
709 | /** |
||
710 | * Accessor for mUniqPrefix. |
||
711 | * |
||
712 | * @return string |
||
713 | * @deprecated since 1.26; use Parser::MARKER_PREFIX instead. |
||
714 | */ |
||
715 | public function uniqPrefix() { |
||
716 | wfDeprecated( __METHOD__, '1.26' ); |
||
717 | return self::MARKER_PREFIX; |
||
718 | } |
||
719 | |||
720 | /** |
||
721 | * Set the context title |
||
722 | * |
||
723 | * @param Title $t |
||
724 | */ |
||
725 | public function setTitle( $t ) { |
||
726 | if ( !$t ) { |
||
727 | $t = Title::newFromText( 'NO TITLE' ); |
||
728 | } |
||
729 | |||
730 | if ( $t->hasFragment() ) { |
||
731 | # Strip the fragment to avoid various odd effects |
||
732 | $this->mTitle = $t->createFragmentTarget( '' ); |
||
733 | } else { |
||
734 | $this->mTitle = $t; |
||
735 | } |
||
736 | } |
||
737 | |||
738 | /** |
||
739 | * Accessor for the Title object |
||
740 | * |
||
741 | * @return Title |
||
742 | */ |
||
743 | public function getTitle() { |
||
744 | return $this->mTitle; |
||
745 | } |
||
746 | |||
747 | /** |
||
748 | * Accessor/mutator for the Title object |
||
749 | * |
||
750 | * @param Title $x Title object or null to just get the current one |
||
751 | * @return Title |
||
752 | */ |
||
753 | public function Title( $x = null ) { |
||
754 | return wfSetVar( $this->mTitle, $x ); |
||
755 | } |
||
756 | |||
757 | /** |
||
758 | * Set the output type |
||
759 | * |
||
760 | * @param int $ot New value |
||
761 | */ |
||
762 | public function setOutputType( $ot ) { |
||
763 | $this->mOutputType = $ot; |
||
764 | # Shortcut alias |
||
765 | $this->ot = [ |
||
766 | 'html' => $ot == self::OT_HTML, |
||
767 | 'wiki' => $ot == self::OT_WIKI, |
||
768 | 'pre' => $ot == self::OT_PREPROCESS, |
||
769 | 'plain' => $ot == self::OT_PLAIN, |
||
770 | ]; |
||
771 | } |
||
772 | |||
773 | /** |
||
774 | * Accessor/mutator for the output type |
||
775 | * |
||
776 | * @param int|null $x New value or null to just get the current one |
||
777 | * @return int |
||
778 | */ |
||
779 | public function OutputType( $x = null ) { |
||
780 | return wfSetVar( $this->mOutputType, $x ); |
||
781 | } |
||
782 | |||
783 | /** |
||
784 | * Get the ParserOutput object |
||
785 | * |
||
786 | * @return ParserOutput |
||
787 | */ |
||
788 | public function getOutput() { |
||
789 | return $this->mOutput; |
||
790 | } |
||
791 | |||
792 | /** |
||
793 | * Get the ParserOptions object |
||
794 | * |
||
795 | * @return ParserOptions |
||
796 | */ |
||
797 | public function getOptions() { |
||
798 | return $this->mOptions; |
||
799 | } |
||
800 | |||
801 | /** |
||
802 | * Accessor/mutator for the ParserOptions object |
||
803 | * |
||
804 | * @param ParserOptions $x New value or null to just get the current one |
||
805 | * @return ParserOptions Current ParserOptions object |
||
806 | */ |
||
807 | public function Options( $x = null ) { |
||
808 | return wfSetVar( $this->mOptions, $x ); |
||
809 | } |
||
810 | |||
811 | /** |
||
812 | * @return int |
||
813 | */ |
||
814 | public function nextLinkID() { |
||
815 | return $this->mLinkID++; |
||
816 | } |
||
817 | |||
818 | /** |
||
819 | * @param int $id |
||
820 | */ |
||
821 | public function setLinkID( $id ) { |
||
822 | $this->mLinkID = $id; |
||
823 | } |
||
824 | |||
825 | /** |
||
826 | * Get a language object for use in parser functions such as {{FORMATNUM:}} |
||
827 | * @return Language |
||
828 | */ |
||
829 | public function getFunctionLang() { |
||
830 | return $this->getTargetLanguage(); |
||
831 | } |
||
832 | |||
833 | /** |
||
834 | * Get the target language for the content being parsed. This is usually the |
||
835 | * language that the content is in. |
||
836 | * |
||
837 | * @since 1.19 |
||
838 | * |
||
839 | * @throws MWException |
||
840 | * @return Language |
||
841 | */ |
||
842 | public function getTargetLanguage() { |
||
843 | $target = $this->mOptions->getTargetLanguage(); |
||
844 | |||
845 | if ( $target !== null ) { |
||
846 | return $target; |
||
847 | } elseif ( $this->mOptions->getInterfaceMessage() ) { |
||
848 | return $this->mOptions->getUserLangObj(); |
||
849 | } elseif ( is_null( $this->mTitle ) ) { |
||
850 | throw new MWException( __METHOD__ . ': $this->mTitle is null' ); |
||
851 | } |
||
852 | |||
853 | return $this->mTitle->getPageLanguage(); |
||
854 | } |
||
855 | |||
856 | /** |
||
857 | * Get the language object for language conversion |
||
858 | * @return Language|null |
||
859 | */ |
||
860 | public function getConverterLanguage() { |
||
861 | return $this->getTargetLanguage(); |
||
862 | } |
||
863 | |||
864 | /** |
||
865 | * Get a User object either from $this->mUser, if set, or from the |
||
866 | * ParserOptions object otherwise |
||
867 | * |
||
868 | * @return User |
||
869 | */ |
||
870 | public function getUser() { |
||
871 | if ( !is_null( $this->mUser ) ) { |
||
872 | return $this->mUser; |
||
873 | } |
||
874 | return $this->mOptions->getUser(); |
||
875 | } |
||
876 | |||
877 | /** |
||
878 | * Get a preprocessor object |
||
879 | * |
||
880 | * @return Preprocessor |
||
881 | */ |
||
882 | public function getPreprocessor() { |
||
883 | if ( !isset( $this->mPreprocessor ) ) { |
||
884 | $class = $this->mPreprocessorClass; |
||
885 | $this->mPreprocessor = new $class( $this ); |
||
886 | } |
||
887 | return $this->mPreprocessor; |
||
888 | } |
||
889 | |||
890 | /** |
||
891 | * Get a LinkRenderer instance to make links with |
||
892 | * |
||
893 | * @since 1.28 |
||
894 | * @return LinkRenderer |
||
895 | */ |
||
896 | public function getLinkRenderer() { |
||
897 | if ( !$this->mLinkRenderer ) { |
||
898 | $this->mLinkRenderer = MediaWikiServices::getInstance() |
||
899 | ->getLinkRendererFactory()->create(); |
||
900 | $this->mLinkRenderer->setStubThreshold( |
||
901 | $this->getOptions()->getStubThreshold() |
||
902 | ); |
||
903 | } |
||
904 | |||
905 | return $this->mLinkRenderer; |
||
906 | } |
||
907 | |||
908 | /** |
||
909 | * Replaces all occurrences of HTML-style comments and the given tags |
||
910 | * in the text with a random marker and returns the next text. The output |
||
911 | * parameter $matches will be an associative array filled with data in |
||
912 | * the form: |
||
913 | * |
||
914 | * @code |
||
915 | * 'UNIQ-xxxxx' => [ |
||
916 | * 'element', |
||
917 | * 'tag content', |
||
918 | * [ 'param' => 'x' ], |
||
919 | * '<element param="x">tag content</element>' ] |
||
920 | * @endcode |
||
921 | * |
||
922 | * @param array $elements List of element names. Comments are always extracted. |
||
923 | * @param string $text Source text string. |
||
924 | * @param array $matches Out parameter, Array: extracted tags |
||
925 | * @param string|null $uniq_prefix |
||
926 | * @return string Stripped text |
||
927 | * @since 1.26 The uniq_prefix argument is deprecated. |
||
928 | */ |
||
929 | public static function extractTagsAndParams( $elements, $text, &$matches, $uniq_prefix = null ) { |
||
930 | if ( $uniq_prefix !== null ) { |
||
931 | wfDeprecated( __METHOD__ . ' called with $prefix argument', '1.26' ); |
||
932 | } |
||
933 | static $n = 1; |
||
934 | $stripped = ''; |
||
935 | $matches = []; |
||
936 | |||
937 | $taglist = implode( '|', $elements ); |
||
938 | $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?" . ">)|<(!--)/i"; |
||
939 | |||
940 | while ( $text != '' ) { |
||
941 | $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); |
||
942 | $stripped .= $p[0]; |
||
943 | if ( count( $p ) < 5 ) { |
||
944 | break; |
||
945 | } |
||
946 | if ( count( $p ) > 5 ) { |
||
947 | # comment |
||
948 | $element = $p[4]; |
||
949 | $attributes = ''; |
||
950 | $close = ''; |
||
951 | $inside = $p[5]; |
||
952 | } else { |
||
953 | # tag |
||
954 | $element = $p[1]; |
||
955 | $attributes = $p[2]; |
||
956 | $close = $p[3]; |
||
957 | $inside = $p[4]; |
||
958 | } |
||
959 | |||
960 | $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX; |
||
961 | $stripped .= $marker; |
||
962 | |||
963 | if ( $close === '/>' ) { |
||
964 | # Empty element tag, <tag /> |
||
965 | $content = null; |
||
966 | $text = $inside; |
||
967 | $tail = null; |
||
968 | } else { |
||
969 | if ( $element === '!--' ) { |
||
970 | $end = '/(-->)/'; |
||
971 | } else { |
||
972 | $end = "/(<\\/$element\\s*>)/i"; |
||
973 | } |
||
974 | $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); |
||
975 | $content = $q[0]; |
||
976 | if ( count( $q ) < 3 ) { |
||
977 | # No end tag -- let it run out to the end of the text. |
||
978 | $tail = ''; |
||
979 | $text = ''; |
||
980 | } else { |
||
981 | $tail = $q[1]; |
||
982 | $text = $q[2]; |
||
983 | } |
||
984 | } |
||
985 | |||
986 | $matches[$marker] = [ $element, |
||
987 | $content, |
||
988 | Sanitizer::decodeTagAttributes( $attributes ), |
||
989 | "<$element$attributes$close$content$tail" ]; |
||
990 | } |
||
991 | return $stripped; |
||
992 | } |
||
993 | |||
994 | /** |
||
995 | * Get a list of strippable XML-like elements |
||
996 | * |
||
997 | * @return array |
||
998 | */ |
||
999 | public function getStripList() { |
||
1000 | return $this->mStripList; |
||
1001 | } |
||
1002 | |||
1003 | /** |
||
1004 | * Add an item to the strip state |
||
1005 | * Returns the unique tag which must be inserted into the stripped text |
||
1006 | * The tag will be replaced with the original text in unstrip() |
||
1007 | * |
||
1008 | * @param string $text |
||
1009 | * |
||
1010 | * @return string |
||
1011 | */ |
||
1012 | public function insertStripItem( $text ) { |
||
1013 | $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX; |
||
1014 | $this->mMarkerIndex++; |
||
1015 | $this->mStripState->addGeneral( $marker, $text ); |
||
1016 | return $marker; |
||
1017 | } |
||
1018 | |||
1019 | /** |
||
1020 | * parse the wiki syntax used to render tables |
||
1021 | * |
||
1022 | * @private |
||
1023 | * @param string $text |
||
1024 | * @return string |
||
1025 | */ |
||
1026 | public function doTableStuff( $text ) { |
||
1027 | |||
1028 | $lines = StringUtils::explode( "\n", $text ); |
||
1029 | $out = ''; |
||
1030 | $td_history = []; # Is currently a td tag open? |
||
1031 | $last_tag_history = []; # Save history of last lag activated (td, th or caption) |
||
1032 | $tr_history = []; # Is currently a tr tag open? |
||
1033 | $tr_attributes = []; # history of tr attributes |
||
1034 | $has_opened_tr = []; # Did this table open a <tr> element? |
||
1035 | $indent_level = 0; # indent level of the table |
||
1036 | |||
1037 | foreach ( $lines as $outLine ) { |
||
1038 | $line = trim( $outLine ); |
||
1039 | |||
1040 | if ( $line === '' ) { # empty line, go to next line |
||
1041 | $out .= $outLine . "\n"; |
||
1042 | continue; |
||
1043 | } |
||
1044 | |||
1045 | $first_character = $line[0]; |
||
1046 | $first_two = substr( $line, 0, 2 ); |
||
1047 | $matches = []; |
||
1048 | |||
1049 | if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) { |
||
1050 | # First check if we are starting a new table |
||
1051 | $indent_level = strlen( $matches[1] ); |
||
1052 | |||
1053 | $attributes = $this->mStripState->unstripBoth( $matches[2] ); |
||
1054 | $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' ); |
||
1055 | |||
1056 | $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>"; |
||
1057 | array_push( $td_history, false ); |
||
1058 | array_push( $last_tag_history, '' ); |
||
1059 | array_push( $tr_history, false ); |
||
1060 | array_push( $tr_attributes, '' ); |
||
1061 | array_push( $has_opened_tr, false ); |
||
1062 | } elseif ( count( $td_history ) == 0 ) { |
||
1063 | # Don't do any of the following |
||
1064 | $out .= $outLine . "\n"; |
||
1065 | continue; |
||
1066 | } elseif ( $first_two === '|}' ) { |
||
1067 | # We are ending a table |
||
1068 | $line = '</table>' . substr( $line, 2 ); |
||
1069 | $last_tag = array_pop( $last_tag_history ); |
||
1070 | |||
1071 | if ( !array_pop( $has_opened_tr ) ) { |
||
1072 | $line = "<tr><td></td></tr>{$line}"; |
||
1073 | } |
||
1074 | |||
1075 | if ( array_pop( $tr_history ) ) { |
||
1076 | $line = "</tr>{$line}"; |
||
1077 | } |
||
1078 | |||
1079 | if ( array_pop( $td_history ) ) { |
||
1080 | $line = "</{$last_tag}>{$line}"; |
||
1081 | } |
||
1082 | array_pop( $tr_attributes ); |
||
1083 | $outLine = $line . str_repeat( '</dd></dl>', $indent_level ); |
||
1084 | } elseif ( $first_two === '|-' ) { |
||
1085 | # Now we have a table row |
||
1086 | $line = preg_replace( '#^\|-+#', '', $line ); |
||
1087 | |||
1088 | # Whats after the tag is now only attributes |
||
1089 | $attributes = $this->mStripState->unstripBoth( $line ); |
||
1090 | $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' ); |
||
1091 | array_pop( $tr_attributes ); |
||
1092 | array_push( $tr_attributes, $attributes ); |
||
1093 | |||
1094 | $line = ''; |
||
1095 | $last_tag = array_pop( $last_tag_history ); |
||
1096 | array_pop( $has_opened_tr ); |
||
1097 | array_push( $has_opened_tr, true ); |
||
1098 | |||
1099 | if ( array_pop( $tr_history ) ) { |
||
1100 | $line = '</tr>'; |
||
1101 | } |
||
1102 | |||
1103 | if ( array_pop( $td_history ) ) { |
||
1104 | $line = "</{$last_tag}>{$line}"; |
||
1105 | } |
||
1106 | |||
1107 | $outLine = $line; |
||
1108 | array_push( $tr_history, false ); |
||
1109 | array_push( $td_history, false ); |
||
1110 | array_push( $last_tag_history, '' ); |
||
1111 | } elseif ( $first_character === '|' |
||
1112 | || $first_character === '!' |
||
1113 | || $first_two === '|+' |
||
1114 | ) { |
||
1115 | # This might be cell elements, td, th or captions |
||
1116 | if ( $first_two === '|+' ) { |
||
1117 | $first_character = '+'; |
||
1118 | $line = substr( $line, 2 ); |
||
1119 | } else { |
||
1120 | $line = substr( $line, 1 ); |
||
1121 | } |
||
1122 | |||
1123 | // Implies both are valid for table headings. |
||
1124 | if ( $first_character === '!' ) { |
||
1125 | $line = StringUtils::replaceMarkup( '!!', '||', $line ); |
||
1126 | } |
||
1127 | |||
1128 | # Split up multiple cells on the same line. |
||
1129 | # FIXME : This can result in improper nesting of tags processed |
||
1130 | # by earlier parser steps. |
||
1131 | $cells = explode( '||', $line ); |
||
1132 | |||
1133 | $outLine = ''; |
||
1134 | |||
1135 | # Loop through each table cell |
||
1136 | foreach ( $cells as $cell ) { |
||
1137 | $previous = ''; |
||
1138 | if ( $first_character !== '+' ) { |
||
1139 | $tr_after = array_pop( $tr_attributes ); |
||
1140 | if ( !array_pop( $tr_history ) ) { |
||
1141 | $previous = "<tr{$tr_after}>\n"; |
||
1142 | } |
||
1143 | array_push( $tr_history, true ); |
||
1144 | array_push( $tr_attributes, '' ); |
||
1145 | array_pop( $has_opened_tr ); |
||
1146 | array_push( $has_opened_tr, true ); |
||
1147 | } |
||
1148 | |||
1149 | $last_tag = array_pop( $last_tag_history ); |
||
1150 | |||
1151 | if ( array_pop( $td_history ) ) { |
||
1152 | $previous = "</{$last_tag}>\n{$previous}"; |
||
1153 | } |
||
1154 | |||
1155 | if ( $first_character === '|' ) { |
||
1156 | $last_tag = 'td'; |
||
1157 | } elseif ( $first_character === '!' ) { |
||
1158 | $last_tag = 'th'; |
||
1159 | } elseif ( $first_character === '+' ) { |
||
1160 | $last_tag = 'caption'; |
||
1161 | } else { |
||
1162 | $last_tag = ''; |
||
1163 | } |
||
1164 | |||
1165 | array_push( $last_tag_history, $last_tag ); |
||
1166 | |||
1167 | # A cell could contain both parameters and data |
||
1168 | $cell_data = explode( '|', $cell, 2 ); |
||
1169 | |||
1170 | # Bug 553: Note that a '|' inside an invalid link should not |
||
1171 | # be mistaken as delimiting cell parameters |
||
1172 | if ( strpos( $cell_data[0], '[[' ) !== false ) { |
||
1173 | $cell = "{$previous}<{$last_tag}>{$cell}"; |
||
1174 | } elseif ( count( $cell_data ) == 1 ) { |
||
1175 | $cell = "{$previous}<{$last_tag}>{$cell_data[0]}"; |
||
1176 | } else { |
||
1177 | $attributes = $this->mStripState->unstripBoth( $cell_data[0] ); |
||
1178 | $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag ); |
||
1179 | $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}"; |
||
1180 | } |
||
1181 | |||
1182 | $outLine .= $cell; |
||
1183 | array_push( $td_history, true ); |
||
1184 | } |
||
1185 | } |
||
1186 | $out .= $outLine . "\n"; |
||
1187 | } |
||
1188 | |||
1189 | # Closing open td, tr && table |
||
1190 | while ( count( $td_history ) > 0 ) { |
||
1191 | if ( array_pop( $td_history ) ) { |
||
1192 | $out .= "</td>\n"; |
||
1193 | } |
||
1194 | if ( array_pop( $tr_history ) ) { |
||
1195 | $out .= "</tr>\n"; |
||
1196 | } |
||
1197 | if ( !array_pop( $has_opened_tr ) ) { |
||
1198 | $out .= "<tr><td></td></tr>\n"; |
||
1199 | } |
||
1200 | |||
1201 | $out .= "</table>\n"; |
||
1202 | } |
||
1203 | |||
1204 | # Remove trailing line-ending (b/c) |
||
1205 | View Code Duplication | if ( substr( $out, -1 ) === "\n" ) { |
|
1206 | $out = substr( $out, 0, -1 ); |
||
1207 | } |
||
1208 | |||
1209 | # special case: don't return empty table |
||
1210 | if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) { |
||
1211 | $out = ''; |
||
1212 | } |
||
1213 | |||
1214 | return $out; |
||
1215 | } |
||
1216 | |||
1217 | /** |
||
1218 | * Helper function for parse() that transforms wiki markup into half-parsed |
||
1219 | * HTML. Only called for $mOutputType == self::OT_HTML. |
||
1220 | * |
||
1221 | * @private |
||
1222 | * |
||
1223 | * @param string $text The text to parse |
||
1224 | * @param bool $isMain Whether this is being called from the main parse() function |
||
1225 | * @param PPFrame|bool $frame A pre-processor frame |
||
1226 | * |
||
1227 | * @return string |
||
1228 | */ |
||
1229 | public function internalParse( $text, $isMain = true, $frame = false ) { |
||
1230 | |||
1231 | $origText = $text; |
||
1232 | |||
1233 | # Hook to suspend the parser in this state |
||
1234 | if ( !Hooks::run( 'ParserBeforeInternalParse', [ &$this, &$text, &$this->mStripState ] ) ) { |
||
1235 | return $text; |
||
1236 | } |
||
1237 | |||
1238 | # if $frame is provided, then use $frame for replacing any variables |
||
1239 | if ( $frame ) { |
||
1240 | # use frame depth to infer how include/noinclude tags should be handled |
||
1241 | # depth=0 means this is the top-level document; otherwise it's an included document |
||
1242 | if ( !$frame->depth ) { |
||
1243 | $flag = 0; |
||
1244 | } else { |
||
1245 | $flag = Parser::PTD_FOR_INCLUSION; |
||
1246 | } |
||
1247 | $dom = $this->preprocessToDom( $text, $flag ); |
||
1248 | $text = $frame->expand( $dom ); |
||
1249 | } else { |
||
1250 | # if $frame is not provided, then use old-style replaceVariables |
||
1251 | $text = $this->replaceVariables( $text ); |
||
1252 | } |
||
1253 | |||
1254 | Hooks::run( 'InternalParseBeforeSanitize', [ &$this, &$text, &$this->mStripState ] ); |
||
1255 | $text = Sanitizer::removeHTMLtags( |
||
1256 | $text, |
||
1257 | [ &$this, 'attributeStripCallback' ], |
||
1258 | false, |
||
1259 | array_keys( $this->mTransparentTagHooks ), |
||
1260 | [], |
||
1261 | [ &$this, 'addTrackingCategory' ] |
||
1262 | ); |
||
1263 | Hooks::run( 'InternalParseBeforeLinks', [ &$this, &$text, &$this->mStripState ] ); |
||
1264 | |||
1265 | # Tables need to come after variable replacement for things to work |
||
1266 | # properly; putting them before other transformations should keep |
||
1267 | # exciting things like link expansions from showing up in surprising |
||
1268 | # places. |
||
1269 | $text = $this->doTableStuff( $text ); |
||
1270 | |||
1271 | $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text ); |
||
1272 | |||
1273 | $text = $this->doDoubleUnderscore( $text ); |
||
1274 | |||
1275 | $text = $this->doHeadings( $text ); |
||
1276 | $text = $this->replaceInternalLinks( $text ); |
||
1277 | $text = $this->doAllQuotes( $text ); |
||
1278 | $text = $this->replaceExternalLinks( $text ); |
||
1279 | |||
1280 | # replaceInternalLinks may sometimes leave behind |
||
1281 | # absolute URLs, which have to be masked to hide them from replaceExternalLinks |
||
1282 | $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text ); |
||
1283 | |||
1284 | $text = $this->doMagicLinks( $text ); |
||
1285 | $text = $this->formatHeadings( $text, $origText, $isMain ); |
||
1286 | |||
1287 | return $text; |
||
1288 | } |
||
1289 | |||
1290 | /** |
||
1291 | * Helper function for parse() that transforms half-parsed HTML into fully |
||
1292 | * parsed HTML. |
||
1293 | * |
||
1294 | * @param string $text |
||
1295 | * @param bool $isMain |
||
1296 | * @param bool $linestart |
||
1297 | * @return string |
||
1298 | */ |
||
1299 | private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) { |
||
1300 | $text = $this->mStripState->unstripGeneral( $text ); |
||
1301 | |||
1302 | if ( $isMain ) { |
||
1303 | Hooks::run( 'ParserAfterUnstrip', [ &$this, &$text ] ); |
||
1304 | } |
||
1305 | |||
1306 | # Clean up special characters, only run once, next-to-last before doBlockLevels |
||
1307 | $fixtags = [ |
||
1308 | # french spaces, last one Guillemet-left |
||
1309 | # only if there is something before the space |
||
1310 | '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ', |
||
1311 | # french spaces, Guillemet-right |
||
1312 | '/(\\302\\253) /' => '\\1 ', |
||
1313 | '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874. |
||
1314 | ]; |
||
1315 | $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text ); |
||
1316 | |||
1317 | $text = $this->doBlockLevels( $text, $linestart ); |
||
1318 | |||
1319 | $this->replaceLinkHolders( $text ); |
||
1320 | |||
1321 | /** |
||
1322 | * The input doesn't get language converted if |
||
1323 | * a) It's disabled |
||
1324 | * b) Content isn't converted |
||
1325 | * c) It's a conversion table |
||
1326 | * d) it is an interface message (which is in the user language) |
||
1327 | */ |
||
1328 | if ( !( $this->mOptions->getDisableContentConversion() |
||
1329 | || isset( $this->mDoubleUnderscores['nocontentconvert'] ) ) |
||
1330 | ) { |
||
1331 | if ( !$this->mOptions->getInterfaceMessage() ) { |
||
1332 | # The position of the convert() call should not be changed. it |
||
1333 | # assumes that the links are all replaced and the only thing left |
||
1334 | # is the <nowiki> mark. |
||
1335 | $text = $this->getConverterLanguage()->convert( $text ); |
||
1336 | } |
||
1337 | } |
||
1338 | |||
1339 | $text = $this->mStripState->unstripNoWiki( $text ); |
||
1340 | |||
1341 | if ( $isMain ) { |
||
1342 | Hooks::run( 'ParserBeforeTidy', [ &$this, &$text ] ); |
||
1343 | } |
||
1344 | |||
1345 | $text = $this->replaceTransparentTags( $text ); |
||
1346 | $text = $this->mStripState->unstripGeneral( $text ); |
||
1347 | |||
1348 | $text = Sanitizer::normalizeCharReferences( $text ); |
||
1349 | |||
1350 | if ( MWTidy::isEnabled() ) { |
||
1351 | if ( $this->mOptions->getTidy() ) { |
||
1352 | $text = MWTidy::tidy( $text ); |
||
1353 | } |
||
1354 | } else { |
||
1355 | # attempt to sanitize at least some nesting problems |
||
1356 | # (bug #2702 and quite a few others) |
||
1357 | $tidyregs = [ |
||
1358 | # ''Something [http://www.cool.com cool''] --> |
||
1359 | # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a> |
||
1360 | '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' => |
||
1361 | '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9', |
||
1362 | # fix up an anchor inside another anchor, only |
||
1363 | # at least for a single single nested link (bug 3695) |
||
1364 | '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' => |
||
1365 | '\\1\\2</a>\\3</a>\\1\\4</a>', |
||
1366 | # fix div inside inline elements- doBlockLevels won't wrap a line which |
||
1367 | # contains a div, so fix it up here; replace |
||
1368 | # div with escaped text |
||
1369 | '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' => |
||
1370 | '\\1\\3<div\\5>\\6</div>\\8\\9', |
||
1371 | # remove empty italic or bold tag pairs, some |
||
1372 | # introduced by rules above |
||
1373 | '/<([bi])><\/\\1>/' => '', |
||
1374 | ]; |
||
1375 | |||
1376 | $text = preg_replace( |
||
1377 | array_keys( $tidyregs ), |
||
1378 | array_values( $tidyregs ), |
||
1379 | $text ); |
||
1380 | } |
||
1381 | |||
1382 | if ( $isMain ) { |
||
1383 | Hooks::run( 'ParserAfterTidy', [ &$this, &$text ] ); |
||
1384 | } |
||
1385 | |||
1386 | return $text; |
||
1387 | } |
||
1388 | |||
1389 | /** |
||
1390 | * Replace special strings like "ISBN xxx" and "RFC xxx" with |
||
1391 | * magic external links. |
||
1392 | * |
||
1393 | * DML |
||
1394 | * @private |
||
1395 | * |
||
1396 | * @param string $text |
||
1397 | * |
||
1398 | * @return string |
||
1399 | */ |
||
1400 | public function doMagicLinks( $text ) { |
||
1401 | $prots = wfUrlProtocolsWithoutProtRel(); |
||
1402 | $urlChar = self::EXT_LINK_URL_CLASS; |
||
1403 | $addr = self::EXT_LINK_ADDR; |
||
1404 | $space = self::SPACE_NOT_NL; # non-newline space |
||
1405 | $spdash = "(?:-|$space)"; # a dash or a non-newline space |
||
1406 | $spaces = "$space++"; # possessive match of 1 or more spaces |
||
1407 | $text = preg_replace_callback( |
||
1408 | '!(?: # Start cases |
||
1409 | (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text |
||
1410 | (<.*?>) | # m[2]: Skip stuff inside |
||
1411 | # HTML elements' . " |
||
1412 | (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links |
||
1413 | # m[4]: Post-protocol path |
||
1414 | \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number |
||
1415 | ([0-9]+)\b | |
||
1416 | \bISBN $spaces ( # m[6]: ISBN, capture number |
||
1417 | (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix |
||
1418 | (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters |
||
1419 | [0-9Xx] # check digit |
||
1420 | )\b |
||
1421 | )!xu", [ &$this, 'magicLinkCallback' ], $text ); |
||
1422 | return $text; |
||
1423 | } |
||
1424 | |||
1425 | /** |
||
1426 | * @throws MWException |
||
1427 | * @param array $m |
||
1428 | * @return HTML|string |
||
1429 | */ |
||
1430 | public function magicLinkCallback( $m ) { |
||
1431 | if ( isset( $m[1] ) && $m[1] !== '' ) { |
||
1432 | # Skip anchor |
||
1433 | return $m[0]; |
||
1434 | } elseif ( isset( $m[2] ) && $m[2] !== '' ) { |
||
1435 | # Skip HTML element |
||
1436 | return $m[0]; |
||
1437 | } elseif ( isset( $m[3] ) && $m[3] !== '' ) { |
||
1438 | # Free external link |
||
1439 | return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) ); |
||
1440 | } elseif ( isset( $m[5] ) && $m[5] !== '' ) { |
||
1441 | # RFC or PMID |
||
1442 | if ( substr( $m[0], 0, 3 ) === 'RFC' ) { |
||
1443 | if ( !$this->mOptions->getMagicRFCLinks() ) { |
||
1444 | return $m[0]; |
||
1445 | } |
||
1446 | $keyword = 'RFC'; |
||
1447 | $urlmsg = 'rfcurl'; |
||
1448 | $cssClass = 'mw-magiclink-rfc'; |
||
1449 | $id = $m[5]; |
||
1450 | } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) { |
||
1451 | if ( !$this->mOptions->getMagicPMIDLinks() ) { |
||
1452 | return $m[0]; |
||
1453 | } |
||
1454 | $keyword = 'PMID'; |
||
1455 | $urlmsg = 'pubmedurl'; |
||
1456 | $cssClass = 'mw-magiclink-pmid'; |
||
1457 | $id = $m[5]; |
||
1458 | } else { |
||
1459 | throw new MWException( __METHOD__ . ': unrecognised match type "' . |
||
1460 | substr( $m[0], 0, 20 ) . '"' ); |
||
1461 | } |
||
1462 | $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text(); |
||
1463 | return Linker::makeExternalLink( $url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle ); |
||
1464 | } elseif ( isset( $m[6] ) && $m[6] !== '' |
||
1465 | && $this->mOptions->getMagicISBNLinks() |
||
1466 | ) { |
||
1467 | # ISBN |
||
1468 | $isbn = $m[6]; |
||
1469 | $space = self::SPACE_NOT_NL; # non-newline space |
||
1470 | $isbn = preg_replace( "/$space/", ' ', $isbn ); |
||
1471 | $num = strtr( $isbn, [ |
||
1472 | '-' => '', |
||
1473 | ' ' => '', |
||
1474 | 'x' => 'X', |
||
1475 | ] ); |
||
1476 | return $this->getLinkRenderer()->makeKnownLink( |
||
1477 | SpecialPage::getTitleFor( 'Booksources', $num ), |
||
1478 | "ISBN $isbn", |
||
1479 | [ |
||
1480 | 'class' => 'internal mw-magiclink-isbn', |
||
1481 | 'title' => false // suppress title attribute |
||
1482 | ] |
||
1483 | ); |
||
1484 | } else { |
||
1485 | return $m[0]; |
||
1486 | } |
||
1487 | } |
||
1488 | |||
1489 | /** |
||
1490 | * Make a free external link, given a user-supplied URL |
||
1491 | * |
||
1492 | * @param string $url |
||
1493 | * @param int $numPostProto |
||
1494 | * The number of characters after the protocol. |
||
1495 | * @return string HTML |
||
1496 | * @private |
||
1497 | */ |
||
1498 | public function makeFreeExternalLink( $url, $numPostProto ) { |
||
1499 | $trail = ''; |
||
1500 | |||
1501 | # The characters '<' and '>' (which were escaped by |
||
1502 | # removeHTMLtags()) should not be included in |
||
1503 | # URLs, per RFC 2396. |
||
1504 | # Make terminate a URL as well (bug T84937) |
||
1505 | $m2 = []; |
||
1506 | View Code Duplication | if ( preg_match( |
|
1507 | '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/', |
||
1508 | $url, |
||
1509 | $m2, |
||
1510 | PREG_OFFSET_CAPTURE |
||
1511 | ) ) { |
||
1512 | $trail = substr( $url, $m2[0][1] ) . $trail; |
||
1513 | $url = substr( $url, 0, $m2[0][1] ); |
||
1514 | } |
||
1515 | |||
1516 | # Move trailing punctuation to $trail |
||
1517 | $sep = ',;\.:!?'; |
||
1518 | # If there is no left bracket, then consider right brackets fair game too |
||
1519 | if ( strpos( $url, '(' ) === false ) { |
||
1520 | $sep .= ')'; |
||
1521 | } |
||
1522 | |||
1523 | $urlRev = strrev( $url ); |
||
1524 | $numSepChars = strspn( $urlRev, $sep ); |
||
1525 | # Don't break a trailing HTML entity by moving the ; into $trail |
||
1526 | # This is in hot code, so use substr_compare to avoid having to |
||
1527 | # create a new string object for the comparison |
||
1528 | if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) { |
||
1529 | # more optimization: instead of running preg_match with a $ |
||
1530 | # anchor, which can be slow, do the match on the reversed |
||
1531 | # string starting at the desired offset. |
||
1532 | # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i |
||
1533 | if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) { |
||
1534 | $numSepChars--; |
||
1535 | } |
||
1536 | } |
||
1537 | if ( $numSepChars ) { |
||
1538 | $trail = substr( $url, -$numSepChars ) . $trail; |
||
1539 | $url = substr( $url, 0, -$numSepChars ); |
||
1540 | } |
||
1541 | |||
1542 | # Verify that we still have a real URL after trail removal, and |
||
1543 | # not just lone protocol |
||
1544 | if ( strlen( $trail ) >= $numPostProto ) { |
||
1545 | return $url . $trail; |
||
1546 | } |
||
1547 | |||
1548 | $url = Sanitizer::cleanUrl( $url ); |
||
1549 | |||
1550 | # Is this an external image? |
||
1551 | $text = $this->maybeMakeExternalImage( $url ); |
||
1552 | if ( $text === false ) { |
||
1553 | # Not an image, make a link |
||
1554 | $text = Linker::makeExternalLink( $url, |
||
1555 | $this->getConverterLanguage()->markNoConversion( $url, true ), |
||
1556 | true, 'free', |
||
1557 | $this->getExternalLinkAttribs( $url ), $this->mTitle ); |
||
1558 | # Register it in the output object... |
||
1559 | # Replace unnecessary URL escape codes with their equivalent characters |
||
1560 | $pasteurized = self::normalizeLinkUrl( $url ); |
||
1561 | $this->mOutput->addExternalLink( $pasteurized ); |
||
1562 | } |
||
1563 | return $text . $trail; |
||
1564 | } |
||
1565 | |||
1566 | /** |
||
1567 | * Parse headers and return html |
||
1568 | * |
||
1569 | * @private |
||
1570 | * |
||
1571 | * @param string $text |
||
1572 | * |
||
1573 | * @return string |
||
1574 | */ |
||
1575 | public function doHeadings( $text ) { |
||
1576 | for ( $i = 6; $i >= 1; --$i ) { |
||
1577 | $h = str_repeat( '=', $i ); |
||
1578 | $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text ); |
||
1579 | } |
||
1580 | return $text; |
||
1581 | } |
||
1582 | |||
1583 | /** |
||
1584 | * Replace single quotes with HTML markup |
||
1585 | * @private |
||
1586 | * |
||
1587 | * @param string $text |
||
1588 | * |
||
1589 | * @return string The altered text |
||
1590 | */ |
||
1591 | public function doAllQuotes( $text ) { |
||
1592 | $outtext = ''; |
||
1593 | $lines = StringUtils::explode( "\n", $text ); |
||
1594 | foreach ( $lines as $line ) { |
||
1595 | $outtext .= $this->doQuotes( $line ) . "\n"; |
||
1596 | } |
||
1597 | $outtext = substr( $outtext, 0, -1 ); |
||
1598 | return $outtext; |
||
1599 | } |
||
1600 | |||
1601 | /** |
||
1602 | * Helper function for doAllQuotes() |
||
1603 | * |
||
1604 | * @param string $text |
||
1605 | * |
||
1606 | * @return string |
||
1607 | */ |
||
1608 | public function doQuotes( $text ) { |
||
1609 | $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE ); |
||
1610 | $countarr = count( $arr ); |
||
1611 | if ( $countarr == 1 ) { |
||
1612 | return $text; |
||
1613 | } |
||
1614 | |||
1615 | // First, do some preliminary work. This may shift some apostrophes from |
||
1616 | // being mark-up to being text. It also counts the number of occurrences |
||
1617 | // of bold and italics mark-ups. |
||
1618 | $numbold = 0; |
||
1619 | $numitalics = 0; |
||
1620 | for ( $i = 1; $i < $countarr; $i += 2 ) { |
||
1621 | $thislen = strlen( $arr[$i] ); |
||
1622 | // If there are ever four apostrophes, assume the first is supposed to |
||
1623 | // be text, and the remaining three constitute mark-up for bold text. |
||
1624 | // (bug 13227: ''''foo'''' turns into ' ''' foo ' ''') |
||
1625 | if ( $thislen == 4 ) { |
||
1626 | $arr[$i - 1] .= "'"; |
||
1627 | $arr[$i] = "'''"; |
||
1628 | $thislen = 3; |
||
1629 | } elseif ( $thislen > 5 ) { |
||
1630 | // If there are more than 5 apostrophes in a row, assume they're all |
||
1631 | // text except for the last 5. |
||
1632 | // (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''') |
||
1633 | $arr[$i - 1] .= str_repeat( "'", $thislen - 5 ); |
||
1634 | $arr[$i] = "'''''"; |
||
1635 | $thislen = 5; |
||
1636 | } |
||
1637 | // Count the number of occurrences of bold and italics mark-ups. |
||
1638 | if ( $thislen == 2 ) { |
||
1639 | $numitalics++; |
||
1640 | } elseif ( $thislen == 3 ) { |
||
1641 | $numbold++; |
||
1642 | } elseif ( $thislen == 5 ) { |
||
1643 | $numitalics++; |
||
1644 | $numbold++; |
||
1645 | } |
||
1646 | } |
||
1647 | |||
1648 | // If there is an odd number of both bold and italics, it is likely |
||
1649 | // that one of the bold ones was meant to be an apostrophe followed |
||
1650 | // by italics. Which one we cannot know for certain, but it is more |
||
1651 | // likely to be one that has a single-letter word before it. |
||
1652 | if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) { |
||
1653 | $firstsingleletterword = -1; |
||
1654 | $firstmultiletterword = -1; |
||
1655 | $firstspace = -1; |
||
1656 | for ( $i = 1; $i < $countarr; $i += 2 ) { |
||
1657 | if ( strlen( $arr[$i] ) == 3 ) { |
||
1658 | $x1 = substr( $arr[$i - 1], -1 ); |
||
1659 | $x2 = substr( $arr[$i - 1], -2, 1 ); |
||
1660 | if ( $x1 === ' ' ) { |
||
1661 | if ( $firstspace == -1 ) { |
||
1662 | $firstspace = $i; |
||
1663 | } |
||
1664 | } elseif ( $x2 === ' ' ) { |
||
1665 | $firstsingleletterword = $i; |
||
1666 | // if $firstsingleletterword is set, we don't |
||
1667 | // look at the other options, so we can bail early. |
||
1668 | break; |
||
1669 | } else { |
||
1670 | if ( $firstmultiletterword == -1 ) { |
||
1671 | $firstmultiletterword = $i; |
||
1672 | } |
||
1673 | } |
||
1674 | } |
||
1675 | } |
||
1676 | |||
1677 | // If there is a single-letter word, use it! |
||
1678 | if ( $firstsingleletterword > -1 ) { |
||
1679 | $arr[$firstsingleletterword] = "''"; |
||
1680 | $arr[$firstsingleletterword - 1] .= "'"; |
||
1681 | } elseif ( $firstmultiletterword > -1 ) { |
||
1682 | // If not, but there's a multi-letter word, use that one. |
||
1683 | $arr[$firstmultiletterword] = "''"; |
||
1684 | $arr[$firstmultiletterword - 1] .= "'"; |
||
1685 | } elseif ( $firstspace > -1 ) { |
||
1686 | // ... otherwise use the first one that has neither. |
||
1687 | // (notice that it is possible for all three to be -1 if, for example, |
||
1688 | // there is only one pentuple-apostrophe in the line) |
||
1689 | $arr[$firstspace] = "''"; |
||
1690 | $arr[$firstspace - 1] .= "'"; |
||
1691 | } |
||
1692 | } |
||
1693 | |||
1694 | // Now let's actually convert our apostrophic mush to HTML! |
||
1695 | $output = ''; |
||
1696 | $buffer = ''; |
||
1697 | $state = ''; |
||
1698 | $i = 0; |
||
1699 | foreach ( $arr as $r ) { |
||
1700 | if ( ( $i % 2 ) == 0 ) { |
||
1701 | if ( $state === 'both' ) { |
||
1702 | $buffer .= $r; |
||
1703 | } else { |
||
1704 | $output .= $r; |
||
1705 | } |
||
1706 | } else { |
||
1707 | $thislen = strlen( $r ); |
||
1708 | if ( $thislen == 2 ) { |
||
1709 | View Code Duplication | if ( $state === 'i' ) { |
|
1710 | $output .= '</i>'; |
||
1711 | $state = ''; |
||
1712 | } elseif ( $state === 'bi' ) { |
||
1713 | $output .= '</i>'; |
||
1714 | $state = 'b'; |
||
1715 | } elseif ( $state === 'ib' ) { |
||
1716 | $output .= '</b></i><b>'; |
||
1717 | $state = 'b'; |
||
1718 | } elseif ( $state === 'both' ) { |
||
1719 | $output .= '<b><i>' . $buffer . '</i>'; |
||
1720 | $state = 'b'; |
||
1721 | } else { // $state can be 'b' or '' |
||
1722 | $output .= '<i>'; |
||
1723 | $state .= 'i'; |
||
1724 | } |
||
1725 | View Code Duplication | } elseif ( $thislen == 3 ) { |
|
1726 | if ( $state === 'b' ) { |
||
1727 | $output .= '</b>'; |
||
1728 | $state = ''; |
||
1729 | } elseif ( $state === 'bi' ) { |
||
1730 | $output .= '</i></b><i>'; |
||
1731 | $state = 'i'; |
||
1732 | } elseif ( $state === 'ib' ) { |
||
1733 | $output .= '</b>'; |
||
1734 | $state = 'i'; |
||
1735 | } elseif ( $state === 'both' ) { |
||
1736 | $output .= '<i><b>' . $buffer . '</b>'; |
||
1737 | $state = 'i'; |
||
1738 | } else { // $state can be 'i' or '' |
||
1739 | $output .= '<b>'; |
||
1740 | $state .= 'b'; |
||
1741 | } |
||
1742 | } elseif ( $thislen == 5 ) { |
||
1743 | if ( $state === 'b' ) { |
||
1744 | $output .= '</b><i>'; |
||
1745 | $state = 'i'; |
||
1746 | } elseif ( $state === 'i' ) { |
||
1747 | $output .= '</i><b>'; |
||
1748 | $state = 'b'; |
||
1749 | } elseif ( $state === 'bi' ) { |
||
1750 | $output .= '</i></b>'; |
||
1751 | $state = ''; |
||
1752 | } elseif ( $state === 'ib' ) { |
||
1753 | $output .= '</b></i>'; |
||
1754 | $state = ''; |
||
1755 | } elseif ( $state === 'both' ) { |
||
1756 | $output .= '<i><b>' . $buffer . '</b></i>'; |
||
1757 | $state = ''; |
||
1758 | } else { // ($state == '') |
||
1759 | $buffer = ''; |
||
1760 | $state = 'both'; |
||
1761 | } |
||
1762 | } |
||
1763 | } |
||
1764 | $i++; |
||
1765 | } |
||
1766 | // Now close all remaining tags. Notice that the order is important. |
||
1767 | if ( $state === 'b' || $state === 'ib' ) { |
||
1768 | $output .= '</b>'; |
||
1769 | } |
||
1770 | if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) { |
||
1771 | $output .= '</i>'; |
||
1772 | } |
||
1773 | if ( $state === 'bi' ) { |
||
1774 | $output .= '</b>'; |
||
1775 | } |
||
1776 | // There might be lonely ''''', so make sure we have a buffer |
||
1777 | if ( $state === 'both' && $buffer ) { |
||
1778 | $output .= '<b><i>' . $buffer . '</i></b>'; |
||
1779 | } |
||
1780 | return $output; |
||
1781 | } |
||
1782 | |||
1783 | /** |
||
1784 | * Replace external links (REL) |
||
1785 | * |
||
1786 | * Note: this is all very hackish and the order of execution matters a lot. |
||
1787 | * Make sure to run tests/parser/parserTests.php if you change this code. |
||
1788 | * |
||
1789 | * @private |
||
1790 | * |
||
1791 | * @param string $text |
||
1792 | * |
||
1793 | * @throws MWException |
||
1794 | * @return string |
||
1795 | */ |
||
1796 | public function replaceExternalLinks( $text ) { |
||
1797 | |||
1798 | $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE ); |
||
1799 | if ( $bits === false ) { |
||
1800 | throw new MWException( "PCRE needs to be compiled with " |
||
1801 | . "--enable-unicode-properties in order for MediaWiki to function" ); |
||
1802 | } |
||
1803 | $s = array_shift( $bits ); |
||
1804 | |||
1805 | $i = 0; |
||
1806 | while ( $i < count( $bits ) ) { |
||
1807 | $url = $bits[$i++]; |
||
1808 | $i++; // protocol |
||
1809 | $text = $bits[$i++]; |
||
1810 | $trail = $bits[$i++]; |
||
1811 | |||
1812 | # The characters '<' and '>' (which were escaped by |
||
1813 | # removeHTMLtags()) should not be included in |
||
1814 | # URLs, per RFC 2396. |
||
1815 | $m2 = []; |
||
1816 | View Code Duplication | if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) { |
|
1817 | $text = substr( $url, $m2[0][1] ) . ' ' . $text; |
||
1818 | $url = substr( $url, 0, $m2[0][1] ); |
||
1819 | } |
||
1820 | |||
1821 | # If the link text is an image URL, replace it with an <img> tag |
||
1822 | # This happened by accident in the original parser, but some people used it extensively |
||
1823 | $img = $this->maybeMakeExternalImage( $text ); |
||
1824 | if ( $img !== false ) { |
||
1825 | $text = $img; |
||
1826 | } |
||
1827 | |||
1828 | $dtrail = ''; |
||
1829 | |||
1830 | # Set linktype for CSS - if URL==text, link is essentially free |
||
1831 | $linktype = ( $text === $url ) ? 'free' : 'text'; |
||
1832 | |||
1833 | # No link text, e.g. [http://domain.tld/some.link] |
||
1834 | if ( $text == '' ) { |
||
1835 | # Autonumber |
||
1836 | $langObj = $this->getTargetLanguage(); |
||
1837 | $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']'; |
||
1838 | $linktype = 'autonumber'; |
||
1839 | } else { |
||
1840 | # Have link text, e.g. [http://domain.tld/some.link text]s |
||
1841 | # Check for trail |
||
1842 | list( $dtrail, $trail ) = Linker::splitTrail( $trail ); |
||
1843 | } |
||
1844 | |||
1845 | $text = $this->getConverterLanguage()->markNoConversion( $text ); |
||
1846 | |||
1847 | $url = Sanitizer::cleanUrl( $url ); |
||
1848 | |||
1849 | # Use the encoded URL |
||
1850 | # This means that users can paste URLs directly into the text |
||
1851 | # Funny characters like ö aren't valid in URLs anyway |
||
1852 | # This was changed in August 2004 |
||
1853 | $s .= Linker::makeExternalLink( $url, $text, false, $linktype, |
||
1854 | $this->getExternalLinkAttribs( $url ), $this->mTitle ) . $dtrail . $trail; |
||
1855 | |||
1856 | # Register link in the output object. |
||
1857 | # Replace unnecessary URL escape codes with the referenced character |
||
1858 | # This prevents spammers from hiding links from the filters |
||
1859 | $pasteurized = self::normalizeLinkUrl( $url ); |
||
1860 | $this->mOutput->addExternalLink( $pasteurized ); |
||
1861 | } |
||
1862 | |||
1863 | return $s; |
||
1864 | } |
||
1865 | |||
1866 | /** |
||
1867 | * Get the rel attribute for a particular external link. |
||
1868 | * |
||
1869 | * @since 1.21 |
||
1870 | * @param string|bool $url Optional URL, to extract the domain from for rel => |
||
1871 | * nofollow if appropriate |
||
1872 | * @param Title $title Optional Title, for wgNoFollowNsExceptions lookups |
||
1873 | * @return string|null Rel attribute for $url |
||
1874 | */ |
||
1875 | public static function getExternalLinkRel( $url = false, $title = null ) { |
||
1876 | global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions; |
||
1877 | $ns = $title ? $title->getNamespace() : false; |
||
1878 | if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions ) |
||
1879 | && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions ) |
||
1880 | ) { |
||
1881 | return 'nofollow'; |
||
1882 | } |
||
1883 | return null; |
||
1884 | } |
||
1885 | |||
1886 | /** |
||
1887 | * Get an associative array of additional HTML attributes appropriate for a |
||
1888 | * particular external link. This currently may include rel => nofollow |
||
1889 | * (depending on configuration, namespace, and the URL's domain) and/or a |
||
1890 | * target attribute (depending on configuration). |
||
1891 | * |
||
1892 | * @param string $url URL to extract the domain from for rel => |
||
1893 | * nofollow if appropriate |
||
1894 | * @return array Associative array of HTML attributes |
||
1895 | */ |
||
1896 | public function getExternalLinkAttribs( $url ) { |
||
1897 | $attribs = []; |
||
1898 | $rel = self::getExternalLinkRel( $url, $this->mTitle ); |
||
1899 | |||
1900 | $target = $this->mOptions->getExternalLinkTarget(); |
||
1901 | if ( $target ) { |
||
1902 | $attribs['target'] = $target; |
||
1903 | if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) { |
||
1904 | // T133507. New windows can navigate parent cross-origin. |
||
1905 | // Including noreferrer due to lacking browser |
||
1906 | // support of noopener. Eventually noreferrer should be removed. |
||
1907 | if ( $rel !== '' ) { |
||
1908 | $rel .= ' '; |
||
1909 | } |
||
1910 | $rel .= 'noreferrer noopener'; |
||
1911 | } |
||
1912 | } |
||
1913 | $attribs['rel'] = $rel; |
||
1914 | return $attribs; |
||
1915 | } |
||
1916 | |||
1917 | /** |
||
1918 | * Replace unusual escape codes in a URL with their equivalent characters |
||
1919 | * |
||
1920 | * @deprecated since 1.24, use normalizeLinkUrl |
||
1921 | * @param string $url |
||
1922 | * @return string |
||
1923 | */ |
||
1924 | public static function replaceUnusualEscapes( $url ) { |
||
1925 | wfDeprecated( __METHOD__, '1.24' ); |
||
1926 | return self::normalizeLinkUrl( $url ); |
||
1927 | } |
||
1928 | |||
1929 | /** |
||
1930 | * Replace unusual escape codes in a URL with their equivalent characters |
||
1931 | * |
||
1932 | * This generally follows the syntax defined in RFC 3986, with special |
||
1933 | * consideration for HTTP query strings. |
||
1934 | * |
||
1935 | * @param string $url |
||
1936 | * @return string |
||
1937 | */ |
||
1938 | public static function normalizeLinkUrl( $url ) { |
||
1939 | # First, make sure unsafe characters are encoded |
||
1940 | $url = preg_replace_callback( '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]/', |
||
1941 | function ( $m ) { |
||
1942 | return rawurlencode( $m[0] ); |
||
1943 | }, |
||
1944 | $url |
||
1945 | ); |
||
1946 | |||
1947 | $ret = ''; |
||
1948 | $end = strlen( $url ); |
||
1949 | |||
1950 | # Fragment part - 'fragment' |
||
1951 | $start = strpos( $url, '#' ); |
||
1952 | View Code Duplication | if ( $start !== false && $start < $end ) { |
|
1953 | $ret = self::normalizeUrlComponent( |
||
1954 | substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret; |
||
1955 | $end = $start; |
||
1956 | } |
||
1957 | |||
1958 | # Query part - 'query' minus &=+; |
||
1959 | $start = strpos( $url, '?' ); |
||
1960 | View Code Duplication | if ( $start !== false && $start < $end ) { |
|
1961 | $ret = self::normalizeUrlComponent( |
||
1962 | substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret; |
||
1963 | $end = $start; |
||
1964 | } |
||
1965 | |||
1966 | # Scheme and path part - 'pchar' |
||
1967 | # (we assume no userinfo or encoded colons in the host) |
||
1968 | $ret = self::normalizeUrlComponent( |
||
1969 | substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret; |
||
1970 | |||
1971 | return $ret; |
||
1972 | } |
||
1973 | |||
1974 | private static function normalizeUrlComponent( $component, $unsafe ) { |
||
1975 | $callback = function ( $matches ) use ( $unsafe ) { |
||
1976 | $char = urldecode( $matches[0] ); |
||
1977 | $ord = ord( $char ); |
||
1978 | if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) { |
||
1979 | # Unescape it |
||
1980 | return $char; |
||
1981 | } else { |
||
1982 | # Leave it escaped, but use uppercase for a-f |
||
1983 | return strtoupper( $matches[0] ); |
||
1984 | } |
||
1985 | }; |
||
1986 | return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component ); |
||
1987 | } |
||
1988 | |||
1989 | /** |
||
1990 | * make an image if it's allowed, either through the global |
||
1991 | * option, through the exception, or through the on-wiki whitelist |
||
1992 | * |
||
1993 | * @param string $url |
||
1994 | * |
||
1995 | * @return string |
||
1996 | */ |
||
1997 | private function maybeMakeExternalImage( $url ) { |
||
1998 | $imagesfrom = $this->mOptions->getAllowExternalImagesFrom(); |
||
1999 | $imagesexception = !empty( $imagesfrom ); |
||
2000 | $text = false; |
||
2001 | # $imagesfrom could be either a single string or an array of strings, parse out the latter |
||
2002 | if ( $imagesexception && is_array( $imagesfrom ) ) { |
||
2003 | $imagematch = false; |
||
2004 | foreach ( $imagesfrom as $match ) { |
||
2005 | if ( strpos( $url, $match ) === 0 ) { |
||
2006 | $imagematch = true; |
||
2007 | break; |
||
2008 | } |
||
2009 | } |
||
2010 | } elseif ( $imagesexception ) { |
||
2011 | $imagematch = ( strpos( $url, $imagesfrom ) === 0 ); |
||
2012 | } else { |
||
2013 | $imagematch = false; |
||
2014 | } |
||
2015 | |||
2016 | if ( $this->mOptions->getAllowExternalImages() |
||
2017 | || ( $imagesexception && $imagematch ) |
||
2018 | ) { |
||
2019 | if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) { |
||
2020 | # Image found |
||
2021 | $text = Linker::makeExternalImage( $url ); |
||
2022 | } |
||
2023 | } |
||
2024 | if ( !$text && $this->mOptions->getEnableImageWhitelist() |
||
2025 | && preg_match( self::EXT_IMAGE_REGEX, $url ) |
||
2026 | ) { |
||
2027 | $whitelist = explode( |
||
2028 | "\n", |
||
2029 | wfMessage( 'external_image_whitelist' )->inContentLanguage()->text() |
||
2030 | ); |
||
2031 | |||
2032 | foreach ( $whitelist as $entry ) { |
||
2033 | # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments |
||
2034 | if ( strpos( $entry, '#' ) === 0 || $entry === '' ) { |
||
2035 | continue; |
||
2036 | } |
||
2037 | if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) { |
||
2038 | # Image matches a whitelist entry |
||
2039 | $text = Linker::makeExternalImage( $url ); |
||
2040 | break; |
||
2041 | } |
||
2042 | } |
||
2043 | } |
||
2044 | return $text; |
||
2045 | } |
||
2046 | |||
2047 | /** |
||
2048 | * Process [[ ]] wikilinks |
||
2049 | * |
||
2050 | * @param string $s |
||
2051 | * |
||
2052 | * @return string Processed text |
||
2053 | * |
||
2054 | * @private |
||
2055 | */ |
||
2056 | public function replaceInternalLinks( $s ) { |
||
2057 | $this->mLinkHolders->merge( $this->replaceInternalLinks2( $s ) ); |
||
2058 | return $s; |
||
2059 | } |
||
2060 | |||
2061 | /** |
||
2062 | * Process [[ ]] wikilinks (RIL) |
||
2063 | * @param string $s |
||
2064 | * @throws MWException |
||
2065 | * @return LinkHolderArray |
||
2066 | * |
||
2067 | * @private |
||
2068 | */ |
||
2069 | public function replaceInternalLinks2( &$s ) { |
||
2070 | global $wgExtraInterlanguageLinkPrefixes; |
||
2071 | |||
2072 | static $tc = false, $e1, $e1_img; |
||
2073 | # the % is needed to support urlencoded titles as well |
||
2074 | if ( !$tc ) { |
||
2075 | $tc = Title::legalChars() . '#%'; |
||
2076 | # Match a link having the form [[namespace:link|alternate]]trail |
||
2077 | $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD"; |
||
2078 | # Match cases where there is no "]]", which might still be images |
||
2079 | $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD"; |
||
2080 | } |
||
2081 | |||
2082 | $holders = new LinkHolderArray( $this ); |
||
2083 | |||
2084 | # split the entire text string on occurrences of [[ |
||
2085 | $a = StringUtils::explode( '[[', ' ' . $s ); |
||
2086 | # get the first element (all text up to first [[), and remove the space we added |
||
2087 | $s = $a->current(); |
||
2088 | $a->next(); |
||
2089 | $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void" |
||
2090 | $s = substr( $s, 1 ); |
||
2091 | |||
2092 | $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension(); |
||
2093 | $e2 = null; |
||
2094 | if ( $useLinkPrefixExtension ) { |
||
2095 | # Match the end of a line for a word that's not followed by whitespace, |
||
2096 | # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched |
||
2097 | global $wgContLang; |
||
2098 | $charset = $wgContLang->linkPrefixCharset(); |
||
2099 | $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu"; |
||
2100 | } |
||
2101 | |||
2102 | if ( is_null( $this->mTitle ) ) { |
||
2103 | throw new MWException( __METHOD__ . ": \$this->mTitle is null\n" ); |
||
2104 | } |
||
2105 | $nottalk = !$this->mTitle->isTalkPage(); |
||
2106 | |||
2107 | View Code Duplication | if ( $useLinkPrefixExtension ) { |
|
2108 | $m = []; |
||
2109 | if ( preg_match( $e2, $s, $m ) ) { |
||
2110 | $first_prefix = $m[2]; |
||
2111 | } else { |
||
2112 | $first_prefix = false; |
||
2113 | } |
||
2114 | } else { |
||
2115 | $prefix = ''; |
||
2116 | } |
||
2117 | |||
2118 | $useSubpages = $this->areSubpagesAllowed(); |
||
2119 | |||
2120 | // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect |
||
2121 | # Loop for each link |
||
2122 | for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) { |
||
2123 | // @codingStandardsIgnoreEnd |
||
2124 | |||
2125 | # Check for excessive memory usage |
||
2126 | if ( $holders->isBig() ) { |
||
2127 | # Too big |
||
2128 | # Do the existence check, replace the link holders and clear the array |
||
2129 | $holders->replace( $s ); |
||
2130 | $holders->clear(); |
||
2131 | } |
||
2132 | |||
2133 | if ( $useLinkPrefixExtension ) { |
||
2134 | View Code Duplication | if ( preg_match( $e2, $s, $m ) ) { |
|
2135 | $prefix = $m[2]; |
||
2136 | $s = $m[1]; |
||
2137 | } else { |
||
2138 | $prefix = ''; |
||
2139 | } |
||
2140 | # first link |
||
2141 | if ( $first_prefix ) { |
||
2142 | $prefix = $first_prefix; |
||
2143 | $first_prefix = false; |
||
2144 | } |
||
2145 | } |
||
2146 | |||
2147 | $might_be_img = false; |
||
2148 | |||
2149 | if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt |
||
2150 | $text = $m[2]; |
||
2151 | # If we get a ] at the beginning of $m[3] that means we have a link that's something like: |
||
2152 | # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up, |
||
2153 | # the real problem is with the $e1 regex |
||
2154 | # See bug 1300. |
||
2155 | # Still some problems for cases where the ] is meant to be outside punctuation, |
||
2156 | # and no image is in sight. See bug 2095. |
||
2157 | if ( $text !== '' |
||
2158 | && substr( $m[3], 0, 1 ) === ']' |
||
2159 | && strpos( $text, '[' ) !== false |
||
2160 | ) { |
||
2161 | $text .= ']'; # so that replaceExternalLinks($text) works later |
||
2162 | $m[3] = substr( $m[3], 1 ); |
||
2163 | } |
||
2164 | # fix up urlencoded title texts |
||
2165 | View Code Duplication | if ( strpos( $m[1], '%' ) !== false ) { |
|
2166 | # Should anchors '#' also be rejected? |
||
2167 | $m[1] = str_replace( [ '<', '>' ], [ '<', '>' ], rawurldecode( $m[1] ) ); |
||
2168 | } |
||
2169 | $trail = $m[3]; |
||
2170 | } elseif ( preg_match( $e1_img, $line, $m ) ) { |
||
2171 | # Invalid, but might be an image with a link in its caption |
||
2172 | $might_be_img = true; |
||
2173 | $text = $m[2]; |
||
2174 | View Code Duplication | if ( strpos( $m[1], '%' ) !== false ) { |
|
2175 | $m[1] = str_replace( [ '<', '>' ], [ '<', '>' ], rawurldecode( $m[1] ) ); |
||
2176 | } |
||
2177 | $trail = ""; |
||
2178 | } else { # Invalid form; output directly |
||
2179 | $s .= $prefix . '[[' . $line; |
||
2180 | continue; |
||
2181 | } |
||
2182 | |||
2183 | $origLink = $m[1]; |
||
2184 | |||
2185 | # Don't allow internal links to pages containing |
||
2186 | # PROTO: where PROTO is a valid URL protocol; these |
||
2187 | # should be external links. |
||
2188 | if ( preg_match( '/^(?i:' . $this->mUrlProtocols . ')/', $origLink ) ) { |
||
2189 | $s .= $prefix . '[[' . $line; |
||
2190 | continue; |
||
2191 | } |
||
2192 | |||
2193 | # Make subpage if necessary |
||
2194 | if ( $useSubpages ) { |
||
2195 | $link = $this->maybeDoSubpageLink( $origLink, $text ); |
||
2196 | } else { |
||
2197 | $link = $origLink; |
||
2198 | } |
||
2199 | |||
2200 | $noforce = ( substr( $origLink, 0, 1 ) !== ':' ); |
||
2201 | if ( !$noforce ) { |
||
2202 | # Strip off leading ':' |
||
2203 | $link = substr( $link, 1 ); |
||
2204 | } |
||
2205 | |||
2206 | $unstrip = $this->mStripState->unstripNoWiki( $link ); |
||
2207 | $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null; |
||
2208 | if ( $nt === null ) { |
||
2209 | $s .= $prefix . '[[' . $line; |
||
2210 | continue; |
||
2211 | } |
||
2212 | |||
2213 | $ns = $nt->getNamespace(); |
||
2214 | $iw = $nt->getInterwiki(); |
||
2215 | |||
2216 | if ( $might_be_img ) { # if this is actually an invalid link |
||
2217 | if ( $ns == NS_FILE && $noforce ) { # but might be an image |
||
2218 | $found = false; |
||
2219 | while ( true ) { |
||
2220 | # look at the next 'line' to see if we can close it there |
||
2221 | $a->next(); |
||
2222 | $next_line = $a->current(); |
||
2223 | if ( $next_line === false || $next_line === null ) { |
||
2224 | break; |
||
2225 | } |
||
2226 | $m = explode( ']]', $next_line, 3 ); |
||
2227 | if ( count( $m ) == 3 ) { |
||
2228 | # the first ]] closes the inner link, the second the image |
||
2229 | $found = true; |
||
2230 | $text .= "[[{$m[0]}]]{$m[1]}"; |
||
2231 | $trail = $m[2]; |
||
2232 | break; |
||
2233 | } elseif ( count( $m ) == 2 ) { |
||
2234 | # if there's exactly one ]] that's fine, we'll keep looking |
||
2235 | $text .= "[[{$m[0]}]]{$m[1]}"; |
||
2236 | } else { |
||
2237 | # if $next_line is invalid too, we need look no further |
||
2238 | $text .= '[[' . $next_line; |
||
2239 | break; |
||
2240 | } |
||
2241 | } |
||
2242 | if ( !$found ) { |
||
2243 | # we couldn't find the end of this imageLink, so output it raw |
||
2244 | # but don't ignore what might be perfectly normal links in the text we've examined |
||
2245 | $holders->merge( $this->replaceInternalLinks2( $text ) ); |
||
2246 | $s .= "{$prefix}[[$link|$text"; |
||
2247 | # note: no $trail, because without an end, there *is* no trail |
||
2248 | continue; |
||
2249 | } |
||
2250 | } else { # it's not an image, so output it raw |
||
2251 | $s .= "{$prefix}[[$link|$text"; |
||
2252 | # note: no $trail, because without an end, there *is* no trail |
||
2253 | continue; |
||
2254 | } |
||
2255 | } |
||
2256 | |||
2257 | $wasblank = ( $text == '' ); |
||
2258 | if ( $wasblank ) { |
||
2259 | $text = $link; |
||
2260 | } else { |
||
2261 | # Bug 4598 madness. Handle the quotes only if they come from the alternate part |
||
2262 | # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a> |
||
2263 | # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']] |
||
2264 | # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a> |
||
2265 | $text = $this->doQuotes( $text ); |
||
2266 | } |
||
2267 | |||
2268 | # Link not escaped by : , create the various objects |
||
2269 | if ( $noforce && !$nt->wasLocalInterwiki() ) { |
||
2270 | # Interwikis |
||
2271 | if ( |
||
2272 | $iw && $this->mOptions->getInterwikiMagic() && $nottalk && ( |
||
2273 | Language::fetchLanguageName( $iw, null, 'mw' ) || |
||
2274 | in_array( $iw, $wgExtraInterlanguageLinkPrefixes ) |
||
2275 | ) |
||
2276 | ) { |
||
2277 | # Bug 24502: filter duplicates |
||
2278 | if ( !isset( $this->mLangLinkLanguages[$iw] ) ) { |
||
2279 | $this->mLangLinkLanguages[$iw] = true; |
||
2280 | $this->mOutput->addLanguageLink( $nt->getFullText() ); |
||
2281 | } |
||
2282 | |||
2283 | $s = rtrim( $s . $prefix ); |
||
2284 | $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail; |
||
2285 | continue; |
||
2286 | } |
||
2287 | |||
2288 | if ( $ns == NS_FILE ) { |
||
2289 | if ( !wfIsBadImage( $nt->getDBkey(), $this->mTitle ) ) { |
||
2290 | if ( $wasblank ) { |
||
2291 | # if no parameters were passed, $text |
||
2292 | # becomes something like "File:Foo.png", |
||
2293 | # which we don't want to pass on to the |
||
2294 | # image generator |
||
2295 | $text = ''; |
||
2296 | } else { |
||
2297 | # recursively parse links inside the image caption |
||
2298 | # actually, this will parse them in any other parameters, too, |
||
2299 | # but it might be hard to fix that, and it doesn't matter ATM |
||
2300 | $text = $this->replaceExternalLinks( $text ); |
||
2301 | $holders->merge( $this->replaceInternalLinks2( $text ) ); |
||
2302 | } |
||
2303 | # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them |
||
2304 | $s .= $prefix . $this->armorLinks( |
||
2305 | $this->makeImage( $nt, $text, $holders ) ) . $trail; |
||
2306 | continue; |
||
2307 | } |
||
2308 | } elseif ( $ns == NS_CATEGORY ) { |
||
2309 | $s = rtrim( $s . "\n" ); # bug 87 |
||
2310 | |||
2311 | if ( $wasblank ) { |
||
2312 | $sortkey = $this->getDefaultSort(); |
||
2313 | } else { |
||
2314 | $sortkey = $text; |
||
2315 | } |
||
2316 | $sortkey = Sanitizer::decodeCharReferences( $sortkey ); |
||
2317 | $sortkey = str_replace( "\n", '', $sortkey ); |
||
2318 | $sortkey = $this->getConverterLanguage()->convertCategoryKey( $sortkey ); |
||
2319 | $this->mOutput->addCategory( $nt->getDBkey(), $sortkey ); |
||
2320 | |||
2321 | /** |
||
2322 | * Strip the whitespace Category links produce, see bug 87 |
||
2323 | */ |
||
2324 | $s .= trim( $prefix . $trail, "\n" ) == '' ? '' : $prefix . $trail; |
||
2325 | |||
2326 | continue; |
||
2327 | } |
||
2328 | } |
||
2329 | |||
2330 | # Self-link checking. For some languages, variants of the title are checked in |
||
2331 | # LinkHolderArray::doVariants() to allow batching the existence checks necessary |
||
2332 | # for linking to a different variant. |
||
2333 | if ( $ns != NS_SPECIAL && $nt->equals( $this->mTitle ) && !$nt->hasFragment() ) { |
||
2334 | $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail ); |
||
2335 | continue; |
||
2336 | } |
||
2337 | |||
2338 | # NS_MEDIA is a pseudo-namespace for linking directly to a file |
||
2339 | # @todo FIXME: Should do batch file existence checks, see comment below |
||
2340 | if ( $ns == NS_MEDIA ) { |
||
2341 | # Give extensions a chance to select the file revision for us |
||
2342 | $options = []; |
||
2343 | $descQuery = false; |
||
2344 | Hooks::run( 'BeforeParserFetchFileAndTitle', |
||
2345 | [ $this, $nt, &$options, &$descQuery ] ); |
||
2346 | # Fetch and register the file (file title may be different via hooks) |
||
2347 | list( $file, $nt ) = $this->fetchFileAndTitle( $nt, $options ); |
||
2348 | # Cloak with NOPARSE to avoid replacement in replaceExternalLinks |
||
2349 | $s .= $prefix . $this->armorLinks( |
||
2350 | Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail; |
||
2351 | continue; |
||
2352 | } |
||
2353 | |||
2354 | # Some titles, such as valid special pages or files in foreign repos, should |
||
2355 | # be shown as bluelinks even though they're not included in the page table |
||
2356 | # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do |
||
2357 | # batch file existence checks for NS_FILE and NS_MEDIA |
||
2358 | if ( $iw == '' && $nt->isAlwaysKnown() ) { |
||
2359 | $this->mOutput->addLink( $nt ); |
||
2360 | $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix ); |
||
2361 | } else { |
||
2362 | # Links will be added to the output link list after checking |
||
2363 | $s .= $holders->makeHolder( $nt, $text, [], $trail, $prefix ); |
||
2364 | } |
||
2365 | } |
||
2366 | return $holders; |
||
2367 | } |
||
2368 | |||
2369 | /** |
||
2370 | * Render a forced-blue link inline; protect against double expansion of |
||
2371 | * URLs if we're in a mode that prepends full URL prefixes to internal links. |
||
2372 | * Since this little disaster has to split off the trail text to avoid |
||
2373 | * breaking URLs in the following text without breaking trails on the |
||
2374 | * wiki links, it's been made into a horrible function. |
||
2375 | * |
||
2376 | * @param Title $nt |
||
2377 | * @param string $text |
||
2378 | * @param string $trail |
||
2379 | * @param string $prefix |
||
2380 | * @return string HTML-wikitext mix oh yuck |
||
2381 | */ |
||
2382 | protected function makeKnownLinkHolder( $nt, $text = '', $trail = '', $prefix = '' ) { |
||
2383 | list( $inside, $trail ) = Linker::splitTrail( $trail ); |
||
2384 | |||
2385 | if ( $text == '' ) { |
||
2386 | $text = htmlspecialchars( $nt->getPrefixedText() ); |
||
2387 | } |
||
2388 | |||
2389 | $link = $this->getLinkRenderer()->makeKnownLink( |
||
2390 | $nt, new HtmlArmor( "$prefix$text$inside" ) |
||
2391 | ); |
||
2392 | |||
2393 | return $this->armorLinks( $link ) . $trail; |
||
2394 | } |
||
2395 | |||
2396 | /** |
||
2397 | * Insert a NOPARSE hacky thing into any inline links in a chunk that's |
||
2398 | * going to go through further parsing steps before inline URL expansion. |
||
2399 | * |
||
2400 | * Not needed quite as much as it used to be since free links are a bit |
||
2401 | * more sensible these days. But bracketed links are still an issue. |
||
2402 | * |
||
2403 | * @param string $text More-or-less HTML |
||
2404 | * @return string Less-or-more HTML with NOPARSE bits |
||
2405 | */ |
||
2406 | public function armorLinks( $text ) { |
||
2407 | return preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', |
||
2408 | self::MARKER_PREFIX . "NOPARSE$1", $text ); |
||
2409 | } |
||
2410 | |||
2411 | /** |
||
2412 | * Return true if subpage links should be expanded on this page. |
||
2413 | * @return bool |
||
2414 | */ |
||
2415 | public function areSubpagesAllowed() { |
||
2416 | # Some namespaces don't allow subpages |
||
2417 | return MWNamespace::hasSubpages( $this->mTitle->getNamespace() ); |
||
2418 | } |
||
2419 | |||
2420 | /** |
||
2421 | * Handle link to subpage if necessary |
||
2422 | * |
||
2423 | * @param string $target The source of the link |
||
2424 | * @param string &$text The link text, modified as necessary |
||
2425 | * @return string The full name of the link |
||
2426 | * @private |
||
2427 | */ |
||
2428 | public function maybeDoSubpageLink( $target, &$text ) { |
||
2429 | return Linker::normalizeSubpageLink( $this->mTitle, $target, $text ); |
||
2430 | } |
||
2431 | |||
2432 | /** |
||
2433 | * Make lists from lines starting with ':', '*', '#', etc. (DBL) |
||
2434 | * |
||
2435 | * @param string $text |
||
2436 | * @param bool $linestart Whether or not this is at the start of a line. |
||
2437 | * @private |
||
2438 | * @return string The lists rendered as HTML |
||
2439 | */ |
||
2440 | public function doBlockLevels( $text, $linestart ) { |
||
2441 | return BlockLevelPass::doBlockLevels( $text, $linestart ); |
||
2442 | } |
||
2443 | |||
2444 | /** |
||
2445 | * Return value of a magic variable (like PAGENAME) |
||
2446 | * |
||
2447 | * @private |
||
2448 | * |
||
2449 | * @param int $index |
||
2450 | * @param bool|PPFrame $frame |
||
2451 | * |
||
2452 | * @throws MWException |
||
2453 | * @return string |
||
2454 | */ |
||
2455 | public function getVariableValue( $index, $frame = false ) { |
||
2456 | global $wgContLang, $wgSitename, $wgServer, $wgServerName; |
||
2457 | global $wgArticlePath, $wgScriptPath, $wgStylePath; |
||
2458 | |||
2459 | if ( is_null( $this->mTitle ) ) { |
||
2460 | // If no title set, bad things are going to happen |
||
2461 | // later. Title should always be set since this |
||
2462 | // should only be called in the middle of a parse |
||
2463 | // operation (but the unit-tests do funky stuff) |
||
2464 | throw new MWException( __METHOD__ . ' Should only be ' |
||
2465 | . ' called while parsing (no title set)' ); |
||
2466 | } |
||
2467 | |||
2468 | /** |
||
2469 | * Some of these require message or data lookups and can be |
||
2470 | * expensive to check many times. |
||
2471 | */ |
||
2472 | if ( Hooks::run( 'ParserGetVariableValueVarCache', [ &$this, &$this->mVarCache ] ) ) { |
||
2473 | if ( isset( $this->mVarCache[$index] ) ) { |
||
2474 | return $this->mVarCache[$index]; |
||
2475 | } |
||
2476 | } |
||
2477 | |||
2478 | $ts = wfTimestamp( TS_UNIX, $this->mOptions->getTimestamp() ); |
||
2479 | Hooks::run( 'ParserGetVariableValueTs', [ &$this, &$ts ] ); |
||
2480 | |||
2481 | $pageLang = $this->getFunctionLang(); |
||
2482 | |||
2483 | switch ( $index ) { |
||
2484 | case '!': |
||
2485 | $value = '|'; |
||
2486 | break; |
||
2487 | case 'currentmonth': |
||
2488 | $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'm' ) ); |
||
2489 | break; |
||
2490 | case 'currentmonth1': |
||
2491 | $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'n' ) ); |
||
2492 | break; |
||
2493 | case 'currentmonthname': |
||
2494 | $value = $pageLang->getMonthName( MWTimestamp::getInstance( $ts )->format( 'n' ) ); |
||
2495 | break; |
||
2496 | case 'currentmonthnamegen': |
||
2497 | $value = $pageLang->getMonthNameGen( MWTimestamp::getInstance( $ts )->format( 'n' ) ); |
||
2498 | break; |
||
2499 | case 'currentmonthabbrev': |
||
2500 | $value = $pageLang->getMonthAbbreviation( MWTimestamp::getInstance( $ts )->format( 'n' ) ); |
||
2501 | break; |
||
2502 | case 'currentday': |
||
2503 | $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'j' ) ); |
||
2504 | break; |
||
2505 | case 'currentday2': |
||
2506 | $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'd' ) ); |
||
2507 | break; |
||
2508 | case 'localmonth': |
||
2509 | $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'm' ) ); |
||
2510 | break; |
||
2511 | case 'localmonth1': |
||
2512 | $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); |
||
2513 | break; |
||
2514 | case 'localmonthname': |
||
2515 | $value = $pageLang->getMonthName( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); |
||
2516 | break; |
||
2517 | case 'localmonthnamegen': |
||
2518 | $value = $pageLang->getMonthNameGen( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); |
||
2519 | break; |
||
2520 | case 'localmonthabbrev': |
||
2521 | $value = $pageLang->getMonthAbbreviation( MWTimestamp::getLocalInstance( $ts )->format( 'n' ) ); |
||
2522 | break; |
||
2523 | case 'localday': |
||
2524 | $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'j' ) ); |
||
2525 | break; |
||
2526 | case 'localday2': |
||
2527 | $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'd' ) ); |
||
2528 | break; |
||
2529 | case 'pagename': |
||
2530 | $value = wfEscapeWikiText( $this->mTitle->getText() ); |
||
2531 | break; |
||
2532 | case 'pagenamee': |
||
2533 | $value = wfEscapeWikiText( $this->mTitle->getPartialURL() ); |
||
2534 | break; |
||
2535 | case 'fullpagename': |
||
2536 | $value = wfEscapeWikiText( $this->mTitle->getPrefixedText() ); |
||
2537 | break; |
||
2538 | case 'fullpagenamee': |
||
2539 | $value = wfEscapeWikiText( $this->mTitle->getPrefixedURL() ); |
||
2540 | break; |
||
2541 | case 'subpagename': |
||
2542 | $value = wfEscapeWikiText( $this->mTitle->getSubpageText() ); |
||
2543 | break; |
||
2544 | case 'subpagenamee': |
||
2545 | $value = wfEscapeWikiText( $this->mTitle->getSubpageUrlForm() ); |
||
2546 | break; |
||
2547 | case 'rootpagename': |
||
2548 | $value = wfEscapeWikiText( $this->mTitle->getRootText() ); |
||
2549 | break; |
||
2550 | View Code Duplication | case 'rootpagenamee': |
|
2551 | $value = wfEscapeWikiText( wfUrlencode( str_replace( |
||
2552 | ' ', |
||
2553 | '_', |
||
2554 | $this->mTitle->getRootText() |
||
2555 | ) ) ); |
||
2556 | break; |
||
2557 | case 'basepagename': |
||
2558 | $value = wfEscapeWikiText( $this->mTitle->getBaseText() ); |
||
2559 | break; |
||
2560 | View Code Duplication | case 'basepagenamee': |
|
2561 | $value = wfEscapeWikiText( wfUrlencode( str_replace( |
||
2562 | ' ', |
||
2563 | '_', |
||
2564 | $this->mTitle->getBaseText() |
||
2565 | ) ) ); |
||
2566 | break; |
||
2567 | View Code Duplication | case 'talkpagename': |
|
2568 | if ( $this->mTitle->canTalk() ) { |
||
2569 | $talkPage = $this->mTitle->getTalkPage(); |
||
2570 | $value = wfEscapeWikiText( $talkPage->getPrefixedText() ); |
||
2571 | } else { |
||
2572 | $value = ''; |
||
2573 | } |
||
2574 | break; |
||
2575 | View Code Duplication | case 'talkpagenamee': |
|
2576 | if ( $this->mTitle->canTalk() ) { |
||
2577 | $talkPage = $this->mTitle->getTalkPage(); |
||
2578 | $value = wfEscapeWikiText( $talkPage->getPrefixedURL() ); |
||
2579 | } else { |
||
2580 | $value = ''; |
||
2581 | } |
||
2582 | break; |
||
2583 | case 'subjectpagename': |
||
2584 | $subjPage = $this->mTitle->getSubjectPage(); |
||
2585 | $value = wfEscapeWikiText( $subjPage->getPrefixedText() ); |
||
2586 | break; |
||
2587 | case 'subjectpagenamee': |
||
2588 | $subjPage = $this->mTitle->getSubjectPage(); |
||
2589 | $value = wfEscapeWikiText( $subjPage->getPrefixedURL() ); |
||
2590 | break; |
||
2591 | case 'pageid': // requested in bug 23427 |
||
2592 | $pageid = $this->getTitle()->getArticleID(); |
||
2593 | if ( $pageid == 0 ) { |
||
2594 | # 0 means the page doesn't exist in the database, |
||
2595 | # which means the user is previewing a new page. |
||
2596 | # The vary-revision flag must be set, because the magic word |
||
2597 | # will have a different value once the page is saved. |
||
2598 | $this->mOutput->setFlag( 'vary-revision' ); |
||
2599 | wfDebug( __METHOD__ . ": {{PAGEID}} used in a new page, setting vary-revision...\n" ); |
||
2600 | } |
||
2601 | $value = $pageid ? $pageid : null; |
||
2602 | break; |
||
2603 | case 'revisionid': |
||
2604 | # Let the edit saving system know we should parse the page |
||
2605 | # *after* a revision ID has been assigned. |
||
2606 | $this->mOutput->setFlag( 'vary-revision-id' ); |
||
2607 | wfDebug( __METHOD__ . ": {{REVISIONID}} used, setting vary-revision-id...\n" ); |
||
2608 | $value = $this->mRevisionId; |
||
2609 | if ( !$value && $this->mOptions->getSpeculativeRevIdCallback() ) { |
||
2610 | $value = call_user_func( $this->mOptions->getSpeculativeRevIdCallback() ); |
||
2611 | $this->mOutput->setSpeculativeRevIdUsed( $value ); |
||
2612 | } |
||
2613 | break; |
||
2614 | View Code Duplication | case 'revisionday': |
|
2615 | # Let the edit saving system know we should parse the page |
||
2616 | # *after* a revision ID has been assigned. This is for null edits. |
||
2617 | $this->mOutput->setFlag( 'vary-revision' ); |
||
2618 | wfDebug( __METHOD__ . ": {{REVISIONDAY}} used, setting vary-revision...\n" ); |
||
2619 | $value = intval( substr( $this->getRevisionTimestamp(), 6, 2 ) ); |
||
2620 | break; |
||
2621 | View Code Duplication | case 'revisionday2': |
|
2622 | # Let the edit saving system know we should parse the page |
||
2623 | # *after* a revision ID has been assigned. This is for null edits. |
||
2624 | $this->mOutput->setFlag( 'vary-revision' ); |
||
2625 | wfDebug( __METHOD__ . ": {{REVISIONDAY2}} used, setting vary-revision...\n" ); |
||
2626 | $value = substr( $this->getRevisionTimestamp(), 6, 2 ); |
||
2627 | break; |
||
2628 | View Code Duplication | case 'revisionmonth': |
|
2629 | # Let the edit saving system know we should parse the page |
||
2630 | # *after* a revision ID has been assigned. This is for null edits. |
||
2631 | $this->mOutput->setFlag( 'vary-revision' ); |
||
2632 | wfDebug( __METHOD__ . ": {{REVISIONMONTH}} used, setting vary-revision...\n" ); |
||
2633 | $value = substr( $this->getRevisionTimestamp(), 4, 2 ); |
||
2634 | break; |
||
2635 | View Code Duplication | case 'revisionmonth1': |
|
2636 | # Let the edit saving system know we should parse the page |
||
2637 | # *after* a revision ID has been assigned. This is for null edits. |
||
2638 | $this->mOutput->setFlag( 'vary-revision' ); |
||
2639 | wfDebug( __METHOD__ . ": {{REVISIONMONTH1}} used, setting vary-revision...\n" ); |
||
2640 | $value = intval( substr( $this->getRevisionTimestamp(), 4, 2 ) ); |
||
2641 | break; |
||
2642 | View Code Duplication | case 'revisionyear': |
|
2643 | # Let the edit saving system know we should parse the page |
||
2644 | # *after* a revision ID has been assigned. This is for null edits. |
||
2645 | $this->mOutput->setFlag( 'vary-revision' ); |
||
2646 | wfDebug( __METHOD__ . ": {{REVISIONYEAR}} used, setting vary-revision...\n" ); |
||
2647 | $value = substr( $this->getRevisionTimestamp(), 0, 4 ); |
||
2648 | break; |
||
2649 | case 'revisiontimestamp': |
||
2650 | # Let the edit saving system know we should parse the page |
||
2651 | # *after* a revision ID has been assigned. This is for null edits. |
||
2652 | $this->mOutput->setFlag( 'vary-revision' ); |
||
2653 | wfDebug( __METHOD__ . ": {{REVISIONTIMESTAMP}} used, setting vary-revision...\n" ); |
||
2654 | $value = $this->getRevisionTimestamp(); |
||
2655 | break; |
||
2656 | case 'revisionuser': |
||
2657 | # Let the edit saving system know we should parse the page |
||
2658 | # *after* a revision ID has been assigned for null edits. |
||
2659 | $this->mOutput->setFlag( 'vary-user' ); |
||
2660 | wfDebug( __METHOD__ . ": {{REVISIONUSER}} used, setting vary-user...\n" ); |
||
2661 | $value = $this->getRevisionUser(); |
||
2662 | break; |
||
2663 | case 'revisionsize': |
||
2664 | $value = $this->getRevisionSize(); |
||
2665 | break; |
||
2666 | case 'namespace': |
||
2667 | $value = str_replace( '_', ' ', $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); |
||
2668 | break; |
||
2669 | case 'namespacee': |
||
2670 | $value = wfUrlencode( $wgContLang->getNsText( $this->mTitle->getNamespace() ) ); |
||
2671 | break; |
||
2672 | case 'namespacenumber': |
||
2673 | $value = $this->mTitle->getNamespace(); |
||
2674 | break; |
||
2675 | case 'talkspace': |
||
2676 | $value = $this->mTitle->canTalk() |
||
2677 | ? str_replace( '_', ' ', $this->mTitle->getTalkNsText() ) |
||
2678 | : ''; |
||
2679 | break; |
||
2680 | case 'talkspacee': |
||
2681 | $value = $this->mTitle->canTalk() ? wfUrlencode( $this->mTitle->getTalkNsText() ) : ''; |
||
2682 | break; |
||
2683 | case 'subjectspace': |
||
2684 | $value = str_replace( '_', ' ', $this->mTitle->getSubjectNsText() ); |
||
2685 | break; |
||
2686 | case 'subjectspacee': |
||
2687 | $value = ( wfUrlencode( $this->mTitle->getSubjectNsText() ) ); |
||
2688 | break; |
||
2689 | case 'currentdayname': |
||
2690 | $value = $pageLang->getWeekdayName( (int)MWTimestamp::getInstance( $ts )->format( 'w' ) + 1 ); |
||
2691 | break; |
||
2692 | case 'currentyear': |
||
2693 | $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'Y' ), true ); |
||
2694 | break; |
||
2695 | case 'currenttime': |
||
2696 | $value = $pageLang->time( wfTimestamp( TS_MW, $ts ), false, false ); |
||
2697 | break; |
||
2698 | case 'currenthour': |
||
2699 | $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'H' ), true ); |
||
2700 | break; |
||
2701 | case 'currentweek': |
||
2702 | # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to |
||
2703 | # int to remove the padding |
||
2704 | $value = $pageLang->formatNum( (int)MWTimestamp::getInstance( $ts )->format( 'W' ) ); |
||
2705 | break; |
||
2706 | case 'currentdow': |
||
2707 | $value = $pageLang->formatNum( MWTimestamp::getInstance( $ts )->format( 'w' ) ); |
||
2708 | break; |
||
2709 | case 'localdayname': |
||
2710 | $value = $pageLang->getWeekdayName( |
||
2711 | (int)MWTimestamp::getLocalInstance( $ts )->format( 'w' ) + 1 |
||
2712 | ); |
||
2713 | break; |
||
2714 | case 'localyear': |
||
2715 | $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'Y' ), true ); |
||
2716 | break; |
||
2717 | case 'localtime': |
||
2718 | $value = $pageLang->time( |
||
2719 | MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ), |
||
2720 | false, |
||
2721 | false |
||
2722 | ); |
||
2723 | break; |
||
2724 | case 'localhour': |
||
2725 | $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'H' ), true ); |
||
2726 | break; |
||
2727 | case 'localweek': |
||
2728 | # @bug 4594 PHP5 has it zero padded, PHP4 does not, cast to |
||
2729 | # int to remove the padding |
||
2730 | $value = $pageLang->formatNum( (int)MWTimestamp::getLocalInstance( $ts )->format( 'W' ) ); |
||
2731 | break; |
||
2732 | case 'localdow': |
||
2733 | $value = $pageLang->formatNum( MWTimestamp::getLocalInstance( $ts )->format( 'w' ) ); |
||
2734 | break; |
||
2735 | case 'numberofarticles': |
||
2736 | $value = $pageLang->formatNum( SiteStats::articles() ); |
||
2737 | break; |
||
2738 | case 'numberoffiles': |
||
2739 | $value = $pageLang->formatNum( SiteStats::images() ); |
||
2740 | break; |
||
2741 | case 'numberofusers': |
||
2742 | $value = $pageLang->formatNum( SiteStats::users() ); |
||
2743 | break; |
||
2744 | case 'numberofactiveusers': |
||
2745 | $value = $pageLang->formatNum( SiteStats::activeUsers() ); |
||
2746 | break; |
||
2747 | case 'numberofpages': |
||
2748 | $value = $pageLang->formatNum( SiteStats::pages() ); |
||
2749 | break; |
||
2750 | case 'numberofadmins': |
||
2751 | $value = $pageLang->formatNum( SiteStats::numberingroup( 'sysop' ) ); |
||
2752 | break; |
||
2753 | case 'numberofedits': |
||
2754 | $value = $pageLang->formatNum( SiteStats::edits() ); |
||
2755 | break; |
||
2756 | case 'currenttimestamp': |
||
2757 | $value = wfTimestamp( TS_MW, $ts ); |
||
2758 | break; |
||
2759 | case 'localtimestamp': |
||
2760 | $value = MWTimestamp::getLocalInstance( $ts )->format( 'YmdHis' ); |
||
2761 | break; |
||
2762 | case 'currentversion': |
||
2763 | $value = SpecialVersion::getVersion(); |
||
2764 | break; |
||
2765 | case 'articlepath': |
||
2766 | return $wgArticlePath; |
||
2767 | case 'sitename': |
||
2768 | return $wgSitename; |
||
2769 | case 'server': |
||
2770 | return $wgServer; |
||
2771 | case 'servername': |
||
2772 | return $wgServerName; |
||
2773 | case 'scriptpath': |
||
2774 | return $wgScriptPath; |
||
2775 | case 'stylepath': |
||
2776 | return $wgStylePath; |
||
2777 | case 'directionmark': |
||
2778 | return $pageLang->getDirMark(); |
||
2779 | case 'contentlanguage': |
||
2780 | global $wgLanguageCode; |
||
2781 | return $wgLanguageCode; |
||
2782 | case 'cascadingsources': |
||
2783 | $value = CoreParserFunctions::cascadingsources( $this ); |
||
2784 | break; |
||
2785 | default: |
||
2786 | $ret = null; |
||
2787 | Hooks::run( |
||
2788 | 'ParserGetVariableValueSwitch', |
||
2789 | [ &$this, &$this->mVarCache, &$index, &$ret, &$frame ] |
||
2790 | ); |
||
2791 | |||
2792 | return $ret; |
||
2793 | } |
||
2794 | |||
2795 | if ( $index ) { |
||
2796 | $this->mVarCache[$index] = $value; |
||
2797 | } |
||
2798 | |||
2799 | return $value; |
||
2800 | } |
||
2801 | |||
2802 | /** |
||
2803 | * initialise the magic variables (like CURRENTMONTHNAME) and substitution modifiers |
||
2804 | * |
||
2805 | * @private |
||
2806 | */ |
||
2807 | public function initialiseVariables() { |
||
2808 | $variableIDs = MagicWord::getVariableIDs(); |
||
2809 | $substIDs = MagicWord::getSubstIDs(); |
||
2810 | |||
2811 | $this->mVariables = new MagicWordArray( $variableIDs ); |
||
2812 | $this->mSubstWords = new MagicWordArray( $substIDs ); |
||
2813 | } |
||
2814 | |||
2815 | /** |
||
2816 | * Preprocess some wikitext and return the document tree. |
||
2817 | * This is the ghost of replace_variables(). |
||
2818 | * |
||
2819 | * @param string $text The text to parse |
||
2820 | * @param int $flags Bitwise combination of: |
||
2821 | * - self::PTD_FOR_INCLUSION: Handle "<noinclude>" and "<includeonly>" as if the text is being |
||
2822 | * included. Default is to assume a direct page view. |
||
2823 | * |
||
2824 | * The generated DOM tree must depend only on the input text and the flags. |
||
2825 | * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899. |
||
2826 | * |
||
2827 | * Any flag added to the $flags parameter here, or any other parameter liable to cause a |
||
2828 | * change in the DOM tree for a given text, must be passed through the section identifier |
||
2829 | * in the section edit link and thus back to extractSections(). |
||
2830 | * |
||
2831 | * The output of this function is currently only cached in process memory, but a persistent |
||
2832 | * cache may be implemented at a later date which takes further advantage of these strict |
||
2833 | * dependency requirements. |
||
2834 | * |
||
2835 | * @return PPNode |
||
2836 | */ |
||
2837 | public function preprocessToDom( $text, $flags = 0 ) { |
||
2838 | $dom = $this->getPreprocessor()->preprocessToObj( $text, $flags ); |
||
2839 | return $dom; |
||
2840 | } |
||
2841 | |||
2842 | /** |
||
2843 | * Return a three-element array: leading whitespace, string contents, trailing whitespace |
||
2844 | * |
||
2845 | * @param string $s |
||
2846 | * |
||
2847 | * @return array |
||
2848 | */ |
||
2849 | public static function splitWhitespace( $s ) { |
||
2850 | $ltrimmed = ltrim( $s ); |
||
2851 | $w1 = substr( $s, 0, strlen( $s ) - strlen( $ltrimmed ) ); |
||
2852 | $trimmed = rtrim( $ltrimmed ); |
||
2853 | $diff = strlen( $ltrimmed ) - strlen( $trimmed ); |
||
2854 | if ( $diff > 0 ) { |
||
2855 | $w2 = substr( $ltrimmed, -$diff ); |
||
2856 | } else { |
||
2857 | $w2 = ''; |
||
2858 | } |
||
2859 | return [ $w1, $trimmed, $w2 ]; |
||
2860 | } |
||
2861 | |||
2862 | /** |
||
2863 | * Replace magic variables, templates, and template arguments |
||
2864 | * with the appropriate text. Templates are substituted recursively, |
||
2865 | * taking care to avoid infinite loops. |
||
2866 | * |
||
2867 | * Note that the substitution depends on value of $mOutputType: |
||
2868 | * self::OT_WIKI: only {{subst:}} templates |
||
2869 | * self::OT_PREPROCESS: templates but not extension tags |
||
2870 | * self::OT_HTML: all templates and extension tags |
||
2871 | * |
||
2872 | * @param string $text The text to transform |
||
2873 | * @param bool|PPFrame $frame Object describing the arguments passed to the |
||
2874 | * template. Arguments may also be provided as an associative array, as |
||
2875 | * was the usual case before MW1.12. Providing arguments this way may be |
||
2876 | * useful for extensions wishing to perform variable replacement |
||
2877 | * explicitly. |
||
2878 | * @param bool $argsOnly Only do argument (triple-brace) expansion, not |
||
2879 | * double-brace expansion. |
||
2880 | * @return string |
||
2881 | */ |
||
2882 | public function replaceVariables( $text, $frame = false, $argsOnly = false ) { |
||
2883 | # Is there any text? Also, Prevent too big inclusions! |
||
2884 | $textSize = strlen( $text ); |
||
2885 | if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) { |
||
2886 | return $text; |
||
2887 | } |
||
2888 | |||
2889 | if ( $frame === false ) { |
||
2890 | $frame = $this->getPreprocessor()->newFrame(); |
||
2891 | } elseif ( !( $frame instanceof PPFrame ) ) { |
||
2892 | wfDebug( __METHOD__ . " called using plain parameters instead of " |
||
2893 | . "a PPFrame instance. Creating custom frame.\n" ); |
||
2894 | $frame = $this->getPreprocessor()->newCustomFrame( $frame ); |
||
2895 | } |
||
2896 | |||
2897 | $dom = $this->preprocessToDom( $text ); |
||
2898 | $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0; |
||
2899 | $text = $frame->expand( $dom, $flags ); |
||
2900 | |||
2901 | return $text; |
||
2902 | } |
||
2903 | |||
2904 | /** |
||
2905 | * Clean up argument array - refactored in 1.9 so parserfunctions can use it, too. |
||
2906 | * |
||
2907 | * @param array $args |
||
2908 | * |
||
2909 | * @return array |
||
2910 | */ |
||
2911 | public static function createAssocArgs( $args ) { |
||
2912 | $assocArgs = []; |
||
2913 | $index = 1; |
||
2914 | foreach ( $args as $arg ) { |
||
2915 | $eqpos = strpos( $arg, '=' ); |
||
2916 | if ( $eqpos === false ) { |
||
2917 | $assocArgs[$index++] = $arg; |
||
2918 | } else { |
||
2919 | $name = trim( substr( $arg, 0, $eqpos ) ); |
||
2920 | $value = trim( substr( $arg, $eqpos + 1 ) ); |
||
2921 | if ( $value === false ) { |
||
2922 | $value = ''; |
||
2923 | } |
||
2924 | if ( $name !== false ) { |
||
2925 | $assocArgs[$name] = $value; |
||
2926 | } |
||
2927 | } |
||
2928 | } |
||
2929 | |||
2930 | return $assocArgs; |
||
2931 | } |
||
2932 | |||
2933 | /** |
||
2934 | * Warn the user when a parser limitation is reached |
||
2935 | * Will warn at most once the user per limitation type |
||
2936 | * |
||
2937 | * The results are shown during preview and run through the Parser (See EditPage.php) |
||
2938 | * |
||
2939 | * @param string $limitationType Should be one of: |
||
2940 | * 'expensive-parserfunction' (corresponding messages: |
||
2941 | * 'expensive-parserfunction-warning', |
||
2942 | * 'expensive-parserfunction-category') |
||
2943 | * 'post-expand-template-argument' (corresponding messages: |
||
2944 | * 'post-expand-template-argument-warning', |
||
2945 | * 'post-expand-template-argument-category') |
||
2946 | * 'post-expand-template-inclusion' (corresponding messages: |
||
2947 | * 'post-expand-template-inclusion-warning', |
||
2948 | * 'post-expand-template-inclusion-category') |
||
2949 | * 'node-count-exceeded' (corresponding messages: |
||
2950 | * 'node-count-exceeded-warning', |
||
2951 | * 'node-count-exceeded-category') |
||
2952 | * 'expansion-depth-exceeded' (corresponding messages: |
||
2953 | * 'expansion-depth-exceeded-warning', |
||
2954 | * 'expansion-depth-exceeded-category') |
||
2955 | * @param string|int|null $current Current value |
||
2956 | * @param string|int|null $max Maximum allowed, when an explicit limit has been |
||
2957 | * exceeded, provide the values (optional) |
||
2958 | */ |
||
2959 | public function limitationWarn( $limitationType, $current = '', $max = '' ) { |
||
2960 | # does no harm if $current and $max are present but are unnecessary for the message |
||
2961 | # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown |
||
2962 | # only during preview, and that would split the parser cache unnecessarily. |
||
2963 | $warning = wfMessage( "$limitationType-warning" )->numParams( $current, $max ) |
||
2964 | ->text(); |
||
2965 | $this->mOutput->addWarning( $warning ); |
||
2966 | $this->addTrackingCategory( "$limitationType-category" ); |
||
2967 | } |
||
2968 | |||
2969 | /** |
||
2970 | * Return the text of a template, after recursively |
||
2971 | * replacing any variables or templates within the template. |
||
2972 | * |
||
2973 | * @param array $piece The parts of the template |
||
2974 | * $piece['title']: the title, i.e. the part before the | |
||
2975 | * $piece['parts']: the parameter array |
||
2976 | * $piece['lineStart']: whether the brace was at the start of a line |
||
2977 | * @param PPFrame $frame The current frame, contains template arguments |
||
2978 | * @throws Exception |
||
2979 | * @return string The text of the template |
||
2980 | */ |
||
2981 | public function braceSubstitution( $piece, $frame ) { |
||
2982 | |||
2983 | // Flags |
||
2984 | |||
2985 | // $text has been filled |
||
2986 | $found = false; |
||
2987 | // wiki markup in $text should be escaped |
||
2988 | $nowiki = false; |
||
2989 | // $text is HTML, armour it against wikitext transformation |
||
2990 | $isHTML = false; |
||
2991 | // Force interwiki transclusion to be done in raw mode not rendered |
||
2992 | $forceRawInterwiki = false; |
||
2993 | // $text is a DOM node needing expansion in a child frame |
||
2994 | $isChildObj = false; |
||
2995 | // $text is a DOM node needing expansion in the current frame |
||
2996 | $isLocalObj = false; |
||
2997 | |||
2998 | # Title object, where $text came from |
||
2999 | $title = false; |
||
3000 | |||
3001 | # $part1 is the bit before the first |, and must contain only title characters. |
||
3002 | # Various prefixes will be stripped from it later. |
||
3003 | $titleWithSpaces = $frame->expand( $piece['title'] ); |
||
3004 | $part1 = trim( $titleWithSpaces ); |
||
3005 | $titleText = false; |
||
3006 | |||
3007 | # Original title text preserved for various purposes |
||
3008 | $originalTitle = $part1; |
||
3009 | |||
3010 | # $args is a list of argument nodes, starting from index 0, not including $part1 |
||
3011 | # @todo FIXME: If piece['parts'] is null then the call to getLength() |
||
3012 | # below won't work b/c this $args isn't an object |
||
3013 | $args = ( null == $piece['parts'] ) ? [] : $piece['parts']; |
||
3014 | |||
3015 | $profileSection = null; // profile templates |
||
3016 | |||
3017 | # SUBST |
||
3018 | if ( !$found ) { |
||
3019 | $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 ); |
||
3020 | |||
3021 | # Possibilities for substMatch: "subst", "safesubst" or FALSE |
||
3022 | # Decide whether to expand template or keep wikitext as-is. |
||
3023 | if ( $this->ot['wiki'] ) { |
||
3024 | if ( $substMatch === false ) { |
||
3025 | $literal = true; # literal when in PST with no prefix |
||
3026 | } else { |
||
3027 | $literal = false; # expand when in PST with subst: or safesubst: |
||
3028 | } |
||
3029 | } else { |
||
3030 | if ( $substMatch == 'subst' ) { |
||
3031 | $literal = true; # literal when not in PST with plain subst: |
||
3032 | } else { |
||
3033 | $literal = false; # expand when not in PST with safesubst: or no prefix |
||
3034 | } |
||
3035 | } |
||
3036 | if ( $literal ) { |
||
3037 | $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); |
||
3038 | $isLocalObj = true; |
||
3039 | $found = true; |
||
3040 | } |
||
3041 | } |
||
3042 | |||
3043 | # Variables |
||
3044 | if ( !$found && $args->getLength() == 0 ) { |
||
3045 | $id = $this->mVariables->matchStartToEnd( $part1 ); |
||
3046 | if ( $id !== false ) { |
||
3047 | $text = $this->getVariableValue( $id, $frame ); |
||
3048 | if ( MagicWord::getCacheTTL( $id ) > -1 ) { |
||
3049 | $this->mOutput->updateCacheExpiry( MagicWord::getCacheTTL( $id ) ); |
||
3050 | } |
||
3051 | $found = true; |
||
3052 | } |
||
3053 | } |
||
3054 | |||
3055 | # MSG, MSGNW and RAW |
||
3056 | if ( !$found ) { |
||
3057 | # Check for MSGNW: |
||
3058 | $mwMsgnw = MagicWord::get( 'msgnw' ); |
||
3059 | if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) { |
||
3060 | $nowiki = true; |
||
3061 | } else { |
||
3062 | # Remove obsolete MSG: |
||
3063 | $mwMsg = MagicWord::get( 'msg' ); |
||
3064 | $mwMsg->matchStartAndRemove( $part1 ); |
||
3065 | } |
||
3066 | |||
3067 | # Check for RAW: |
||
3068 | $mwRaw = MagicWord::get( 'raw' ); |
||
3069 | if ( $mwRaw->matchStartAndRemove( $part1 ) ) { |
||
3070 | $forceRawInterwiki = true; |
||
3071 | } |
||
3072 | } |
||
3073 | |||
3074 | # Parser functions |
||
3075 | if ( !$found ) { |
||
3076 | $colonPos = strpos( $part1, ':' ); |
||
3077 | if ( $colonPos !== false ) { |
||
3078 | $func = substr( $part1, 0, $colonPos ); |
||
3079 | $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ]; |
||
3080 | $argsLength = $args->getLength(); |
||
3081 | for ( $i = 0; $i < $argsLength; $i++ ) { |
||
3082 | $funcArgs[] = $args->item( $i ); |
||
3083 | } |
||
3084 | try { |
||
3085 | $result = $this->callParserFunction( $frame, $func, $funcArgs ); |
||
3086 | } catch ( Exception $ex ) { |
||
3087 | throw $ex; |
||
3088 | } |
||
3089 | |||
3090 | # The interface for parser functions allows for extracting |
||
3091 | # flags into the local scope. Extract any forwarded flags |
||
3092 | # here. |
||
3093 | extract( $result ); |
||
3094 | } |
||
3095 | } |
||
3096 | |||
3097 | # Finish mangling title and then check for loops. |
||
3098 | # Set $title to a Title object and $titleText to the PDBK |
||
3099 | if ( !$found ) { |
||
3100 | $ns = NS_TEMPLATE; |
||
3101 | # Split the title into page and subpage |
||
3102 | $subpage = ''; |
||
3103 | $relative = $this->maybeDoSubpageLink( $part1, $subpage ); |
||
3104 | if ( $part1 !== $relative ) { |
||
3105 | $part1 = $relative; |
||
3106 | $ns = $this->mTitle->getNamespace(); |
||
3107 | } |
||
3108 | $title = Title::newFromText( $part1, $ns ); |
||
3109 | if ( $title ) { |
||
3110 | $titleText = $title->getPrefixedText(); |
||
3111 | # Check for language variants if the template is not found |
||
3112 | if ( $this->getConverterLanguage()->hasVariants() && $title->getArticleID() == 0 ) { |
||
3113 | $this->getConverterLanguage()->findVariantLink( $part1, $title, true ); |
||
3114 | } |
||
3115 | # Do recursion depth check |
||
3116 | $limit = $this->mOptions->getMaxTemplateDepth(); |
||
3117 | View Code Duplication | if ( $frame->depth >= $limit ) { |
|
3118 | $found = true; |
||
3119 | $text = '<span class="error">' |
||
3120 | . wfMessage( 'parser-template-recursion-depth-warning' ) |
||
3121 | ->numParams( $limit )->inContentLanguage()->text() |
||
3122 | . '</span>'; |
||
3123 | } |
||
3124 | } |
||
3125 | } |
||
3126 | |||
3127 | # Load from database |
||
3128 | if ( !$found && $title ) { |
||
3129 | $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() ); |
||
3130 | if ( !$title->isExternal() ) { |
||
3131 | if ( $title->isSpecialPage() |
||
3132 | && $this->mOptions->getAllowSpecialInclusion() |
||
3133 | && $this->ot['html'] |
||
3134 | ) { |
||
3135 | $specialPage = SpecialPageFactory::getPage( $title->getDBkey() ); |
||
3136 | // Pass the template arguments as URL parameters. |
||
3137 | // "uselang" will have no effect since the Language object |
||
3138 | // is forced to the one defined in ParserOptions. |
||
3139 | $pageArgs = []; |
||
3140 | $argsLength = $args->getLength(); |
||
3141 | for ( $i = 0; $i < $argsLength; $i++ ) { |
||
3142 | $bits = $args->item( $i )->splitArg(); |
||
3143 | if ( strval( $bits['index'] ) === '' ) { |
||
3144 | $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) ); |
||
3145 | $value = trim( $frame->expand( $bits['value'] ) ); |
||
3146 | $pageArgs[$name] = $value; |
||
3147 | } |
||
3148 | } |
||
3149 | |||
3150 | // Create a new context to execute the special page |
||
3151 | $context = new RequestContext; |
||
3152 | $context->setTitle( $title ); |
||
3153 | $context->setRequest( new FauxRequest( $pageArgs ) ); |
||
3154 | if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) { |
||
3155 | $context->setUser( $this->getUser() ); |
||
3156 | } else { |
||
3157 | // If this page is cached, then we better not be per user. |
||
3158 | $context->setUser( User::newFromName( '127.0.0.1', false ) ); |
||
3159 | } |
||
3160 | $context->setLanguage( $this->mOptions->getUserLangObj() ); |
||
3161 | $ret = SpecialPageFactory::capturePath( |
||
3162 | $title, $context, $this->getLinkRenderer() ); |
||
3163 | if ( $ret ) { |
||
3164 | $text = $context->getOutput()->getHTML(); |
||
3165 | $this->mOutput->addOutputPageMetadata( $context->getOutput() ); |
||
3166 | $found = true; |
||
3167 | $isHTML = true; |
||
3168 | if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) { |
||
3169 | $this->mOutput->updateRuntimeAdaptiveExpiry( |
||
3170 | $specialPage->maxIncludeCacheTime() |
||
3171 | ); |
||
3172 | } |
||
3173 | } |
||
3174 | } elseif ( MWNamespace::isNonincludable( $title->getNamespace() ) ) { |
||
3175 | $found = false; # access denied |
||
3176 | wfDebug( __METHOD__ . ": template inclusion denied for " . |
||
3177 | $title->getPrefixedDBkey() . "\n" ); |
||
3178 | } else { |
||
3179 | list( $text, $title ) = $this->getTemplateDom( $title ); |
||
3180 | if ( $text !== false ) { |
||
3181 | $found = true; |
||
3182 | $isChildObj = true; |
||
3183 | } |
||
3184 | } |
||
3185 | |||
3186 | # If the title is valid but undisplayable, make a link to it |
||
3187 | if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) { |
||
3188 | $text = "[[:$titleText]]"; |
||
3189 | $found = true; |
||
3190 | } |
||
3191 | } elseif ( $title->isTrans() ) { |
||
3192 | # Interwiki transclusion |
||
3193 | if ( $this->ot['html'] && !$forceRawInterwiki ) { |
||
3194 | $text = $this->interwikiTransclude( $title, 'render' ); |
||
3195 | $isHTML = true; |
||
3196 | } else { |
||
3197 | $text = $this->interwikiTransclude( $title, 'raw' ); |
||
3198 | # Preprocess it like a template |
||
3199 | $text = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); |
||
3200 | $isChildObj = true; |
||
3201 | } |
||
3202 | $found = true; |
||
3203 | } |
||
3204 | |||
3205 | # Do infinite loop check |
||
3206 | # This has to be done after redirect resolution to avoid infinite loops via redirects |
||
3207 | if ( !$frame->loopCheck( $title ) ) { |
||
3208 | $found = true; |
||
3209 | $text = '<span class="error">' |
||
3210 | . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text() |
||
3211 | . '</span>'; |
||
3212 | wfDebug( __METHOD__ . ": template loop broken at '$titleText'\n" ); |
||
3213 | } |
||
3214 | } |
||
3215 | |||
3216 | # If we haven't found text to substitute by now, we're done |
||
3217 | # Recover the source wikitext and return it |
||
3218 | if ( !$found ) { |
||
3219 | $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args ); |
||
3220 | if ( $profileSection ) { |
||
3221 | $this->mProfiler->scopedProfileOut( $profileSection ); |
||
3222 | } |
||
3223 | return [ 'object' => $text ]; |
||
3224 | } |
||
3225 | |||
3226 | # Expand DOM-style return values in a child frame |
||
3227 | if ( $isChildObj ) { |
||
3228 | # Clean up argument array |
||
3229 | $newFrame = $frame->newChild( $args, $title ); |
||
3230 | |||
3231 | if ( $nowiki ) { |
||
3232 | $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG ); |
||
3233 | } elseif ( $titleText !== false && $newFrame->isEmpty() ) { |
||
3234 | # Expansion is eligible for the empty-frame cache |
||
3235 | $text = $newFrame->cachedExpand( $titleText, $text ); |
||
3236 | } else { |
||
3237 | # Uncached expansion |
||
3238 | $text = $newFrame->expand( $text ); |
||
3239 | } |
||
3240 | } |
||
3241 | if ( $isLocalObj && $nowiki ) { |
||
3242 | $text = $frame->expand( $text, PPFrame::RECOVER_ORIG ); |
||
3243 | $isLocalObj = false; |
||
3244 | } |
||
3245 | |||
3246 | if ( $profileSection ) { |
||
3247 | $this->mProfiler->scopedProfileOut( $profileSection ); |
||
3248 | } |
||
3249 | |||
3250 | # Replace raw HTML by a placeholder |
||
3251 | if ( $isHTML ) { |
||
3252 | $text = $this->insertStripItem( $text ); |
||
3253 | } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) { |
||
3254 | # Escape nowiki-style return values |
||
3255 | $text = wfEscapeWikiText( $text ); |
||
3256 | } elseif ( is_string( $text ) |
||
3257 | && !$piece['lineStart'] |
||
3258 | && preg_match( '/^(?:{\\||:|;|#|\*)/', $text ) |
||
3259 | ) { |
||
3260 | # Bug 529: if the template begins with a table or block-level |
||
3261 | # element, it should be treated as beginning a new line. |
||
3262 | # This behavior is somewhat controversial. |
||
3263 | $text = "\n" . $text; |
||
3264 | } |
||
3265 | |||
3266 | if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) { |
||
3267 | # Error, oversize inclusion |
||
3268 | if ( $titleText !== false ) { |
||
3269 | # Make a working, properly escaped link if possible (bug 23588) |
||
3270 | $text = "[[:$titleText]]"; |
||
3271 | } else { |
||
3272 | # This will probably not be a working link, but at least it may |
||
3273 | # provide some hint of where the problem is |
||
3274 | preg_replace( '/^:/', '', $originalTitle ); |
||
3275 | $text = "[[:$originalTitle]]"; |
||
3276 | } |
||
3277 | $text .= $this->insertStripItem( '<!-- WARNING: template omitted, ' |
||
3278 | . 'post-expand include size too large -->' ); |
||
3279 | $this->limitationWarn( 'post-expand-template-inclusion' ); |
||
3280 | } |
||
3281 | |||
3282 | if ( $isLocalObj ) { |
||
3283 | $ret = [ 'object' => $text ]; |
||
3284 | } else { |
||
3285 | $ret = [ 'text' => $text ]; |
||
3286 | } |
||
3287 | |||
3288 | return $ret; |
||
3289 | } |
||
3290 | |||
3291 | /** |
||
3292 | * Call a parser function and return an array with text and flags. |
||
3293 | * |
||
3294 | * The returned array will always contain a boolean 'found', indicating |
||
3295 | * whether the parser function was found or not. It may also contain the |
||
3296 | * following: |
||
3297 | * text: string|object, resulting wikitext or PP DOM object |
||
3298 | * isHTML: bool, $text is HTML, armour it against wikitext transformation |
||
3299 | * isChildObj: bool, $text is a DOM node needing expansion in a child frame |
||
3300 | * isLocalObj: bool, $text is a DOM node needing expansion in the current frame |
||
3301 | * nowiki: bool, wiki markup in $text should be escaped |
||
3302 | * |
||
3303 | * @since 1.21 |
||
3304 | * @param PPFrame $frame The current frame, contains template arguments |
||
3305 | * @param string $function Function name |
||
3306 | * @param array $args Arguments to the function |
||
3307 | * @throws MWException |
||
3308 | * @return array |
||
3309 | */ |
||
3310 | public function callParserFunction( $frame, $function, array $args = [] ) { |
||
3311 | global $wgContLang; |
||
3312 | |||
3313 | # Case sensitive functions |
||
3314 | if ( isset( $this->mFunctionSynonyms[1][$function] ) ) { |
||
3315 | $function = $this->mFunctionSynonyms[1][$function]; |
||
3316 | } else { |
||
3317 | # Case insensitive functions |
||
3318 | $function = $wgContLang->lc( $function ); |
||
3319 | if ( isset( $this->mFunctionSynonyms[0][$function] ) ) { |
||
3320 | $function = $this->mFunctionSynonyms[0][$function]; |
||
3321 | } else { |
||
3322 | return [ 'found' => false ]; |
||
3323 | } |
||
3324 | } |
||
3325 | |||
3326 | list( $callback, $flags ) = $this->mFunctionHooks[$function]; |
||
3327 | |||
3328 | # Workaround for PHP bug 35229 and similar |
||
3329 | if ( !is_callable( $callback ) ) { |
||
3330 | throw new MWException( "Tag hook for $function is not callable\n" ); |
||
3331 | } |
||
3332 | |||
3333 | $allArgs = [ &$this ]; |
||
3334 | if ( $flags & self::SFH_OBJECT_ARGS ) { |
||
3335 | # Convert arguments to PPNodes and collect for appending to $allArgs |
||
3336 | $funcArgs = []; |
||
3337 | foreach ( $args as $k => $v ) { |
||
3338 | if ( $v instanceof PPNode || $k === 0 ) { |
||
3339 | $funcArgs[] = $v; |
||
3340 | } else { |
||
3341 | $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 ); |
||
3342 | } |
||
3343 | } |
||
3344 | |||
3345 | # Add a frame parameter, and pass the arguments as an array |
||
3346 | $allArgs[] = $frame; |
||
3347 | $allArgs[] = $funcArgs; |
||
3348 | } else { |
||
3349 | # Convert arguments to plain text and append to $allArgs |
||
3350 | foreach ( $args as $k => $v ) { |
||
3351 | if ( $v instanceof PPNode ) { |
||
3352 | $allArgs[] = trim( $frame->expand( $v ) ); |
||
3353 | } elseif ( is_int( $k ) && $k >= 0 ) { |
||
3354 | $allArgs[] = trim( $v ); |
||
3355 | } else { |
||
3356 | $allArgs[] = trim( "$k=$v" ); |
||
3357 | } |
||
3358 | } |
||
3359 | } |
||
3360 | |||
3361 | $result = call_user_func_array( $callback, $allArgs ); |
||
3362 | |||
3363 | # The interface for function hooks allows them to return a wikitext |
||
3364 | # string or an array containing the string and any flags. This mungs |
||
3365 | # things around to match what this method should return. |
||
3366 | if ( !is_array( $result ) ) { |
||
3367 | $result =[ |
||
3368 | 'found' => true, |
||
3369 | 'text' => $result, |
||
3370 | ]; |
||
3371 | } else { |
||
3372 | if ( isset( $result[0] ) && !isset( $result['text'] ) ) { |
||
3373 | $result['text'] = $result[0]; |
||
3374 | } |
||
3375 | unset( $result[0] ); |
||
3376 | $result += [ |
||
3377 | 'found' => true, |
||
3378 | ]; |
||
3379 | } |
||
3380 | |||
3381 | $noparse = true; |
||
3382 | $preprocessFlags = 0; |
||
3383 | if ( isset( $result['noparse'] ) ) { |
||
3384 | $noparse = $result['noparse']; |
||
3385 | } |
||
3386 | if ( isset( $result['preprocessFlags'] ) ) { |
||
3387 | $preprocessFlags = $result['preprocessFlags']; |
||
3388 | } |
||
3389 | |||
3390 | if ( !$noparse ) { |
||
3391 | $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags ); |
||
3392 | $result['isChildObj'] = true; |
||
3393 | } |
||
3394 | |||
3395 | return $result; |
||
3396 | } |
||
3397 | |||
3398 | /** |
||
3399 | * Get the semi-parsed DOM representation of a template with a given title, |
||
3400 | * and its redirect destination title. Cached. |
||
3401 | * |
||
3402 | * @param Title $title |
||
3403 | * |
||
3404 | * @return array |
||
3405 | */ |
||
3406 | public function getTemplateDom( $title ) { |
||
3407 | $cacheTitle = $title; |
||
3408 | $titleText = $title->getPrefixedDBkey(); |
||
3409 | |||
3410 | if ( isset( $this->mTplRedirCache[$titleText] ) ) { |
||
3411 | list( $ns, $dbk ) = $this->mTplRedirCache[$titleText]; |
||
3412 | $title = Title::makeTitle( $ns, $dbk ); |
||
3413 | $titleText = $title->getPrefixedDBkey(); |
||
3414 | } |
||
3415 | if ( isset( $this->mTplDomCache[$titleText] ) ) { |
||
3416 | return [ $this->mTplDomCache[$titleText], $title ]; |
||
3417 | } |
||
3418 | |||
3419 | # Cache miss, go to the database |
||
3420 | list( $text, $title ) = $this->fetchTemplateAndTitle( $title ); |
||
3421 | |||
3422 | if ( $text === false ) { |
||
3423 | $this->mTplDomCache[$titleText] = false; |
||
3424 | return [ false, $title ]; |
||
3425 | } |
||
3426 | |||
3427 | $dom = $this->preprocessToDom( $text, self::PTD_FOR_INCLUSION ); |
||
3428 | $this->mTplDomCache[$titleText] = $dom; |
||
3429 | |||
3430 | if ( !$title->equals( $cacheTitle ) ) { |
||
3431 | $this->mTplRedirCache[$cacheTitle->getPrefixedDBkey()] = |
||
3432 | [ $title->getNamespace(), $cdb = $title->getDBkey() ]; |
||
3433 | } |
||
3434 | |||
3435 | return [ $dom, $title ]; |
||
3436 | } |
||
3437 | |||
3438 | /** |
||
3439 | * Fetch the current revision of a given title. Note that the revision |
||
3440 | * (and even the title) may not exist in the database, so everything |
||
3441 | * contributing to the output of the parser should use this method |
||
3442 | * where possible, rather than getting the revisions themselves. This |
||
3443 | * method also caches its results, so using it benefits performance. |
||
3444 | * |
||
3445 | * @since 1.24 |
||
3446 | * @param Title $title |
||
3447 | * @return Revision |
||
3448 | */ |
||
3449 | public function fetchCurrentRevisionOfTitle( $title ) { |
||
3450 | $cacheKey = $title->getPrefixedDBkey(); |
||
3451 | if ( !$this->currentRevisionCache ) { |
||
3452 | $this->currentRevisionCache = new MapCacheLRU( 100 ); |
||
3453 | } |
||
3454 | if ( !$this->currentRevisionCache->has( $cacheKey ) ) { |
||
3455 | $this->currentRevisionCache->set( $cacheKey, |
||
3456 | // Defaults to Parser::statelessFetchRevision() |
||
3457 | call_user_func( $this->mOptions->getCurrentRevisionCallback(), $title, $this ) |
||
3458 | ); |
||
3459 | } |
||
3460 | return $this->currentRevisionCache->get( $cacheKey ); |
||
3461 | } |
||
3462 | |||
3463 | /** |
||
3464 | * Wrapper around Revision::newFromTitle to allow passing additional parameters |
||
3465 | * without passing them on to it. |
||
3466 | * |
||
3467 | * @since 1.24 |
||
3468 | * @param Title $title |
||
3469 | * @param Parser|bool $parser |
||
3470 | * @return Revision|bool False if missing |
||
3471 | */ |
||
3472 | public static function statelessFetchRevision( Title $title, $parser = false ) { |
||
3473 | $pageId = $title->getArticleID(); |
||
3474 | $revId = $title->getLatestRevID(); |
||
3475 | |||
3476 | $rev = Revision::newKnownCurrent( wfGetDB( DB_REPLICA ), $pageId, $revId ); |
||
0 ignored issues
–
show
|
|||
3477 | if ( $rev ) { |
||
3478 | $rev->setTitle( $title ); |
||
3479 | } |
||
3480 | |||
3481 | return $rev; |
||
3482 | } |
||
3483 | |||
3484 | /** |
||
3485 | * Fetch the unparsed text of a template and register a reference to it. |
||
3486 | * @param Title $title |
||
3487 | * @return array ( string or false, Title ) |
||
3488 | */ |
||
3489 | public function fetchTemplateAndTitle( $title ) { |
||
3490 | // Defaults to Parser::statelessFetchTemplate() |
||
3491 | $templateCb = $this->mOptions->getTemplateCallback(); |
||
3492 | $stuff = call_user_func( $templateCb, $title, $this ); |
||
3493 | // We use U+007F DELETE to distinguish strip markers from regular text. |
||
3494 | $text = $stuff['text']; |
||
3495 | if ( is_string( $stuff['text'] ) ) { |
||
3496 | $text = strtr( $text, "\x7f", "?" ); |
||
3497 | } |
||
3498 | $finalTitle = isset( $stuff['finalTitle'] ) ? $stuff['finalTitle'] : $title; |
||
3499 | if ( isset( $stuff['deps'] ) ) { |
||
3500 | foreach ( $stuff['deps'] as $dep ) { |
||
3501 | $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] ); |
||
3502 | if ( $dep['title']->equals( $this->getTitle() ) ) { |
||
3503 | // If we transclude ourselves, the final result |
||
3504 | // will change based on the new version of the page |
||
3505 | $this->mOutput->setFlag( 'vary-revision' ); |
||
3506 | } |
||
3507 | } |
||
3508 | } |
||
3509 | return [ $text, $finalTitle ]; |
||
3510 | } |
||
3511 | |||
3512 | /** |
||
3513 | * Fetch the unparsed text of a template and register a reference to it. |
||
3514 | * @param Title $title |
||
3515 | * @return string|bool |
||
3516 | */ |
||
3517 | public function fetchTemplate( $title ) { |
||
3518 | return $this->fetchTemplateAndTitle( $title )[0]; |
||
3519 | } |
||
3520 | |||
3521 | /** |
||
3522 | * Static function to get a template |
||
3523 | * Can be overridden via ParserOptions::setTemplateCallback(). |
||
3524 | * |
||
3525 | * @param Title $title |
||
3526 | * @param bool|Parser $parser |
||
3527 | * |
||
3528 | * @return array |
||
3529 | */ |
||
3530 | public static function statelessFetchTemplate( $title, $parser = false ) { |
||
3531 | $text = $skip = false; |
||
3532 | $finalTitle = $title; |
||
3533 | $deps = []; |
||
3534 | |||
3535 | # Loop to fetch the article, with up to 1 redirect |
||
3536 | // @codingStandardsIgnoreStart Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed |
||
3537 | for ( $i = 0; $i < 2 && is_object( $title ); $i++ ) { |
||
3538 | // @codingStandardsIgnoreEnd |
||
3539 | # Give extensions a chance to select the revision instead |
||
3540 | $id = false; # Assume current |
||
3541 | Hooks::run( 'BeforeParserFetchTemplateAndtitle', |
||
3542 | [ $parser, $title, &$skip, &$id ] ); |
||
3543 | |||
3544 | if ( $skip ) { |
||
3545 | $text = false; |
||
3546 | $deps[] = [ |
||
3547 | 'title' => $title, |
||
3548 | 'page_id' => $title->getArticleID(), |
||
3549 | 'rev_id' => null |
||
3550 | ]; |
||
3551 | break; |
||
3552 | } |
||
3553 | # Get the revision |
||
3554 | if ( $id ) { |
||
3555 | $rev = Revision::newFromId( $id ); |
||
3556 | } elseif ( $parser ) { |
||
3557 | $rev = $parser->fetchCurrentRevisionOfTitle( $title ); |
||
3558 | } else { |
||
3559 | $rev = Revision::newFromTitle( $title ); |
||
3560 | } |
||
3561 | $rev_id = $rev ? $rev->getId() : 0; |
||
3562 | # If there is no current revision, there is no page |
||
3563 | if ( $id === false && !$rev ) { |
||
3564 | $linkCache = LinkCache::singleton(); |
||
3565 | $linkCache->addBadLinkObj( $title ); |
||
3566 | } |
||
3567 | |||
3568 | $deps[] = [ |
||
3569 | 'title' => $title, |
||
3570 | 'page_id' => $title->getArticleID(), |
||
3571 | 'rev_id' => $rev_id ]; |
||
3572 | if ( $rev && !$title->equals( $rev->getTitle() ) ) { |
||
3573 | # We fetched a rev from a different title; register it too... |
||
3574 | $deps[] = [ |
||
3575 | 'title' => $rev->getTitle(), |
||
3576 | 'page_id' => $rev->getPage(), |
||
3577 | 'rev_id' => $rev_id ]; |
||
3578 | } |
||
3579 | |||
3580 | if ( $rev ) { |
||
3581 | $content = $rev->getContent(); |
||
3582 | $text = $content ? $content->getWikitextForTransclusion() : null; |
||
3583 | |||
3584 | if ( $text === false || $text === null ) { |
||
3585 | $text = false; |
||
3586 | break; |
||
3587 | } |
||
3588 | } elseif ( $title->getNamespace() == NS_MEDIAWIKI ) { |
||
3589 | global $wgContLang; |
||
3590 | $message = wfMessage( $wgContLang->lcfirst( $title->getText() ) )->inContentLanguage(); |
||
3591 | if ( !$message->exists() ) { |
||
3592 | $text = false; |
||
3593 | break; |
||
3594 | } |
||
3595 | $content = $message->content(); |
||
3596 | $text = $message->plain(); |
||
3597 | } else { |
||
3598 | break; |
||
3599 | } |
||
3600 | if ( !$content ) { |
||
3601 | break; |
||
3602 | } |
||
3603 | # Redirect? |
||
3604 | $finalTitle = $title; |
||
3605 | $title = $content->getRedirectTarget(); |
||
3606 | } |
||
3607 | return [ |
||
3608 | 'text' => $text, |
||
3609 | 'finalTitle' => $finalTitle, |
||
3610 | 'deps' => $deps ]; |
||
3611 | } |
||
3612 | |||
3613 | /** |
||
3614 | * Fetch a file and its title and register a reference to it. |
||
3615 | * If 'broken' is a key in $options then the file will appear as a broken thumbnail. |
||
3616 | * @param Title $title |
||
3617 | * @param array $options Array of options to RepoGroup::findFile |
||
3618 | * @return File|bool |
||
3619 | */ |
||
3620 | public function fetchFile( $title, $options = [] ) { |
||
3621 | return $this->fetchFileAndTitle( $title, $options )[0]; |
||
3622 | } |
||
3623 | |||
3624 | /** |
||
3625 | * Fetch a file and its title and register a reference to it. |
||
3626 | * If 'broken' is a key in $options then the file will appear as a broken thumbnail. |
||
3627 | * @param Title $title |
||
3628 | * @param array $options Array of options to RepoGroup::findFile |
||
3629 | * @return array ( File or false, Title of file ) |
||
3630 | */ |
||
3631 | public function fetchFileAndTitle( $title, $options = [] ) { |
||
3632 | $file = $this->fetchFileNoRegister( $title, $options ); |
||
3633 | |||
3634 | $time = $file ? $file->getTimestamp() : false; |
||
3635 | $sha1 = $file ? $file->getSha1() : false; |
||
3636 | # Register the file as a dependency... |
||
3637 | $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); |
||
3638 | if ( $file && !$title->equals( $file->getTitle() ) ) { |
||
3639 | # Update fetched file title |
||
3640 | $title = $file->getTitle(); |
||
3641 | $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 ); |
||
3642 | } |
||
3643 | return [ $file, $title ]; |
||
3644 | } |
||
3645 | |||
3646 | /** |
||
3647 | * Helper function for fetchFileAndTitle. |
||
3648 | * |
||
3649 | * Also useful if you need to fetch a file but not use it yet, |
||
3650 | * for example to get the file's handler. |
||
3651 | * |
||
3652 | * @param Title $title |
||
3653 | * @param array $options Array of options to RepoGroup::findFile |
||
3654 | * @return File|bool |
||
3655 | */ |
||
3656 | protected function fetchFileNoRegister( $title, $options = [] ) { |
||
3657 | if ( isset( $options['broken'] ) ) { |
||
3658 | $file = false; // broken thumbnail forced by hook |
||
3659 | } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp) |
||
3660 | $file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options ); |
||
3661 | } else { // get by (name,timestamp) |
||
3662 | $file = wfFindFile( $title, $options ); |
||
3663 | } |
||
3664 | return $file; |
||
3665 | } |
||
3666 | |||
3667 | /** |
||
3668 | * Transclude an interwiki link. |
||
3669 | * |
||
3670 | * @param Title $title |
||
3671 | * @param string $action |
||
3672 | * |
||
3673 | * @return string |
||
3674 | */ |
||
3675 | public function interwikiTransclude( $title, $action ) { |
||
3676 | global $wgEnableScaryTranscluding; |
||
3677 | |||
3678 | if ( !$wgEnableScaryTranscluding ) { |
||
3679 | return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text(); |
||
3680 | } |
||
3681 | |||
3682 | $url = $title->getFullURL( [ 'action' => $action ] ); |
||
3683 | |||
3684 | if ( strlen( $url ) > 255 ) { |
||
3685 | return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text(); |
||
3686 | } |
||
3687 | return $this->fetchScaryTemplateMaybeFromCache( $url ); |
||
3688 | } |
||
3689 | |||
3690 | /** |
||
3691 | * @param string $url |
||
3692 | * @return mixed|string |
||
3693 | */ |
||
3694 | public function fetchScaryTemplateMaybeFromCache( $url ) { |
||
3695 | global $wgTranscludeCacheExpiry; |
||
3696 | $dbr = wfGetDB( DB_REPLICA ); |
||
3697 | $tsCond = $dbr->timestamp( time() - $wgTranscludeCacheExpiry ); |
||
3698 | $obj = $dbr->selectRow( 'transcache', [ 'tc_time', 'tc_contents' ], |
||
3699 | [ 'tc_url' => $url, "tc_time >= " . $dbr->addQuotes( $tsCond ) ] ); |
||
3700 | if ( $obj ) { |
||
3701 | return $obj->tc_contents; |
||
3702 | } |
||
3703 | |||
3704 | $req = MWHttpRequest::factory( $url, [], __METHOD__ ); |
||
3705 | $status = $req->execute(); // Status object |
||
3706 | if ( $status->isOK() ) { |
||
3707 | $text = $req->getContent(); |
||
3708 | } elseif ( $req->getStatus() != 200 ) { |
||
3709 | // Though we failed to fetch the content, this status is useless. |
||
3710 | return wfMessage( 'scarytranscludefailed-httpstatus' ) |
||
3711 | ->params( $url, $req->getStatus() /* HTTP status */ )->inContentLanguage()->text(); |
||
3712 | } else { |
||
3713 | return wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text(); |
||
3714 | } |
||
3715 | |||
3716 | $dbw = wfGetDB( DB_MASTER ); |
||
3717 | $dbw->replace( 'transcache', [ 'tc_url' ], [ |
||
3718 | 'tc_url' => $url, |
||
3719 | 'tc_time' => $dbw->timestamp( time() ), |
||
3720 | 'tc_contents' => $text |
||
3721 | ] ); |
||
3722 | return $text; |
||
3723 | } |
||
3724 | |||
3725 | /** |
||
3726 | * Triple brace replacement -- used for template arguments |
||
3727 | * @private |
||
3728 | * |
||
3729 | * @param array $piece |
||
3730 | * @param PPFrame $frame |
||
3731 | * |
||
3732 | * @return array |
||
3733 | */ |
||
3734 | public function argSubstitution( $piece, $frame ) { |
||
3735 | |||
3736 | $error = false; |
||
3737 | $parts = $piece['parts']; |
||
3738 | $nameWithSpaces = $frame->expand( $piece['title'] ); |
||
3739 | $argName = trim( $nameWithSpaces ); |
||
3740 | $object = false; |
||
3741 | $text = $frame->getArgument( $argName ); |
||
3742 | if ( $text === false && $parts->getLength() > 0 |
||
3743 | && ( $this->ot['html'] |
||
3744 | || $this->ot['pre'] |
||
3745 | || ( $this->ot['wiki'] && $frame->isTemplate() ) |
||
3746 | ) |
||
3747 | ) { |
||
3748 | # No match in frame, use the supplied default |
||
3749 | $object = $parts->item( 0 )->getChildren(); |
||
3750 | } |
||
3751 | if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) { |
||
3752 | $error = '<!-- WARNING: argument omitted, expansion size too large -->'; |
||
3753 | $this->limitationWarn( 'post-expand-template-argument' ); |
||
3754 | } |
||
3755 | |||
3756 | if ( $text === false && $object === false ) { |
||
3757 | # No match anywhere |
||
3758 | $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts ); |
||
3759 | } |
||
3760 | if ( $error !== false ) { |
||
3761 | $text .= $error; |
||
3762 | } |
||
3763 | if ( $object !== false ) { |
||
3764 | $ret = [ 'object' => $object ]; |
||
3765 | } else { |
||
3766 | $ret = [ 'text' => $text ]; |
||
3767 | } |
||
3768 | |||
3769 | return $ret; |
||
3770 | } |
||
3771 | |||
3772 | /** |
||
3773 | * Return the text to be used for a given extension tag. |
||
3774 | * This is the ghost of strip(). |
||
3775 | * |
||
3776 | * @param array $params Associative array of parameters: |
||
3777 | * name PPNode for the tag name |
||
3778 | * attr PPNode for unparsed text where tag attributes are thought to be |
||
3779 | * attributes Optional associative array of parsed attributes |
||
3780 | * inner Contents of extension element |
||
3781 | * noClose Original text did not have a close tag |
||
3782 | * @param PPFrame $frame |
||
3783 | * |
||
3784 | * @throws MWException |
||
3785 | * @return string |
||
3786 | */ |
||
3787 | public function extensionSubstitution( $params, $frame ) { |
||
3788 | static $errorStr = '<span class="error">'; |
||
3789 | static $errorLen = 20; |
||
3790 | |||
3791 | $name = $frame->expand( $params['name'] ); |
||
3792 | if ( substr( $name, 0, $errorLen ) === $errorStr ) { |
||
3793 | // Probably expansion depth or node count exceeded. Just punt the |
||
3794 | // error up. |
||
3795 | return $name; |
||
3796 | } |
||
3797 | |||
3798 | $attrText = !isset( $params['attr'] ) ? null : $frame->expand( $params['attr'] ); |
||
3799 | if ( substr( $attrText, 0, $errorLen ) === $errorStr ) { |
||
3800 | // See above |
||
3801 | return $attrText; |
||
3802 | } |
||
3803 | |||
3804 | $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] ); |
||
3805 | if ( substr( $content, 0, $errorLen ) === $errorStr ) { |
||
3806 | // See above |
||
3807 | return $content; |
||
3808 | } |
||
3809 | |||
3810 | $marker = self::MARKER_PREFIX . "-$name-" |
||
3811 | . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX; |
||
3812 | |||
3813 | $isFunctionTag = isset( $this->mFunctionTagHooks[strtolower( $name )] ) && |
||
3814 | ( $this->ot['html'] || $this->ot['pre'] ); |
||
3815 | if ( $isFunctionTag ) { |
||
3816 | $markerType = 'none'; |
||
3817 | } else { |
||
3818 | $markerType = 'general'; |
||
3819 | } |
||
3820 | if ( $this->ot['html'] || $isFunctionTag ) { |
||
3821 | $name = strtolower( $name ); |
||
3822 | $attributes = Sanitizer::decodeTagAttributes( $attrText ); |
||
3823 | if ( isset( $params['attributes'] ) ) { |
||
3824 | $attributes = $attributes + $params['attributes']; |
||
3825 | } |
||
3826 | |||
3827 | if ( isset( $this->mTagHooks[$name] ) ) { |
||
3828 | # Workaround for PHP bug 35229 and similar |
||
3829 | if ( !is_callable( $this->mTagHooks[$name] ) ) { |
||
3830 | throw new MWException( "Tag hook for $name is not callable\n" ); |
||
3831 | } |
||
3832 | $output = call_user_func_array( $this->mTagHooks[$name], |
||
3833 | [ $content, $attributes, $this, $frame ] ); |
||
3834 | } elseif ( isset( $this->mFunctionTagHooks[$name] ) ) { |
||
3835 | list( $callback, ) = $this->mFunctionTagHooks[$name]; |
||
3836 | if ( !is_callable( $callback ) ) { |
||
3837 | throw new MWException( "Tag hook for $name is not callable\n" ); |
||
3838 | } |
||
3839 | |||
3840 | $output = call_user_func_array( $callback, [ &$this, $frame, $content, $attributes ] ); |
||
3841 | } else { |
||
3842 | $output = '<span class="error">Invalid tag extension name: ' . |
||
3843 | htmlspecialchars( $name ) . '</span>'; |
||
3844 | } |
||
3845 | |||
3846 | if ( is_array( $output ) ) { |
||
3847 | # Extract flags to local scope (to override $markerType) |
||
3848 | $flags = $output; |
||
3849 | $output = $flags[0]; |
||
3850 | unset( $flags[0] ); |
||
3851 | extract( $flags ); |
||
3852 | } |
||
3853 | } else { |
||
3854 | if ( is_null( $attrText ) ) { |
||
3855 | $attrText = ''; |
||
3856 | } |
||
3857 | if ( isset( $params['attributes'] ) ) { |
||
3858 | foreach ( $params['attributes'] as $attrName => $attrValue ) { |
||
3859 | $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' . |
||
3860 | htmlspecialchars( $attrValue ) . '"'; |
||
3861 | } |
||
3862 | } |
||
3863 | if ( $content === null ) { |
||
3864 | $output = "<$name$attrText/>"; |
||
3865 | } else { |
||
3866 | $close = is_null( $params['close'] ) ? '' : $frame->expand( $params['close'] ); |
||
3867 | if ( substr( $close, 0, $errorLen ) === $errorStr ) { |
||
3868 | // See above |
||
3869 | return $close; |
||
3870 | } |
||
3871 | $output = "<$name$attrText>$content$close"; |
||
3872 | } |
||
3873 | } |
||
3874 | |||
3875 | if ( $markerType === 'none' ) { |
||
3876 | return $output; |
||
3877 | } elseif ( $markerType === 'nowiki' ) { |
||
3878 | $this->mStripState->addNoWiki( $marker, $output ); |
||
3879 | } elseif ( $markerType === 'general' ) { |
||
3880 | $this->mStripState->addGeneral( $marker, $output ); |
||
3881 | } else { |
||
3882 | throw new MWException( __METHOD__ . ': invalid marker type' ); |
||
3883 | } |
||
3884 | return $marker; |
||
3885 | } |
||
3886 | |||
3887 | /** |
||
3888 | * Increment an include size counter |
||
3889 | * |
||
3890 | * @param string $type The type of expansion |
||
3891 | * @param int $size The size of the text |
||
3892 | * @return bool False if this inclusion would take it over the maximum, true otherwise |
||
3893 | */ |
||
3894 | public function incrementIncludeSize( $type, $size ) { |
||
3895 | if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) { |
||
3896 | return false; |
||
3897 | } else { |
||
3898 | $this->mIncludeSizes[$type] += $size; |
||
3899 | return true; |
||
3900 | } |
||
3901 | } |
||
3902 | |||
3903 | /** |
||
3904 | * Increment the expensive function count |
||
3905 | * |
||
3906 | * @return bool False if the limit has been exceeded |
||
3907 | */ |
||
3908 | public function incrementExpensiveFunctionCount() { |
||
3909 | $this->mExpensiveFunctionCount++; |
||
3910 | return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit(); |
||
3911 | } |
||
3912 | |||
3913 | /** |
||
3914 | * Strip double-underscore items like __NOGALLERY__ and __NOTOC__ |
||
3915 | * Fills $this->mDoubleUnderscores, returns the modified text |
||
3916 | * |
||
3917 | * @param string $text |
||
3918 | * |
||
3919 | * @return string |
||
3920 | */ |
||
3921 | public function doDoubleUnderscore( $text ) { |
||
3922 | |||
3923 | # The position of __TOC__ needs to be recorded |
||
3924 | $mw = MagicWord::get( 'toc' ); |
||
3925 | if ( $mw->match( $text ) ) { |
||
3926 | $this->mShowToc = true; |
||
3927 | $this->mForceTocPosition = true; |
||
3928 | |||
3929 | # Set a placeholder. At the end we'll fill it in with the TOC. |
||
3930 | $text = $mw->replace( '<!--MWTOC-->', $text, 1 ); |
||
3931 | |||
3932 | # Only keep the first one. |
||
3933 | $text = $mw->replace( '', $text ); |
||
3934 | } |
||
3935 | |||
3936 | # Now match and remove the rest of them |
||
3937 | $mwa = MagicWord::getDoubleUnderscoreArray(); |
||
3938 | $this->mDoubleUnderscores = $mwa->matchAndRemove( $text ); |
||
3939 | |||
3940 | if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) { |
||
3941 | $this->mOutput->mNoGallery = true; |
||
3942 | } |
||
3943 | if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) { |
||
3944 | $this->mShowToc = false; |
||
3945 | } |
||
3946 | if ( isset( $this->mDoubleUnderscores['hiddencat'] ) |
||
3947 | && $this->mTitle->getNamespace() == NS_CATEGORY |
||
3948 | ) { |
||
3949 | $this->addTrackingCategory( 'hidden-category-category' ); |
||
3950 | } |
||
3951 | # (bug 8068) Allow control over whether robots index a page. |
||
3952 | # @todo FIXME: Bug 14899: __INDEX__ always overrides __NOINDEX__ here! This |
||
3953 | # is not desirable, the last one on the page should win. |
||
3954 | View Code Duplication | if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ) { |
|
3955 | $this->mOutput->setIndexPolicy( 'noindex' ); |
||
3956 | $this->addTrackingCategory( 'noindex-category' ); |
||
3957 | } |
||
3958 | View Code Duplication | if ( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ) { |
|
3959 | $this->mOutput->setIndexPolicy( 'index' ); |
||
3960 | $this->addTrackingCategory( 'index-category' ); |
||
3961 | } |
||
3962 | |||
3963 | # Cache all double underscores in the database |
||
3964 | foreach ( $this->mDoubleUnderscores as $key => $val ) { |
||
3965 | $this->mOutput->setProperty( $key, '' ); |
||
3966 | } |
||
3967 | |||
3968 | return $text; |
||
3969 | } |
||
3970 | |||
3971 | /** |
||
3972 | * @see ParserOutput::addTrackingCategory() |
||
3973 | * @param string $msg Message key |
||
3974 | * @return bool Whether the addition was successful |
||
3975 | */ |
||
3976 | public function addTrackingCategory( $msg ) { |
||
3977 | return $this->mOutput->addTrackingCategory( $msg, $this->mTitle ); |
||
3978 | } |
||
3979 | |||
3980 | /** |
||
3981 | * This function accomplishes several tasks: |
||
3982 | * 1) Auto-number headings if that option is enabled |
||
3983 | * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page |
||
3984 | * 3) Add a Table of contents on the top for users who have enabled the option |
||
3985 | * 4) Auto-anchor headings |
||
3986 | * |
||
3987 | * It loops through all headlines, collects the necessary data, then splits up the |
||
3988 | * string and re-inserts the newly formatted headlines. |
||
3989 | * |
||
3990 | * @param string $text |
||
3991 | * @param string $origText Original, untouched wikitext |
||
3992 | * @param bool $isMain |
||
3993 | * @return mixed|string |
||
3994 | * @private |
||
3995 | */ |
||
3996 | public function formatHeadings( $text, $origText, $isMain = true ) { |
||
3997 | global $wgMaxTocLevel, $wgExperimentalHtmlIds; |
||
3998 | |||
3999 | # Inhibit editsection links if requested in the page |
||
4000 | if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) { |
||
4001 | $maybeShowEditLink = $showEditLink = false; |
||
4002 | } else { |
||
4003 | $maybeShowEditLink = true; /* Actual presence will depend on ParserOptions option */ |
||
4004 | $showEditLink = $this->mOptions->getEditSection(); |
||
4005 | } |
||
4006 | if ( $showEditLink ) { |
||
4007 | $this->mOutput->setEditSectionTokens( true ); |
||
4008 | } |
||
4009 | |||
4010 | # Get all headlines for numbering them and adding funky stuff like [edit] |
||
4011 | # links - this is for later, but we need the number of headlines right now |
||
4012 | $matches = []; |
||
4013 | $numMatches = preg_match_all( |
||
4014 | '/<H(?P<level>[1-6])(?P<attrib>.*?>)\s*(?P<header>[\s\S]*?)\s*<\/H[1-6] *>/i', |
||
4015 | $text, |
||
4016 | $matches |
||
4017 | ); |
||
4018 | |||
4019 | # if there are fewer than 4 headlines in the article, do not show TOC |
||
4020 | # unless it's been explicitly enabled. |
||
4021 | $enoughToc = $this->mShowToc && |
||
4022 | ( ( $numMatches >= 4 ) || $this->mForceTocPosition ); |
||
4023 | |||
4024 | # Allow user to stipulate that a page should have a "new section" |
||
4025 | # link added via __NEWSECTIONLINK__ |
||
4026 | if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) { |
||
4027 | $this->mOutput->setNewSection( true ); |
||
4028 | } |
||
4029 | |||
4030 | # Allow user to remove the "new section" |
||
4031 | # link via __NONEWSECTIONLINK__ |
||
4032 | if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) { |
||
4033 | $this->mOutput->hideNewSection( true ); |
||
4034 | } |
||
4035 | |||
4036 | # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML, |
||
4037 | # override above conditions and always show TOC above first header |
||
4038 | if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) { |
||
4039 | $this->mShowToc = true; |
||
4040 | $enoughToc = true; |
||
4041 | } |
||
4042 | |||
4043 | # headline counter |
||
4044 | $headlineCount = 0; |
||
4045 | $numVisible = 0; |
||
4046 | |||
4047 | # Ugh .. the TOC should have neat indentation levels which can be |
||
4048 | # passed to the skin functions. These are determined here |
||
4049 | $toc = ''; |
||
4050 | $full = ''; |
||
4051 | $head = []; |
||
4052 | $sublevelCount = []; |
||
4053 | $levelCount = []; |
||
4054 | $level = 0; |
||
4055 | $prevlevel = 0; |
||
4056 | $toclevel = 0; |
||
4057 | $prevtoclevel = 0; |
||
4058 | $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX; |
||
4059 | $baseTitleText = $this->mTitle->getPrefixedDBkey(); |
||
4060 | $oldType = $this->mOutputType; |
||
4061 | $this->setOutputType( self::OT_WIKI ); |
||
4062 | $frame = $this->getPreprocessor()->newFrame(); |
||
4063 | $root = $this->preprocessToDom( $origText ); |
||
4064 | $node = $root->getFirstChild(); |
||
4065 | $byteOffset = 0; |
||
4066 | $tocraw = []; |
||
4067 | $refers = []; |
||
4068 | |||
4069 | $headlines = $numMatches !== false ? $matches[3] : []; |
||
4070 | |||
4071 | foreach ( $headlines as $headline ) { |
||
4072 | $isTemplate = false; |
||
4073 | $titleText = false; |
||
4074 | $sectionIndex = false; |
||
4075 | $numbering = ''; |
||
4076 | $markerMatches = []; |
||
4077 | if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) { |
||
4078 | $serial = $markerMatches[1]; |
||
4079 | list( $titleText, $sectionIndex ) = $this->mHeadings[$serial]; |
||
4080 | $isTemplate = ( $titleText != $baseTitleText ); |
||
4081 | $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline ); |
||
4082 | } |
||
4083 | |||
4084 | if ( $toclevel ) { |
||
4085 | $prevlevel = $level; |
||
4086 | } |
||
4087 | $level = $matches[1][$headlineCount]; |
||
4088 | |||
4089 | if ( $level > $prevlevel ) { |
||
4090 | # Increase TOC level |
||
4091 | $toclevel++; |
||
4092 | $sublevelCount[$toclevel] = 0; |
||
4093 | if ( $toclevel < $wgMaxTocLevel ) { |
||
4094 | $prevtoclevel = $toclevel; |
||
4095 | $toc .= Linker::tocIndent(); |
||
4096 | $numVisible++; |
||
4097 | } |
||
4098 | } elseif ( $level < $prevlevel && $toclevel > 1 ) { |
||
4099 | # Decrease TOC level, find level to jump to |
||
4100 | |||
4101 | for ( $i = $toclevel; $i > 0; $i-- ) { |
||
4102 | if ( $levelCount[$i] == $level ) { |
||
4103 | # Found last matching level |
||
4104 | $toclevel = $i; |
||
4105 | break; |
||
4106 | } elseif ( $levelCount[$i] < $level ) { |
||
4107 | # Found first matching level below current level |
||
4108 | $toclevel = $i + 1; |
||
4109 | break; |
||
4110 | } |
||
4111 | } |
||
4112 | if ( $i == 0 ) { |
||
4113 | $toclevel = 1; |
||
4114 | } |
||
4115 | if ( $toclevel < $wgMaxTocLevel ) { |
||
4116 | if ( $prevtoclevel < $wgMaxTocLevel ) { |
||
4117 | # Unindent only if the previous toc level was shown :p |
||
4118 | $toc .= Linker::tocUnindent( $prevtoclevel - $toclevel ); |
||
4119 | $prevtoclevel = $toclevel; |
||
4120 | } else { |
||
4121 | $toc .= Linker::tocLineEnd(); |
||
4122 | } |
||
4123 | } |
||
4124 | } else { |
||
4125 | # No change in level, end TOC line |
||
4126 | if ( $toclevel < $wgMaxTocLevel ) { |
||
4127 | $toc .= Linker::tocLineEnd(); |
||
4128 | } |
||
4129 | } |
||
4130 | |||
4131 | $levelCount[$toclevel] = $level; |
||
4132 | |||
4133 | # count number of headlines for each level |
||
4134 | $sublevelCount[$toclevel]++; |
||
4135 | $dot = 0; |
||
4136 | for ( $i = 1; $i <= $toclevel; $i++ ) { |
||
4137 | if ( !empty( $sublevelCount[$i] ) ) { |
||
4138 | if ( $dot ) { |
||
4139 | $numbering .= '.'; |
||
4140 | } |
||
4141 | $numbering .= $this->getTargetLanguage()->formatNum( $sublevelCount[$i] ); |
||
4142 | $dot = 1; |
||
4143 | } |
||
4144 | } |
||
4145 | |||
4146 | # The safe header is a version of the header text safe to use for links |
||
4147 | |||
4148 | # Remove link placeholders by the link text. |
||
4149 | # <!--LINK number--> |
||
4150 | # turns into |
||
4151 | # link text with suffix |
||
4152 | # Do this before unstrip since link text can contain strip markers |
||
4153 | $safeHeadline = $this->replaceLinkHoldersText( $headline ); |
||
4154 | |||
4155 | # Avoid insertion of weird stuff like <math> by expanding the relevant sections |
||
4156 | $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline ); |
||
4157 | |||
4158 | # Strip out HTML (first regex removes any tag not allowed) |
||
4159 | # Allowed tags are: |
||
4160 | # * <sup> and <sub> (bug 8393) |
||
4161 | # * <i> (bug 26375) |
||
4162 | # * <b> (r105284) |
||
4163 | # * <bdi> (bug 72884) |
||
4164 | # * <span dir="rtl"> and <span dir="ltr"> (bug 35167) |
||
4165 | # * <s> and <strike> (T35715) |
||
4166 | # We strip any parameter from accepted tags (second regex), except dir="rtl|ltr" from <span>, |
||
4167 | # to allow setting directionality in toc items. |
||
4168 | $tocline = preg_replace( |
||
4169 | [ |
||
4170 | '#<(?!/?(span|sup|sub|bdi|i|b|s|strike)(?: [^>]*)?>).*?>#', |
||
4171 | '#<(/?(?:span(?: dir="(?:rtl|ltr)")?|sup|sub|bdi|i|b|s|strike))(?: .*?)?>#' |
||
4172 | ], |
||
4173 | [ '', '<$1>' ], |
||
4174 | $safeHeadline |
||
4175 | ); |
||
4176 | |||
4177 | # Strip '<span></span>', which is the result from the above if |
||
4178 | # <span id="foo"></span> is used to produce an additional anchor |
||
4179 | # for a section. |
||
4180 | $tocline = str_replace( '<span></span>', '', $tocline ); |
||
4181 | |||
4182 | $tocline = trim( $tocline ); |
||
4183 | |||
4184 | # For the anchor, strip out HTML-y stuff period |
||
4185 | $safeHeadline = preg_replace( '/<.*?>/', '', $safeHeadline ); |
||
4186 | $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline ); |
||
4187 | |||
4188 | # Save headline for section edit hint before it's escaped |
||
4189 | $headlineHint = $safeHeadline; |
||
4190 | |||
4191 | if ( $wgExperimentalHtmlIds ) { |
||
4192 | # For reverse compatibility, provide an id that's |
||
4193 | # HTML4-compatible, like we used to. |
||
4194 | # It may be worth noting, academically, that it's possible for |
||
4195 | # the legacy anchor to conflict with a non-legacy headline |
||
4196 | # anchor on the page. In this case likely the "correct" thing |
||
4197 | # would be to either drop the legacy anchors or make sure |
||
4198 | # they're numbered first. However, this would require people |
||
4199 | # to type in section names like "abc_.D7.93.D7.90.D7.A4" |
||
4200 | # manually, so let's not bother worrying about it. |
||
4201 | $legacyHeadline = Sanitizer::escapeId( $safeHeadline, |
||
4202 | [ 'noninitial', 'legacy' ] ); |
||
4203 | $safeHeadline = Sanitizer::escapeId( $safeHeadline ); |
||
4204 | |||
4205 | if ( $legacyHeadline == $safeHeadline ) { |
||
4206 | # No reason to have both (in fact, we can't) |
||
4207 | $legacyHeadline = false; |
||
4208 | } |
||
4209 | } else { |
||
4210 | $legacyHeadline = false; |
||
4211 | $safeHeadline = Sanitizer::escapeId( $safeHeadline, |
||
4212 | 'noninitial' ); |
||
4213 | } |
||
4214 | |||
4215 | # HTML names must be case-insensitively unique (bug 10721). |
||
4216 | # This does not apply to Unicode characters per |
||
4217 | # http://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison |
||
4218 | # @todo FIXME: We may be changing them depending on the current locale. |
||
4219 | $arrayKey = strtolower( $safeHeadline ); |
||
4220 | if ( $legacyHeadline === false ) { |
||
4221 | $legacyArrayKey = false; |
||
4222 | } else { |
||
4223 | $legacyArrayKey = strtolower( $legacyHeadline ); |
||
4224 | } |
||
4225 | |||
4226 | # Create the anchor for linking from the TOC to the section |
||
4227 | $anchor = $safeHeadline; |
||
4228 | $legacyAnchor = $legacyHeadline; |
||
4229 | View Code Duplication | if ( isset( $refers[$arrayKey] ) ) { |
|
4230 | // @codingStandardsIgnoreStart |
||
4231 | for ( $i = 2; isset( $refers["${arrayKey}_$i"] ); ++$i ); |
||
4232 | // @codingStandardsIgnoreEnd |
||
4233 | $anchor .= "_$i"; |
||
4234 | $refers["${arrayKey}_$i"] = true; |
||
4235 | } else { |
||
4236 | $refers[$arrayKey] = true; |
||
4237 | } |
||
4238 | View Code Duplication | if ( $legacyHeadline !== false && isset( $refers[$legacyArrayKey] ) ) { |
|
4239 | // @codingStandardsIgnoreStart |
||
4240 | for ( $i = 2; isset( $refers["${legacyArrayKey}_$i"] ); ++$i ); |
||
4241 | // @codingStandardsIgnoreEnd |
||
4242 | $legacyAnchor .= "_$i"; |
||
4243 | $refers["${legacyArrayKey}_$i"] = true; |
||
4244 | } else { |
||
4245 | $refers[$legacyArrayKey] = true; |
||
4246 | } |
||
4247 | |||
4248 | # Don't number the heading if it is the only one (looks silly) |
||
4249 | if ( count( $matches[3] ) > 1 && $this->mOptions->getNumberHeadings() ) { |
||
4250 | # the two are different if the line contains a link |
||
4251 | $headline = Html::element( |
||
4252 | 'span', |
||
4253 | [ 'class' => 'mw-headline-number' ], |
||
4254 | $numbering |
||
4255 | ) . ' ' . $headline; |
||
4256 | } |
||
4257 | |||
4258 | if ( $enoughToc && ( !isset( $wgMaxTocLevel ) || $toclevel < $wgMaxTocLevel ) ) { |
||
4259 | $toc .= Linker::tocLine( $anchor, $tocline, |
||
4260 | $numbering, $toclevel, ( $isTemplate ? false : $sectionIndex ) ); |
||
4261 | } |
||
4262 | |||
4263 | # Add the section to the section tree |
||
4264 | # Find the DOM node for this header |
||
4265 | $noOffset = ( $isTemplate || $sectionIndex === false ); |
||
4266 | while ( $node && !$noOffset ) { |
||
4267 | if ( $node->getName() === 'h' ) { |
||
4268 | $bits = $node->splitHeading(); |
||
4269 | if ( $bits['i'] == $sectionIndex ) { |
||
4270 | break; |
||
4271 | } |
||
4272 | } |
||
4273 | $byteOffset += mb_strlen( $this->mStripState->unstripBoth( |
||
4274 | $frame->expand( $node, PPFrame::RECOVER_ORIG ) ) ); |
||
4275 | $node = $node->getNextSibling(); |
||
4276 | } |
||
4277 | $tocraw[] = [ |
||
4278 | 'toclevel' => $toclevel, |
||
4279 | 'level' => $level, |
||
4280 | 'line' => $tocline, |
||
4281 | 'number' => $numbering, |
||
4282 | 'index' => ( $isTemplate ? 'T-' : '' ) . $sectionIndex, |
||
4283 | 'fromtitle' => $titleText, |
||
4284 | 'byteoffset' => ( $noOffset ? null : $byteOffset ), |
||
4285 | 'anchor' => $anchor, |
||
4286 | ]; |
||
4287 | |||
4288 | # give headline the correct <h#> tag |
||
4289 | if ( $maybeShowEditLink && $sectionIndex !== false ) { |
||
4290 | // Output edit section links as markers with styles that can be customized by skins |
||
4291 | if ( $isTemplate ) { |
||
4292 | # Put a T flag in the section identifier, to indicate to extractSections() |
||
4293 | # that sections inside <includeonly> should be counted. |
||
4294 | $editsectionPage = $titleText; |
||
4295 | $editsectionSection = "T-$sectionIndex"; |
||
4296 | $editsectionContent = null; |
||
4297 | } else { |
||
4298 | $editsectionPage = $this->mTitle->getPrefixedText(); |
||
4299 | $editsectionSection = $sectionIndex; |
||
4300 | $editsectionContent = $headlineHint; |
||
4301 | } |
||
4302 | // We use a bit of pesudo-xml for editsection markers. The |
||
4303 | // language converter is run later on. Using a UNIQ style marker |
||
4304 | // leads to the converter screwing up the tokens when it |
||
4305 | // converts stuff. And trying to insert strip tags fails too. At |
||
4306 | // this point all real inputted tags have already been escaped, |
||
4307 | // so we don't have to worry about a user trying to input one of |
||
4308 | // these markers directly. We use a page and section attribute |
||
4309 | // to stop the language converter from converting these |
||
4310 | // important bits of data, but put the headline hint inside a |
||
4311 | // content block because the language converter is supposed to |
||
4312 | // be able to convert that piece of data. |
||
4313 | // Gets replaced with html in ParserOutput::getText |
||
4314 | $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage ); |
||
4315 | $editlink .= '" section="' . htmlspecialchars( $editsectionSection ) . '"'; |
||
4316 | if ( $editsectionContent !== null ) { |
||
4317 | $editlink .= '>' . $editsectionContent . '</mw:editsection>'; |
||
4318 | } else { |
||
4319 | $editlink .= '/>'; |
||
4320 | } |
||
4321 | } else { |
||
4322 | $editlink = ''; |
||
4323 | } |
||
4324 | $head[$headlineCount] = Linker::makeHeadline( $level, |
||
4325 | $matches['attrib'][$headlineCount], $anchor, $headline, |
||
4326 | $editlink, $legacyAnchor ); |
||
4327 | |||
4328 | $headlineCount++; |
||
4329 | } |
||
4330 | |||
4331 | $this->setOutputType( $oldType ); |
||
4332 | |||
4333 | # Never ever show TOC if no headers |
||
4334 | if ( $numVisible < 1 ) { |
||
4335 | $enoughToc = false; |
||
4336 | } |
||
4337 | |||
4338 | if ( $enoughToc ) { |
||
4339 | if ( $prevtoclevel > 0 && $prevtoclevel < $wgMaxTocLevel ) { |
||
4340 | $toc .= Linker::tocUnindent( $prevtoclevel - 1 ); |
||
4341 | } |
||
4342 | $toc = Linker::tocList( $toc, $this->mOptions->getUserLangObj() ); |
||
4343 | $this->mOutput->setTOCHTML( $toc ); |
||
4344 | $toc = self::TOC_START . $toc . self::TOC_END; |
||
4345 | $this->mOutput->addModules( 'mediawiki.toc' ); |
||
4346 | } |
||
4347 | |||
4348 | if ( $isMain ) { |
||
4349 | $this->mOutput->setSections( $tocraw ); |
||
4350 | } |
||
4351 | |||
4352 | # split up and insert constructed headlines |
||
4353 | $blocks = preg_split( '/<H[1-6].*?>[\s\S]*?<\/H[1-6]>/i', $text ); |
||
4354 | $i = 0; |
||
4355 | |||
4356 | // build an array of document sections |
||
4357 | $sections = []; |
||
4358 | foreach ( $blocks as $block ) { |
||
4359 | // $head is zero-based, sections aren't. |
||
4360 | if ( empty( $head[$i - 1] ) ) { |
||
4361 | $sections[$i] = $block; |
||
4362 | } else { |
||
4363 | $sections[$i] = $head[$i - 1] . $block; |
||
4364 | } |
||
4365 | |||
4366 | /** |
||
4367 | * Send a hook, one per section. |
||
4368 | * The idea here is to be able to make section-level DIVs, but to do so in a |
||
4369 | * lower-impact, more correct way than r50769 |
||
4370 | * |
||
4371 | * $this : caller |
||
4372 | * $section : the section number |
||
4373 | * &$sectionContent : ref to the content of the section |
||
4374 | * $showEditLinks : boolean describing whether this section has an edit link |
||
4375 | */ |
||
4376 | Hooks::run( 'ParserSectionCreate', [ $this, $i, &$sections[$i], $showEditLink ] ); |
||
4377 | |||
4378 | $i++; |
||
4379 | } |
||
4380 | |||
4381 | if ( $enoughToc && $isMain && !$this->mForceTocPosition ) { |
||
4382 | // append the TOC at the beginning |
||
4383 | // Top anchor now in skin |
||
4384 | $sections[0] = $sections[0] . $toc . "\n"; |
||
4385 | } |
||
4386 | |||
4387 | $full .= implode( '', $sections ); |
||
4388 | |||
4389 | if ( $this->mForceTocPosition ) { |
||
4390 | return str_replace( '<!--MWTOC-->', $toc, $full ); |
||
4391 | } else { |
||
4392 | return $full; |
||
4393 | } |
||
4394 | } |
||
4395 | |||
4396 | /** |
||
4397 | * Transform wiki markup when saving a page by doing "\r\n" -> "\n" |
||
4398 | * conversion, substituting signatures, {{subst:}} templates, etc. |
||
4399 | * |
||
4400 | * @param string $text The text to transform |
||
4401 | * @param Title $title The Title object for the current article |
||
4402 | * @param User $user The User object describing the current user |
||
4403 | * @param ParserOptions $options Parsing options |
||
4404 | * @param bool $clearState Whether to clear the parser state first |
||
4405 | * @return string The altered wiki markup |
||
4406 | */ |
||
4407 | public function preSaveTransform( $text, Title $title, User $user, |
||
4408 | ParserOptions $options, $clearState = true |
||
4409 | ) { |
||
4410 | if ( $clearState ) { |
||
4411 | $magicScopeVariable = $this->lock(); |
||
4412 | } |
||
4413 | $this->startParse( $title, $options, self::OT_WIKI, $clearState ); |
||
4414 | $this->setUser( $user ); |
||
4415 | |||
4416 | // We still normalize line endings for backwards-compatibility |
||
4417 | // with other code that just calls PST, but this should already |
||
4418 | // be handled in TextContent subclasses |
||
4419 | $text = TextContent::normalizeLineEndings( $text ); |
||
4420 | |||
4421 | if ( $options->getPreSaveTransform() ) { |
||
4422 | $text = $this->pstPass2( $text, $user ); |
||
4423 | } |
||
4424 | $text = $this->mStripState->unstripBoth( $text ); |
||
4425 | |||
4426 | $this->setUser( null ); # Reset |
||
4427 | |||
4428 | return $text; |
||
4429 | } |
||
4430 | |||
4431 | /** |
||
4432 | * Pre-save transform helper function |
||
4433 | * |
||
4434 | * @param string $text |
||
4435 | * @param User $user |
||
4436 | * |
||
4437 | * @return string |
||
4438 | */ |
||
4439 | private function pstPass2( $text, $user ) { |
||
4440 | global $wgContLang; |
||
4441 | |||
4442 | # Note: This is the timestamp saved as hardcoded wikitext to |
||
4443 | # the database, we use $wgContLang here in order to give |
||
4444 | # everyone the same signature and use the default one rather |
||
4445 | # than the one selected in each user's preferences. |
||
4446 | # (see also bug 12815) |
||
4447 | $ts = $this->mOptions->getTimestamp(); |
||
4448 | $timestamp = MWTimestamp::getLocalInstance( $ts ); |
||
4449 | $ts = $timestamp->format( 'YmdHis' ); |
||
4450 | $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text(); |
||
4451 | |||
4452 | $d = $wgContLang->timeanddate( $ts, false, false ) . " ($tzMsg)"; |
||
4453 | |||
4454 | # Variable replacement |
||
4455 | # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags |
||
4456 | $text = $this->replaceVariables( $text ); |
||
4457 | |||
4458 | # This works almost by chance, as the replaceVariables are done before the getUserSig(), |
||
4459 | # which may corrupt this parser instance via its wfMessage()->text() call- |
||
4460 | |||
4461 | # Signatures |
||
4462 | $sigText = $this->getUserSig( $user ); |
||
4463 | $text = strtr( $text, [ |
||
4464 | '~~~~~' => $d, |
||
4465 | '~~~~' => "$sigText $d", |
||
4466 | '~~~' => $sigText |
||
4467 | ] ); |
||
4468 | |||
4469 | # Context links ("pipe tricks"): [[|name]] and [[name (context)|]] |
||
4470 | $tc = '[' . Title::legalChars() . ']'; |
||
4471 | $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii! |
||
4472 | |||
4473 | // [[ns:page (context)|]] |
||
4474 | $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/"; |
||
4475 | // [[ns:page(context)|]] (double-width brackets, added in r40257) |
||
4476 | $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/"; |
||
4477 | // [[ns:page (context), context|]] (using either single or double-width comma) |
||
4478 | $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,)$tc+|)\\|]]/"; |
||
4479 | // [[|page]] (reverse pipe trick: add context from page title) |
||
4480 | $p2 = "/\[\[\\|($tc+)]]/"; |
||
4481 | |||
4482 | # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]" |
||
4483 | $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text ); |
||
4484 | $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text ); |
||
4485 | $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text ); |
||
4486 | |||
4487 | $t = $this->mTitle->getText(); |
||
4488 | $m = []; |
||
4489 | if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) { |
||
4490 | $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); |
||
4491 | } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) { |
||
4492 | $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text ); |
||
4493 | } else { |
||
4494 | # if there's no context, don't bother duplicating the title |
||
4495 | $text = preg_replace( $p2, '[[\\1]]', $text ); |
||
4496 | } |
||
4497 | |||
4498 | return $text; |
||
4499 | } |
||
4500 | |||
4501 | /** |
||
4502 | * Fetch the user's signature text, if any, and normalize to |
||
4503 | * validated, ready-to-insert wikitext. |
||
4504 | * If you have pre-fetched the nickname or the fancySig option, you can |
||
4505 | * specify them here to save a database query. |
||
4506 | * Do not reuse this parser instance after calling getUserSig(), |
||
4507 | * as it may have changed if it's the $wgParser. |
||
4508 | * |
||
4509 | * @param User $user |
||
4510 | * @param string|bool $nickname Nickname to use or false to use user's default nickname |
||
4511 | * @param bool|null $fancySig whether the nicknname is the complete signature |
||
4512 | * or null to use default value |
||
4513 | * @return string |
||
4514 | */ |
||
4515 | public function getUserSig( &$user, $nickname = false, $fancySig = null ) { |
||
4516 | global $wgMaxSigChars; |
||
4517 | |||
4518 | $username = $user->getName(); |
||
4519 | |||
4520 | # If not given, retrieve from the user object. |
||
4521 | if ( $nickname === false ) { |
||
4522 | $nickname = $user->getOption( 'nickname' ); |
||
4523 | } |
||
4524 | |||
4525 | if ( is_null( $fancySig ) ) { |
||
4526 | $fancySig = $user->getBoolOption( 'fancysig' ); |
||
4527 | } |
||
4528 | |||
4529 | $nickname = $nickname == null ? $username : $nickname; |
||
4530 | |||
4531 | if ( mb_strlen( $nickname ) > $wgMaxSigChars ) { |
||
4532 | $nickname = $username; |
||
4533 | wfDebug( __METHOD__ . ": $username has overlong signature.\n" ); |
||
4534 | } elseif ( $fancySig !== false ) { |
||
4535 | # Sig. might contain markup; validate this |
||
4536 | if ( $this->validateSig( $nickname ) !== false ) { |
||
4537 | # Validated; clean up (if needed) and return it |
||
4538 | return $this->cleanSig( $nickname, true ); |
||
4539 | } else { |
||
4540 | # Failed to validate; fall back to the default |
||
4541 | $nickname = $username; |
||
4542 | wfDebug( __METHOD__ . ": $username has bad XML tags in signature.\n" ); |
||
4543 | } |
||
4544 | } |
||
4545 | |||
4546 | # Make sure nickname doesnt get a sig in a sig |
||
4547 | $nickname = self::cleanSigInSig( $nickname ); |
||
4548 | |||
4549 | # If we're still here, make it a link to the user page |
||
4550 | $userText = wfEscapeWikiText( $username ); |
||
4551 | $nickText = wfEscapeWikiText( $nickname ); |
||
4552 | $msgName = $user->isAnon() ? 'signature-anon' : 'signature'; |
||
4553 | |||
4554 | return wfMessage( $msgName, $userText, $nickText )->inContentLanguage() |
||
4555 | ->title( $this->getTitle() )->text(); |
||
4556 | } |
||
4557 | |||
4558 | /** |
||
4559 | * Check that the user's signature contains no bad XML |
||
4560 | * |
||
4561 | * @param string $text |
||
4562 | * @return string|bool An expanded string, or false if invalid. |
||
4563 | */ |
||
4564 | public function validateSig( $text ) { |
||
4565 | return Xml::isWellFormedXmlFragment( $text ) ? $text : false; |
||
4566 | } |
||
4567 | |||
4568 | /** |
||
4569 | * Clean up signature text |
||
4570 | * |
||
4571 | * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig |
||
4572 | * 2) Substitute all transclusions |
||
4573 | * |
||
4574 | * @param string $text |
||
4575 | * @param bool $parsing Whether we're cleaning (preferences save) or parsing |
||
4576 | * @return string Signature text |
||
4577 | */ |
||
4578 | public function cleanSig( $text, $parsing = false ) { |
||
4579 | if ( !$parsing ) { |
||
4580 | global $wgTitle; |
||
4581 | $magicScopeVariable = $this->lock(); |
||
4582 | $this->startParse( $wgTitle, new ParserOptions, self::OT_PREPROCESS, true ); |
||
4583 | } |
||
4584 | |||
4585 | # Option to disable this feature |
||
4586 | if ( !$this->mOptions->getCleanSignatures() ) { |
||
4587 | return $text; |
||
4588 | } |
||
4589 | |||
4590 | # @todo FIXME: Regex doesn't respect extension tags or nowiki |
||
4591 | # => Move this logic to braceSubstitution() |
||
4592 | $substWord = MagicWord::get( 'subst' ); |
||
4593 | $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase(); |
||
4594 | $substText = '{{' . $substWord->getSynonym( 0 ); |
||
4595 | |||
4596 | $text = preg_replace( $substRegex, $substText, $text ); |
||
4597 | $text = self::cleanSigInSig( $text ); |
||
4598 | $dom = $this->preprocessToDom( $text ); |
||
4599 | $frame = $this->getPreprocessor()->newFrame(); |
||
4600 | $text = $frame->expand( $dom ); |
||
4601 | |||
4602 | if ( !$parsing ) { |
||
4603 | $text = $this->mStripState->unstripBoth( $text ); |
||
4604 | } |
||
4605 | |||
4606 | return $text; |
||
4607 | } |
||
4608 | |||
4609 | /** |
||
4610 | * Strip 3, 4 or 5 tildes out of signatures. |
||
4611 | * |
||
4612 | * @param string $text |
||
4613 | * @return string Signature text with /~{3,5}/ removed |
||
4614 | */ |
||
4615 | public static function cleanSigInSig( $text ) { |
||
4616 | $text = preg_replace( '/~{3,5}/', '', $text ); |
||
4617 | return $text; |
||
4618 | } |
||
4619 | |||
4620 | /** |
||
4621 | * Set up some variables which are usually set up in parse() |
||
4622 | * so that an external function can call some class members with confidence |
||
4623 | * |
||
4624 | * @param Title|null $title |
||
4625 | * @param ParserOptions $options |
||
4626 | * @param int $outputType |
||
4627 | * @param bool $clearState |
||
4628 | */ |
||
4629 | public function startExternalParse( Title $title = null, ParserOptions $options, |
||
4630 | $outputType, $clearState = true |
||
4631 | ) { |
||
4632 | $this->startParse( $title, $options, $outputType, $clearState ); |
||
4633 | } |
||
4634 | |||
4635 | /** |
||
4636 | * @param Title|null $title |
||
4637 | * @param ParserOptions $options |
||
4638 | * @param int $outputType |
||
4639 | * @param bool $clearState |
||
4640 | */ |
||
4641 | private function startParse( Title $title = null, ParserOptions $options, |
||
4642 | $outputType, $clearState = true |
||
4643 | ) { |
||
4644 | $this->setTitle( $title ); |
||
4645 | $this->mOptions = $options; |
||
4646 | $this->setOutputType( $outputType ); |
||
4647 | if ( $clearState ) { |
||
4648 | $this->clearState(); |
||
4649 | } |
||
4650 | } |
||
4651 | |||
4652 | /** |
||
4653 | * Wrapper for preprocess() |
||
4654 | * |
||
4655 | * @param string $text The text to preprocess |
||
4656 | * @param ParserOptions $options Options |
||
4657 | * @param Title|null $title Title object or null to use $wgTitle |
||
4658 | * @return string |
||
4659 | */ |
||
4660 | public function transformMsg( $text, $options, $title = null ) { |
||
4661 | static $executing = false; |
||
4662 | |||
4663 | # Guard against infinite recursion |
||
4664 | if ( $executing ) { |
||
4665 | return $text; |
||
4666 | } |
||
4667 | $executing = true; |
||
4668 | |||
4669 | if ( !$title ) { |
||
4670 | global $wgTitle; |
||
4671 | $title = $wgTitle; |
||
4672 | } |
||
4673 | |||
4674 | $text = $this->preprocess( $text, $title, $options ); |
||
4675 | |||
4676 | $executing = false; |
||
4677 | return $text; |
||
4678 | } |
||
4679 | |||
4680 | /** |
||
4681 | * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>" |
||
4682 | * The callback should have the following form: |
||
4683 | * function myParserHook( $text, $params, $parser, $frame ) { ... } |
||
4684 | * |
||
4685 | * Transform and return $text. Use $parser for any required context, e.g. use |
||
4686 | * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions |
||
4687 | * |
||
4688 | * Hooks may return extended information by returning an array, of which the |
||
4689 | * first numbered element (index 0) must be the return string, and all other |
||
4690 | * entries are extracted into local variables within an internal function |
||
4691 | * in the Parser class. |
||
4692 | * |
||
4693 | * This interface (introduced r61913) appears to be undocumented, but |
||
4694 | * 'markerType' is used by some core tag hooks to override which strip |
||
4695 | * array their results are placed in. **Use great caution if attempting |
||
4696 | * this interface, as it is not documented and injudicious use could smash |
||
4697 | * private variables.** |
||
4698 | * |
||
4699 | * @param string $tag The tag to use, e.g. 'hook' for "<hook>" |
||
4700 | * @param callable $callback The callback function (and object) to use for the tag |
||
4701 | * @throws MWException |
||
4702 | * @return callable|null The old value of the mTagHooks array associated with the hook |
||
4703 | */ |
||
4704 | View Code Duplication | public function setHook( $tag, $callback ) { |
|
4705 | $tag = strtolower( $tag ); |
||
4706 | if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { |
||
4707 | throw new MWException( "Invalid character {$m[0]} in setHook('$tag', ...) call" ); |
||
4708 | } |
||
4709 | $oldVal = isset( $this->mTagHooks[$tag] ) ? $this->mTagHooks[$tag] : null; |
||
4710 | $this->mTagHooks[$tag] = $callback; |
||
4711 | if ( !in_array( $tag, $this->mStripList ) ) { |
||
4712 | $this->mStripList[] = $tag; |
||
4713 | } |
||
4714 | |||
4715 | return $oldVal; |
||
4716 | } |
||
4717 | |||
4718 | /** |
||
4719 | * As setHook(), but letting the contents be parsed. |
||
4720 | * |
||
4721 | * Transparent tag hooks are like regular XML-style tag hooks, except they |
||
4722 | * operate late in the transformation sequence, on HTML instead of wikitext. |
||
4723 | * |
||
4724 | * This is probably obsoleted by things dealing with parser frames? |
||
4725 | * The only extension currently using it is geoserver. |
||
4726 | * |
||
4727 | * @since 1.10 |
||
4728 | * @todo better document or deprecate this |
||
4729 | * |
||
4730 | * @param string $tag The tag to use, e.g. 'hook' for "<hook>" |
||
4731 | * @param callable $callback The callback function (and object) to use for the tag |
||
4732 | * @throws MWException |
||
4733 | * @return callable|null The old value of the mTagHooks array associated with the hook |
||
4734 | */ |
||
4735 | public function setTransparentTagHook( $tag, $callback ) { |
||
4736 | $tag = strtolower( $tag ); |
||
4737 | if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { |
||
4738 | throw new MWException( "Invalid character {$m[0]} in setTransparentHook('$tag', ...) call" ); |
||
4739 | } |
||
4740 | $oldVal = isset( $this->mTransparentTagHooks[$tag] ) ? $this->mTransparentTagHooks[$tag] : null; |
||
4741 | $this->mTransparentTagHooks[$tag] = $callback; |
||
4742 | |||
4743 | return $oldVal; |
||
4744 | } |
||
4745 | |||
4746 | /** |
||
4747 | * Remove all tag hooks |
||
4748 | */ |
||
4749 | public function clearTagHooks() { |
||
4750 | $this->mTagHooks = []; |
||
4751 | $this->mFunctionTagHooks = []; |
||
4752 | $this->mStripList = $this->mDefaultStripList; |
||
4753 | } |
||
4754 | |||
4755 | /** |
||
4756 | * Create a function, e.g. {{sum:1|2|3}} |
||
4757 | * The callback function should have the form: |
||
4758 | * function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... } |
||
4759 | * |
||
4760 | * Or with Parser::SFH_OBJECT_ARGS: |
||
4761 | * function myParserFunction( $parser, $frame, $args ) { ... } |
||
4762 | * |
||
4763 | * The callback may either return the text result of the function, or an array with the text |
||
4764 | * in element 0, and a number of flags in the other elements. The names of the flags are |
||
4765 | * specified in the keys. Valid flags are: |
||
4766 | * found The text returned is valid, stop processing the template. This |
||
4767 | * is on by default. |
||
4768 | * nowiki Wiki markup in the return value should be escaped |
||
4769 | * isHTML The returned text is HTML, armour it against wikitext transformation |
||
4770 | * |
||
4771 | * @param string $id The magic word ID |
||
4772 | * @param callable $callback The callback function (and object) to use |
||
4773 | * @param int $flags A combination of the following flags: |
||
4774 | * Parser::SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}} |
||
4775 | * |
||
4776 | * Parser::SFH_OBJECT_ARGS Pass the template arguments as PPNode objects instead of text. |
||
4777 | * This allows for conditional expansion of the parse tree, allowing you to eliminate dead |
||
4778 | * branches and thus speed up parsing. It is also possible to analyse the parse tree of |
||
4779 | * the arguments, and to control the way they are expanded. |
||
4780 | * |
||
4781 | * The $frame parameter is a PPFrame. This can be used to produce expanded text from the |
||
4782 | * arguments, for instance: |
||
4783 | * $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : ''; |
||
4784 | * |
||
4785 | * For technical reasons, $args[0] is pre-expanded and will be a string. This may change in |
||
4786 | * future versions. Please call $frame->expand() on it anyway so that your code keeps |
||
4787 | * working if/when this is changed. |
||
4788 | * |
||
4789 | * If you want whitespace to be trimmed from $args, you need to do it yourself, post- |
||
4790 | * expansion. |
||
4791 | * |
||
4792 | * Please read the documentation in includes/parser/Preprocessor.php for more information |
||
4793 | * about the methods available in PPFrame and PPNode. |
||
4794 | * |
||
4795 | * @throws MWException |
||
4796 | * @return string|callable The old callback function for this name, if any |
||
4797 | */ |
||
4798 | public function setFunctionHook( $id, $callback, $flags = 0 ) { |
||
4799 | global $wgContLang; |
||
4800 | |||
4801 | $oldVal = isset( $this->mFunctionHooks[$id] ) ? $this->mFunctionHooks[$id][0] : null; |
||
4802 | $this->mFunctionHooks[$id] = [ $callback, $flags ]; |
||
4803 | |||
4804 | # Add to function cache |
||
4805 | $mw = MagicWord::get( $id ); |
||
4806 | if ( !$mw ) { |
||
4807 | throw new MWException( __METHOD__ . '() expecting a magic word identifier.' ); |
||
4808 | } |
||
4809 | |||
4810 | $synonyms = $mw->getSynonyms(); |
||
4811 | $sensitive = intval( $mw->isCaseSensitive() ); |
||
4812 | |||
4813 | foreach ( $synonyms as $syn ) { |
||
4814 | # Case |
||
4815 | if ( !$sensitive ) { |
||
4816 | $syn = $wgContLang->lc( $syn ); |
||
4817 | } |
||
4818 | # Add leading hash |
||
4819 | if ( !( $flags & self::SFH_NO_HASH ) ) { |
||
4820 | $syn = '#' . $syn; |
||
4821 | } |
||
4822 | # Remove trailing colon |
||
4823 | if ( substr( $syn, -1, 1 ) === ':' ) { |
||
4824 | $syn = substr( $syn, 0, -1 ); |
||
4825 | } |
||
4826 | $this->mFunctionSynonyms[$sensitive][$syn] = $id; |
||
4827 | } |
||
4828 | return $oldVal; |
||
4829 | } |
||
4830 | |||
4831 | /** |
||
4832 | * Get all registered function hook identifiers |
||
4833 | * |
||
4834 | * @return array |
||
4835 | */ |
||
4836 | public function getFunctionHooks() { |
||
4837 | return array_keys( $this->mFunctionHooks ); |
||
4838 | } |
||
4839 | |||
4840 | /** |
||
4841 | * Create a tag function, e.g. "<test>some stuff</test>". |
||
4842 | * Unlike tag hooks, tag functions are parsed at preprocessor level. |
||
4843 | * Unlike parser functions, their content is not preprocessed. |
||
4844 | * @param string $tag |
||
4845 | * @param callable $callback |
||
4846 | * @param int $flags |
||
4847 | * @throws MWException |
||
4848 | * @return null |
||
4849 | */ |
||
4850 | View Code Duplication | public function setFunctionTagHook( $tag, $callback, $flags ) { |
|
4851 | $tag = strtolower( $tag ); |
||
4852 | if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) { |
||
4853 | throw new MWException( "Invalid character {$m[0]} in setFunctionTagHook('$tag', ...) call" ); |
||
4854 | } |
||
4855 | $old = isset( $this->mFunctionTagHooks[$tag] ) ? |
||
4856 | $this->mFunctionTagHooks[$tag] : null; |
||
4857 | $this->mFunctionTagHooks[$tag] = [ $callback, $flags ]; |
||
4858 | |||
4859 | if ( !in_array( $tag, $this->mStripList ) ) { |
||
4860 | $this->mStripList[] = $tag; |
||
4861 | } |
||
4862 | |||
4863 | return $old; |
||
4864 | } |
||
4865 | |||
4866 | /** |
||
4867 | * Replace "<!--LINK-->" link placeholders with actual links, in the buffer |
||
4868 | * Placeholders created in Linker::link() |
||
4869 | * |
||
4870 | * @param string $text |
||
4871 | * @param int $options |
||
4872 | */ |
||
4873 | public function replaceLinkHolders( &$text, $options = 0 ) { |
||
4874 | $this->mLinkHolders->replace( $text ); |
||
4875 | } |
||
4876 | |||
4877 | /** |
||
4878 | * Replace "<!--LINK-->" link placeholders with plain text of links |
||
4879 | * (not HTML-formatted). |
||
4880 | * |
||
4881 | * @param string $text |
||
4882 | * @return string |
||
4883 | */ |
||
4884 | public function replaceLinkHoldersText( $text ) { |
||
4885 | return $this->mLinkHolders->replaceText( $text ); |
||
4886 | } |
||
4887 | |||
4888 | /** |
||
4889 | * Renders an image gallery from a text with one line per image. |
||
4890 | * text labels may be given by using |-style alternative text. E.g. |
||
4891 | * Image:one.jpg|The number "1" |
||
4892 | * Image:tree.jpg|A tree |
||
4893 | * given as text will return the HTML of a gallery with two images, |
||
4894 | * labeled 'The number "1"' and |
||
4895 | * 'A tree'. |
||
4896 | * |
||
4897 | * @param string $text |
||
4898 | * @param array $params |
||
4899 | * @return string HTML |
||
4900 | */ |
||
4901 | public function renderImageGallery( $text, $params ) { |
||
4902 | |||
4903 | $mode = false; |
||
4904 | if ( isset( $params['mode'] ) ) { |
||
4905 | $mode = $params['mode']; |
||
4906 | } |
||
4907 | |||
4908 | try { |
||
4909 | $ig = ImageGalleryBase::factory( $mode ); |
||
4910 | } catch ( Exception $e ) { |
||
4911 | // If invalid type set, fallback to default. |
||
4912 | $ig = ImageGalleryBase::factory( false ); |
||
4913 | } |
||
4914 | |||
4915 | $ig->setContextTitle( $this->mTitle ); |
||
4916 | $ig->setShowBytes( false ); |
||
4917 | $ig->setShowFilename( false ); |
||
4918 | $ig->setParser( $this ); |
||
4919 | $ig->setHideBadImages(); |
||
4920 | $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'table' ) ); |
||
4921 | |||
4922 | if ( isset( $params['showfilename'] ) ) { |
||
4923 | $ig->setShowFilename( true ); |
||
4924 | } else { |
||
4925 | $ig->setShowFilename( false ); |
||
4926 | } |
||
4927 | if ( isset( $params['caption'] ) ) { |
||
4928 | $caption = $params['caption']; |
||
4929 | $caption = htmlspecialchars( $caption ); |
||
4930 | $caption = $this->replaceInternalLinks( $caption ); |
||
4931 | $ig->setCaptionHtml( $caption ); |
||
4932 | } |
||
4933 | if ( isset( $params['perrow'] ) ) { |
||
4934 | $ig->setPerRow( $params['perrow'] ); |
||
4935 | } |
||
4936 | if ( isset( $params['widths'] ) ) { |
||
4937 | $ig->setWidths( $params['widths'] ); |
||
4938 | } |
||
4939 | if ( isset( $params['heights'] ) ) { |
||
4940 | $ig->setHeights( $params['heights'] ); |
||
4941 | } |
||
4942 | $ig->setAdditionalOptions( $params ); |
||
4943 | |||
4944 | Hooks::run( 'BeforeParserrenderImageGallery', [ &$this, &$ig ] ); |
||
4945 | |||
4946 | $lines = StringUtils::explode( "\n", $text ); |
||
4947 | foreach ( $lines as $line ) { |
||
4948 | # match lines like these: |
||
4949 | # Image:someimage.jpg|This is some image |
||
4950 | $matches = []; |
||
4951 | preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches ); |
||
4952 | # Skip empty lines |
||
4953 | if ( count( $matches ) == 0 ) { |
||
4954 | continue; |
||
4955 | } |
||
4956 | |||
4957 | if ( strpos( $matches[0], '%' ) !== false ) { |
||
4958 | $matches[1] = rawurldecode( $matches[1] ); |
||
4959 | } |
||
4960 | $title = Title::newFromText( $matches[1], NS_FILE ); |
||
4961 | if ( is_null( $title ) ) { |
||
4962 | # Bogus title. Ignore these so we don't bomb out later. |
||
4963 | continue; |
||
4964 | } |
||
4965 | |||
4966 | # We need to get what handler the file uses, to figure out parameters. |
||
4967 | # Note, a hook can overide the file name, and chose an entirely different |
||
4968 | # file (which potentially could be of a different type and have different handler). |
||
4969 | $options = []; |
||
4970 | $descQuery = false; |
||
4971 | Hooks::run( 'BeforeParserFetchFileAndTitle', |
||
4972 | [ $this, $title, &$options, &$descQuery ] ); |
||
4973 | # Don't register it now, as ImageGallery does that later. |
||
4974 | $file = $this->fetchFileNoRegister( $title, $options ); |
||
4975 | $handler = $file ? $file->getHandler() : false; |
||
4976 | |||
4977 | $paramMap = [ |
||
4978 | 'img_alt' => 'gallery-internal-alt', |
||
4979 | 'img_link' => 'gallery-internal-link', |
||
4980 | ]; |
||
4981 | if ( $handler ) { |
||
4982 | $paramMap = $paramMap + $handler->getParamMap(); |
||
4983 | // We don't want people to specify per-image widths. |
||
4984 | // Additionally the width parameter would need special casing anyhow. |
||
4985 | unset( $paramMap['img_width'] ); |
||
4986 | } |
||
4987 | |||
4988 | $mwArray = new MagicWordArray( array_keys( $paramMap ) ); |
||
4989 | |||
4990 | $label = ''; |
||
4991 | $alt = ''; |
||
4992 | $link = ''; |
||
4993 | $handlerOptions = []; |
||
4994 | if ( isset( $matches[3] ) ) { |
||
4995 | // look for an |alt= definition while trying not to break existing |
||
4996 | // captions with multiple pipes (|) in it, until a more sensible grammar |
||
4997 | // is defined for images in galleries |
||
4998 | |||
4999 | // FIXME: Doing recursiveTagParse at this stage, and the trim before |
||
5000 | // splitting on '|' is a bit odd, and different from makeImage. |
||
5001 | $matches[3] = $this->recursiveTagParse( trim( $matches[3] ) ); |
||
5002 | $parameterMatches = StringUtils::explode( '|', $matches[3] ); |
||
5003 | |||
5004 | foreach ( $parameterMatches as $parameterMatch ) { |
||
5005 | list( $magicName, $match ) = $mwArray->matchVariableStartToEnd( $parameterMatch ); |
||
5006 | if ( $magicName ) { |
||
5007 | $paramName = $paramMap[$magicName]; |
||
5008 | |||
5009 | switch ( $paramName ) { |
||
5010 | case 'gallery-internal-alt': |
||
5011 | $alt = $this->stripAltText( $match, false ); |
||
5012 | break; |
||
5013 | case 'gallery-internal-link': |
||
5014 | $linkValue = strip_tags( $this->replaceLinkHoldersText( $match ) ); |
||
5015 | $chars = self::EXT_LINK_URL_CLASS; |
||
5016 | $addr = self::EXT_LINK_ADDR; |
||
5017 | $prots = $this->mUrlProtocols; |
||
5018 | // check to see if link matches an absolute url, if not then it must be a wiki link. |
||
5019 | if ( preg_match( "/^($prots)$addr$chars*$/u", $linkValue ) ) { |
||
5020 | $link = $linkValue; |
||
5021 | } else { |
||
5022 | $localLinkTitle = Title::newFromText( $linkValue ); |
||
5023 | if ( $localLinkTitle !== null ) { |
||
5024 | $link = $localLinkTitle->getLinkURL(); |
||
5025 | } |
||
5026 | } |
||
5027 | break; |
||
5028 | default: |
||
5029 | // Must be a handler specific parameter. |
||
5030 | if ( $handler->validateParam( $paramName, $match ) ) { |
||
5031 | $handlerOptions[$paramName] = $match; |
||
5032 | } else { |
||
5033 | // Guess not, consider it as caption. |
||
5034 | wfDebug( "$parameterMatch failed parameter validation\n" ); |
||
5035 | $label = '|' . $parameterMatch; |
||
5036 | } |
||
5037 | } |
||
5038 | |||
5039 | } else { |
||
5040 | // Last pipe wins. |
||
5041 | $label = '|' . $parameterMatch; |
||
5042 | } |
||
5043 | } |
||
5044 | // Remove the pipe. |
||
5045 | $label = substr( $label, 1 ); |
||
5046 | } |
||
5047 | |||
5048 | $ig->add( $title, $label, $alt, $link, $handlerOptions ); |
||
5049 | } |
||
5050 | $html = $ig->toHTML(); |
||
5051 | Hooks::run( 'AfterParserFetchFileAndTitle', [ $this, $ig, &$html ] ); |
||
5052 | return $html; |
||
5053 | } |
||
5054 | |||
5055 | /** |
||
5056 | * @param MediaHandler $handler |
||
5057 | * @return array |
||
5058 | */ |
||
5059 | public function getImageParams( $handler ) { |
||
5060 | if ( $handler ) { |
||
5061 | $handlerClass = get_class( $handler ); |
||
5062 | } else { |
||
5063 | $handlerClass = ''; |
||
5064 | } |
||
5065 | if ( !isset( $this->mImageParams[$handlerClass] ) ) { |
||
5066 | # Initialise static lists |
||
5067 | static $internalParamNames = [ |
||
5068 | 'horizAlign' => [ 'left', 'right', 'center', 'none' ], |
||
5069 | 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', |
||
5070 | 'bottom', 'text-bottom' ], |
||
5071 | 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless', |
||
5072 | 'upright', 'border', 'link', 'alt', 'class' ], |
||
5073 | ]; |
||
5074 | static $internalParamMap; |
||
5075 | if ( !$internalParamMap ) { |
||
5076 | $internalParamMap = []; |
||
5077 | foreach ( $internalParamNames as $type => $names ) { |
||
5078 | foreach ( $names as $name ) { |
||
5079 | $magicName = str_replace( '-', '_', "img_$name" ); |
||
5080 | $internalParamMap[$magicName] = [ $type, $name ]; |
||
5081 | } |
||
5082 | } |
||
5083 | } |
||
5084 | |||
5085 | # Add handler params |
||
5086 | $paramMap = $internalParamMap; |
||
5087 | if ( $handler ) { |
||
5088 | $handlerParamMap = $handler->getParamMap(); |
||
5089 | foreach ( $handlerParamMap as $magic => $paramName ) { |
||
5090 | $paramMap[$magic] = [ 'handler', $paramName ]; |
||
5091 | } |
||
5092 | } |
||
5093 | $this->mImageParams[$handlerClass] = $paramMap; |
||
5094 | $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray( array_keys( $paramMap ) ); |
||
5095 | } |
||
5096 | return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ]; |
||
5097 | } |
||
5098 | |||
5099 | /** |
||
5100 | * Parse image options text and use it to make an image |
||
5101 | * |
||
5102 | * @param Title $title |
||
5103 | * @param string $options |
||
5104 | * @param LinkHolderArray|bool $holders |
||
5105 | * @return string HTML |
||
5106 | */ |
||
5107 | public function makeImage( $title, $options, $holders = false ) { |
||
5108 | # Check if the options text is of the form "options|alt text" |
||
5109 | # Options are: |
||
5110 | # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang |
||
5111 | # * left no resizing, just left align. label is used for alt= only |
||
5112 | # * right same, but right aligned |
||
5113 | # * none same, but not aligned |
||
5114 | # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox |
||
5115 | # * center center the image |
||
5116 | # * frame Keep original image size, no magnify-button. |
||
5117 | # * framed Same as "frame" |
||
5118 | # * frameless like 'thumb' but without a frame. Keeps user preferences for width |
||
5119 | # * upright reduce width for upright images, rounded to full __0 px |
||
5120 | # * border draw a 1px border around the image |
||
5121 | # * alt Text for HTML alt attribute (defaults to empty) |
||
5122 | # * class Set a class for img node |
||
5123 | # * link Set the target of the image link. Can be external, interwiki, or local |
||
5124 | # vertical-align values (no % or length right now): |
||
5125 | # * baseline |
||
5126 | # * sub |
||
5127 | # * super |
||
5128 | # * top |
||
5129 | # * text-top |
||
5130 | # * middle |
||
5131 | # * bottom |
||
5132 | # * text-bottom |
||
5133 | |||
5134 | $parts = StringUtils::explode( "|", $options ); |
||
5135 | |||
5136 | # Give extensions a chance to select the file revision for us |
||
5137 | $options = []; |
||
5138 | $descQuery = false; |
||
5139 | Hooks::run( 'BeforeParserFetchFileAndTitle', |
||
5140 | [ $this, $title, &$options, &$descQuery ] ); |
||
5141 | # Fetch and register the file (file title may be different via hooks) |
||
5142 | list( $file, $title ) = $this->fetchFileAndTitle( $title, $options ); |
||
5143 | |||
5144 | # Get parameter map |
||
5145 | $handler = $file ? $file->getHandler() : false; |
||
5146 | |||
5147 | list( $paramMap, $mwArray ) = $this->getImageParams( $handler ); |
||
5148 | |||
5149 | if ( !$file ) { |
||
5150 | $this->addTrackingCategory( 'broken-file-category' ); |
||
5151 | } |
||
5152 | |||
5153 | # Process the input parameters |
||
5154 | $caption = ''; |
||
5155 | $params = [ 'frame' => [], 'handler' => [], |
||
5156 | 'horizAlign' => [], 'vertAlign' => [] ]; |
||
5157 | $seenformat = false; |
||
5158 | foreach ( $parts as $part ) { |
||
5159 | $part = trim( $part ); |
||
5160 | list( $magicName, $value ) = $mwArray->matchVariableStartToEnd( $part ); |
||
5161 | $validated = false; |
||
5162 | if ( isset( $paramMap[$magicName] ) ) { |
||
5163 | list( $type, $paramName ) = $paramMap[$magicName]; |
||
5164 | |||
5165 | # Special case; width and height come in one variable together |
||
5166 | if ( $type === 'handler' && $paramName === 'width' ) { |
||
5167 | $parsedWidthParam = $this->parseWidthParam( $value ); |
||
5168 | View Code Duplication | if ( isset( $parsedWidthParam['width'] ) ) { |
|
5169 | $width = $parsedWidthParam['width']; |
||
5170 | if ( $handler->validateParam( 'width', $width ) ) { |
||
5171 | $params[$type]['width'] = $width; |
||
5172 | $validated = true; |
||
5173 | } |
||
5174 | } |
||
5175 | View Code Duplication | if ( isset( $parsedWidthParam['height'] ) ) { |
|
5176 | $height = $parsedWidthParam['height']; |
||
5177 | if ( $handler->validateParam( 'height', $height ) ) { |
||
5178 | $params[$type]['height'] = $height; |
||
5179 | $validated = true; |
||
5180 | } |
||
5181 | } |
||
5182 | # else no validation -- bug 13436 |
||
5183 | } else { |
||
5184 | if ( $type === 'handler' ) { |
||
5185 | # Validate handler parameter |
||
5186 | $validated = $handler->validateParam( $paramName, $value ); |
||
5187 | } else { |
||
5188 | # Validate internal parameters |
||
5189 | switch ( $paramName ) { |
||
5190 | case 'manualthumb': |
||
5191 | case 'alt': |
||
5192 | case 'class': |
||
5193 | # @todo FIXME: Possibly check validity here for |
||
5194 | # manualthumb? downstream behavior seems odd with |
||
5195 | # missing manual thumbs. |
||
5196 | $validated = true; |
||
5197 | $value = $this->stripAltText( $value, $holders ); |
||
5198 | break; |
||
5199 | case 'link': |
||
5200 | $chars = self::EXT_LINK_URL_CLASS; |
||
5201 | $addr = self::EXT_LINK_ADDR; |
||
5202 | $prots = $this->mUrlProtocols; |
||
5203 | if ( $value === '' ) { |
||
5204 | $paramName = 'no-link'; |
||
5205 | $value = true; |
||
5206 | $validated = true; |
||
5207 | } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) { |
||
5208 | if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value, $m ) ) { |
||
5209 | $paramName = 'link-url'; |
||
5210 | $this->mOutput->addExternalLink( $value ); |
||
5211 | if ( $this->mOptions->getExternalLinkTarget() ) { |
||
5212 | $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget(); |
||
5213 | } |
||
5214 | $validated = true; |
||
5215 | } |
||
5216 | } else { |
||
5217 | $linkTitle = Title::newFromText( $value ); |
||
5218 | if ( $linkTitle ) { |
||
5219 | $paramName = 'link-title'; |
||
5220 | $value = $linkTitle; |
||
5221 | $this->mOutput->addLink( $linkTitle ); |
||
5222 | $validated = true; |
||
5223 | } |
||
5224 | } |
||
5225 | break; |
||
5226 | case 'frameless': |
||
5227 | case 'framed': |
||
5228 | case 'thumbnail': |
||
5229 | // use first appearing option, discard others. |
||
5230 | $validated = ! $seenformat; |
||
5231 | $seenformat = true; |
||
5232 | break; |
||
5233 | default: |
||
5234 | # Most other things appear to be empty or numeric... |
||
5235 | $validated = ( $value === false || is_numeric( trim( $value ) ) ); |
||
5236 | } |
||
5237 | } |
||
5238 | |||
5239 | if ( $validated ) { |
||
5240 | $params[$type][$paramName] = $value; |
||
5241 | } |
||
5242 | } |
||
5243 | } |
||
5244 | if ( !$validated ) { |
||
5245 | $caption = $part; |
||
5246 | } |
||
5247 | } |
||
5248 | |||
5249 | # Process alignment parameters |
||
5250 | if ( $params['horizAlign'] ) { |
||
5251 | $params['frame']['align'] = key( $params['horizAlign'] ); |
||
5252 | } |
||
5253 | if ( $params['vertAlign'] ) { |
||
5254 | $params['frame']['valign'] = key( $params['vertAlign'] ); |
||
5255 | } |
||
5256 | |||
5257 | $params['frame']['caption'] = $caption; |
||
5258 | |||
5259 | # Will the image be presented in a frame, with the caption below? |
||
5260 | $imageIsFramed = isset( $params['frame']['frame'] ) |
||
5261 | || isset( $params['frame']['framed'] ) |
||
5262 | || isset( $params['frame']['thumbnail'] ) |
||
5263 | || isset( $params['frame']['manualthumb'] ); |
||
5264 | |||
5265 | # In the old days, [[Image:Foo|text...]] would set alt text. Later it |
||
5266 | # came to also set the caption, ordinary text after the image -- which |
||
5267 | # makes no sense, because that just repeats the text multiple times in |
||
5268 | # screen readers. It *also* came to set the title attribute. |
||
5269 | # Now that we have an alt attribute, we should not set the alt text to |
||
5270 | # equal the caption: that's worse than useless, it just repeats the |
||
5271 | # text. This is the framed/thumbnail case. If there's no caption, we |
||
5272 | # use the unnamed parameter for alt text as well, just for the time be- |
||
5273 | # ing, if the unnamed param is set and the alt param is not. |
||
5274 | # For the future, we need to figure out if we want to tweak this more, |
||
5275 | # e.g., introducing a title= parameter for the title; ignoring the un- |
||
5276 | # named parameter entirely for images without a caption; adding an ex- |
||
5277 | # plicit caption= parameter and preserving the old magic unnamed para- |
||
5278 | # meter for BC; ... |
||
5279 | if ( $imageIsFramed ) { # Framed image |
||
5280 | if ( $caption === '' && !isset( $params['frame']['alt'] ) ) { |
||
5281 | # No caption or alt text, add the filename as the alt text so |
||
5282 | # that screen readers at least get some description of the image |
||
5283 | $params['frame']['alt'] = $title->getText(); |
||
5284 | } |
||
5285 | # Do not set $params['frame']['title'] because tooltips don't make sense |
||
5286 | # for framed images |
||
5287 | } else { # Inline image |
||
5288 | if ( !isset( $params['frame']['alt'] ) ) { |
||
5289 | # No alt text, use the "caption" for the alt text |
||
5290 | if ( $caption !== '' ) { |
||
5291 | $params['frame']['alt'] = $this->stripAltText( $caption, $holders ); |
||
5292 | } else { |
||
5293 | # No caption, fall back to using the filename for the |
||
5294 | # alt text |
||
5295 | $params['frame']['alt'] = $title->getText(); |
||
5296 | } |
||
5297 | } |
||
5298 | # Use the "caption" for the tooltip text |
||
5299 | $params['frame']['title'] = $this->stripAltText( $caption, $holders ); |
||
5300 | } |
||
5301 | |||
5302 | Hooks::run( 'ParserMakeImageParams', [ $title, $file, &$params, $this ] ); |
||
5303 | |||
5304 | # Linker does the rest |
||
5305 | $time = isset( $options['time'] ) ? $options['time'] : false; |
||
5306 | $ret = Linker::makeImageLink( $this, $title, $file, $params['frame'], $params['handler'], |
||
5307 | $time, $descQuery, $this->mOptions->getThumbSize() ); |
||
5308 | |||
5309 | # Give the handler a chance to modify the parser object |
||
5310 | if ( $handler ) { |
||
5311 | $handler->parserTransformHook( $this, $file ); |
||
5312 | } |
||
5313 | |||
5314 | return $ret; |
||
5315 | } |
||
5316 | |||
5317 | /** |
||
5318 | * @param string $caption |
||
5319 | * @param LinkHolderArray|bool $holders |
||
5320 | * @return mixed|string |
||
5321 | */ |
||
5322 | protected function stripAltText( $caption, $holders ) { |
||
5323 | # Strip bad stuff out of the title (tooltip). We can't just use |
||
5324 | # replaceLinkHoldersText() here, because if this function is called |
||
5325 | # from replaceInternalLinks2(), mLinkHolders won't be up-to-date. |
||
5326 | if ( $holders ) { |
||
5327 | $tooltip = $holders->replaceText( $caption ); |
||
5328 | } else { |
||
5329 | $tooltip = $this->replaceLinkHoldersText( $caption ); |
||
5330 | } |
||
5331 | |||
5332 | # make sure there are no placeholders in thumbnail attributes |
||
5333 | # that are later expanded to html- so expand them now and |
||
5334 | # remove the tags |
||
5335 | $tooltip = $this->mStripState->unstripBoth( $tooltip ); |
||
5336 | $tooltip = Sanitizer::stripAllTags( $tooltip ); |
||
5337 | |||
5338 | return $tooltip; |
||
5339 | } |
||
5340 | |||
5341 | /** |
||
5342 | * Set a flag in the output object indicating that the content is dynamic and |
||
5343 | * shouldn't be cached. |
||
5344 | * @deprecated since 1.28; use getOutput()->updateCacheExpiry() |
||
5345 | */ |
||
5346 | public function disableCache() { |
||
5347 | wfDebug( "Parser output marked as uncacheable.\n" ); |
||
5348 | if ( !$this->mOutput ) { |
||
5349 | throw new MWException( __METHOD__ . |
||
5350 | " can only be called when actually parsing something" ); |
||
5351 | } |
||
5352 | $this->mOutput->updateCacheExpiry( 0 ); // new style, for consistency |
||
5353 | } |
||
5354 | |||
5355 | /** |
||
5356 | * Callback from the Sanitizer for expanding items found in HTML attribute |
||
5357 | * values, so they can be safely tested and escaped. |
||
5358 | * |
||
5359 | * @param string $text |
||
5360 | * @param bool|PPFrame $frame |
||
5361 | * @return string |
||
5362 | */ |
||
5363 | public function attributeStripCallback( &$text, $frame = false ) { |
||
5364 | $text = $this->replaceVariables( $text, $frame ); |
||
5365 | $text = $this->mStripState->unstripBoth( $text ); |
||
5366 | return $text; |
||
5367 | } |
||
5368 | |||
5369 | /** |
||
5370 | * Accessor |
||
5371 | * |
||
5372 | * @return array |
||
5373 | */ |
||
5374 | public function getTags() { |
||
5375 | return array_merge( |
||
5376 | array_keys( $this->mTransparentTagHooks ), |
||
5377 | array_keys( $this->mTagHooks ), |
||
5378 | array_keys( $this->mFunctionTagHooks ) |
||
5379 | ); |
||
5380 | } |
||
5381 | |||
5382 | /** |
||
5383 | * Replace transparent tags in $text with the values given by the callbacks. |
||
5384 | * |
||
5385 | * Transparent tag hooks are like regular XML-style tag hooks, except they |
||
5386 | * operate late in the transformation sequence, on HTML instead of wikitext. |
||
5387 | * |
||
5388 | * @param string $text |
||
5389 | * |
||
5390 | * @return string |
||
5391 | */ |
||
5392 | public function replaceTransparentTags( $text ) { |
||
5393 | $matches = []; |
||
5394 | $elements = array_keys( $this->mTransparentTagHooks ); |
||
5395 | $text = self::extractTagsAndParams( $elements, $text, $matches ); |
||
5396 | $replacements = []; |
||
5397 | |||
5398 | foreach ( $matches as $marker => $data ) { |
||
5399 | list( $element, $content, $params, $tag ) = $data; |
||
5400 | $tagName = strtolower( $element ); |
||
5401 | if ( isset( $this->mTransparentTagHooks[$tagName] ) ) { |
||
5402 | $output = call_user_func_array( |
||
5403 | $this->mTransparentTagHooks[$tagName], |
||
5404 | [ $content, $params, $this ] |
||
5405 | ); |
||
5406 | } else { |
||
5407 | $output = $tag; |
||
5408 | } |
||
5409 | $replacements[$marker] = $output; |
||
5410 | } |
||
5411 | return strtr( $text, $replacements ); |
||
5412 | } |
||
5413 | |||
5414 | /** |
||
5415 | * Break wikitext input into sections, and either pull or replace |
||
5416 | * some particular section's text. |
||
5417 | * |
||
5418 | * External callers should use the getSection and replaceSection methods. |
||
5419 | * |
||
5420 | * @param string $text Page wikitext |
||
5421 | * @param string|number $sectionId A section identifier string of the form: |
||
5422 | * "<flag1> - <flag2> - ... - <section number>" |
||
5423 | * |
||
5424 | * Currently the only recognised flag is "T", which means the target section number |
||
5425 | * was derived during a template inclusion parse, in other words this is a template |
||
5426 | * section edit link. If no flags are given, it was an ordinary section edit link. |
||
5427 | * This flag is required to avoid a section numbering mismatch when a section is |
||
5428 | * enclosed by "<includeonly>" (bug 6563). |
||
5429 | * |
||
5430 | * The section number 0 pulls the text before the first heading; other numbers will |
||
5431 | * pull the given section along with its lower-level subsections. If the section is |
||
5432 | * not found, $mode=get will return $newtext, and $mode=replace will return $text. |
||
5433 | * |
||
5434 | * Section 0 is always considered to exist, even if it only contains the empty |
||
5435 | * string. If $text is the empty string and section 0 is replaced, $newText is |
||
5436 | * returned. |
||
5437 | * |
||
5438 | * @param string $mode One of "get" or "replace" |
||
5439 | * @param string $newText Replacement text for section data. |
||
5440 | * @return string For "get", the extracted section text. |
||
5441 | * for "replace", the whole page with the section replaced. |
||
5442 | */ |
||
5443 | private function extractSections( $text, $sectionId, $mode, $newText = '' ) { |
||
5444 | global $wgTitle; # not generally used but removes an ugly failure mode |
||
5445 | |||
5446 | $magicScopeVariable = $this->lock(); |
||
5447 | $this->startParse( $wgTitle, new ParserOptions, self::OT_PLAIN, true ); |
||
5448 | $outText = ''; |
||
5449 | $frame = $this->getPreprocessor()->newFrame(); |
||
5450 | |||
5451 | # Process section extraction flags |
||
5452 | $flags = 0; |
||
5453 | $sectionParts = explode( '-', $sectionId ); |
||
5454 | $sectionIndex = array_pop( $sectionParts ); |
||
5455 | foreach ( $sectionParts as $part ) { |
||
5456 | if ( $part === 'T' ) { |
||
5457 | $flags |= self::PTD_FOR_INCLUSION; |
||
5458 | } |
||
5459 | } |
||
5460 | |||
5461 | # Check for empty input |
||
5462 | if ( strval( $text ) === '' ) { |
||
5463 | # Only sections 0 and T-0 exist in an empty document |
||
5464 | if ( $sectionIndex == 0 ) { |
||
5465 | if ( $mode === 'get' ) { |
||
5466 | return ''; |
||
5467 | } else { |
||
5468 | return $newText; |
||
5469 | } |
||
5470 | } else { |
||
5471 | if ( $mode === 'get' ) { |
||
5472 | return $newText; |
||
5473 | } else { |
||
5474 | return $text; |
||
5475 | } |
||
5476 | } |
||
5477 | } |
||
5478 | |||
5479 | # Preprocess the text |
||
5480 | $root = $this->preprocessToDom( $text, $flags ); |
||
5481 | |||
5482 | # <h> nodes indicate section breaks |
||
5483 | # They can only occur at the top level, so we can find them by iterating the root's children |
||
5484 | $node = $root->getFirstChild(); |
||
5485 | |||
5486 | # Find the target section |
||
5487 | if ( $sectionIndex == 0 ) { |
||
5488 | # Section zero doesn't nest, level=big |
||
5489 | $targetLevel = 1000; |
||
5490 | } else { |
||
5491 | while ( $node ) { |
||
5492 | View Code Duplication | if ( $node->getName() === 'h' ) { |
|
5493 | $bits = $node->splitHeading(); |
||
5494 | if ( $bits['i'] == $sectionIndex ) { |
||
5495 | $targetLevel = $bits['level']; |
||
5496 | break; |
||
5497 | } |
||
5498 | } |
||
5499 | if ( $mode === 'replace' ) { |
||
5500 | $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
||
5501 | } |
||
5502 | $node = $node->getNextSibling(); |
||
5503 | } |
||
5504 | } |
||
5505 | |||
5506 | if ( !$node ) { |
||
5507 | # Not found |
||
5508 | if ( $mode === 'get' ) { |
||
5509 | return $newText; |
||
5510 | } else { |
||
5511 | return $text; |
||
5512 | } |
||
5513 | } |
||
5514 | |||
5515 | # Find the end of the section, including nested sections |
||
5516 | do { |
||
5517 | View Code Duplication | if ( $node->getName() === 'h' ) { |
|
5518 | $bits = $node->splitHeading(); |
||
5519 | $curLevel = $bits['level']; |
||
5520 | if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) { |
||
5521 | break; |
||
5522 | } |
||
5523 | } |
||
5524 | if ( $mode === 'get' ) { |
||
5525 | $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
||
5526 | } |
||
5527 | $node = $node->getNextSibling(); |
||
5528 | } while ( $node ); |
||
5529 | |||
5530 | # Write out the remainder (in replace mode only) |
||
5531 | if ( $mode === 'replace' ) { |
||
5532 | # Output the replacement text |
||
5533 | # Add two newlines on -- trailing whitespace in $newText is conventionally |
||
5534 | # stripped by the editor, so we need both newlines to restore the paragraph gap |
||
5535 | # Only add trailing whitespace if there is newText |
||
5536 | if ( $newText != "" ) { |
||
5537 | $outText .= $newText . "\n\n"; |
||
5538 | } |
||
5539 | |||
5540 | while ( $node ) { |
||
5541 | $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG ); |
||
5542 | $node = $node->getNextSibling(); |
||
5543 | } |
||
5544 | } |
||
5545 | |||
5546 | if ( is_string( $outText ) ) { |
||
5547 | # Re-insert stripped tags |
||
5548 | $outText = rtrim( $this->mStripState->unstripBoth( $outText ) ); |
||
5549 | } |
||
5550 | |||
5551 | return $outText; |
||
5552 | } |
||
5553 | |||
5554 | /** |
||
5555 | * This function returns the text of a section, specified by a number ($section). |
||
5556 | * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or |
||
5557 | * the first section before any such heading (section 0). |
||
5558 | * |
||
5559 | * If a section contains subsections, these are also returned. |
||
5560 | * |
||
5561 | * @param string $text Text to look in |
||
5562 | * @param string|number $sectionId Section identifier as a number or string |
||
5563 | * (e.g. 0, 1 or 'T-1'). |
||
5564 | * @param string $defaultText Default to return if section is not found |
||
5565 | * |
||
5566 | * @return string Text of the requested section |
||
5567 | */ |
||
5568 | public function getSection( $text, $sectionId, $defaultText = '' ) { |
||
5569 | return $this->extractSections( $text, $sectionId, 'get', $defaultText ); |
||
5570 | } |
||
5571 | |||
5572 | /** |
||
5573 | * This function returns $oldtext after the content of the section |
||
5574 | * specified by $section has been replaced with $text. If the target |
||
5575 | * section does not exist, $oldtext is returned unchanged. |
||
5576 | * |
||
5577 | * @param string $oldText Former text of the article |
||
5578 | * @param string|number $sectionId Section identifier as a number or string |
||
5579 | * (e.g. 0, 1 or 'T-1'). |
||
5580 | * @param string $newText Replacing text |
||
5581 | * |
||
5582 | * @return string Modified text |
||
5583 | */ |
||
5584 | public function replaceSection( $oldText, $sectionId, $newText ) { |
||
5585 | return $this->extractSections( $oldText, $sectionId, 'replace', $newText ); |
||
5586 | } |
||
5587 | |||
5588 | /** |
||
5589 | * Get the ID of the revision we are parsing |
||
5590 | * |
||
5591 | * @return int|null |
||
5592 | */ |
||
5593 | public function getRevisionId() { |
||
5594 | return $this->mRevisionId; |
||
5595 | } |
||
5596 | |||
5597 | /** |
||
5598 | * Get the revision object for $this->mRevisionId |
||
5599 | * |
||
5600 | * @return Revision|null Either a Revision object or null |
||
5601 | * @since 1.23 (public since 1.23) |
||
5602 | */ |
||
5603 | public function getRevisionObject() { |
||
5604 | if ( !is_null( $this->mRevisionObject ) ) { |
||
5605 | return $this->mRevisionObject; |
||
5606 | } |
||
5607 | if ( is_null( $this->mRevisionId ) ) { |
||
5608 | return null; |
||
5609 | } |
||
5610 | |||
5611 | $rev = call_user_func( |
||
5612 | $this->mOptions->getCurrentRevisionCallback(), $this->getTitle(), $this |
||
5613 | ); |
||
5614 | |||
5615 | # If the parse is for a new revision, then the callback should have |
||
5616 | # already been set to force the object and should match mRevisionId. |
||
5617 | # If not, try to fetch by mRevisionId for sanity. |
||
5618 | if ( $rev && $rev->getId() != $this->mRevisionId ) { |
||
5619 | $rev = Revision::newFromId( $this->mRevisionId ); |
||
5620 | } |
||
5621 | |||
5622 | $this->mRevisionObject = $rev; |
||
5623 | |||
5624 | return $this->mRevisionObject; |
||
5625 | } |
||
5626 | |||
5627 | /** |
||
5628 | * Get the timestamp associated with the current revision, adjusted for |
||
5629 | * the default server-local timestamp |
||
5630 | * @return string |
||
5631 | */ |
||
5632 | public function getRevisionTimestamp() { |
||
5633 | if ( is_null( $this->mRevisionTimestamp ) ) { |
||
5634 | global $wgContLang; |
||
5635 | |||
5636 | $revObject = $this->getRevisionObject(); |
||
5637 | $timestamp = $revObject ? $revObject->getTimestamp() : wfTimestampNow(); |
||
5638 | |||
5639 | # The cryptic '' timezone parameter tells to use the site-default |
||
5640 | # timezone offset instead of the user settings. |
||
5641 | # Since this value will be saved into the parser cache, served |
||
5642 | # to other users, and potentially even used inside links and such, |
||
5643 | # it needs to be consistent for all visitors. |
||
5644 | $this->mRevisionTimestamp = $wgContLang->userAdjust( $timestamp, '' ); |
||
5645 | |||
5646 | } |
||
5647 | return $this->mRevisionTimestamp; |
||
5648 | } |
||
5649 | |||
5650 | /** |
||
5651 | * Get the name of the user that edited the last revision |
||
5652 | * |
||
5653 | * @return string User name |
||
5654 | */ |
||
5655 | public function getRevisionUser() { |
||
5656 | if ( is_null( $this->mRevisionUser ) ) { |
||
5657 | $revObject = $this->getRevisionObject(); |
||
5658 | |||
5659 | # if this template is subst: the revision id will be blank, |
||
5660 | # so just use the current user's name |
||
5661 | if ( $revObject ) { |
||
5662 | $this->mRevisionUser = $revObject->getUserText(); |
||
5663 | } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) { |
||
5664 | $this->mRevisionUser = $this->getUser()->getName(); |
||
5665 | } |
||
5666 | } |
||
5667 | return $this->mRevisionUser; |
||
5668 | } |
||
5669 | |||
5670 | /** |
||
5671 | * Get the size of the revision |
||
5672 | * |
||
5673 | * @return int|null Revision size |
||
5674 | */ |
||
5675 | public function getRevisionSize() { |
||
5676 | if ( is_null( $this->mRevisionSize ) ) { |
||
5677 | $revObject = $this->getRevisionObject(); |
||
5678 | |||
5679 | # if this variable is subst: the revision id will be blank, |
||
5680 | # so just use the parser input size, because the own substituation |
||
5681 | # will change the size. |
||
5682 | if ( $revObject ) { |
||
5683 | $this->mRevisionSize = $revObject->getSize(); |
||
5684 | } else { |
||
5685 | $this->mRevisionSize = $this->mInputSize; |
||
5686 | } |
||
5687 | } |
||
5688 | return $this->mRevisionSize; |
||
5689 | } |
||
5690 | |||
5691 | /** |
||
5692 | * Mutator for $mDefaultSort |
||
5693 | * |
||
5694 | * @param string $sort New value |
||
5695 | */ |
||
5696 | public function setDefaultSort( $sort ) { |
||
5697 | $this->mDefaultSort = $sort; |
||
5698 | $this->mOutput->setProperty( 'defaultsort', $sort ); |
||
5699 | } |
||
5700 | |||
5701 | /** |
||
5702 | * Accessor for $mDefaultSort |
||
5703 | * Will use the empty string if none is set. |
||
5704 | * |
||
5705 | * This value is treated as a prefix, so the |
||
5706 | * empty string is equivalent to sorting by |
||
5707 | * page name. |
||
5708 | * |
||
5709 | * @return string |
||
5710 | */ |
||
5711 | public function getDefaultSort() { |
||
5712 | if ( $this->mDefaultSort !== false ) { |
||
5713 | return $this->mDefaultSort; |
||
5714 | } else { |
||
5715 | return ''; |
||
5716 | } |
||
5717 | } |
||
5718 | |||
5719 | /** |
||
5720 | * Accessor for $mDefaultSort |
||
5721 | * Unlike getDefaultSort(), will return false if none is set |
||
5722 | * |
||
5723 | * @return string|bool |
||
5724 | */ |
||
5725 | public function getCustomDefaultSort() { |
||
5726 | return $this->mDefaultSort; |
||
5727 | } |
||
5728 | |||
5729 | /** |
||
5730 | * Try to guess the section anchor name based on a wikitext fragment |
||
5731 | * presumably extracted from a heading, for example "Header" from |
||
5732 | * "== Header ==". |
||
5733 | * |
||
5734 | * @param string $text |
||
5735 | * |
||
5736 | * @return string |
||
5737 | */ |
||
5738 | public function guessSectionNameFromWikiText( $text ) { |
||
5739 | # Strip out wikitext links(they break the anchor) |
||
5740 | $text = $this->stripSectionName( $text ); |
||
5741 | $text = Sanitizer::normalizeSectionNameWhitespace( $text ); |
||
5742 | return '#' . Sanitizer::escapeId( $text, 'noninitial' ); |
||
5743 | } |
||
5744 | |||
5745 | /** |
||
5746 | * Same as guessSectionNameFromWikiText(), but produces legacy anchors |
||
5747 | * instead. For use in redirects, since IE6 interprets Redirect: headers |
||
5748 | * as something other than UTF-8 (apparently?), resulting in breakage. |
||
5749 | * |
||
5750 | * @param string $text The section name |
||
5751 | * @return string An anchor |
||
5752 | */ |
||
5753 | public function guessLegacySectionNameFromWikiText( $text ) { |
||
5754 | # Strip out wikitext links(they break the anchor) |
||
5755 | $text = $this->stripSectionName( $text ); |
||
5756 | $text = Sanitizer::normalizeSectionNameWhitespace( $text ); |
||
5757 | return '#' . Sanitizer::escapeId( $text, [ 'noninitial', 'legacy' ] ); |
||
5758 | } |
||
5759 | |||
5760 | /** |
||
5761 | * Strips a text string of wikitext for use in a section anchor |
||
5762 | * |
||
5763 | * Accepts a text string and then removes all wikitext from the |
||
5764 | * string and leaves only the resultant text (i.e. the result of |
||
5765 | * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of |
||
5766 | * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended |
||
5767 | * to create valid section anchors by mimicing the output of the |
||
5768 | * parser when headings are parsed. |
||
5769 | * |
||
5770 | * @param string $text Text string to be stripped of wikitext |
||
5771 | * for use in a Section anchor |
||
5772 | * @return string Filtered text string |
||
5773 | */ |
||
5774 | public function stripSectionName( $text ) { |
||
5775 | # Strip internal link markup |
||
5776 | $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text ); |
||
5777 | $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text ); |
||
5778 | |||
5779 | # Strip external link markup |
||
5780 | # @todo FIXME: Not tolerant to blank link text |
||
5781 | # I.E. [https://www.mediawiki.org] will render as [1] or something depending |
||
5782 | # on how many empty links there are on the page - need to figure that out. |
||
5783 | $text = preg_replace( '/\[(?i:' . $this->mUrlProtocols . ')([^ ]+?) ([^[]+)\]/', '$2', $text ); |
||
5784 | |||
5785 | # Parse wikitext quotes (italics & bold) |
||
5786 | $text = $this->doQuotes( $text ); |
||
5787 | |||
5788 | # Strip HTML tags |
||
5789 | $text = StringUtils::delimiterReplace( '<', '>', '', $text ); |
||
5790 | return $text; |
||
5791 | } |
||
5792 | |||
5793 | /** |
||
5794 | * strip/replaceVariables/unstrip for preprocessor regression testing |
||
5795 | * |
||
5796 | * @param string $text |
||
5797 | * @param Title $title |
||
5798 | * @param ParserOptions $options |
||
5799 | * @param int $outputType |
||
5800 | * |
||
5801 | * @return string |
||
5802 | */ |
||
5803 | public function testSrvus( $text, Title $title, ParserOptions $options, |
||
5804 | $outputType = self::OT_HTML |
||
5805 | ) { |
||
5806 | $magicScopeVariable = $this->lock(); |
||
5807 | $this->startParse( $title, $options, $outputType, true ); |
||
5808 | |||
5809 | $text = $this->replaceVariables( $text ); |
||
5810 | $text = $this->mStripState->unstripBoth( $text ); |
||
5811 | $text = Sanitizer::removeHTMLtags( $text ); |
||
5812 | return $text; |
||
5813 | } |
||
5814 | |||
5815 | /** |
||
5816 | * @param string $text |
||
5817 | * @param Title $title |
||
5818 | * @param ParserOptions $options |
||
5819 | * @return string |
||
5820 | */ |
||
5821 | public function testPst( $text, Title $title, ParserOptions $options ) { |
||
5822 | return $this->preSaveTransform( $text, $title, $options->getUser(), $options ); |
||
5823 | } |
||
5824 | |||
5825 | /** |
||
5826 | * @param string $text |
||
5827 | * @param Title $title |
||
5828 | * @param ParserOptions $options |
||
5829 | * @return string |
||
5830 | */ |
||
5831 | public function testPreprocess( $text, Title $title, ParserOptions $options ) { |
||
5832 | return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS ); |
||
5833 | } |
||
5834 | |||
5835 | /** |
||
5836 | * Call a callback function on all regions of the given text that are not |
||
5837 | * inside strip markers, and replace those regions with the return value |
||
5838 | * of the callback. For example, with input: |
||
5839 | * |
||
5840 | * aaa<MARKER>bbb |
||
5841 | * |
||
5842 | * This will call the callback function twice, with 'aaa' and 'bbb'. Those |
||
5843 | * two strings will be replaced with the value returned by the callback in |
||
5844 | * each case. |
||
5845 | * |
||
5846 | * @param string $s |
||
5847 | * @param callable $callback |
||
5848 | * |
||
5849 | * @return string |
||
5850 | */ |
||
5851 | public function markerSkipCallback( $s, $callback ) { |
||
5852 | $i = 0; |
||
5853 | $out = ''; |
||
5854 | while ( $i < strlen( $s ) ) { |
||
5855 | $markerStart = strpos( $s, self::MARKER_PREFIX, $i ); |
||
5856 | if ( $markerStart === false ) { |
||
5857 | $out .= call_user_func( $callback, substr( $s, $i ) ); |
||
5858 | break; |
||
5859 | } else { |
||
5860 | $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) ); |
||
5861 | $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart ); |
||
5862 | if ( $markerEnd === false ) { |
||
5863 | $out .= substr( $s, $markerStart ); |
||
5864 | break; |
||
5865 | } else { |
||
5866 | $markerEnd += strlen( self::MARKER_SUFFIX ); |
||
5867 | $out .= substr( $s, $markerStart, $markerEnd - $markerStart ); |
||
5868 | $i = $markerEnd; |
||
5869 | } |
||
5870 | } |
||
5871 | } |
||
5872 | return $out; |
||
5873 | } |
||
5874 | |||
5875 | /** |
||
5876 | * Remove any strip markers found in the given text. |
||
5877 | * |
||
5878 | * @param string $text Input string |
||
5879 | * @return string |
||
5880 | */ |
||
5881 | public function killMarkers( $text ) { |
||
5882 | return $this->mStripState->killMarkers( $text ); |
||
5883 | } |
||
5884 | |||
5885 | /** |
||
5886 | * Save the parser state required to convert the given half-parsed text to |
||
5887 | * HTML. "Half-parsed" in this context means the output of |
||
5888 | * recursiveTagParse() or internalParse(). This output has strip markers |
||
5889 | * from replaceVariables (extensionSubstitution() etc.), and link |
||
5890 | * placeholders from replaceLinkHolders(). |
||
5891 | * |
||
5892 | * Returns an array which can be serialized and stored persistently. This |
||
5893 | * array can later be loaded into another parser instance with |
||
5894 | * unserializeHalfParsedText(). The text can then be safely incorporated into |
||
5895 | * the return value of a parser hook. |
||
5896 | * |
||
5897 | * @param string $text |
||
5898 | * |
||
5899 | * @return array |
||
5900 | */ |
||
5901 | public function serializeHalfParsedText( $text ) { |
||
5902 | $data = [ |
||
5903 | 'text' => $text, |
||
5904 | 'version' => self::HALF_PARSED_VERSION, |
||
5905 | 'stripState' => $this->mStripState->getSubState( $text ), |
||
5906 | 'linkHolders' => $this->mLinkHolders->getSubArray( $text ) |
||
5907 | ]; |
||
5908 | return $data; |
||
5909 | } |
||
5910 | |||
5911 | /** |
||
5912 | * Load the parser state given in the $data array, which is assumed to |
||
5913 | * have been generated by serializeHalfParsedText(). The text contents is |
||
5914 | * extracted from the array, and its markers are transformed into markers |
||
5915 | * appropriate for the current Parser instance. This transformed text is |
||
5916 | * returned, and can be safely included in the return value of a parser |
||
5917 | * hook. |
||
5918 | * |
||
5919 | * If the $data array has been stored persistently, the caller should first |
||
5920 | * check whether it is still valid, by calling isValidHalfParsedText(). |
||
5921 | * |
||
5922 | * @param array $data Serialized data |
||
5923 | * @throws MWException |
||
5924 | * @return string |
||
5925 | */ |
||
5926 | public function unserializeHalfParsedText( $data ) { |
||
5927 | View Code Duplication | if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) { |
|
5928 | throw new MWException( __METHOD__ . ': invalid version' ); |
||
5929 | } |
||
5930 | |||
5931 | # First, extract the strip state. |
||
5932 | $texts = [ $data['text'] ]; |
||
5933 | $texts = $this->mStripState->merge( $data['stripState'], $texts ); |
||
5934 | |||
5935 | # Now renumber links |
||
5936 | $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts ); |
||
5937 | |||
5938 | # Should be good to go. |
||
5939 | return $texts[0]; |
||
5940 | } |
||
5941 | |||
5942 | /** |
||
5943 | * Returns true if the given array, presumed to be generated by |
||
5944 | * serializeHalfParsedText(), is compatible with the current version of the |
||
5945 | * parser. |
||
5946 | * |
||
5947 | * @param array $data |
||
5948 | * |
||
5949 | * @return bool |
||
5950 | */ |
||
5951 | public function isValidHalfParsedText( $data ) { |
||
5952 | return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION; |
||
5953 | } |
||
5954 | |||
5955 | /** |
||
5956 | * Parsed a width param of imagelink like 300px or 200x300px |
||
5957 | * |
||
5958 | * @param string $value |
||
5959 | * |
||
5960 | * @return array |
||
5961 | * @since 1.20 |
||
5962 | */ |
||
5963 | public function parseWidthParam( $value ) { |
||
5964 | $parsedWidthParam = []; |
||
5965 | if ( $value === '' ) { |
||
5966 | return $parsedWidthParam; |
||
5967 | } |
||
5968 | $m = []; |
||
5969 | # (bug 13500) In both cases (width/height and width only), |
||
5970 | # permit trailing "px" for backward compatibility. |
||
5971 | if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) { |
||
5972 | $width = intval( $m[1] ); |
||
5973 | $height = intval( $m[2] ); |
||
5974 | $parsedWidthParam['width'] = $width; |
||
5975 | $parsedWidthParam['height'] = $height; |
||
5976 | } elseif ( preg_match( '/^[0-9]*\s*(?:px)?\s*$/', $value ) ) { |
||
5977 | $width = intval( $value ); |
||
5978 | $parsedWidthParam['width'] = $width; |
||
5979 | } |
||
5980 | return $parsedWidthParam; |
||
5981 | } |
||
5982 | |||
5983 | /** |
||
5984 | * Lock the current instance of the parser. |
||
5985 | * |
||
5986 | * This is meant to stop someone from calling the parser |
||
5987 | * recursively and messing up all the strip state. |
||
5988 | * |
||
5989 | * @throws MWException If parser is in a parse |
||
5990 | * @return ScopedCallback The lock will be released once the return value goes out of scope. |
||
5991 | */ |
||
5992 | protected function lock() { |
||
5993 | if ( $this->mInParse ) { |
||
5994 | throw new MWException( "Parser state cleared while parsing. " |
||
5995 | . "Did you call Parser::parse recursively?" ); |
||
5996 | } |
||
5997 | $this->mInParse = true; |
||
5998 | |||
5999 | $recursiveCheck = new ScopedCallback( function() { |
||
6000 | $this->mInParse = false; |
||
6001 | } ); |
||
6002 | |||
6003 | return $recursiveCheck; |
||
6004 | } |
||
6005 | |||
6006 | /** |
||
6007 | * Strip outer <p></p> tag from the HTML source of a single paragraph. |
||
6008 | * |
||
6009 | * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag, |
||
6010 | * or if there is more than one <p/> tag in the input HTML. |
||
6011 | * |
||
6012 | * @param string $html |
||
6013 | * @return string |
||
6014 | * @since 1.24 |
||
6015 | */ |
||
6016 | public static function stripOuterParagraph( $html ) { |
||
6017 | $m = []; |
||
6018 | if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) ) { |
||
6019 | if ( strpos( $m[1], '</p>' ) === false ) { |
||
6020 | $html = $m[1]; |
||
6021 | } |
||
6022 | } |
||
6023 | |||
6024 | return $html; |
||
6025 | } |
||
6026 | |||
6027 | /** |
||
6028 | * Return this parser if it is not doing anything, otherwise |
||
6029 | * get a fresh parser. You can use this method by doing |
||
6030 | * $myParser = $wgParser->getFreshParser(), or more simply |
||
6031 | * $wgParser->getFreshParser()->parse( ... ); |
||
6032 | * if you're unsure if $wgParser is safe to use. |
||
6033 | * |
||
6034 | * @since 1.24 |
||
6035 | * @return Parser A parser object that is not parsing anything |
||
6036 | */ |
||
6037 | public function getFreshParser() { |
||
6038 | global $wgParserConf; |
||
6039 | if ( $this->mInParse ) { |
||
6040 | return new $wgParserConf['class']( $wgParserConf ); |
||
6041 | } else { |
||
6042 | return $this; |
||
6043 | } |
||
6044 | } |
||
6045 | |||
6046 | /** |
||
6047 | * Set's up the PHP implementation of OOUI for use in this request |
||
6048 | * and instructs OutputPage to enable OOUI for itself. |
||
6049 | * |
||
6050 | * @since 1.26 |
||
6051 | */ |
||
6052 | public function enableOOUI() { |
||
6053 | OutputPage::setupOOUI(); |
||
6054 | $this->mOutput->setEnableOOUI( true ); |
||
6055 | } |
||
6056 | } |
||
6057 |
Unless you are absolutely sure that the expression can never be null because of other conditions, we strongly recommend to add an additional type check to your code: