This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | /** |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License along |
||
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
16 | * http://www.gnu.org/copyleft/gpl.html |
||
17 | * |
||
18 | * @file |
||
19 | * @ingroup Language |
||
20 | */ |
||
21 | use MediaWiki\MediaWikiServices; |
||
22 | |||
23 | /** |
||
24 | * Base class for language conversion. |
||
25 | * @ingroup Language |
||
26 | * |
||
27 | * @author Zhengzhu Feng <[email protected]> |
||
28 | * @author fdcn <[email protected]> |
||
29 | * @author shinjiman <[email protected]> |
||
30 | * @author PhiLiP <[email protected]> |
||
31 | */ |
||
32 | class LanguageConverter { |
||
33 | /** |
||
34 | * languages supporting variants |
||
35 | * @since 1.20 |
||
36 | * @var array |
||
37 | */ |
||
38 | static public $languagesWithVariants = [ |
||
39 | 'gan', |
||
40 | 'iu', |
||
41 | 'kk', |
||
42 | 'ku', |
||
43 | 'shi', |
||
44 | 'sr', |
||
45 | 'tg', |
||
46 | 'uz', |
||
47 | 'zh', |
||
48 | ]; |
||
49 | |||
50 | public $mMainLanguageCode; |
||
51 | public $mVariants; |
||
52 | public $mVariantFallbacks; |
||
53 | public $mVariantNames; |
||
54 | public $mTablesLoaded = false; |
||
55 | public $mTables; |
||
56 | // 'bidirectional' 'unidirectional' 'disable' for each variant |
||
57 | public $mManualLevel; |
||
58 | |||
59 | /** |
||
60 | * @var string Memcached key name |
||
61 | */ |
||
62 | public $mCacheKey; |
||
63 | |||
64 | public $mLangObj; |
||
65 | public $mFlags; |
||
66 | public $mDescCodeSep = ':', $mDescVarSep = ';'; |
||
67 | public $mUcfirst = false; |
||
68 | public $mConvRuleTitle = false; |
||
69 | public $mURLVariant; |
||
70 | public $mUserVariant; |
||
71 | public $mHeaderVariant; |
||
72 | public $mMaxDepth = 10; |
||
73 | public $mVarSeparatorPattern; |
||
74 | |||
75 | const CACHE_VERSION_KEY = 'VERSION 7'; |
||
76 | |||
77 | /** |
||
78 | * Constructor |
||
79 | * |
||
80 | * @param Language $langobj |
||
81 | * @param string $maincode The main language code of this language |
||
82 | * @param array $variants The supported variants of this language |
||
83 | * @param array $variantfallbacks The fallback language of each variant |
||
84 | * @param array $flags Defining the custom strings that maps to the flags |
||
85 | * @param array $manualLevel Limit for supported variants |
||
86 | */ |
||
87 | public function __construct( $langobj, $maincode, $variants = [], |
||
88 | $variantfallbacks = [], $flags = [], |
||
89 | $manualLevel = [] ) { |
||
90 | global $wgDisabledVariants; |
||
91 | $this->mLangObj = $langobj; |
||
92 | $this->mMainLanguageCode = $maincode; |
||
93 | $this->mVariants = array_diff( $variants, $wgDisabledVariants ); |
||
94 | $this->mVariantFallbacks = $variantfallbacks; |
||
95 | $this->mVariantNames = Language::fetchLanguageNames(); |
||
96 | $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); |
||
97 | $defaultflags = [ |
||
98 | // 'S' show converted text |
||
99 | // '+' add rules for alltext |
||
100 | // 'E' the gave flags is error |
||
101 | // these flags above are reserved for program |
||
102 | 'A' => 'A', // add rule for convert code (all text convert) |
||
103 | 'T' => 'T', // title convert |
||
104 | 'R' => 'R', // raw content |
||
105 | 'D' => 'D', // convert description (subclass implement) |
||
106 | '-' => '-', // remove convert (not implement) |
||
107 | 'H' => 'H', // add rule for convert code (but no display in placed code) |
||
108 | 'N' => 'N' // current variant name |
||
109 | ]; |
||
110 | $this->mFlags = array_merge( $defaultflags, $flags ); |
||
111 | foreach ( $this->mVariants as $v ) { |
||
112 | if ( array_key_exists( $v, $manualLevel ) ) { |
||
113 | $this->mManualLevel[$v] = $manualLevel[$v]; |
||
114 | } else { |
||
115 | $this->mManualLevel[$v] = 'bidirectional'; |
||
116 | } |
||
117 | $this->mFlags[$v] = $v; |
||
118 | } |
||
119 | } |
||
120 | |||
121 | /** |
||
122 | * Get all valid variants. |
||
123 | * Call this instead of using $this->mVariants directly. |
||
124 | * |
||
125 | * @return array Contains all valid variants |
||
126 | */ |
||
127 | public function getVariants() { |
||
128 | return $this->mVariants; |
||
129 | } |
||
130 | |||
131 | /** |
||
132 | * In case some variant is not defined in the markup, we need |
||
133 | * to have some fallback. For example, in zh, normally people |
||
134 | * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk. |
||
135 | * when zh-sg is preferred but not defined, we will pick zh-hans |
||
136 | * in this case. Right now this is only used by zh. |
||
137 | * |
||
138 | * @param string $variant The language code of the variant |
||
139 | * @return string|array The code of the fallback language or the |
||
140 | * main code if there is no fallback |
||
141 | */ |
||
142 | public function getVariantFallbacks( $variant ) { |
||
143 | if ( isset( $this->mVariantFallbacks[$variant] ) ) { |
||
144 | return $this->mVariantFallbacks[$variant]; |
||
145 | } |
||
146 | return $this->mMainLanguageCode; |
||
147 | } |
||
148 | |||
149 | /** |
||
150 | * Get the title produced by the conversion rule. |
||
151 | * @return string The converted title text |
||
152 | */ |
||
153 | public function getConvRuleTitle() { |
||
154 | return $this->mConvRuleTitle; |
||
155 | } |
||
156 | |||
157 | /** |
||
158 | * Get preferred language variant. |
||
159 | * @return string The preferred language code |
||
160 | */ |
||
161 | public function getPreferredVariant() { |
||
162 | global $wgDefaultLanguageVariant, $wgUser; |
||
163 | |||
164 | $req = $this->getURLVariant(); |
||
165 | |||
166 | if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) { |
||
167 | $req = $this->getUserVariant(); |
||
168 | } elseif ( !$req ) { |
||
169 | $req = $this->getHeaderVariant(); |
||
170 | } |
||
171 | |||
172 | if ( $wgDefaultLanguageVariant && !$req ) { |
||
173 | $req = $this->validateVariant( $wgDefaultLanguageVariant ); |
||
174 | } |
||
175 | |||
176 | // This function, unlike the other get*Variant functions, is |
||
177 | // not memoized (i.e. there return value is not cached) since |
||
178 | // new information might appear during processing after this |
||
179 | // is first called. |
||
180 | if ( $this->validateVariant( $req ) ) { |
||
0 ignored issues
–
show
|
|||
181 | return $req; |
||
182 | } |
||
183 | return $this->mMainLanguageCode; |
||
184 | } |
||
185 | |||
186 | /** |
||
187 | * Get default variant. |
||
188 | * This function would not be affected by user's settings |
||
189 | * @return string The default variant code |
||
190 | */ |
||
191 | public function getDefaultVariant() { |
||
192 | global $wgDefaultLanguageVariant; |
||
193 | |||
194 | $req = $this->getURLVariant(); |
||
195 | |||
196 | if ( !$req ) { |
||
197 | $req = $this->getHeaderVariant(); |
||
198 | } |
||
199 | |||
200 | if ( $wgDefaultLanguageVariant && !$req ) { |
||
201 | $req = $this->validateVariant( $wgDefaultLanguageVariant ); |
||
202 | } |
||
203 | |||
204 | if ( $req ) { |
||
205 | return $req; |
||
206 | } |
||
207 | return $this->mMainLanguageCode; |
||
208 | } |
||
209 | |||
210 | /** |
||
211 | * Validate the variant |
||
212 | * @param string $variant The variant to validate |
||
213 | * @return mixed Returns the variant if it is valid, null otherwise |
||
214 | */ |
||
215 | public function validateVariant( $variant = null ) { |
||
216 | if ( $variant !== null && in_array( $variant, $this->mVariants ) ) { |
||
217 | return $variant; |
||
218 | } |
||
219 | return null; |
||
220 | } |
||
221 | |||
222 | /** |
||
223 | * Get the variant specified in the URL |
||
224 | * |
||
225 | * @return mixed Variant if one found, false otherwise. |
||
226 | */ |
||
227 | public function getURLVariant() { |
||
228 | global $wgRequest; |
||
229 | |||
230 | if ( $this->mURLVariant ) { |
||
231 | return $this->mURLVariant; |
||
232 | } |
||
233 | |||
234 | // see if the preference is set in the request |
||
235 | $ret = $wgRequest->getText( 'variant' ); |
||
236 | |||
237 | if ( !$ret ) { |
||
238 | $ret = $wgRequest->getVal( 'uselang' ); |
||
239 | } |
||
240 | |||
241 | $this->mURLVariant = $this->validateVariant( $ret ); |
||
242 | return $this->mURLVariant; |
||
243 | } |
||
244 | |||
245 | /** |
||
246 | * Determine if the user has a variant set. |
||
247 | * |
||
248 | * @return mixed Variant if one found, false otherwise. |
||
249 | */ |
||
250 | protected function getUserVariant() { |
||
251 | global $wgUser, $wgContLang; |
||
252 | |||
253 | // memoizing this function wreaks havoc on parserTest.php |
||
254 | /* |
||
255 | if ( $this->mUserVariant ) { |
||
256 | return $this->mUserVariant; |
||
257 | } |
||
258 | */ |
||
259 | |||
260 | // Get language variant preference from logged in users |
||
261 | // Don't call this on stub objects because that causes infinite |
||
262 | // recursion during initialisation |
||
263 | if ( !$wgUser->isSafeToLoad() ) { |
||
264 | return false; |
||
265 | } |
||
266 | if ( $wgUser->isLoggedIn() ) { |
||
267 | if ( $this->mMainLanguageCode == $wgContLang->getCode() ) { |
||
268 | $ret = $wgUser->getOption( 'variant' ); |
||
269 | } else { |
||
270 | $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode ); |
||
271 | } |
||
272 | } else { |
||
273 | // figure out user lang without constructing wgLang to avoid |
||
274 | // infinite recursion |
||
275 | $ret = $wgUser->getOption( 'language' ); |
||
276 | } |
||
277 | |||
278 | $this->mUserVariant = $this->validateVariant( $ret ); |
||
279 | return $this->mUserVariant; |
||
280 | } |
||
281 | |||
282 | /** |
||
283 | * Determine the language variant from the Accept-Language header. |
||
284 | * |
||
285 | * @return mixed Variant if one found, false otherwise. |
||
286 | */ |
||
287 | protected function getHeaderVariant() { |
||
288 | global $wgRequest; |
||
289 | |||
290 | if ( $this->mHeaderVariant ) { |
||
291 | return $this->mHeaderVariant; |
||
292 | } |
||
293 | |||
294 | // see if some supported language variant is set in the |
||
295 | // HTTP header. |
||
296 | $languages = array_keys( $wgRequest->getAcceptLang() ); |
||
297 | if ( empty( $languages ) ) { |
||
298 | return null; |
||
299 | } |
||
300 | |||
301 | $fallbackLanguages = []; |
||
302 | foreach ( $languages as $language ) { |
||
303 | $this->mHeaderVariant = $this->validateVariant( $language ); |
||
304 | if ( $this->mHeaderVariant ) { |
||
305 | break; |
||
306 | } |
||
307 | |||
308 | // To see if there are fallbacks of current language. |
||
309 | // We record these fallback variants, and process |
||
310 | // them later. |
||
311 | $fallbacks = $this->getVariantFallbacks( $language ); |
||
312 | if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) { |
||
313 | $fallbackLanguages[] = $fallbacks; |
||
314 | } elseif ( is_array( $fallbacks ) ) { |
||
315 | $fallbackLanguages = |
||
316 | array_merge( $fallbackLanguages, $fallbacks ); |
||
317 | } |
||
318 | } |
||
319 | |||
320 | if ( !$this->mHeaderVariant ) { |
||
321 | // process fallback languages now |
||
322 | $fallback_languages = array_unique( $fallbackLanguages ); |
||
323 | foreach ( $fallback_languages as $language ) { |
||
324 | $this->mHeaderVariant = $this->validateVariant( $language ); |
||
325 | if ( $this->mHeaderVariant ) { |
||
326 | break; |
||
327 | } |
||
328 | } |
||
329 | } |
||
330 | |||
331 | return $this->mHeaderVariant; |
||
332 | } |
||
333 | |||
334 | /** |
||
335 | * Dictionary-based conversion. |
||
336 | * This function would not parse the conversion rules. |
||
337 | * If you want to parse rules, try to use convert() or |
||
338 | * convertTo(). |
||
339 | * |
||
340 | * @param string $text The text to be converted |
||
341 | * @param bool|string $toVariant The target language code |
||
342 | * @return string The converted text |
||
343 | */ |
||
344 | public function autoConvert( $text, $toVariant = false ) { |
||
345 | |||
346 | $this->loadTables(); |
||
347 | |||
348 | if ( !$toVariant ) { |
||
349 | $toVariant = $this->getPreferredVariant(); |
||
350 | if ( !$toVariant ) { |
||
351 | return $text; |
||
352 | } |
||
353 | } |
||
354 | |||
355 | if ( $this->guessVariant( $text, $toVariant ) ) { |
||
356 | return $text; |
||
357 | } |
||
358 | |||
359 | /* we convert everything except: |
||
360 | 1. HTML markups (anything between < and >) |
||
361 | 2. HTML entities |
||
362 | 3. placeholders created by the parser |
||
363 | */ |
||
364 | $marker = '|' . Parser::MARKER_PREFIX . '[\-a-zA-Z0-9]+'; |
||
365 | |||
366 | // this one is needed when the text is inside an HTML markup |
||
367 | $htmlfix = '|<[^>]+$|^[^<>]*>'; |
||
368 | |||
369 | // disable convert to variants between <code> tags |
||
370 | $codefix = '<code>.+?<\/code>|'; |
||
371 | // disable conversion of <script> tags |
||
372 | $scriptfix = '<script.*?>.*?<\/script>|'; |
||
373 | // disable conversion of <pre> tags |
||
374 | $prefix = '<pre.*?>.*?<\/pre>|'; |
||
375 | |||
376 | $reg = '/' . $codefix . $scriptfix . $prefix . |
||
377 | '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; |
||
378 | $startPos = 0; |
||
379 | $sourceBlob = ''; |
||
380 | $literalBlob = ''; |
||
381 | |||
382 | // Guard against delimiter nulls in the input |
||
383 | $text = str_replace( "\000", '', $text ); |
||
384 | |||
385 | $markupMatches = null; |
||
386 | $elementMatches = null; |
||
387 | while ( $startPos < strlen( $text ) ) { |
||
388 | if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { |
||
389 | $elementPos = $markupMatches[0][1]; |
||
390 | $element = $markupMatches[0][0]; |
||
391 | } else { |
||
392 | $elementPos = strlen( $text ); |
||
393 | $element = ''; |
||
394 | } |
||
395 | |||
396 | // Queue the part before the markup for translation in a batch |
||
397 | $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; |
||
398 | |||
399 | // Advance to the next position |
||
400 | $startPos = $elementPos + strlen( $element ); |
||
401 | |||
402 | // Translate any alt or title attributes inside the matched element |
||
403 | if ( $element !== '' |
||
404 | && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, $elementMatches ) |
||
405 | ) { |
||
406 | $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); |
||
407 | $changed = false; |
||
408 | foreach ( [ 'title', 'alt' ] as $attrName ) { |
||
409 | if ( !isset( $attrs[$attrName] ) ) { |
||
410 | continue; |
||
411 | } |
||
412 | $attr = $attrs[$attrName]; |
||
413 | // Don't convert URLs |
||
414 | if ( !strpos( $attr, '://' ) ) { |
||
415 | $attr = $this->recursiveConvertTopLevel( $attr, $toVariant ); |
||
416 | } |
||
417 | |||
418 | // Remove HTML tags to avoid disrupting the layout |
||
419 | $attr = preg_replace( '/<[^>]+>/', '', $attr ); |
||
420 | if ( $attr !== $attrs[$attrName] ) { |
||
421 | $attrs[$attrName] = $attr; |
||
422 | $changed = true; |
||
423 | } |
||
424 | } |
||
425 | if ( $changed ) { |
||
426 | $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . |
||
427 | $elementMatches[3]; |
||
428 | } |
||
429 | } |
||
430 | $literalBlob .= $element . "\000"; |
||
431 | } |
||
432 | |||
433 | // Do the main translation batch |
||
434 | $translatedBlob = $this->translate( $sourceBlob, $toVariant ); |
||
435 | |||
436 | // Put the output back together |
||
437 | $translatedIter = StringUtils::explode( "\000", $translatedBlob ); |
||
438 | $literalIter = StringUtils::explode( "\000", $literalBlob ); |
||
439 | $output = ''; |
||
440 | while ( $translatedIter->valid() && $literalIter->valid() ) { |
||
441 | $output .= $translatedIter->current(); |
||
442 | $output .= $literalIter->current(); |
||
443 | $translatedIter->next(); |
||
444 | $literalIter->next(); |
||
445 | } |
||
446 | |||
447 | return $output; |
||
448 | } |
||
449 | |||
450 | /** |
||
451 | * Translate a string to a variant. |
||
452 | * Doesn't parse rules or do any of that other stuff, for that use |
||
453 | * convert() or convertTo(). |
||
454 | * |
||
455 | * @param string $text Text to convert |
||
456 | * @param string $variant Variant language code |
||
457 | * @return string Translated text |
||
458 | */ |
||
459 | public function translate( $text, $variant ) { |
||
460 | // If $text is empty or only includes spaces, do nothing |
||
461 | // Otherwise translate it |
||
462 | if ( trim( $text ) ) { |
||
463 | $this->loadTables(); |
||
464 | $text = $this->mTables[$variant]->replace( $text ); |
||
465 | } |
||
466 | return $text; |
||
467 | } |
||
468 | |||
469 | /** |
||
470 | * Call translate() to convert text to all valid variants. |
||
471 | * |
||
472 | * @param string $text The text to be converted |
||
473 | * @return array Variant => converted text |
||
474 | */ |
||
475 | public function autoConvertToAllVariants( $text ) { |
||
476 | $this->loadTables(); |
||
477 | |||
478 | $ret = []; |
||
479 | foreach ( $this->mVariants as $variant ) { |
||
480 | $ret[$variant] = $this->translate( $text, $variant ); |
||
481 | } |
||
482 | |||
483 | return $ret; |
||
484 | } |
||
485 | |||
486 | /** |
||
487 | * Apply manual conversion rules. |
||
488 | * |
||
489 | * @param ConverterRule $convRule |
||
490 | */ |
||
491 | protected function applyManualConv( $convRule ) { |
||
492 | // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom |
||
493 | // title conversion. |
||
494 | // Bug 24072: $mConvRuleTitle was overwritten by other manual |
||
495 | // rule(s) not for title, this breaks the title conversion. |
||
496 | $newConvRuleTitle = $convRule->getTitle(); |
||
497 | if ( $newConvRuleTitle ) { |
||
498 | // So I add an empty check for getTitle() |
||
499 | $this->mConvRuleTitle = $newConvRuleTitle; |
||
500 | } |
||
501 | |||
502 | // merge/remove manual conversion rules to/from global table |
||
503 | $convTable = $convRule->getConvTable(); |
||
504 | $action = $convRule->getRulesAction(); |
||
505 | foreach ( $convTable as $variant => $pair ) { |
||
506 | if ( !$this->validateVariant( $variant ) ) { |
||
507 | continue; |
||
508 | } |
||
509 | |||
510 | if ( $action == 'add' ) { |
||
511 | // More efficient than array_merge(), about 2.5 times. |
||
512 | foreach ( $pair as $from => $to ) { |
||
513 | $this->mTables[$variant]->setPair( $from, $to ); |
||
514 | } |
||
515 | } elseif ( $action == 'remove' ) { |
||
516 | $this->mTables[$variant]->removeArray( $pair ); |
||
517 | } |
||
518 | } |
||
519 | } |
||
520 | |||
521 | /** |
||
522 | * Auto convert a Title object to a readable string in the |
||
523 | * preferred variant. |
||
524 | * |
||
525 | * @param Title $title A object of Title |
||
526 | * @return string Converted title text |
||
527 | */ |
||
528 | public function convertTitle( $title ) { |
||
529 | $variant = $this->getPreferredVariant(); |
||
530 | $index = $title->getNamespace(); |
||
531 | if ( $index !== NS_MAIN ) { |
||
532 | $text = $this->convertNamespace( $index, $variant ) . ':'; |
||
533 | } else { |
||
534 | $text = ''; |
||
535 | } |
||
536 | $text .= $this->translate( $title->getText(), $variant ); |
||
537 | return $text; |
||
538 | } |
||
539 | |||
540 | /** |
||
541 | * Get the namespace display name in the preferred variant. |
||
542 | * |
||
543 | * @param int $index Namespace id |
||
544 | * @param string|null $variant Variant code or null for preferred variant |
||
545 | * @return string Namespace name for display |
||
546 | */ |
||
547 | public function convertNamespace( $index, $variant = null ) { |
||
548 | if ( $index === NS_MAIN ) { |
||
549 | return ''; |
||
550 | } |
||
551 | |||
552 | if ( $variant === null ) { |
||
553 | $variant = $this->getPreferredVariant(); |
||
554 | } |
||
555 | |||
556 | $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); |
||
557 | $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant ); |
||
558 | $nsVariantText = $cache->get( $key ); |
||
559 | if ( $nsVariantText !== false ) { |
||
560 | return $nsVariantText; |
||
561 | } |
||
562 | |||
563 | // First check if a message gives a converted name in the target variant. |
||
564 | $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant ); |
||
565 | if ( $nsConvMsg->exists() ) { |
||
566 | $nsVariantText = $nsConvMsg->plain(); |
||
567 | } |
||
568 | |||
569 | // Then check if a message gives a converted name in content language |
||
570 | // which needs extra translation to the target variant. |
||
571 | if ( $nsVariantText === false ) { |
||
572 | $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); |
||
573 | if ( $nsConvMsg->exists() ) { |
||
574 | $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant ); |
||
575 | } |
||
576 | } |
||
577 | |||
578 | if ( $nsVariantText === false ) { |
||
579 | // No message exists, retrieve it from the target variant's namespace names. |
||
580 | $langObj = $this->mLangObj->factory( $variant ); |
||
581 | $nsVariantText = $langObj->getFormattedNsText( $index ); |
||
582 | } |
||
583 | |||
584 | $cache->set( $key, $nsVariantText, 60 ); |
||
585 | |||
586 | return $nsVariantText; |
||
587 | } |
||
588 | |||
589 | /** |
||
590 | * Convert text to different variants of a language. The automatic |
||
591 | * conversion is done in autoConvert(). Here we parse the text |
||
592 | * marked with -{}-, which specifies special conversions of the |
||
593 | * text that can not be accomplished in autoConvert(). |
||
594 | * |
||
595 | * Syntax of the markup: |
||
596 | * -{code1:text1;code2:text2;...}- or |
||
597 | * -{flags|code1:text1;code2:text2;...}- or |
||
598 | * -{text}- in which case no conversion should take place for text |
||
599 | * |
||
600 | * @param string $text Text to be converted |
||
601 | * @return string Converted text |
||
602 | */ |
||
603 | public function convert( $text ) { |
||
604 | $variant = $this->getPreferredVariant(); |
||
605 | return $this->convertTo( $text, $variant ); |
||
606 | } |
||
607 | |||
608 | /** |
||
609 | * Same as convert() except a extra parameter to custom variant. |
||
610 | * |
||
611 | * @param string $text Text to be converted |
||
612 | * @param string $variant The target variant code |
||
613 | * @return string Converted text |
||
614 | */ |
||
615 | public function convertTo( $text, $variant ) { |
||
616 | global $wgDisableLangConversion; |
||
617 | if ( $wgDisableLangConversion ) { |
||
618 | return $text; |
||
619 | } |
||
620 | // Reset converter state for a new converter run. |
||
621 | $this->mConvRuleTitle = false; |
||
622 | return $this->recursiveConvertTopLevel( $text, $variant ); |
||
623 | } |
||
624 | |||
625 | /** |
||
626 | * Recursively convert text on the outside. Allow to use nested |
||
627 | * markups to custom rules. |
||
628 | * |
||
629 | * @param string $text Text to be converted |
||
630 | * @param string $variant The target variant code |
||
631 | * @param int $depth Depth of recursion |
||
632 | * @return string Converted text |
||
633 | */ |
||
634 | protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { |
||
635 | $startPos = 0; |
||
636 | $out = ''; |
||
637 | $length = strlen( $text ); |
||
638 | $shouldConvert = !$this->guessVariant( $text, $variant ); |
||
639 | |||
640 | while ( $startPos < $length ) { |
||
641 | $pos = strpos( $text, '-{', $startPos ); |
||
642 | |||
643 | if ( $pos === false ) { |
||
644 | // No more markup, append final segment |
||
645 | $fragment = substr( $text, $startPos ); |
||
646 | $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; |
||
647 | return $out; |
||
648 | } |
||
649 | |||
650 | // Markup found |
||
651 | // Append initial segment |
||
652 | $fragment = substr( $text, $startPos, $pos - $startPos ); |
||
653 | $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; |
||
654 | |||
655 | // Advance position |
||
656 | $startPos = $pos; |
||
657 | |||
658 | // Do recursive conversion |
||
659 | $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
||
660 | } |
||
661 | |||
662 | return $out; |
||
663 | } |
||
664 | |||
665 | /** |
||
666 | * Recursively convert text on the inside. |
||
667 | * |
||
668 | * @param string $text Text to be converted |
||
669 | * @param string $variant The target variant code |
||
670 | * @param int $startPos |
||
671 | * @param int $depth Depth of recursion |
||
672 | * |
||
673 | * @throws MWException |
||
674 | * @return string Converted text |
||
675 | */ |
||
676 | protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { |
||
677 | // Quick sanity check (no function calls) |
||
678 | if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { |
||
679 | throw new MWException( __METHOD__ . ': invalid input string' ); |
||
680 | } |
||
681 | |||
682 | $startPos += 2; |
||
683 | $inner = ''; |
||
684 | $warningDone = false; |
||
685 | $length = strlen( $text ); |
||
686 | |||
687 | while ( $startPos < $length ) { |
||
688 | $m = false; |
||
689 | preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); |
||
690 | if ( !$m ) { |
||
691 | // Unclosed rule |
||
692 | break; |
||
693 | } |
||
694 | |||
695 | $token = $m[0][0]; |
||
696 | $pos = $m[0][1]; |
||
697 | |||
698 | // Markup found |
||
699 | // Append initial segment |
||
700 | $inner .= substr( $text, $startPos, $pos - $startPos ); |
||
701 | |||
702 | // Advance position |
||
703 | $startPos = $pos; |
||
704 | |||
705 | switch ( $token ) { |
||
706 | case '-{': |
||
707 | // Check max depth |
||
708 | if ( $depth >= $this->mMaxDepth ) { |
||
709 | $inner .= '-{'; |
||
710 | View Code Duplication | if ( !$warningDone ) { |
|
711 | $inner .= '<span class="error">' . |
||
712 | wfMessage( 'language-converter-depth-warning' ) |
||
713 | ->numParams( $this->mMaxDepth )->inContentLanguage()->text() . |
||
714 | '</span>'; |
||
715 | $warningDone = true; |
||
716 | } |
||
717 | $startPos += 2; |
||
718 | continue; |
||
719 | } |
||
720 | // Recursively parse another rule |
||
721 | $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
||
722 | break; |
||
723 | case '}-': |
||
724 | // Apply the rule |
||
725 | $startPos += 2; |
||
726 | $rule = new ConverterRule( $inner, $this ); |
||
727 | $rule->parse( $variant ); |
||
728 | $this->applyManualConv( $rule ); |
||
729 | return $rule->getDisplay(); |
||
730 | default: |
||
731 | throw new MWException( __METHOD__ . ': invalid regex match' ); |
||
732 | } |
||
733 | } |
||
734 | |||
735 | // Unclosed rule |
||
736 | if ( $startPos < $length ) { |
||
737 | $inner .= substr( $text, $startPos ); |
||
738 | } |
||
739 | $startPos = $length; |
||
740 | return '-{' . $this->autoConvert( $inner, $variant ); |
||
741 | } |
||
742 | |||
743 | /** |
||
744 | * If a language supports multiple variants, it is possible that |
||
745 | * non-existing link in one variant actually exists in another variant. |
||
746 | * This function tries to find it. See e.g. LanguageZh.php |
||
747 | * The input parameters may be modified upon return |
||
748 | * |
||
749 | * @param string &$link The name of the link |
||
750 | * @param Title &$nt The title object of the link |
||
751 | * @param bool $ignoreOtherCond To disable other conditions when |
||
752 | * we need to transclude a template or update a category's link |
||
753 | */ |
||
754 | public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { |
||
755 | # If the article has already existed, there is no need to |
||
756 | # check it again, otherwise it may cause a fault. |
||
757 | if ( is_object( $nt ) && $nt->exists() ) { |
||
758 | return; |
||
759 | } |
||
760 | |||
761 | global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest; |
||
762 | $isredir = $wgRequest->getText( 'redirect', 'yes' ); |
||
763 | $action = $wgRequest->getText( 'action' ); |
||
764 | if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) { |
||
765 | $action = 'view'; |
||
766 | } |
||
767 | $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' ); |
||
768 | $disableLinkConversion = $wgDisableLangConversion |
||
769 | || $wgDisableTitleConversion; |
||
770 | $linkBatch = new LinkBatch(); |
||
771 | |||
772 | $ns = NS_MAIN; |
||
773 | |||
774 | if ( $disableLinkConversion || |
||
775 | ( !$ignoreOtherCond && |
||
776 | ( $isredir == 'no' |
||
777 | || $action == 'edit' |
||
778 | || $action == 'submit' |
||
779 | || $linkconvert == 'no' ) ) ) { |
||
780 | return; |
||
781 | } |
||
782 | |||
783 | if ( is_object( $nt ) ) { |
||
784 | $ns = $nt->getNamespace(); |
||
785 | } |
||
786 | |||
787 | $variants = $this->autoConvertToAllVariants( $link ); |
||
788 | if ( !$variants ) { // give up |
||
789 | return; |
||
790 | } |
||
791 | |||
792 | $titles = []; |
||
793 | |||
794 | foreach ( $variants as $v ) { |
||
795 | if ( $v != $link ) { |
||
796 | $varnt = Title::newFromText( $v, $ns ); |
||
797 | if ( !is_null( $varnt ) ) { |
||
798 | $linkBatch->addObj( $varnt ); |
||
799 | $titles[] = $varnt; |
||
800 | } |
||
801 | } |
||
802 | } |
||
803 | |||
804 | // fetch all variants in single query |
||
805 | $linkBatch->execute(); |
||
806 | |||
807 | foreach ( $titles as $varnt ) { |
||
808 | if ( $varnt->getArticleID() > 0 ) { |
||
809 | $nt = $varnt; |
||
810 | $link = $varnt->getText(); |
||
811 | break; |
||
812 | } |
||
813 | } |
||
814 | } |
||
815 | |||
816 | /** |
||
817 | * Returns language specific hash options. |
||
818 | * |
||
819 | * @return string |
||
820 | */ |
||
821 | public function getExtraHashOptions() { |
||
822 | $variant = $this->getPreferredVariant(); |
||
823 | |||
824 | return '!' . $variant; |
||
825 | } |
||
826 | |||
827 | /** |
||
828 | * Guess if a text is written in a variant. This should be implemented in subclasses. |
||
829 | * |
||
830 | * @param string $text The text to be checked |
||
831 | * @param string $variant Language code of the variant to be checked for |
||
832 | * @return bool True if $text appears to be written in $variant, false if not |
||
833 | * |
||
834 | * @author Nikola Smolenski <[email protected]> |
||
835 | * @since 1.19 |
||
836 | */ |
||
837 | public function guessVariant( $text, $variant ) { |
||
838 | return false; |
||
839 | } |
||
840 | |||
841 | /** |
||
842 | * Load default conversion tables. |
||
843 | * This method must be implemented in derived class. |
||
844 | * |
||
845 | * @private |
||
846 | * @throws MWException |
||
847 | */ |
||
848 | function loadDefaultTables() { |
||
849 | $name = get_class( $this ); |
||
850 | |||
851 | throw new MWException( "Must implement loadDefaultTables() method in class $name" ); |
||
852 | } |
||
853 | |||
854 | /** |
||
855 | * Load conversion tables either from the cache or the disk. |
||
856 | * @private |
||
857 | * @param bool $fromCache Load from memcached? Defaults to true. |
||
858 | */ |
||
859 | function loadTables( $fromCache = true ) { |
||
860 | global $wgLanguageConverterCacheType; |
||
861 | |||
862 | if ( $this->mTablesLoaded ) { |
||
863 | return; |
||
864 | } |
||
865 | |||
866 | $this->mTablesLoaded = true; |
||
867 | $this->mTables = false; |
||
868 | $cache = ObjectCache::getInstance( $wgLanguageConverterCacheType ); |
||
869 | if ( $fromCache ) { |
||
870 | wfProfileIn( __METHOD__ . '-cache' ); |
||
871 | $this->mTables = $cache->get( $this->mCacheKey ); |
||
872 | wfProfileOut( __METHOD__ . '-cache' ); |
||
873 | } |
||
874 | if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) { |
||
875 | wfProfileIn( __METHOD__ . '-recache' ); |
||
876 | // not in cache, or we need a fresh reload. |
||
877 | // We will first load the default tables |
||
878 | // then update them using things in MediaWiki:Conversiontable/* |
||
879 | $this->loadDefaultTables(); |
||
880 | foreach ( $this->mVariants as $var ) { |
||
881 | $cached = $this->parseCachedTable( $var ); |
||
882 | $this->mTables[$var]->mergeArray( $cached ); |
||
883 | } |
||
884 | |||
885 | $this->postLoadTables(); |
||
886 | $this->mTables[self::CACHE_VERSION_KEY] = true; |
||
887 | |||
888 | $cache->set( $this->mCacheKey, $this->mTables, 43200 ); |
||
889 | wfProfileOut( __METHOD__ . '-recache' ); |
||
890 | } |
||
891 | } |
||
892 | |||
893 | /** |
||
894 | * Hook for post processing after conversion tables are loaded. |
||
895 | */ |
||
896 | function postLoadTables() { |
||
897 | } |
||
898 | |||
899 | /** |
||
900 | * Reload the conversion tables. |
||
901 | * |
||
902 | * @private |
||
903 | */ |
||
904 | function reloadTables() { |
||
905 | if ( $this->mTables ) { |
||
906 | unset( $this->mTables ); |
||
907 | } |
||
908 | |||
909 | $this->mTablesLoaded = false; |
||
910 | $this->loadTables( false ); |
||
911 | } |
||
912 | |||
913 | /** |
||
914 | * Parse the conversion table stored in the cache. |
||
915 | * |
||
916 | * The tables should be in blocks of the following form: |
||
917 | * -{ |
||
918 | * word => word ; |
||
919 | * word => word ; |
||
920 | * ... |
||
921 | * }- |
||
922 | * |
||
923 | * To make the tables more manageable, subpages are allowed |
||
924 | * and will be parsed recursively if $recursive == true. |
||
925 | * |
||
926 | * @param string $code Language code |
||
927 | * @param string $subpage Subpage name |
||
928 | * @param bool $recursive Parse subpages recursively? Defaults to true. |
||
929 | * |
||
930 | * @return array |
||
931 | */ |
||
932 | function parseCachedTable( $code, $subpage = '', $recursive = true ) { |
||
933 | static $parsed = []; |
||
934 | |||
935 | $key = 'Conversiontable/' . $code; |
||
936 | if ( $subpage ) { |
||
937 | $key .= '/' . $subpage; |
||
938 | } |
||
939 | if ( array_key_exists( $key, $parsed ) ) { |
||
940 | return []; |
||
941 | } |
||
942 | |||
943 | $parsed[$key] = true; |
||
944 | |||
945 | if ( $subpage === '' ) { |
||
946 | $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code ); |
||
947 | } else { |
||
948 | $txt = false; |
||
949 | $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); |
||
950 | if ( $title && $title->exists() ) { |
||
951 | $revision = Revision::newFromTitle( $title ); |
||
952 | if ( $revision ) { |
||
953 | if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) { |
||
954 | $txt = $revision->getContent( Revision::RAW )->getNativeData(); |
||
955 | } |
||
956 | |||
957 | // @todo in the future, use a specialized content model, perhaps based on json! |
||
958 | } |
||
959 | } |
||
960 | } |
||
961 | |||
962 | # Nothing to parse if there's no text |
||
963 | if ( $txt === false || $txt === null || $txt === '' ) { |
||
964 | return []; |
||
965 | } |
||
966 | |||
967 | // get all subpage links of the form |
||
968 | // [[MediaWiki:Conversiontable/zh-xx/...|...]] |
||
969 | $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . |
||
970 | ':Conversiontable'; |
||
971 | $subs = StringUtils::explode( '[[', $txt ); |
||
972 | $sublinks = []; |
||
973 | foreach ( $subs as $sub ) { |
||
974 | $link = explode( ']]', $sub, 2 ); |
||
975 | if ( count( $link ) != 2 ) { |
||
976 | continue; |
||
977 | } |
||
978 | $b = explode( '|', $link[0], 2 ); |
||
979 | $b = explode( '/', trim( $b[0] ), 3 ); |
||
980 | if ( count( $b ) == 3 ) { |
||
981 | $sublink = $b[2]; |
||
982 | } else { |
||
983 | $sublink = ''; |
||
984 | } |
||
985 | |||
986 | if ( $b[0] == $linkhead && $b[1] == $code ) { |
||
987 | $sublinks[] = $sublink; |
||
988 | } |
||
989 | } |
||
990 | |||
991 | // parse the mappings in this page |
||
992 | $blocks = StringUtils::explode( '-{', $txt ); |
||
993 | $ret = []; |
||
994 | $first = true; |
||
995 | foreach ( $blocks as $block ) { |
||
996 | if ( $first ) { |
||
997 | // Skip the part before the first -{ |
||
998 | $first = false; |
||
999 | continue; |
||
1000 | } |
||
1001 | $mappings = explode( '}-', $block, 2 )[0]; |
||
1002 | $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings ); |
||
1003 | $table = StringUtils::explode( ';', $stripped ); |
||
1004 | foreach ( $table as $t ) { |
||
1005 | $m = explode( '=>', $t, 3 ); |
||
1006 | if ( count( $m ) != 2 ) { |
||
1007 | continue; |
||
1008 | } |
||
1009 | // trim any trailling comments starting with '//' |
||
1010 | $tt = explode( '//', $m[1], 2 ); |
||
1011 | $ret[trim( $m[0] )] = trim( $tt[0] ); |
||
1012 | } |
||
1013 | } |
||
1014 | |||
1015 | // recursively parse the subpages |
||
1016 | if ( $recursive ) { |
||
1017 | foreach ( $sublinks as $link ) { |
||
1018 | $s = $this->parseCachedTable( $code, $link, $recursive ); |
||
1019 | $ret = $s + $ret; |
||
1020 | } |
||
1021 | } |
||
1022 | |||
1023 | if ( $this->mUcfirst ) { |
||
1024 | foreach ( $ret as $k => $v ) { |
||
1025 | $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v ); |
||
1026 | } |
||
1027 | } |
||
1028 | return $ret; |
||
1029 | } |
||
1030 | |||
1031 | /** |
||
1032 | * Enclose a string with the "no conversion" tag. This is used by |
||
1033 | * various functions in the Parser. |
||
1034 | * |
||
1035 | * @param string $text Text to be tagged for no conversion |
||
1036 | * @param bool $noParse Unused |
||
1037 | * @return string The tagged text |
||
1038 | */ |
||
1039 | public function markNoConversion( $text, $noParse = false ) { |
||
1040 | # don't mark if already marked |
||
1041 | if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) { |
||
1042 | return $text; |
||
1043 | } |
||
1044 | |||
1045 | $ret = "-{R|$text}-"; |
||
1046 | return $ret; |
||
1047 | } |
||
1048 | |||
1049 | /** |
||
1050 | * Convert the sorting key for category links. This should make different |
||
1051 | * keys that are variants of each other map to the same key. |
||
1052 | * |
||
1053 | * @param string $key |
||
1054 | * |
||
1055 | * @return string |
||
1056 | */ |
||
1057 | function convertCategoryKey( $key ) { |
||
1058 | return $key; |
||
1059 | } |
||
1060 | |||
1061 | /** |
||
1062 | * Refresh the cache of conversion tables when |
||
1063 | * MediaWiki:Conversiontable* is updated. |
||
1064 | * |
||
1065 | * @param Title $titleobj The Title of the page being updated |
||
1066 | */ |
||
1067 | public function updateConversionTable( Title $titleobj ) { |
||
1068 | if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) { |
||
1069 | $title = $titleobj->getDBkey(); |
||
1070 | $t = explode( '/', $title, 3 ); |
||
1071 | $c = count( $t ); |
||
1072 | if ( $c > 1 && $t[0] == 'Conversiontable' ) { |
||
1073 | if ( $this->validateVariant( $t[1] ) ) { |
||
1074 | $this->reloadTables(); |
||
1075 | } |
||
1076 | } |
||
1077 | } |
||
1078 | } |
||
1079 | |||
1080 | /** |
||
1081 | * Get the cached separator pattern for ConverterRule::parseRules() |
||
1082 | * @return string |
||
1083 | */ |
||
1084 | function getVarSeparatorPattern() { |
||
1085 | if ( is_null( $this->mVarSeparatorPattern ) ) { |
||
1086 | // varsep_pattern for preg_split: |
||
1087 | // text should be splited by ";" only if a valid variant |
||
1088 | // name exist after the markup, for example: |
||
1089 | // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ |
||
1090 | // <span style="font-size:120%;">yyy</span>;}- |
||
1091 | // we should split it as: |
||
1092 | // [ |
||
1093 | // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' |
||
1094 | // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' |
||
1095 | // [2] => '' |
||
1096 | // ] |
||
1097 | $pat = '/;\s*(?='; |
||
1098 | foreach ( $this->mVariants as $variant ) { |
||
1099 | // zh-hans:xxx;zh-hant:yyy |
||
1100 | $pat .= $variant . '\s*:|'; |
||
1101 | // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz |
||
1102 | $pat .= '[^;]*?=>\s*' . $variant . '\s*:|'; |
||
1103 | } |
||
1104 | $pat .= '\s*$)/'; |
||
1105 | $this->mVarSeparatorPattern = $pat; |
||
1106 | } |
||
1107 | return $this->mVarSeparatorPattern; |
||
1108 | } |
||
1109 | } |
||
1110 |
In PHP, under loose comparison (like
==
, or!=
, orswitch
conditions), values of different types might be equal.For
string
values, the empty string''
is a special case, in particular the following results might be unexpected: