This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | /** |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License along |
||
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
||
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||
16 | * http://www.gnu.org/copyleft/gpl.html |
||
17 | * |
||
18 | * @file |
||
19 | * @ingroup Language |
||
20 | */ |
||
21 | use MediaWiki\MediaWikiServices; |
||
22 | |||
23 | /** |
||
24 | * Base class for language conversion. |
||
25 | * @ingroup Language |
||
26 | * |
||
27 | * @author Zhengzhu Feng <[email protected]> |
||
28 | * @author fdcn <[email protected]> |
||
29 | * @author shinjiman <[email protected]> |
||
30 | * @author PhiLiP <[email protected]> |
||
31 | */ |
||
32 | class LanguageConverter { |
||
33 | /** |
||
34 | * languages supporting variants |
||
35 | * @since 1.20 |
||
36 | * @var array |
||
37 | */ |
||
38 | static public $languagesWithVariants = [ |
||
39 | 'gan', |
||
40 | 'iu', |
||
41 | 'kk', |
||
42 | 'ku', |
||
43 | 'shi', |
||
44 | 'sr', |
||
45 | 'tg', |
||
46 | 'uz', |
||
47 | 'zh', |
||
48 | ]; |
||
49 | |||
50 | public $mMainLanguageCode; |
||
51 | public $mVariants; |
||
52 | public $mVariantFallbacks; |
||
53 | public $mVariantNames; |
||
54 | public $mTablesLoaded = false; |
||
55 | public $mTables; |
||
56 | // 'bidirectional' 'unidirectional' 'disable' for each variant |
||
57 | public $mManualLevel; |
||
58 | |||
59 | /** |
||
60 | * @var string Memcached key name |
||
61 | */ |
||
62 | public $mCacheKey; |
||
63 | |||
64 | public $mLangObj; |
||
65 | public $mFlags; |
||
66 | public $mDescCodeSep = ':', $mDescVarSep = ';'; |
||
67 | public $mUcfirst = false; |
||
68 | public $mConvRuleTitle = false; |
||
69 | public $mURLVariant; |
||
70 | public $mUserVariant; |
||
71 | public $mHeaderVariant; |
||
72 | public $mMaxDepth = 10; |
||
73 | public $mVarSeparatorPattern; |
||
74 | |||
75 | const CACHE_VERSION_KEY = 'VERSION 7'; |
||
76 | |||
77 | /** |
||
78 | * Constructor |
||
79 | * |
||
80 | * @param Language $langobj |
||
81 | * @param string $maincode The main language code of this language |
||
82 | * @param array $variants The supported variants of this language |
||
83 | * @param array $variantfallbacks The fallback language of each variant |
||
84 | * @param array $flags Defining the custom strings that maps to the flags |
||
85 | * @param array $manualLevel Limit for supported variants |
||
86 | */ |
||
87 | public function __construct( $langobj, $maincode, $variants = [], |
||
88 | $variantfallbacks = [], $flags = [], |
||
89 | $manualLevel = [] ) { |
||
90 | global $wgDisabledVariants; |
||
91 | $this->mLangObj = $langobj; |
||
92 | $this->mMainLanguageCode = $maincode; |
||
93 | $this->mVariants = array_diff( $variants, $wgDisabledVariants ); |
||
94 | $this->mVariantFallbacks = $variantfallbacks; |
||
95 | $this->mVariantNames = Language::fetchLanguageNames(); |
||
96 | $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode ); |
||
97 | $defaultflags = [ |
||
98 | // 'S' show converted text |
||
99 | // '+' add rules for alltext |
||
100 | // 'E' the gave flags is error |
||
101 | // these flags above are reserved for program |
||
102 | 'A' => 'A', // add rule for convert code (all text convert) |
||
103 | 'T' => 'T', // title convert |
||
104 | 'R' => 'R', // raw content |
||
105 | 'D' => 'D', // convert description (subclass implement) |
||
106 | '-' => '-', // remove convert (not implement) |
||
107 | 'H' => 'H', // add rule for convert code (but no display in placed code) |
||
108 | 'N' => 'N' // current variant name |
||
109 | ]; |
||
110 | $this->mFlags = array_merge( $defaultflags, $flags ); |
||
111 | foreach ( $this->mVariants as $v ) { |
||
112 | if ( array_key_exists( $v, $manualLevel ) ) { |
||
113 | $this->mManualLevel[$v] = $manualLevel[$v]; |
||
114 | } else { |
||
115 | $this->mManualLevel[$v] = 'bidirectional'; |
||
116 | } |
||
117 | $this->mFlags[$v] = $v; |
||
118 | } |
||
119 | } |
||
120 | |||
121 | /** |
||
122 | * Get all valid variants. |
||
123 | * Call this instead of using $this->mVariants directly. |
||
124 | * |
||
125 | * @return array Contains all valid variants |
||
126 | */ |
||
127 | public function getVariants() { |
||
128 | return $this->mVariants; |
||
129 | } |
||
130 | |||
131 | /** |
||
132 | * In case some variant is not defined in the markup, we need |
||
133 | * to have some fallback. For example, in zh, normally people |
||
134 | * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk. |
||
135 | * when zh-sg is preferred but not defined, we will pick zh-hans |
||
136 | * in this case. Right now this is only used by zh. |
||
137 | * |
||
138 | * @param string $variant The language code of the variant |
||
139 | * @return string|array The code of the fallback language or the |
||
140 | * main code if there is no fallback |
||
141 | */ |
||
142 | public function getVariantFallbacks( $variant ) { |
||
143 | if ( isset( $this->mVariantFallbacks[$variant] ) ) { |
||
144 | return $this->mVariantFallbacks[$variant]; |
||
145 | } |
||
146 | return $this->mMainLanguageCode; |
||
147 | } |
||
148 | |||
149 | /** |
||
150 | * Get the title produced by the conversion rule. |
||
151 | * @return string The converted title text |
||
152 | */ |
||
153 | public function getConvRuleTitle() { |
||
154 | return $this->mConvRuleTitle; |
||
155 | } |
||
156 | |||
157 | /** |
||
158 | * Get preferred language variant. |
||
159 | * @return string The preferred language code |
||
160 | */ |
||
161 | public function getPreferredVariant() { |
||
162 | global $wgDefaultLanguageVariant, $wgUser; |
||
163 | |||
164 | $req = $this->getURLVariant(); |
||
165 | |||
166 | if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) { |
||
167 | $req = $this->getUserVariant(); |
||
168 | } elseif ( !$req ) { |
||
169 | $req = $this->getHeaderVariant(); |
||
170 | } |
||
171 | |||
172 | if ( $wgDefaultLanguageVariant && !$req ) { |
||
173 | $req = $this->validateVariant( $wgDefaultLanguageVariant ); |
||
174 | } |
||
175 | |||
176 | // This function, unlike the other get*Variant functions, is |
||
177 | // not memoized (i.e. there return value is not cached) since |
||
178 | // new information might appear during processing after this |
||
179 | // is first called. |
||
180 | if ( $this->validateVariant( $req ) ) { |
||
181 | return $req; |
||
182 | } |
||
183 | return $this->mMainLanguageCode; |
||
184 | } |
||
185 | |||
186 | /** |
||
187 | * Get default variant. |
||
188 | * This function would not be affected by user's settings |
||
189 | * @return string The default variant code |
||
190 | */ |
||
191 | public function getDefaultVariant() { |
||
192 | global $wgDefaultLanguageVariant; |
||
193 | |||
194 | $req = $this->getURLVariant(); |
||
195 | |||
196 | if ( !$req ) { |
||
197 | $req = $this->getHeaderVariant(); |
||
198 | } |
||
199 | |||
200 | if ( $wgDefaultLanguageVariant && !$req ) { |
||
201 | $req = $this->validateVariant( $wgDefaultLanguageVariant ); |
||
202 | } |
||
203 | |||
204 | if ( $req ) { |
||
205 | return $req; |
||
206 | } |
||
207 | return $this->mMainLanguageCode; |
||
208 | } |
||
209 | |||
210 | /** |
||
211 | * Validate the variant |
||
212 | * @param string $variant The variant to validate |
||
213 | * @return mixed Returns the variant if it is valid, null otherwise |
||
214 | */ |
||
215 | public function validateVariant( $variant = null ) { |
||
216 | if ( $variant !== null && in_array( $variant, $this->mVariants ) ) { |
||
217 | return $variant; |
||
218 | } |
||
219 | return null; |
||
220 | } |
||
221 | |||
222 | /** |
||
223 | * Get the variant specified in the URL |
||
224 | * |
||
225 | * @return mixed Variant if one found, false otherwise. |
||
226 | */ |
||
227 | public function getURLVariant() { |
||
228 | global $wgRequest; |
||
229 | |||
230 | if ( $this->mURLVariant ) { |
||
231 | return $this->mURLVariant; |
||
232 | } |
||
233 | |||
234 | // see if the preference is set in the request |
||
235 | $ret = $wgRequest->getText( 'variant' ); |
||
236 | |||
237 | if ( !$ret ) { |
||
238 | $ret = $wgRequest->getVal( 'uselang' ); |
||
239 | } |
||
240 | |||
241 | $this->mURLVariant = $this->validateVariant( $ret ); |
||
242 | return $this->mURLVariant; |
||
243 | } |
||
244 | |||
245 | /** |
||
246 | * Determine if the user has a variant set. |
||
247 | * |
||
248 | * @return mixed Variant if one found, false otherwise. |
||
249 | */ |
||
250 | protected function getUserVariant() { |
||
251 | global $wgUser, $wgContLang; |
||
252 | |||
253 | // memoizing this function wreaks havoc on parserTest.php |
||
254 | /* |
||
255 | if ( $this->mUserVariant ) { |
||
256 | return $this->mUserVariant; |
||
257 | } |
||
258 | */ |
||
259 | |||
260 | // Get language variant preference from logged in users |
||
261 | // Don't call this on stub objects because that causes infinite |
||
262 | // recursion during initialisation |
||
263 | if ( !$wgUser->isSafeToLoad() ) { |
||
264 | return false; |
||
265 | } |
||
266 | if ( $wgUser->isLoggedIn() ) { |
||
267 | if ( $this->mMainLanguageCode == $wgContLang->getCode() ) { |
||
268 | $ret = $wgUser->getOption( 'variant' ); |
||
269 | } else { |
||
270 | $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode ); |
||
271 | } |
||
272 | } else { |
||
273 | // figure out user lang without constructing wgLang to avoid |
||
274 | // infinite recursion |
||
275 | $ret = $wgUser->getOption( 'language' ); |
||
276 | } |
||
277 | |||
278 | $this->mUserVariant = $this->validateVariant( $ret ); |
||
279 | return $this->mUserVariant; |
||
280 | } |
||
281 | |||
282 | /** |
||
283 | * Determine the language variant from the Accept-Language header. |
||
284 | * |
||
285 | * @return mixed Variant if one found, false otherwise. |
||
286 | */ |
||
287 | protected function getHeaderVariant() { |
||
288 | global $wgRequest; |
||
289 | |||
290 | if ( $this->mHeaderVariant ) { |
||
291 | return $this->mHeaderVariant; |
||
292 | } |
||
293 | |||
294 | // see if some supported language variant is set in the |
||
295 | // HTTP header. |
||
296 | $languages = array_keys( $wgRequest->getAcceptLang() ); |
||
297 | if ( empty( $languages ) ) { |
||
298 | return null; |
||
299 | } |
||
300 | |||
301 | $fallbackLanguages = []; |
||
302 | foreach ( $languages as $language ) { |
||
303 | $this->mHeaderVariant = $this->validateVariant( $language ); |
||
304 | if ( $this->mHeaderVariant ) { |
||
305 | break; |
||
306 | } |
||
307 | |||
308 | // To see if there are fallbacks of current language. |
||
309 | // We record these fallback variants, and process |
||
310 | // them later. |
||
311 | $fallbacks = $this->getVariantFallbacks( $language ); |
||
312 | if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) { |
||
313 | $fallbackLanguages[] = $fallbacks; |
||
314 | } elseif ( is_array( $fallbacks ) ) { |
||
315 | $fallbackLanguages = |
||
316 | array_merge( $fallbackLanguages, $fallbacks ); |
||
317 | } |
||
318 | } |
||
319 | |||
320 | if ( !$this->mHeaderVariant ) { |
||
321 | // process fallback languages now |
||
322 | $fallback_languages = array_unique( $fallbackLanguages ); |
||
323 | foreach ( $fallback_languages as $language ) { |
||
324 | $this->mHeaderVariant = $this->validateVariant( $language ); |
||
325 | if ( $this->mHeaderVariant ) { |
||
326 | break; |
||
327 | } |
||
328 | } |
||
329 | } |
||
330 | |||
331 | return $this->mHeaderVariant; |
||
332 | } |
||
333 | |||
334 | /** |
||
335 | * Dictionary-based conversion. |
||
336 | * This function would not parse the conversion rules. |
||
337 | * If you want to parse rules, try to use convert() or |
||
338 | * convertTo(). |
||
339 | * |
||
340 | * @param string $text The text to be converted |
||
341 | * @param bool|string $toVariant The target language code |
||
342 | * @return string The converted text |
||
343 | */ |
||
344 | public function autoConvert( $text, $toVariant = false ) { |
||
345 | |||
346 | $this->loadTables(); |
||
347 | |||
348 | if ( !$toVariant ) { |
||
349 | $toVariant = $this->getPreferredVariant(); |
||
350 | if ( !$toVariant ) { |
||
351 | return $text; |
||
352 | } |
||
353 | } |
||
354 | |||
355 | if ( $this->guessVariant( $text, $toVariant ) ) { |
||
356 | return $text; |
||
357 | } |
||
358 | |||
359 | /* we convert everything except: |
||
360 | 1. HTML markups (anything between < and >) |
||
361 | 2. HTML entities |
||
362 | 3. placeholders created by the parser |
||
363 | */ |
||
364 | $marker = '|' . Parser::MARKER_PREFIX . '[\-a-zA-Z0-9]+'; |
||
365 | |||
366 | // this one is needed when the text is inside an HTML markup |
||
367 | $htmlfix = '|<[^>]+$|^[^<>]*>'; |
||
368 | |||
369 | // disable convert to variants between <code> tags |
||
370 | $codefix = '<code>.+?<\/code>|'; |
||
371 | // disable conversion of <script> tags |
||
372 | $scriptfix = '<script.*?>.*?<\/script>|'; |
||
373 | // disable conversion of <pre> tags |
||
374 | $prefix = '<pre.*?>.*?<\/pre>|'; |
||
375 | |||
376 | $reg = '/' . $codefix . $scriptfix . $prefix . |
||
377 | '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s'; |
||
378 | $startPos = 0; |
||
379 | $sourceBlob = ''; |
||
380 | $literalBlob = ''; |
||
381 | |||
382 | // Guard against delimiter nulls in the input |
||
383 | $text = str_replace( "\000", '', $text ); |
||
384 | |||
385 | $markupMatches = null; |
||
386 | $elementMatches = null; |
||
387 | while ( $startPos < strlen( $text ) ) { |
||
388 | if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { |
||
389 | $elementPos = $markupMatches[0][1]; |
||
390 | $element = $markupMatches[0][0]; |
||
391 | } else { |
||
392 | $elementPos = strlen( $text ); |
||
393 | $element = ''; |
||
394 | } |
||
395 | |||
396 | // Queue the part before the markup for translation in a batch |
||
397 | $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000"; |
||
398 | |||
399 | // Advance to the next position |
||
400 | $startPos = $elementPos + strlen( $element ); |
||
401 | |||
402 | // Translate any alt or title attributes inside the matched element |
||
403 | if ( $element !== '' |
||
404 | && preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, $elementMatches ) |
||
405 | ) { |
||
406 | $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] ); |
||
407 | $changed = false; |
||
408 | foreach ( [ 'title', 'alt' ] as $attrName ) { |
||
409 | if ( !isset( $attrs[$attrName] ) ) { |
||
410 | continue; |
||
411 | } |
||
412 | $attr = $attrs[$attrName]; |
||
413 | // Don't convert URLs |
||
414 | if ( !strpos( $attr, '://' ) ) { |
||
415 | $attr = $this->recursiveConvertTopLevel( $attr, $toVariant ); |
||
416 | } |
||
417 | |||
418 | // Remove HTML tags to avoid disrupting the layout |
||
419 | $attr = preg_replace( '/<[^>]+>/', '', $attr ); |
||
420 | if ( $attr !== $attrs[$attrName] ) { |
||
421 | $attrs[$attrName] = $attr; |
||
422 | $changed = true; |
||
423 | } |
||
424 | } |
||
425 | if ( $changed ) { |
||
426 | $element = $elementMatches[1] . Html::expandAttributes( $attrs ) . |
||
427 | $elementMatches[3]; |
||
428 | } |
||
429 | } |
||
430 | $literalBlob .= $element . "\000"; |
||
431 | } |
||
432 | |||
433 | // Do the main translation batch |
||
434 | $translatedBlob = $this->translate( $sourceBlob, $toVariant ); |
||
0 ignored issues
–
show
|
|||
435 | |||
436 | // Put the output back together |
||
437 | $translatedIter = StringUtils::explode( "\000", $translatedBlob ); |
||
438 | $literalIter = StringUtils::explode( "\000", $literalBlob ); |
||
439 | $output = ''; |
||
440 | while ( $translatedIter->valid() && $literalIter->valid() ) { |
||
441 | $output .= $translatedIter->current(); |
||
442 | $output .= $literalIter->current(); |
||
443 | $translatedIter->next(); |
||
444 | $literalIter->next(); |
||
445 | } |
||
446 | |||
447 | return $output; |
||
448 | } |
||
449 | |||
450 | /** |
||
451 | * Translate a string to a variant. |
||
452 | * Doesn't parse rules or do any of that other stuff, for that use |
||
453 | * convert() or convertTo(). |
||
454 | * |
||
455 | * @param string $text Text to convert |
||
456 | * @param string $variant Variant language code |
||
457 | * @return string Translated text |
||
458 | */ |
||
459 | public function translate( $text, $variant ) { |
||
460 | // If $text is empty or only includes spaces, do nothing |
||
461 | // Otherwise translate it |
||
462 | if ( trim( $text ) ) { |
||
463 | $this->loadTables(); |
||
464 | $text = $this->mTables[$variant]->replace( $text ); |
||
465 | } |
||
466 | return $text; |
||
467 | } |
||
468 | |||
469 | /** |
||
470 | * Call translate() to convert text to all valid variants. |
||
471 | * |
||
472 | * @param string $text The text to be converted |
||
473 | * @return array Variant => converted text |
||
474 | */ |
||
475 | public function autoConvertToAllVariants( $text ) { |
||
476 | $this->loadTables(); |
||
477 | |||
478 | $ret = []; |
||
479 | foreach ( $this->mVariants as $variant ) { |
||
480 | $ret[$variant] = $this->translate( $text, $variant ); |
||
481 | } |
||
482 | |||
483 | return $ret; |
||
484 | } |
||
485 | |||
486 | /** |
||
487 | * Apply manual conversion rules. |
||
488 | * |
||
489 | * @param ConverterRule $convRule |
||
490 | */ |
||
491 | protected function applyManualConv( $convRule ) { |
||
492 | // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom |
||
493 | // title conversion. |
||
494 | // Bug 24072: $mConvRuleTitle was overwritten by other manual |
||
495 | // rule(s) not for title, this breaks the title conversion. |
||
496 | $newConvRuleTitle = $convRule->getTitle(); |
||
497 | if ( $newConvRuleTitle ) { |
||
498 | // So I add an empty check for getTitle() |
||
499 | $this->mConvRuleTitle = $newConvRuleTitle; |
||
500 | } |
||
501 | |||
502 | // merge/remove manual conversion rules to/from global table |
||
503 | $convTable = $convRule->getConvTable(); |
||
504 | $action = $convRule->getRulesAction(); |
||
505 | foreach ( $convTable as $variant => $pair ) { |
||
506 | if ( !$this->validateVariant( $variant ) ) { |
||
507 | continue; |
||
508 | } |
||
509 | |||
510 | if ( $action == 'add' ) { |
||
511 | // More efficient than array_merge(), about 2.5 times. |
||
512 | foreach ( $pair as $from => $to ) { |
||
513 | $this->mTables[$variant]->setPair( $from, $to ); |
||
514 | } |
||
515 | } elseif ( $action == 'remove' ) { |
||
516 | $this->mTables[$variant]->removeArray( $pair ); |
||
517 | } |
||
518 | } |
||
519 | } |
||
520 | |||
521 | /** |
||
522 | * Auto convert a Title object to a readable string in the |
||
523 | * preferred variant. |
||
524 | * |
||
525 | * @param Title $title A object of Title |
||
526 | * @return string Converted title text |
||
527 | */ |
||
528 | public function convertTitle( $title ) { |
||
529 | $variant = $this->getPreferredVariant(); |
||
530 | $index = $title->getNamespace(); |
||
531 | if ( $index !== NS_MAIN ) { |
||
532 | $text = $this->convertNamespace( $index, $variant ) . ':'; |
||
533 | } else { |
||
534 | $text = ''; |
||
535 | } |
||
536 | $text .= $this->translate( $title->getText(), $variant ); |
||
537 | return $text; |
||
538 | } |
||
539 | |||
540 | /** |
||
541 | * Get the namespace display name in the preferred variant. |
||
542 | * |
||
543 | * @param int $index Namespace id |
||
544 | * @param string|null $variant Variant code or null for preferred variant |
||
545 | * @return string Namespace name for display |
||
546 | */ |
||
547 | public function convertNamespace( $index, $variant = null ) { |
||
548 | if ( $index === NS_MAIN ) { |
||
549 | return ''; |
||
550 | } |
||
551 | |||
552 | if ( $variant === null ) { |
||
553 | $variant = $this->getPreferredVariant(); |
||
554 | } |
||
555 | |||
556 | $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); |
||
557 | $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant ); |
||
558 | $nsVariantText = $cache->get( $key ); |
||
559 | if ( $nsVariantText !== false ) { |
||
560 | return $nsVariantText; |
||
561 | } |
||
562 | |||
563 | // First check if a message gives a converted name in the target variant. |
||
564 | $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant ); |
||
565 | if ( $nsConvMsg->exists() ) { |
||
566 | $nsVariantText = $nsConvMsg->plain(); |
||
567 | } |
||
568 | |||
569 | // Then check if a message gives a converted name in content language |
||
570 | // which needs extra translation to the target variant. |
||
571 | if ( $nsVariantText === false ) { |
||
572 | $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage(); |
||
573 | if ( $nsConvMsg->exists() ) { |
||
574 | $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant ); |
||
575 | } |
||
576 | } |
||
577 | |||
578 | if ( $nsVariantText === false ) { |
||
579 | // No message exists, retrieve it from the target variant's namespace names. |
||
580 | $langObj = $this->mLangObj->factory( $variant ); |
||
581 | $nsVariantText = $langObj->getFormattedNsText( $index ); |
||
582 | } |
||
583 | |||
584 | $cache->set( $key, $nsVariantText, 60 ); |
||
585 | |||
586 | return $nsVariantText; |
||
587 | } |
||
588 | |||
589 | /** |
||
590 | * Convert text to different variants of a language. The automatic |
||
591 | * conversion is done in autoConvert(). Here we parse the text |
||
592 | * marked with -{}-, which specifies special conversions of the |
||
593 | * text that can not be accomplished in autoConvert(). |
||
594 | * |
||
595 | * Syntax of the markup: |
||
596 | * -{code1:text1;code2:text2;...}- or |
||
597 | * -{flags|code1:text1;code2:text2;...}- or |
||
598 | * -{text}- in which case no conversion should take place for text |
||
599 | * |
||
600 | * @param string $text Text to be converted |
||
601 | * @return string Converted text |
||
602 | */ |
||
603 | public function convert( $text ) { |
||
604 | $variant = $this->getPreferredVariant(); |
||
605 | return $this->convertTo( $text, $variant ); |
||
606 | } |
||
607 | |||
608 | /** |
||
609 | * Same as convert() except a extra parameter to custom variant. |
||
610 | * |
||
611 | * @param string $text Text to be converted |
||
612 | * @param string $variant The target variant code |
||
613 | * @return string Converted text |
||
614 | */ |
||
615 | public function convertTo( $text, $variant ) { |
||
616 | global $wgDisableLangConversion; |
||
617 | if ( $wgDisableLangConversion ) { |
||
618 | return $text; |
||
619 | } |
||
620 | // Reset converter state for a new converter run. |
||
621 | $this->mConvRuleTitle = false; |
||
622 | return $this->recursiveConvertTopLevel( $text, $variant ); |
||
623 | } |
||
624 | |||
625 | /** |
||
626 | * Recursively convert text on the outside. Allow to use nested |
||
627 | * markups to custom rules. |
||
628 | * |
||
629 | * @param string $text Text to be converted |
||
630 | * @param string $variant The target variant code |
||
631 | * @param int $depth Depth of recursion |
||
632 | * @return string Converted text |
||
633 | */ |
||
634 | protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) { |
||
635 | $startPos = 0; |
||
636 | $out = ''; |
||
637 | $length = strlen( $text ); |
||
638 | $shouldConvert = !$this->guessVariant( $text, $variant ); |
||
639 | |||
640 | while ( $startPos < $length ) { |
||
641 | $pos = strpos( $text, '-{', $startPos ); |
||
642 | |||
643 | if ( $pos === false ) { |
||
644 | // No more markup, append final segment |
||
645 | $fragment = substr( $text, $startPos ); |
||
646 | $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; |
||
647 | return $out; |
||
648 | } |
||
649 | |||
650 | // Markup found |
||
651 | // Append initial segment |
||
652 | $fragment = substr( $text, $startPos, $pos - $startPos ); |
||
653 | $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; |
||
654 | |||
655 | // Advance position |
||
656 | $startPos = $pos; |
||
657 | |||
658 | // Do recursive conversion |
||
659 | $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
||
660 | } |
||
661 | |||
662 | return $out; |
||
663 | } |
||
664 | |||
665 | /** |
||
666 | * Recursively convert text on the inside. |
||
667 | * |
||
668 | * @param string $text Text to be converted |
||
669 | * @param string $variant The target variant code |
||
670 | * @param int $startPos |
||
671 | * @param int $depth Depth of recursion |
||
672 | * |
||
673 | * @throws MWException |
||
674 | * @return string Converted text |
||
675 | */ |
||
676 | protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) { |
||
677 | // Quick sanity check (no function calls) |
||
678 | if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) { |
||
679 | throw new MWException( __METHOD__ . ': invalid input string' ); |
||
680 | } |
||
681 | |||
682 | $startPos += 2; |
||
683 | $inner = ''; |
||
684 | $warningDone = false; |
||
685 | $length = strlen( $text ); |
||
686 | |||
687 | while ( $startPos < $length ) { |
||
688 | $m = false; |
||
689 | preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos ); |
||
690 | if ( !$m ) { |
||
691 | // Unclosed rule |
||
692 | break; |
||
693 | } |
||
694 | |||
695 | $token = $m[0][0]; |
||
696 | $pos = $m[0][1]; |
||
697 | |||
698 | // Markup found |
||
699 | // Append initial segment |
||
700 | $inner .= substr( $text, $startPos, $pos - $startPos ); |
||
701 | |||
702 | // Advance position |
||
703 | $startPos = $pos; |
||
704 | |||
705 | switch ( $token ) { |
||
706 | case '-{': |
||
707 | // Check max depth |
||
708 | if ( $depth >= $this->mMaxDepth ) { |
||
709 | $inner .= '-{'; |
||
710 | View Code Duplication | if ( !$warningDone ) { |
|
711 | $inner .= '<span class="error">' . |
||
712 | wfMessage( 'language-converter-depth-warning' ) |
||
713 | ->numParams( $this->mMaxDepth )->inContentLanguage()->text() . |
||
714 | '</span>'; |
||
715 | $warningDone = true; |
||
716 | } |
||
717 | $startPos += 2; |
||
718 | continue; |
||
719 | } |
||
720 | // Recursively parse another rule |
||
721 | $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); |
||
722 | break; |
||
723 | case '}-': |
||
724 | // Apply the rule |
||
725 | $startPos += 2; |
||
726 | $rule = new ConverterRule( $inner, $this ); |
||
727 | $rule->parse( $variant ); |
||
728 | $this->applyManualConv( $rule ); |
||
729 | return $rule->getDisplay(); |
||
730 | default: |
||
731 | throw new MWException( __METHOD__ . ': invalid regex match' ); |
||
732 | } |
||
733 | } |
||
734 | |||
735 | // Unclosed rule |
||
736 | if ( $startPos < $length ) { |
||
737 | $inner .= substr( $text, $startPos ); |
||
738 | } |
||
739 | $startPos = $length; |
||
740 | return '-{' . $this->autoConvert( $inner, $variant ); |
||
741 | } |
||
742 | |||
743 | /** |
||
744 | * If a language supports multiple variants, it is possible that |
||
745 | * non-existing link in one variant actually exists in another variant. |
||
746 | * This function tries to find it. See e.g. LanguageZh.php |
||
747 | * The input parameters may be modified upon return |
||
748 | * |
||
749 | * @param string &$link The name of the link |
||
750 | * @param Title &$nt The title object of the link |
||
751 | * @param bool $ignoreOtherCond To disable other conditions when |
||
752 | * we need to transclude a template or update a category's link |
||
753 | */ |
||
754 | public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { |
||
755 | # If the article has already existed, there is no need to |
||
756 | # check it again, otherwise it may cause a fault. |
||
757 | if ( is_object( $nt ) && $nt->exists() ) { |
||
758 | return; |
||
759 | } |
||
760 | |||
761 | global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest; |
||
762 | $isredir = $wgRequest->getText( 'redirect', 'yes' ); |
||
763 | $action = $wgRequest->getText( 'action' ); |
||
764 | if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) { |
||
765 | $action = 'view'; |
||
766 | } |
||
767 | $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' ); |
||
768 | $disableLinkConversion = $wgDisableLangConversion |
||
769 | || $wgDisableTitleConversion; |
||
770 | $linkBatch = new LinkBatch(); |
||
771 | |||
772 | $ns = NS_MAIN; |
||
773 | |||
774 | if ( $disableLinkConversion || |
||
775 | ( !$ignoreOtherCond && |
||
776 | ( $isredir == 'no' |
||
777 | || $action == 'edit' |
||
778 | || $action == 'submit' |
||
779 | || $linkconvert == 'no' ) ) ) { |
||
780 | return; |
||
781 | } |
||
782 | |||
783 | if ( is_object( $nt ) ) { |
||
784 | $ns = $nt->getNamespace(); |
||
785 | } |
||
786 | |||
787 | $variants = $this->autoConvertToAllVariants( $link ); |
||
788 | if ( !$variants ) { // give up |
||
789 | return; |
||
790 | } |
||
791 | |||
792 | $titles = []; |
||
793 | |||
794 | foreach ( $variants as $v ) { |
||
795 | if ( $v != $link ) { |
||
796 | $varnt = Title::newFromText( $v, $ns ); |
||
797 | if ( !is_null( $varnt ) ) { |
||
798 | $linkBatch->addObj( $varnt ); |
||
799 | $titles[] = $varnt; |
||
800 | } |
||
801 | } |
||
802 | } |
||
803 | |||
804 | // fetch all variants in single query |
||
805 | $linkBatch->execute(); |
||
806 | |||
807 | foreach ( $titles as $varnt ) { |
||
808 | if ( $varnt->getArticleID() > 0 ) { |
||
809 | $nt = $varnt; |
||
810 | $link = $varnt->getText(); |
||
811 | break; |
||
812 | } |
||
813 | } |
||
814 | } |
||
815 | |||
816 | /** |
||
817 | * Returns language specific hash options. |
||
818 | * |
||
819 | * @return string |
||
820 | */ |
||
821 | public function getExtraHashOptions() { |
||
822 | $variant = $this->getPreferredVariant(); |
||
823 | |||
824 | return '!' . $variant; |
||
825 | } |
||
826 | |||
827 | /** |
||
828 | * Guess if a text is written in a variant. This should be implemented in subclasses. |
||
829 | * |
||
830 | * @param string $text The text to be checked |
||
831 | * @param string $variant Language code of the variant to be checked for |
||
832 | * @return bool True if $text appears to be written in $variant, false if not |
||
833 | * |
||
834 | * @author Nikola Smolenski <[email protected]> |
||
835 | * @since 1.19 |
||
836 | */ |
||
837 | public function guessVariant( $text, $variant ) { |
||
838 | return false; |
||
839 | } |
||
840 | |||
841 | /** |
||
842 | * Load default conversion tables. |
||
843 | * This method must be implemented in derived class. |
||
844 | * |
||
845 | * @private |
||
846 | * @throws MWException |
||
847 | */ |
||
848 | function loadDefaultTables() { |
||
849 | $name = get_class( $this ); |
||
850 | |||
851 | throw new MWException( "Must implement loadDefaultTables() method in class $name" ); |
||
852 | } |
||
853 | |||
854 | /** |
||
855 | * Load conversion tables either from the cache or the disk. |
||
856 | * @private |
||
857 | * @param bool $fromCache Load from memcached? Defaults to true. |
||
858 | */ |
||
859 | function loadTables( $fromCache = true ) { |
||
860 | global $wgLanguageConverterCacheType; |
||
861 | |||
862 | if ( $this->mTablesLoaded ) { |
||
863 | return; |
||
864 | } |
||
865 | |||
866 | $this->mTablesLoaded = true; |
||
867 | $this->mTables = false; |
||
868 | $cache = ObjectCache::getInstance( $wgLanguageConverterCacheType ); |
||
869 | if ( $fromCache ) { |
||
870 | wfProfileIn( __METHOD__ . '-cache' ); |
||
871 | $this->mTables = $cache->get( $this->mCacheKey ); |
||
872 | wfProfileOut( __METHOD__ . '-cache' ); |
||
873 | } |
||
874 | if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) { |
||
875 | wfProfileIn( __METHOD__ . '-recache' ); |
||
876 | // not in cache, or we need a fresh reload. |
||
877 | // We will first load the default tables |
||
878 | // then update them using things in MediaWiki:Conversiontable/* |
||
879 | $this->loadDefaultTables(); |
||
880 | foreach ( $this->mVariants as $var ) { |
||
881 | $cached = $this->parseCachedTable( $var ); |
||
882 | $this->mTables[$var]->mergeArray( $cached ); |
||
883 | } |
||
884 | |||
885 | $this->postLoadTables(); |
||
886 | $this->mTables[self::CACHE_VERSION_KEY] = true; |
||
887 | |||
888 | $cache->set( $this->mCacheKey, $this->mTables, 43200 ); |
||
889 | wfProfileOut( __METHOD__ . '-recache' ); |
||
890 | } |
||
891 | } |
||
892 | |||
893 | /** |
||
894 | * Hook for post processing after conversion tables are loaded. |
||
895 | */ |
||
896 | function postLoadTables() { |
||
897 | } |
||
898 | |||
899 | /** |
||
900 | * Reload the conversion tables. |
||
901 | * |
||
902 | * @private |
||
903 | */ |
||
904 | function reloadTables() { |
||
905 | if ( $this->mTables ) { |
||
906 | unset( $this->mTables ); |
||
907 | } |
||
908 | |||
909 | $this->mTablesLoaded = false; |
||
910 | $this->loadTables( false ); |
||
911 | } |
||
912 | |||
913 | /** |
||
914 | * Parse the conversion table stored in the cache. |
||
915 | * |
||
916 | * The tables should be in blocks of the following form: |
||
917 | * -{ |
||
918 | * word => word ; |
||
919 | * word => word ; |
||
920 | * ... |
||
921 | * }- |
||
922 | * |
||
923 | * To make the tables more manageable, subpages are allowed |
||
924 | * and will be parsed recursively if $recursive == true. |
||
925 | * |
||
926 | * @param string $code Language code |
||
927 | * @param string $subpage Subpage name |
||
928 | * @param bool $recursive Parse subpages recursively? Defaults to true. |
||
929 | * |
||
930 | * @return array |
||
931 | */ |
||
932 | function parseCachedTable( $code, $subpage = '', $recursive = true ) { |
||
933 | static $parsed = []; |
||
934 | |||
935 | $key = 'Conversiontable/' . $code; |
||
936 | if ( $subpage ) { |
||
937 | $key .= '/' . $subpage; |
||
938 | } |
||
939 | if ( array_key_exists( $key, $parsed ) ) { |
||
940 | return []; |
||
941 | } |
||
942 | |||
943 | $parsed[$key] = true; |
||
944 | |||
945 | if ( $subpage === '' ) { |
||
946 | $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code ); |
||
947 | } else { |
||
948 | $txt = false; |
||
949 | $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key ); |
||
950 | if ( $title && $title->exists() ) { |
||
951 | $revision = Revision::newFromTitle( $title ); |
||
952 | if ( $revision ) { |
||
953 | if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) { |
||
954 | $txt = $revision->getContent( Revision::RAW )->getNativeData(); |
||
955 | } |
||
956 | |||
957 | // @todo in the future, use a specialized content model, perhaps based on json! |
||
958 | } |
||
959 | } |
||
960 | } |
||
961 | |||
962 | # Nothing to parse if there's no text |
||
963 | if ( $txt === false || $txt === null || $txt === '' ) { |
||
964 | return []; |
||
965 | } |
||
966 | |||
967 | // get all subpage links of the form |
||
968 | // [[MediaWiki:Conversiontable/zh-xx/...|...]] |
||
969 | $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) . |
||
970 | ':Conversiontable'; |
||
971 | $subs = StringUtils::explode( '[[', $txt ); |
||
972 | $sublinks = []; |
||
973 | foreach ( $subs as $sub ) { |
||
974 | $link = explode( ']]', $sub, 2 ); |
||
975 | if ( count( $link ) != 2 ) { |
||
976 | continue; |
||
977 | } |
||
978 | $b = explode( '|', $link[0], 2 ); |
||
979 | $b = explode( '/', trim( $b[0] ), 3 ); |
||
980 | if ( count( $b ) == 3 ) { |
||
981 | $sublink = $b[2]; |
||
982 | } else { |
||
983 | $sublink = ''; |
||
984 | } |
||
985 | |||
986 | if ( $b[0] == $linkhead && $b[1] == $code ) { |
||
987 | $sublinks[] = $sublink; |
||
988 | } |
||
989 | } |
||
990 | |||
991 | // parse the mappings in this page |
||
992 | $blocks = StringUtils::explode( '-{', $txt ); |
||
993 | $ret = []; |
||
994 | $first = true; |
||
995 | foreach ( $blocks as $block ) { |
||
996 | if ( $first ) { |
||
997 | // Skip the part before the first -{ |
||
998 | $first = false; |
||
999 | continue; |
||
1000 | } |
||
1001 | $mappings = explode( '}-', $block, 2 )[0]; |
||
1002 | $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings ); |
||
1003 | $table = StringUtils::explode( ';', $stripped ); |
||
1004 | foreach ( $table as $t ) { |
||
1005 | $m = explode( '=>', $t, 3 ); |
||
1006 | if ( count( $m ) != 2 ) { |
||
1007 | continue; |
||
1008 | } |
||
1009 | // trim any trailling comments starting with '//' |
||
1010 | $tt = explode( '//', $m[1], 2 ); |
||
1011 | $ret[trim( $m[0] )] = trim( $tt[0] ); |
||
1012 | } |
||
1013 | } |
||
1014 | |||
1015 | // recursively parse the subpages |
||
1016 | if ( $recursive ) { |
||
1017 | foreach ( $sublinks as $link ) { |
||
1018 | $s = $this->parseCachedTable( $code, $link, $recursive ); |
||
1019 | $ret = $s + $ret; |
||
1020 | } |
||
1021 | } |
||
1022 | |||
1023 | if ( $this->mUcfirst ) { |
||
1024 | foreach ( $ret as $k => $v ) { |
||
1025 | $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v ); |
||
1026 | } |
||
1027 | } |
||
1028 | return $ret; |
||
1029 | } |
||
1030 | |||
1031 | /** |
||
1032 | * Enclose a string with the "no conversion" tag. This is used by |
||
1033 | * various functions in the Parser. |
||
1034 | * |
||
1035 | * @param string $text Text to be tagged for no conversion |
||
1036 | * @param bool $noParse Unused |
||
1037 | * @return string The tagged text |
||
1038 | */ |
||
1039 | public function markNoConversion( $text, $noParse = false ) { |
||
1040 | # don't mark if already marked |
||
1041 | if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) { |
||
1042 | return $text; |
||
1043 | } |
||
1044 | |||
1045 | $ret = "-{R|$text}-"; |
||
1046 | return $ret; |
||
1047 | } |
||
1048 | |||
1049 | /** |
||
1050 | * Convert the sorting key for category links. This should make different |
||
1051 | * keys that are variants of each other map to the same key. |
||
1052 | * |
||
1053 | * @param string $key |
||
1054 | * |
||
1055 | * @return string |
||
1056 | */ |
||
1057 | function convertCategoryKey( $key ) { |
||
1058 | return $key; |
||
1059 | } |
||
1060 | |||
1061 | /** |
||
1062 | * Refresh the cache of conversion tables when |
||
1063 | * MediaWiki:Conversiontable* is updated. |
||
1064 | * |
||
1065 | * @param Title $titleobj The Title of the page being updated |
||
1066 | */ |
||
1067 | public function updateConversionTable( Title $titleobj ) { |
||
1068 | if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) { |
||
1069 | $title = $titleobj->getDBkey(); |
||
1070 | $t = explode( '/', $title, 3 ); |
||
1071 | $c = count( $t ); |
||
1072 | if ( $c > 1 && $t[0] == 'Conversiontable' ) { |
||
1073 | if ( $this->validateVariant( $t[1] ) ) { |
||
1074 | $this->reloadTables(); |
||
1075 | } |
||
1076 | } |
||
1077 | } |
||
1078 | } |
||
1079 | |||
1080 | /** |
||
1081 | * Get the cached separator pattern for ConverterRule::parseRules() |
||
1082 | * @return string |
||
1083 | */ |
||
1084 | function getVarSeparatorPattern() { |
||
1085 | if ( is_null( $this->mVarSeparatorPattern ) ) { |
||
1086 | // varsep_pattern for preg_split: |
||
1087 | // text should be splited by ";" only if a valid variant |
||
1088 | // name exist after the markup, for example: |
||
1089 | // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\ |
||
1090 | // <span style="font-size:120%;">yyy</span>;}- |
||
1091 | // we should split it as: |
||
1092 | // [ |
||
1093 | // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>' |
||
1094 | // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>' |
||
1095 | // [2] => '' |
||
1096 | // ] |
||
1097 | $pat = '/;\s*(?='; |
||
1098 | foreach ( $this->mVariants as $variant ) { |
||
1099 | // zh-hans:xxx;zh-hant:yyy |
||
1100 | $pat .= $variant . '\s*:|'; |
||
1101 | // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz |
||
1102 | $pat .= '[^;]*?=>\s*' . $variant . '\s*:|'; |
||
1103 | } |
||
1104 | $pat .= '\s*$)/'; |
||
1105 | $this->mVarSeparatorPattern = $pat; |
||
1106 | } |
||
1107 | return $this->mVarSeparatorPattern; |
||
1108 | } |
||
1109 | } |
||
1110 |
This check looks at variables that have been passed in as parameters and are passed out again to other methods.
If the outgoing method call has stricter type requirements than the method itself, an issue is raised.
An additional type check may prevent trouble.