Issues (4122)

Security Analysis    not enabled

This project does not seem to handle request data directly as such no vulnerable execution paths were found.

  Cross-Site Scripting
Cross-Site Scripting enables an attacker to inject code into the response of a web-request that is viewed by other users. It can for example be used to bypass access controls, or even to take over other users' accounts.
  File Exposure
File Exposure allows an attacker to gain access to local files that he should not be able to access. These files can for example include database credentials, or other configuration files.
  File Manipulation
File Manipulation enables an attacker to write custom data to files. This potentially leads to injection of arbitrary code on the server.
  Object Injection
Object Injection enables an attacker to inject an object into PHP code, and can lead to arbitrary code execution, file exposure, or file manipulation attacks.
  Code Injection
Code Injection enables an attacker to execute arbitrary code on the server.
  Response Splitting
Response Splitting can be used to send arbitrary responses.
  File Inclusion
File Inclusion enables an attacker to inject custom files into PHP's file loading mechanism, either explicitly passed to include, or for example via PHP's auto-loading mechanism.
  Command Injection
Command Injection enables an attacker to inject a shell command that is execute with the privileges of the web-server. This can be used to expose sensitive data, or gain access of your server.
  SQL Injection
SQL Injection enables an attacker to execute arbitrary SQL code on your database server gaining access to user data, or manipulating user data.
  XPath Injection
XPath Injection enables an attacker to modify the parts of XML document that are read. If that XML document is for example used for authentication, this can lead to further vulnerabilities similar to SQL Injection.
  LDAP Injection
LDAP Injection enables an attacker to inject LDAP statements potentially granting permission to run unauthorized queries, or modify content inside the LDAP tree.
  Header Injection
  Other Vulnerability
This category comprises other attack vectors such as manipulating the PHP runtime, loading custom extensions, freezing the runtime, or similar.
  Regex Injection
Regex Injection enables an attacker to execute arbitrary code in your PHP process.
  XML Injection
XML Injection enables an attacker to read files on your local filesystem including configuration files, or can be abused to freeze your web-server process.
  Variable Injection
Variable Injection enables an attacker to overwrite program variables with custom data, and can lead to further vulnerabilities.
Unfortunately, the security analysis is currently not available for your project. If you are a non-commercial open-source project, please contact support to gain access.

languages/LanguageConverter.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * This program is free software; you can redistribute it and/or modify
4
 * it under the terms of the GNU General Public License as published by
5
 * the Free Software Foundation; either version 2 of the License, or
6
 * (at your option) any later version.
7
 *
8
 * This program is distributed in the hope that it will be useful,
9
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
 * GNU General Public License for more details.
12
 *
13
 * You should have received a copy of the GNU General Public License along
14
 * with this program; if not, write to the Free Software Foundation, Inc.,
15
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
 * http://www.gnu.org/copyleft/gpl.html
17
 *
18
 * @file
19
 * @ingroup Language
20
 */
21
use MediaWiki\MediaWikiServices;
22
23
/**
24
 * Base class for language conversion.
25
 * @ingroup Language
26
 *
27
 * @author Zhengzhu Feng <[email protected]>
28
 * @author fdcn <[email protected]>
29
 * @author shinjiman <[email protected]>
30
 * @author PhiLiP <[email protected]>
31
 */
32
class LanguageConverter {
33
	/**
34
	 * languages supporting variants
35
	 * @since 1.20
36
	 * @var array
37
	 */
38
	static public $languagesWithVariants = [
39
		'gan',
40
		'iu',
41
		'kk',
42
		'ku',
43
		'shi',
44
		'sr',
45
		'tg',
46
		'uz',
47
		'zh',
48
	];
49
50
	public $mMainLanguageCode;
51
	public $mVariants;
52
	public $mVariantFallbacks;
53
	public $mVariantNames;
54
	public $mTablesLoaded = false;
55
	public $mTables;
56
	// 'bidirectional' 'unidirectional' 'disable' for each variant
57
	public $mManualLevel;
58
59
	/**
60
	 * @var string Memcached key name
61
	 */
62
	public $mCacheKey;
63
64
	public $mLangObj;
65
	public $mFlags;
66
	public $mDescCodeSep = ':', $mDescVarSep = ';';
67
	public $mUcfirst = false;
68
	public $mConvRuleTitle = false;
69
	public $mURLVariant;
70
	public $mUserVariant;
71
	public $mHeaderVariant;
72
	public $mMaxDepth = 10;
73
	public $mVarSeparatorPattern;
74
75
	const CACHE_VERSION_KEY = 'VERSION 7';
76
77
	/**
78
	 * Constructor
79
	 *
80
	 * @param Language $langobj
81
	 * @param string $maincode The main language code of this language
82
	 * @param array $variants The supported variants of this language
83
	 * @param array $variantfallbacks The fallback language of each variant
84
	 * @param array $flags Defining the custom strings that maps to the flags
85
	 * @param array $manualLevel Limit for supported variants
86
	 */
87
	public function __construct( $langobj, $maincode, $variants = [],
88
								$variantfallbacks = [], $flags = [],
89
								$manualLevel = [] ) {
90
		global $wgDisabledVariants;
91
		$this->mLangObj = $langobj;
92
		$this->mMainLanguageCode = $maincode;
93
		$this->mVariants = array_diff( $variants, $wgDisabledVariants );
94
		$this->mVariantFallbacks = $variantfallbacks;
95
		$this->mVariantNames = Language::fetchLanguageNames();
96
		$this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
97
		$defaultflags = [
98
			// 'S' show converted text
99
			// '+' add rules for alltext
100
			// 'E' the gave flags is error
101
			// these flags above are reserved for program
102
			'A' => 'A',	  // add rule for convert code (all text convert)
103
			'T' => 'T',	  // title convert
104
			'R' => 'R',	  // raw content
105
			'D' => 'D',	  // convert description (subclass implement)
106
			'-' => '-',	  // remove convert (not implement)
107
			'H' => 'H',	  // add rule for convert code (but no display in placed code)
108
			'N' => 'N'	  // current variant name
109
		];
110
		$this->mFlags = array_merge( $defaultflags, $flags );
111
		foreach ( $this->mVariants as $v ) {
112
			if ( array_key_exists( $v, $manualLevel ) ) {
113
				$this->mManualLevel[$v] = $manualLevel[$v];
114
			} else {
115
				$this->mManualLevel[$v] = 'bidirectional';
116
			}
117
			$this->mFlags[$v] = $v;
118
		}
119
	}
120
121
	/**
122
	 * Get all valid variants.
123
	 * Call this instead of using $this->mVariants directly.
124
	 *
125
	 * @return array Contains all valid variants
126
	 */
127
	public function getVariants() {
128
		return $this->mVariants;
129
	}
130
131
	/**
132
	 * In case some variant is not defined in the markup, we need
133
	 * to have some fallback. For example, in zh, normally people
134
	 * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk.
135
	 * when zh-sg is preferred but not defined, we will pick zh-hans
136
	 * in this case. Right now this is only used by zh.
137
	 *
138
	 * @param string $variant The language code of the variant
139
	 * @return string|array The code of the fallback language or the
140
	 *   main code if there is no fallback
141
	 */
142
	public function getVariantFallbacks( $variant ) {
143
		if ( isset( $this->mVariantFallbacks[$variant] ) ) {
144
			return $this->mVariantFallbacks[$variant];
145
		}
146
		return $this->mMainLanguageCode;
147
	}
148
149
	/**
150
	 * Get the title produced by the conversion rule.
151
	 * @return string The converted title text
152
	 */
153
	public function getConvRuleTitle() {
154
		return $this->mConvRuleTitle;
155
	}
156
157
	/**
158
	 * Get preferred language variant.
159
	 * @return string The preferred language code
160
	 */
161
	public function getPreferredVariant() {
162
		global $wgDefaultLanguageVariant, $wgUser;
163
164
		$req = $this->getURLVariant();
165
166
		if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
167
			$req = $this->getUserVariant();
168
		} elseif ( !$req ) {
169
			$req = $this->getHeaderVariant();
170
		}
171
172
		if ( $wgDefaultLanguageVariant && !$req ) {
173
			$req = $this->validateVariant( $wgDefaultLanguageVariant );
174
		}
175
176
		// This function, unlike the other get*Variant functions, is
177
		// not memoized (i.e. there return value is not cached) since
178
		// new information might appear during processing after this
179
		// is first called.
180
		if ( $this->validateVariant( $req ) ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->validateVariant($req) of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
181
			return $req;
182
		}
183
		return $this->mMainLanguageCode;
184
	}
185
186
	/**
187
	 * Get default variant.
188
	 * This function would not be affected by user's settings
189
	 * @return string The default variant code
190
	 */
191
	public function getDefaultVariant() {
192
		global $wgDefaultLanguageVariant;
193
194
		$req = $this->getURLVariant();
195
196
		if ( !$req ) {
197
			$req = $this->getHeaderVariant();
198
		}
199
200
		if ( $wgDefaultLanguageVariant && !$req ) {
201
			$req = $this->validateVariant( $wgDefaultLanguageVariant );
202
		}
203
204
		if ( $req ) {
205
			return $req;
206
		}
207
		return $this->mMainLanguageCode;
208
	}
209
210
	/**
211
	 * Validate the variant
212
	 * @param string $variant The variant to validate
213
	 * @return mixed Returns the variant if it is valid, null otherwise
214
	 */
215
	public function validateVariant( $variant = null ) {
216
		if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
217
			return $variant;
218
		}
219
		return null;
220
	}
221
222
	/**
223
	 * Get the variant specified in the URL
224
	 *
225
	 * @return mixed Variant if one found, false otherwise.
226
	 */
227
	public function getURLVariant() {
228
		global $wgRequest;
229
230
		if ( $this->mURLVariant ) {
231
			return $this->mURLVariant;
232
		}
233
234
		// see if the preference is set in the request
235
		$ret = $wgRequest->getText( 'variant' );
236
237
		if ( !$ret ) {
238
			$ret = $wgRequest->getVal( 'uselang' );
239
		}
240
241
		$this->mURLVariant = $this->validateVariant( $ret );
242
		return $this->mURLVariant;
243
	}
244
245
	/**
246
	 * Determine if the user has a variant set.
247
	 *
248
	 * @return mixed Variant if one found, false otherwise.
249
	 */
250
	protected function getUserVariant() {
251
		global $wgUser, $wgContLang;
252
253
		// memoizing this function wreaks havoc on parserTest.php
254
		/*
255
		if ( $this->mUserVariant ) {
256
			return $this->mUserVariant;
257
		}
258
		*/
259
260
		// Get language variant preference from logged in users
261
		// Don't call this on stub objects because that causes infinite
262
		// recursion during initialisation
263
		if ( !$wgUser->isSafeToLoad() ) {
264
			return false;
265
		}
266
		if ( $wgUser->isLoggedIn() ) {
267
			if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
268
				$ret = $wgUser->getOption( 'variant' );
269
			} else {
270
				$ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
271
			}
272
		} else {
273
			// figure out user lang without constructing wgLang to avoid
274
			// infinite recursion
275
			$ret = $wgUser->getOption( 'language' );
276
		}
277
278
		$this->mUserVariant = $this->validateVariant( $ret );
279
		return $this->mUserVariant;
280
	}
281
282
	/**
283
	 * Determine the language variant from the Accept-Language header.
284
	 *
285
	 * @return mixed Variant if one found, false otherwise.
286
	 */
287
	protected function getHeaderVariant() {
288
		global $wgRequest;
289
290
		if ( $this->mHeaderVariant ) {
291
			return $this->mHeaderVariant;
292
		}
293
294
		// see if some supported language variant is set in the
295
		// HTTP header.
296
		$languages = array_keys( $wgRequest->getAcceptLang() );
297
		if ( empty( $languages ) ) {
298
			return null;
299
		}
300
301
		$fallbackLanguages = [];
302
		foreach ( $languages as $language ) {
303
			$this->mHeaderVariant = $this->validateVariant( $language );
304
			if ( $this->mHeaderVariant ) {
305
				break;
306
			}
307
308
			// To see if there are fallbacks of current language.
309
			// We record these fallback variants, and process
310
			// them later.
311
			$fallbacks = $this->getVariantFallbacks( $language );
312
			if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
313
				$fallbackLanguages[] = $fallbacks;
314
			} elseif ( is_array( $fallbacks ) ) {
315
				$fallbackLanguages =
316
					array_merge( $fallbackLanguages, $fallbacks );
317
			}
318
		}
319
320
		if ( !$this->mHeaderVariant ) {
321
			// process fallback languages now
322
			$fallback_languages = array_unique( $fallbackLanguages );
323
			foreach ( $fallback_languages as $language ) {
324
				$this->mHeaderVariant = $this->validateVariant( $language );
325
				if ( $this->mHeaderVariant ) {
326
					break;
327
				}
328
			}
329
		}
330
331
		return $this->mHeaderVariant;
332
	}
333
334
	/**
335
	 * Dictionary-based conversion.
336
	 * This function would not parse the conversion rules.
337
	 * If you want to parse rules, try to use convert() or
338
	 * convertTo().
339
	 *
340
	 * @param string $text The text to be converted
341
	 * @param bool|string $toVariant The target language code
342
	 * @return string The converted text
343
	 */
344
	public function autoConvert( $text, $toVariant = false ) {
345
346
		$this->loadTables();
347
348
		if ( !$toVariant ) {
349
			$toVariant = $this->getPreferredVariant();
350
			if ( !$toVariant ) {
351
				return $text;
352
			}
353
		}
354
355
		if ( $this->guessVariant( $text, $toVariant ) ) {
356
			return $text;
357
		}
358
359
		/* we convert everything except:
360
		   1. HTML markups (anything between < and >)
361
		   2. HTML entities
362
		   3. placeholders created by the parser
363
		*/
364
		$marker = '|' . Parser::MARKER_PREFIX . '[\-a-zA-Z0-9]+';
365
366
		// this one is needed when the text is inside an HTML markup
367
		$htmlfix = '|<[^>]+$|^[^<>]*>';
368
369
		// disable convert to variants between <code> tags
370
		$codefix = '<code>.+?<\/code>|';
371
		// disable conversion of <script> tags
372
		$scriptfix = '<script.*?>.*?<\/script>|';
373
		// disable conversion of <pre> tags
374
		$prefix = '<pre.*?>.*?<\/pre>|';
375
376
		$reg = '/' . $codefix . $scriptfix . $prefix .
377
			'<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
378
		$startPos = 0;
379
		$sourceBlob = '';
380
		$literalBlob = '';
381
382
		// Guard against delimiter nulls in the input
383
		$text = str_replace( "\000", '', $text );
384
385
		$markupMatches = null;
386
		$elementMatches = null;
387
		while ( $startPos < strlen( $text ) ) {
388
			if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
389
				$elementPos = $markupMatches[0][1];
390
				$element = $markupMatches[0][0];
391
			} else {
392
				$elementPos = strlen( $text );
393
				$element = '';
394
			}
395
396
			// Queue the part before the markup for translation in a batch
397
			$sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
398
399
			// Advance to the next position
400
			$startPos = $elementPos + strlen( $element );
401
402
			// Translate any alt or title attributes inside the matched element
403
			if ( $element !== ''
404
				&& preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, $elementMatches )
405
			) {
406
				$attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
407
				$changed = false;
408
				foreach ( [ 'title', 'alt' ] as $attrName ) {
409
					if ( !isset( $attrs[$attrName] ) ) {
410
						continue;
411
					}
412
					$attr = $attrs[$attrName];
413
					// Don't convert URLs
414
					if ( !strpos( $attr, '://' ) ) {
415
						$attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
416
					}
417
418
					// Remove HTML tags to avoid disrupting the layout
419
					$attr = preg_replace( '/<[^>]+>/', '', $attr );
420
					if ( $attr !== $attrs[$attrName] ) {
421
						$attrs[$attrName] = $attr;
422
						$changed = true;
423
					}
424
				}
425
				if ( $changed ) {
426
					$element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
427
						$elementMatches[3];
428
				}
429
			}
430
			$literalBlob .= $element . "\000";
431
		}
432
433
		// Do the main translation batch
434
		$translatedBlob = $this->translate( $sourceBlob, $toVariant );
435
436
		// Put the output back together
437
		$translatedIter = StringUtils::explode( "\000", $translatedBlob );
438
		$literalIter = StringUtils::explode( "\000", $literalBlob );
439
		$output = '';
440
		while ( $translatedIter->valid() && $literalIter->valid() ) {
441
			$output .= $translatedIter->current();
442
			$output .= $literalIter->current();
443
			$translatedIter->next();
444
			$literalIter->next();
445
		}
446
447
		return $output;
448
	}
449
450
	/**
451
	 * Translate a string to a variant.
452
	 * Doesn't parse rules or do any of that other stuff, for that use
453
	 * convert() or convertTo().
454
	 *
455
	 * @param string $text Text to convert
456
	 * @param string $variant Variant language code
457
	 * @return string Translated text
458
	 */
459
	public function translate( $text, $variant ) {
460
		// If $text is empty or only includes spaces, do nothing
461
		// Otherwise translate it
462
		if ( trim( $text ) ) {
463
			$this->loadTables();
464
			$text = $this->mTables[$variant]->replace( $text );
465
		}
466
		return $text;
467
	}
468
469
	/**
470
	 * Call translate() to convert text to all valid variants.
471
	 *
472
	 * @param string $text The text to be converted
473
	 * @return array Variant => converted text
474
	 */
475
	public function autoConvertToAllVariants( $text ) {
476
		$this->loadTables();
477
478
		$ret = [];
479
		foreach ( $this->mVariants as $variant ) {
480
			$ret[$variant] = $this->translate( $text, $variant );
481
		}
482
483
		return $ret;
484
	}
485
486
	/**
487
	 * Apply manual conversion rules.
488
	 *
489
	 * @param ConverterRule $convRule
490
	 */
491
	protected function applyManualConv( $convRule ) {
492
		// Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
493
		// title conversion.
494
		// Bug 24072: $mConvRuleTitle was overwritten by other manual
495
		// rule(s) not for title, this breaks the title conversion.
496
		$newConvRuleTitle = $convRule->getTitle();
497
		if ( $newConvRuleTitle ) {
498
			// So I add an empty check for getTitle()
499
			$this->mConvRuleTitle = $newConvRuleTitle;
500
		}
501
502
		// merge/remove manual conversion rules to/from global table
503
		$convTable = $convRule->getConvTable();
504
		$action = $convRule->getRulesAction();
505
		foreach ( $convTable as $variant => $pair ) {
506
			if ( !$this->validateVariant( $variant ) ) {
507
				continue;
508
			}
509
510
			if ( $action == 'add' ) {
511
				// More efficient than array_merge(), about 2.5 times.
512
				foreach ( $pair as $from => $to ) {
513
					$this->mTables[$variant]->setPair( $from, $to );
514
				}
515
			} elseif ( $action == 'remove' ) {
516
				$this->mTables[$variant]->removeArray( $pair );
517
			}
518
		}
519
	}
520
521
	/**
522
	 * Auto convert a Title object to a readable string in the
523
	 * preferred variant.
524
	 *
525
	 * @param Title $title A object of Title
526
	 * @return string Converted title text
527
	 */
528
	public function convertTitle( $title ) {
529
		$variant = $this->getPreferredVariant();
530
		$index = $title->getNamespace();
531
		if ( $index !== NS_MAIN ) {
532
			$text = $this->convertNamespace( $index, $variant ) . ':';
533
		} else {
534
			$text = '';
535
		}
536
		$text .= $this->translate( $title->getText(), $variant );
537
		return $text;
538
	}
539
540
	/**
541
	 * Get the namespace display name in the preferred variant.
542
	 *
543
	 * @param int $index Namespace id
544
	 * @param string|null $variant Variant code or null for preferred variant
545
	 * @return string Namespace name for display
546
	 */
547
	public function convertNamespace( $index, $variant = null ) {
548
		if ( $index === NS_MAIN ) {
549
			return '';
550
		}
551
552
		if ( $variant === null ) {
553
			$variant = $this->getPreferredVariant();
554
		}
555
556
		$cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
557
		$key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
558
		$nsVariantText = $cache->get( $key );
559
		if ( $nsVariantText !== false ) {
560
			return $nsVariantText;
561
		}
562
563
		// First check if a message gives a converted name in the target variant.
564
		$nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
565
		if ( $nsConvMsg->exists() ) {
566
			$nsVariantText = $nsConvMsg->plain();
567
		}
568
569
		// Then check if a message gives a converted name in content language
570
		// which needs extra translation to the target variant.
571
		if ( $nsVariantText === false ) {
572
			$nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
573
			if ( $nsConvMsg->exists() ) {
574
				$nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
575
			}
576
		}
577
578
		if ( $nsVariantText === false ) {
579
			// No message exists, retrieve it from the target variant's namespace names.
580
			$langObj = $this->mLangObj->factory( $variant );
581
			$nsVariantText = $langObj->getFormattedNsText( $index );
582
		}
583
584
		$cache->set( $key, $nsVariantText, 60 );
585
586
		return $nsVariantText;
587
	}
588
589
	/**
590
	 * Convert text to different variants of a language. The automatic
591
	 * conversion is done in autoConvert(). Here we parse the text
592
	 * marked with -{}-, which specifies special conversions of the
593
	 * text that can not be accomplished in autoConvert().
594
	 *
595
	 * Syntax of the markup:
596
	 * -{code1:text1;code2:text2;...}-  or
597
	 * -{flags|code1:text1;code2:text2;...}-  or
598
	 * -{text}- in which case no conversion should take place for text
599
	 *
600
	 * @param string $text Text to be converted
601
	 * @return string Converted text
602
	 */
603
	public function convert( $text ) {
604
		$variant = $this->getPreferredVariant();
605
		return $this->convertTo( $text, $variant );
606
	}
607
608
	/**
609
	 * Same as convert() except a extra parameter to custom variant.
610
	 *
611
	 * @param string $text Text to be converted
612
	 * @param string $variant The target variant code
613
	 * @return string Converted text
614
	 */
615
	public function convertTo( $text, $variant ) {
616
		global $wgDisableLangConversion;
617
		if ( $wgDisableLangConversion ) {
618
			return $text;
619
		}
620
		// Reset converter state for a new converter run.
621
		$this->mConvRuleTitle = false;
622
		return $this->recursiveConvertTopLevel( $text, $variant );
623
	}
624
625
	/**
626
	 * Recursively convert text on the outside. Allow to use nested
627
	 * markups to custom rules.
628
	 *
629
	 * @param string $text Text to be converted
630
	 * @param string $variant The target variant code
631
	 * @param int $depth Depth of recursion
632
	 * @return string Converted text
633
	 */
634
	protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
635
		$startPos = 0;
636
		$out = '';
637
		$length = strlen( $text );
638
		$shouldConvert = !$this->guessVariant( $text, $variant );
639
640
		while ( $startPos < $length ) {
641
			$pos = strpos( $text, '-{', $startPos );
642
643
			if ( $pos === false ) {
644
				// No more markup, append final segment
645
				$fragment = substr( $text, $startPos );
646
				$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
647
				return $out;
648
			}
649
650
			// Markup found
651
			// Append initial segment
652
			$fragment = substr( $text, $startPos, $pos - $startPos );
653
			$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
654
655
			// Advance position
656
			$startPos = $pos;
657
658
			// Do recursive conversion
659
			$out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
660
		}
661
662
		return $out;
663
	}
664
665
	/**
666
	 * Recursively convert text on the inside.
667
	 *
668
	 * @param string $text Text to be converted
669
	 * @param string $variant The target variant code
670
	 * @param int $startPos
671
	 * @param int $depth Depth of recursion
672
	 *
673
	 * @throws MWException
674
	 * @return string Converted text
675
	 */
676
	protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
677
		// Quick sanity check (no function calls)
678
		if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
679
			throw new MWException( __METHOD__ . ': invalid input string' );
680
		}
681
682
		$startPos += 2;
683
		$inner = '';
684
		$warningDone = false;
685
		$length = strlen( $text );
686
687
		while ( $startPos < $length ) {
688
			$m = false;
689
			preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
690
			if ( !$m ) {
691
				// Unclosed rule
692
				break;
693
			}
694
695
			$token = $m[0][0];
696
			$pos = $m[0][1];
697
698
			// Markup found
699
			// Append initial segment
700
			$inner .= substr( $text, $startPos, $pos - $startPos );
701
702
			// Advance position
703
			$startPos = $pos;
704
705
			switch ( $token ) {
706
				case '-{':
707
					// Check max depth
708
					if ( $depth >= $this->mMaxDepth ) {
709
						$inner .= '-{';
710 View Code Duplication
						if ( !$warningDone ) {
711
							$inner .= '<span class="error">' .
712
								wfMessage( 'language-converter-depth-warning' )
713
									->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
714
								'</span>';
715
							$warningDone = true;
716
						}
717
						$startPos += 2;
718
						continue;
719
					}
720
					// Recursively parse another rule
721
					$inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
722
					break;
723
				case '}-':
724
					// Apply the rule
725
					$startPos += 2;
726
					$rule = new ConverterRule( $inner, $this );
727
					$rule->parse( $variant );
728
					$this->applyManualConv( $rule );
729
					return $rule->getDisplay();
730
				default:
731
					throw new MWException( __METHOD__ . ': invalid regex match' );
732
			}
733
		}
734
735
		// Unclosed rule
736
		if ( $startPos < $length ) {
737
			$inner .= substr( $text, $startPos );
738
		}
739
		$startPos = $length;
740
		return '-{' . $this->autoConvert( $inner, $variant );
741
	}
742
743
	/**
744
	 * If a language supports multiple variants, it is possible that
745
	 * non-existing link in one variant actually exists in another variant.
746
	 * This function tries to find it. See e.g. LanguageZh.php
747
	 * The input parameters may be modified upon return
748
	 *
749
	 * @param string &$link The name of the link
750
	 * @param Title &$nt The title object of the link
751
	 * @param bool $ignoreOtherCond To disable other conditions when
752
	 *   we need to transclude a template or update a category's link
753
	 */
754
	public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
755
		# If the article has already existed, there is no need to
756
		# check it again, otherwise it may cause a fault.
757
		if ( is_object( $nt ) && $nt->exists() ) {
758
			return;
759
		}
760
761
		global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest;
762
		$isredir = $wgRequest->getText( 'redirect', 'yes' );
763
		$action = $wgRequest->getText( 'action' );
764
		if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
765
			$action = 'view';
766
		}
767
		$linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
768
		$disableLinkConversion = $wgDisableLangConversion
769
			|| $wgDisableTitleConversion;
770
		$linkBatch = new LinkBatch();
771
772
		$ns = NS_MAIN;
773
774
		if ( $disableLinkConversion ||
775
			( !$ignoreOtherCond &&
776
				( $isredir == 'no'
777
					|| $action == 'edit'
778
					|| $action == 'submit'
779
					|| $linkconvert == 'no' ) ) ) {
780
			return;
781
		}
782
783
		if ( is_object( $nt ) ) {
784
			$ns = $nt->getNamespace();
785
		}
786
787
		$variants = $this->autoConvertToAllVariants( $link );
788
		if ( !$variants ) { // give up
789
			return;
790
		}
791
792
		$titles = [];
793
794
		foreach ( $variants as $v ) {
795
			if ( $v != $link ) {
796
				$varnt = Title::newFromText( $v, $ns );
797
				if ( !is_null( $varnt ) ) {
798
					$linkBatch->addObj( $varnt );
799
					$titles[] = $varnt;
800
				}
801
			}
802
		}
803
804
		// fetch all variants in single query
805
		$linkBatch->execute();
806
807
		foreach ( $titles as $varnt ) {
808
			if ( $varnt->getArticleID() > 0 ) {
809
				$nt = $varnt;
810
				$link = $varnt->getText();
811
				break;
812
			}
813
		}
814
	}
815
816
	/**
817
	 * Returns language specific hash options.
818
	 *
819
	 * @return string
820
	 */
821
	public function getExtraHashOptions() {
822
		$variant = $this->getPreferredVariant();
823
824
		return '!' . $variant;
825
	}
826
827
	/**
828
	 * Guess if a text is written in a variant. This should be implemented in subclasses.
829
	 *
830
	 * @param string $text The text to be checked
831
	 * @param string $variant Language code of the variant to be checked for
832
	 * @return bool True if $text appears to be written in $variant, false if not
833
	 *
834
	 * @author Nikola Smolenski <[email protected]>
835
	 * @since 1.19
836
	 */
837
	public function guessVariant( $text, $variant ) {
838
		return false;
839
	}
840
841
	/**
842
	 * Load default conversion tables.
843
	 * This method must be implemented in derived class.
844
	 *
845
	 * @private
846
	 * @throws MWException
847
	 */
848
	function loadDefaultTables() {
849
		$name = get_class( $this );
850
851
		throw new MWException( "Must implement loadDefaultTables() method in class $name" );
852
	}
853
854
	/**
855
	 * Load conversion tables either from the cache or the disk.
856
	 * @private
857
	 * @param bool $fromCache Load from memcached? Defaults to true.
858
	 */
859
	function loadTables( $fromCache = true ) {
860
		global $wgLanguageConverterCacheType;
861
862
		if ( $this->mTablesLoaded ) {
863
			return;
864
		}
865
866
		$this->mTablesLoaded = true;
867
		$this->mTables = false;
868
		$cache = ObjectCache::getInstance( $wgLanguageConverterCacheType );
869
		if ( $fromCache ) {
870
			wfProfileIn( __METHOD__ . '-cache' );
871
			$this->mTables = $cache->get( $this->mCacheKey );
872
			wfProfileOut( __METHOD__ . '-cache' );
873
		}
874
		if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
875
			wfProfileIn( __METHOD__ . '-recache' );
876
			// not in cache, or we need a fresh reload.
877
			// We will first load the default tables
878
			// then update them using things in MediaWiki:Conversiontable/*
879
			$this->loadDefaultTables();
880
			foreach ( $this->mVariants as $var ) {
881
				$cached = $this->parseCachedTable( $var );
882
				$this->mTables[$var]->mergeArray( $cached );
883
			}
884
885
			$this->postLoadTables();
886
			$this->mTables[self::CACHE_VERSION_KEY] = true;
887
888
			$cache->set( $this->mCacheKey, $this->mTables, 43200 );
889
			wfProfileOut( __METHOD__ . '-recache' );
890
		}
891
	}
892
893
	/**
894
	 * Hook for post processing after conversion tables are loaded.
895
	 */
896
	function postLoadTables() {
897
	}
898
899
	/**
900
	 * Reload the conversion tables.
901
	 *
902
	 * @private
903
	 */
904
	function reloadTables() {
905
		if ( $this->mTables ) {
906
			unset( $this->mTables );
907
		}
908
909
		$this->mTablesLoaded = false;
910
		$this->loadTables( false );
911
	}
912
913
	/**
914
	 * Parse the conversion table stored in the cache.
915
	 *
916
	 * The tables should be in blocks of the following form:
917
	 *		-{
918
	 *			word => word ;
919
	 *			word => word ;
920
	 *			...
921
	 *		}-
922
	 *
923
	 * To make the tables more manageable, subpages are allowed
924
	 * and will be parsed recursively if $recursive == true.
925
	 *
926
	 * @param string $code Language code
927
	 * @param string $subpage Subpage name
928
	 * @param bool $recursive Parse subpages recursively? Defaults to true.
929
	 *
930
	 * @return array
931
	 */
932
	function parseCachedTable( $code, $subpage = '', $recursive = true ) {
933
		static $parsed = [];
934
935
		$key = 'Conversiontable/' . $code;
936
		if ( $subpage ) {
937
			$key .= '/' . $subpage;
938
		}
939
		if ( array_key_exists( $key, $parsed ) ) {
940
			return [];
941
		}
942
943
		$parsed[$key] = true;
944
945
		if ( $subpage === '' ) {
946
			$txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
947
		} else {
948
			$txt = false;
949
			$title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
950
			if ( $title && $title->exists() ) {
951
				$revision = Revision::newFromTitle( $title );
952
				if ( $revision ) {
953
					if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
954
						$txt = $revision->getContent( Revision::RAW )->getNativeData();
955
					}
956
957
					// @todo in the future, use a specialized content model, perhaps based on json!
958
				}
959
			}
960
		}
961
962
		# Nothing to parse if there's no text
963
		if ( $txt === false || $txt === null || $txt === '' ) {
964
			return [];
965
		}
966
967
		// get all subpage links of the form
968
		// [[MediaWiki:Conversiontable/zh-xx/...|...]]
969
		$linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
970
			':Conversiontable';
971
		$subs = StringUtils::explode( '[[', $txt );
972
		$sublinks = [];
973
		foreach ( $subs as $sub ) {
974
			$link = explode( ']]', $sub, 2 );
975
			if ( count( $link ) != 2 ) {
976
				continue;
977
			}
978
			$b = explode( '|', $link[0], 2 );
979
			$b = explode( '/', trim( $b[0] ), 3 );
980
			if ( count( $b ) == 3 ) {
981
				$sublink = $b[2];
982
			} else {
983
				$sublink = '';
984
			}
985
986
			if ( $b[0] == $linkhead && $b[1] == $code ) {
987
				$sublinks[] = $sublink;
988
			}
989
		}
990
991
		// parse the mappings in this page
992
		$blocks = StringUtils::explode( '-{', $txt );
993
		$ret = [];
994
		$first = true;
995
		foreach ( $blocks as $block ) {
996
			if ( $first ) {
997
				// Skip the part before the first -{
998
				$first = false;
999
				continue;
1000
			}
1001
			$mappings = explode( '}-', $block, 2 )[0];
1002
			$stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1003
			$table = StringUtils::explode( ';', $stripped );
1004
			foreach ( $table as $t ) {
1005
				$m = explode( '=>', $t, 3 );
1006
				if ( count( $m ) != 2 ) {
1007
					continue;
1008
				}
1009
				// trim any trailling comments starting with '//'
1010
				$tt = explode( '//', $m[1], 2 );
1011
				$ret[trim( $m[0] )] = trim( $tt[0] );
1012
			}
1013
		}
1014
1015
		// recursively parse the subpages
1016
		if ( $recursive ) {
1017
			foreach ( $sublinks as $link ) {
1018
				$s = $this->parseCachedTable( $code, $link, $recursive );
1019
				$ret = $s + $ret;
1020
			}
1021
		}
1022
1023
		if ( $this->mUcfirst ) {
1024
			foreach ( $ret as $k => $v ) {
1025
				$ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1026
			}
1027
		}
1028
		return $ret;
1029
	}
1030
1031
	/**
1032
	 * Enclose a string with the "no conversion" tag. This is used by
1033
	 * various functions in the Parser.
1034
	 *
1035
	 * @param string $text Text to be tagged for no conversion
1036
	 * @param bool $noParse Unused
1037
	 * @return string The tagged text
1038
	 */
1039
	public function markNoConversion( $text, $noParse = false ) {
1040
		# don't mark if already marked
1041
		if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1042
			return $text;
1043
		}
1044
1045
		$ret = "-{R|$text}-";
1046
		return $ret;
1047
	}
1048
1049
	/**
1050
	 * Convert the sorting key for category links. This should make different
1051
	 * keys that are variants of each other map to the same key.
1052
	 *
1053
	 * @param string $key
1054
	 *
1055
	 * @return string
1056
	 */
1057
	function convertCategoryKey( $key ) {
1058
		return $key;
1059
	}
1060
1061
	/**
1062
	 * Refresh the cache of conversion tables when
1063
	 * MediaWiki:Conversiontable* is updated.
1064
	 *
1065
	 * @param Title $titleobj The Title of the page being updated
1066
	 */
1067
	public function updateConversionTable( Title $titleobj ) {
1068
		if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1069
			$title = $titleobj->getDBkey();
1070
			$t = explode( '/', $title, 3 );
1071
			$c = count( $t );
1072
			if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1073
				if ( $this->validateVariant( $t[1] ) ) {
1074
					$this->reloadTables();
1075
				}
1076
			}
1077
		}
1078
	}
1079
1080
	/**
1081
	 * Get the cached separator pattern for ConverterRule::parseRules()
1082
	 * @return string
1083
	 */
1084
	function getVarSeparatorPattern() {
1085
		if ( is_null( $this->mVarSeparatorPattern ) ) {
1086
			// varsep_pattern for preg_split:
1087
			// text should be splited by ";" only if a valid variant
1088
			// name exist after the markup, for example:
1089
			//  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1090
			// 	<span style="font-size:120%;">yyy</span>;}-
1091
			// we should split it as:
1092
			//  [
1093
			// 	  [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1094
			// 	  [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1095
			// 	  [2] => ''
1096
			//  ]
1097
			$pat = '/;\s*(?=';
1098
			foreach ( $this->mVariants as $variant ) {
1099
				// zh-hans:xxx;zh-hant:yyy
1100
				$pat .= $variant . '\s*:|';
1101
				// xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1102
				$pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1103
			}
1104
			$pat .= '\s*$)/';
1105
			$this->mVarSeparatorPattern = $pat;
1106
		}
1107
		return $this->mVarSeparatorPattern;
1108
	}
1109
}
1110