Completed
Branch master (5cbada)
by
unknown
28:59
created

LanguageConverter::parseCachedTable()   F

Complexity

Conditions 24
Paths 1212

Size

Total Lines 98
Code Lines 60

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 24
eloc 60
nc 1212
nop 3
dl 0
loc 98
rs 2
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * This program is free software; you can redistribute it and/or modify
4
 * it under the terms of the GNU General Public License as published by
5
 * the Free Software Foundation; either version 2 of the License, or
6
 * (at your option) any later version.
7
 *
8
 * This program is distributed in the hope that it will be useful,
9
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
 * GNU General Public License for more details.
12
 *
13
 * You should have received a copy of the GNU General Public License along
14
 * with this program; if not, write to the Free Software Foundation, Inc.,
15
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16
 * http://www.gnu.org/copyleft/gpl.html
17
 *
18
 * @file
19
 * @ingroup Language
20
 */
21
22
/**
23
 * Base class for language conversion.
24
 * @ingroup Language
25
 *
26
 * @author Zhengzhu Feng <[email protected]>
27
 * @author fdcn <[email protected]>
28
 * @author shinjiman <[email protected]>
29
 * @author PhiLiP <[email protected]>
30
 */
31
class LanguageConverter {
32
	/**
33
	 * languages supporting variants
34
	 * @since 1.20
35
	 * @var array
36
	 */
37
	static public $languagesWithVariants = [
38
		'gan',
39
		'iu',
40
		'kk',
41
		'ku',
42
		'shi',
43
		'sr',
44
		'tg',
45
		'uz',
46
		'zh',
47
	];
48
49
	public $mMainLanguageCode;
50
	public $mVariants, $mVariantFallbacks, $mVariantNames;
0 ignored issues
show
Coding Style introduced by
It is generally advisable to only define one property per statement.

Only declaring a single property per statement allows you to later on add doc comments more easily.

It is also recommended by PSR2, so it is a common style that many people expect.

Loading history...
51
	public $mTablesLoaded = false;
52
	public $mTables;
53
	// 'bidirectional' 'unidirectional' 'disable' for each variant
54
	public $mManualLevel;
55
56
	/**
57
	 * @var string Memcached key name
58
	 */
59
	public $mCacheKey;
60
61
	public $mLangObj;
62
	public $mFlags;
63
	public $mDescCodeSep = ':', $mDescVarSep = ';';
0 ignored issues
show
Coding Style introduced by
It is generally advisable to only define one property per statement.

Only declaring a single property per statement allows you to later on add doc comments more easily.

It is also recommended by PSR2, so it is a common style that many people expect.

Loading history...
64
	public $mUcfirst = false;
65
	public $mConvRuleTitle = false;
66
	public $mURLVariant;
67
	public $mUserVariant;
68
	public $mHeaderVariant;
69
	public $mMaxDepth = 10;
70
	public $mVarSeparatorPattern;
71
72
	const CACHE_VERSION_KEY = 'VERSION 7';
73
74
	/**
75
	 * Constructor
76
	 *
77
	 * @param Language $langobj
78
	 * @param string $maincode The main language code of this language
79
	 * @param array $variants The supported variants of this language
80
	 * @param array $variantfallbacks The fallback language of each variant
81
	 * @param array $flags Defining the custom strings that maps to the flags
82
	 * @param array $manualLevel Limit for supported variants
83
	 */
84
	public function __construct( $langobj, $maincode, $variants = [],
85
								$variantfallbacks = [], $flags = [],
86
								$manualLevel = [] ) {
87
		global $wgDisabledVariants;
88
		$this->mLangObj = $langobj;
89
		$this->mMainLanguageCode = $maincode;
90
		$this->mVariants = array_diff( $variants, $wgDisabledVariants );
91
		$this->mVariantFallbacks = $variantfallbacks;
92
		$this->mVariantNames = Language::fetchLanguageNames();
93
		$this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
94
		$defaultflags = [
95
			// 'S' show converted text
96
			// '+' add rules for alltext
97
			// 'E' the gave flags is error
98
			// these flags above are reserved for program
99
			'A' => 'A',	  // add rule for convert code (all text convert)
100
			'T' => 'T',	  // title convert
101
			'R' => 'R',	  // raw content
102
			'D' => 'D',	  // convert description (subclass implement)
103
			'-' => '-',	  // remove convert (not implement)
104
			'H' => 'H',	  // add rule for convert code (but no display in placed code)
105
			'N' => 'N'	  // current variant name
106
		];
107
		$this->mFlags = array_merge( $defaultflags, $flags );
108
		foreach ( $this->mVariants as $v ) {
109
			if ( array_key_exists( $v, $manualLevel ) ) {
110
				$this->mManualLevel[$v] = $manualLevel[$v];
111
			} else {
112
				$this->mManualLevel[$v] = 'bidirectional';
113
			}
114
			$this->mFlags[$v] = $v;
115
		}
116
	}
117
118
	/**
119
	 * Get all valid variants.
120
	 * Call this instead of using $this->mVariants directly.
121
	 *
122
	 * @return array Contains all valid variants
123
	 */
124
	public function getVariants() {
125
		return $this->mVariants;
126
	}
127
128
	/**
129
	 * In case some variant is not defined in the markup, we need
130
	 * to have some fallback. For example, in zh, normally people
131
	 * will define zh-hans and zh-hant, but less so for zh-sg or zh-hk.
132
	 * when zh-sg is preferred but not defined, we will pick zh-hans
133
	 * in this case. Right now this is only used by zh.
134
	 *
135
	 * @param string $variant The language code of the variant
136
	 * @return string|array The code of the fallback language or the
137
	 *   main code if there is no fallback
138
	 */
139
	public function getVariantFallbacks( $variant ) {
140
		if ( isset( $this->mVariantFallbacks[$variant] ) ) {
141
			return $this->mVariantFallbacks[$variant];
142
		}
143
		return $this->mMainLanguageCode;
144
	}
145
146
	/**
147
	 * Get the title produced by the conversion rule.
148
	 * @return string The converted title text
149
	 */
150
	public function getConvRuleTitle() {
151
		return $this->mConvRuleTitle;
152
	}
153
154
	/**
155
	 * Get preferred language variant.
156
	 * @return string The preferred language code
157
	 */
158
	public function getPreferredVariant() {
159
		global $wgDefaultLanguageVariant, $wgUser;
160
161
		$req = $this->getURLVariant();
162
163
		if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
164
			$req = $this->getUserVariant();
165
		} elseif ( !$req ) {
166
			$req = $this->getHeaderVariant();
167
		}
168
169
		if ( $wgDefaultLanguageVariant && !$req ) {
170
			$req = $this->validateVariant( $wgDefaultLanguageVariant );
171
		}
172
173
		// This function, unlike the other get*Variant functions, is
174
		// not memoized (i.e. there return value is not cached) since
175
		// new information might appear during processing after this
176
		// is first called.
177
		if ( $this->validateVariant( $req ) ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->validateVariant($req) of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
178
			return $req;
179
		}
180
		return $this->mMainLanguageCode;
181
	}
182
183
	/**
184
	 * Get default variant.
185
	 * This function would not be affected by user's settings
186
	 * @return string The default variant code
187
	 */
188
	public function getDefaultVariant() {
189
		global $wgDefaultLanguageVariant;
190
191
		$req = $this->getURLVariant();
192
193
		if ( !$req ) {
194
			$req = $this->getHeaderVariant();
195
		}
196
197
		if ( $wgDefaultLanguageVariant && !$req ) {
198
			$req = $this->validateVariant( $wgDefaultLanguageVariant );
199
		}
200
201
		if ( $req ) {
202
			return $req;
203
		}
204
		return $this->mMainLanguageCode;
205
	}
206
207
	/**
208
	 * Validate the variant
209
	 * @param string $variant The variant to validate
210
	 * @return mixed Returns the variant if it is valid, null otherwise
211
	 */
212
	public function validateVariant( $variant = null ) {
213
		if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
214
			return $variant;
215
		}
216
		return null;
217
	}
218
219
	/**
220
	 * Get the variant specified in the URL
221
	 *
222
	 * @return mixed Variant if one found, false otherwise.
223
	 */
224
	public function getURLVariant() {
225
		global $wgRequest;
226
227
		if ( $this->mURLVariant ) {
228
			return $this->mURLVariant;
229
		}
230
231
		// see if the preference is set in the request
232
		$ret = $wgRequest->getText( 'variant' );
233
234
		if ( !$ret ) {
235
			$ret = $wgRequest->getVal( 'uselang' );
236
		}
237
238
		$this->mURLVariant = $this->validateVariant( $ret );
239
		return $this->mURLVariant;
240
	}
241
242
	/**
243
	 * Determine if the user has a variant set.
244
	 *
245
	 * @return mixed Variant if one found, false otherwise.
246
	 */
247
	protected function getUserVariant() {
248
		global $wgUser, $wgContLang;
249
250
		// memoizing this function wreaks havoc on parserTest.php
251
		/*
252
		if ( $this->mUserVariant ) {
253
			return $this->mUserVariant;
254
		}
255
		*/
256
257
		// Get language variant preference from logged in users
258
		// Don't call this on stub objects because that causes infinite
259
		// recursion during initialisation
260
		if ( !$wgUser->isSafeToLoad() ) {
261
			return false;
262
		}
263
		if ( $wgUser->isLoggedIn() ) {
264
			if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
265
				$ret = $wgUser->getOption( 'variant' );
266
			} else {
267
				$ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
268
			}
269
		} else {
270
			// figure out user lang without constructing wgLang to avoid
271
			// infinite recursion
272
			$ret = $wgUser->getOption( 'language' );
273
		}
274
275
		$this->mUserVariant = $this->validateVariant( $ret );
276
		return $this->mUserVariant;
277
	}
278
279
	/**
280
	 * Determine the language variant from the Accept-Language header.
281
	 *
282
	 * @return mixed Variant if one found, false otherwise.
283
	 */
284
	protected function getHeaderVariant() {
285
		global $wgRequest;
286
287
		if ( $this->mHeaderVariant ) {
288
			return $this->mHeaderVariant;
289
		}
290
291
		// see if some supported language variant is set in the
292
		// HTTP header.
293
		$languages = array_keys( $wgRequest->getAcceptLang() );
294
		if ( empty( $languages ) ) {
295
			return null;
296
		}
297
298
		$fallbackLanguages = [];
299
		foreach ( $languages as $language ) {
300
			$this->mHeaderVariant = $this->validateVariant( $language );
301
			if ( $this->mHeaderVariant ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->mHeaderVariant of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
302
				break;
303
			}
304
305
			// To see if there are fallbacks of current language.
306
			// We record these fallback variants, and process
307
			// them later.
308
			$fallbacks = $this->getVariantFallbacks( $language );
309
			if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
310
				$fallbackLanguages[] = $fallbacks;
311
			} elseif ( is_array( $fallbacks ) ) {
312
				$fallbackLanguages =
313
					array_merge( $fallbackLanguages, $fallbacks );
314
			}
315
		}
316
317
		if ( !$this->mHeaderVariant ) {
318
			// process fallback languages now
319
			$fallback_languages = array_unique( $fallbackLanguages );
320
			foreach ( $fallback_languages as $language ) {
321
				$this->mHeaderVariant = $this->validateVariant( $language );
322
				if ( $this->mHeaderVariant ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->mHeaderVariant of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
323
					break;
324
				}
325
			}
326
		}
327
328
		return $this->mHeaderVariant;
329
	}
330
331
	/**
332
	 * Dictionary-based conversion.
333
	 * This function would not parse the conversion rules.
334
	 * If you want to parse rules, try to use convert() or
335
	 * convertTo().
336
	 *
337
	 * @param string $text The text to be converted
338
	 * @param bool|string $toVariant The target language code
339
	 * @return string The converted text
340
	 */
341
	public function autoConvert( $text, $toVariant = false ) {
342
343
		$this->loadTables();
344
345
		if ( !$toVariant ) {
346
			$toVariant = $this->getPreferredVariant();
347
			if ( !$toVariant ) {
348
				return $text;
349
			}
350
		}
351
352
		if ( $this->guessVariant( $text, $toVariant ) ) {
0 ignored issues
show
Bug introduced by
It seems like $toVariant defined by parameter $toVariant on line 341 can also be of type boolean; however, LanguageConverter::guessVariant() does only seem to accept string, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
353
			return $text;
354
		}
355
356
		/* we convert everything except:
357
		   1. HTML markups (anything between < and >)
358
		   2. HTML entities
359
		   3. placeholders created by the parser
360
		*/
361
		$marker = '|' . Parser::MARKER_PREFIX . '[\-a-zA-Z0-9]+';
362
363
		// this one is needed when the text is inside an HTML markup
364
		$htmlfix = '|<[^>]+$|^[^<>]*>';
365
366
		// disable convert to variants between <code> tags
367
		$codefix = '<code>.+?<\/code>|';
368
		// disable conversion of <script> tags
369
		$scriptfix = '<script.*?>.*?<\/script>|';
370
		// disable conversion of <pre> tags
371
		$prefix = '<pre.*?>.*?<\/pre>|';
372
373
		$reg = '/' . $codefix . $scriptfix . $prefix .
374
			'<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix . '/s';
375
		$startPos = 0;
376
		$sourceBlob = '';
377
		$literalBlob = '';
378
379
		// Guard against delimiter nulls in the input
380
		$text = str_replace( "\000", '', $text );
381
382
		$markupMatches = null;
383
		$elementMatches = null;
384
		while ( $startPos < strlen( $text ) ) {
385
			if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
386
				$elementPos = $markupMatches[0][1];
387
				$element = $markupMatches[0][0];
388
			} else {
389
				$elementPos = strlen( $text );
390
				$element = '';
391
			}
392
393
			// Queue the part before the markup for translation in a batch
394
			$sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
395
396
			// Advance to the next position
397
			$startPos = $elementPos + strlen( $element );
398
399
			// Translate any alt or title attributes inside the matched element
400
			if ( $element !== ''
401
				&& preg_match( '/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, $elementMatches )
402
			) {
403
				$attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
404
				$changed = false;
405
				foreach ( [ 'title', 'alt' ] as $attrName ) {
406
					if ( !isset( $attrs[$attrName] ) ) {
407
						continue;
408
					}
409
					$attr = $attrs[$attrName];
410
					// Don't convert URLs
411
					if ( !strpos( $attr, '://' ) ) {
412
						$attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
0 ignored issues
show
Bug introduced by
It seems like $toVariant defined by parameter $toVariant on line 341 can also be of type boolean; however, LanguageConverter::recursiveConvertTopLevel() does only seem to accept string, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
413
					}
414
415
					// Remove HTML tags to avoid disrupting the layout
416
					$attr = preg_replace( '/<[^>]+>/', '', $attr );
417
					if ( $attr !== $attrs[$attrName] ) {
418
						$attrs[$attrName] = $attr;
419
						$changed = true;
420
					}
421
				}
422
				if ( $changed ) {
423
					$element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
424
						$elementMatches[3];
425
				}
426
			}
427
			$literalBlob .= $element . "\000";
428
		}
429
430
		// Do the main translation batch
431
		$translatedBlob = $this->translate( $sourceBlob, $toVariant );
0 ignored issues
show
Bug introduced by
It seems like $toVariant defined by parameter $toVariant on line 341 can also be of type boolean; however, LanguageConverter::translate() does only seem to accept string, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
432
433
		// Put the output back together
434
		$translatedIter = StringUtils::explode( "\000", $translatedBlob );
435
		$literalIter = StringUtils::explode( "\000", $literalBlob );
436
		$output = '';
437
		while ( $translatedIter->valid() && $literalIter->valid() ) {
438
			$output .= $translatedIter->current();
439
			$output .= $literalIter->current();
440
			$translatedIter->next();
441
			$literalIter->next();
442
		}
443
444
		return $output;
445
	}
446
447
	/**
448
	 * Translate a string to a variant.
449
	 * Doesn't parse rules or do any of that other stuff, for that use
450
	 * convert() or convertTo().
451
	 *
452
	 * @param string $text Text to convert
453
	 * @param string $variant Variant language code
454
	 * @return string Translated text
455
	 */
456
	public function translate( $text, $variant ) {
457
		// If $text is empty or only includes spaces, do nothing
458
		// Otherwise translate it
459
		if ( trim( $text ) ) {
460
			$this->loadTables();
461
			$text = $this->mTables[$variant]->replace( $text );
462
		}
463
		return $text;
464
	}
465
466
	/**
467
	 * Call translate() to convert text to all valid variants.
468
	 *
469
	 * @param string $text The text to be converted
470
	 * @return array Variant => converted text
471
	 */
472
	public function autoConvertToAllVariants( $text ) {
473
		$this->loadTables();
474
475
		$ret = [];
476
		foreach ( $this->mVariants as $variant ) {
477
			$ret[$variant] = $this->translate( $text, $variant );
478
		}
479
480
		return $ret;
481
	}
482
483
	/**
484
	 * Apply manual conversion rules.
485
	 *
486
	 * @param ConverterRule $convRule
487
	 */
488
	protected function applyManualConv( $convRule ) {
489
		// Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
490
		// title conversion.
491
		// Bug 24072: $mConvRuleTitle was overwritten by other manual
492
		// rule(s) not for title, this breaks the title conversion.
493
		$newConvRuleTitle = $convRule->getTitle();
494
		if ( $newConvRuleTitle ) {
495
			// So I add an empty check for getTitle()
496
			$this->mConvRuleTitle = $newConvRuleTitle;
497
		}
498
499
		// merge/remove manual conversion rules to/from global table
500
		$convTable = $convRule->getConvTable();
501
		$action = $convRule->getRulesAction();
502
		foreach ( $convTable as $variant => $pair ) {
503
			if ( !$this->validateVariant( $variant ) ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->validateVariant($variant) of type string|null is loosely compared to false; this is ambiguous if the string can be empty. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
504
				continue;
505
			}
506
507
			if ( $action == 'add' ) {
508
				// More efficient than array_merge(), about 2.5 times.
509
				foreach ( $pair as $from => $to ) {
510
					$this->mTables[$variant]->setPair( $from, $to );
511
				}
512
			} elseif ( $action == 'remove' ) {
513
				$this->mTables[$variant]->removeArray( $pair );
514
			}
515
		}
516
	}
517
518
	/**
519
	 * Auto convert a Title object to a readable string in the
520
	 * preferred variant.
521
	 *
522
	 * @param Title $title A object of Title
523
	 * @return string Converted title text
524
	 */
525
	public function convertTitle( $title ) {
526
		$variant = $this->getPreferredVariant();
527
		$index = $title->getNamespace();
528
		if ( $index !== NS_MAIN ) {
529
			$text = $this->convertNamespace( $index, $variant ) . ':';
530
		} else {
531
			$text = '';
532
		}
533
		$text .= $this->translate( $title->getText(), $variant );
534
		return $text;
535
	}
536
537
	/**
538
	 * Get the namespace display name in the preferred variant.
539
	 *
540
	 * @param int $index Namespace id
541
	 * @param string|null $variant Variant code or null for preferred variant
542
	 * @return string Namespace name for display
543
	 */
544
	public function convertNamespace( $index, $variant = null ) {
545
		if ( $index === NS_MAIN ) {
546
			return '';
547
		}
548
549
		if ( $variant === null ) {
550
			$variant = $this->getPreferredVariant();
551
		}
552
553
		$cache = ObjectCache::newAccelerator( CACHE_NONE );
0 ignored issues
show
Deprecated Code introduced by
The method ObjectCache::newAccelerator() has been deprecated with message: 1.27

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
554
		$key = wfMemcKey( 'languageconverter', 'namespace-text', $index, $variant );
555
		$nsVariantText = $cache->get( $key );
556
		if ( $nsVariantText !== false ) {
557
			return $nsVariantText;
558
		}
559
560
		// First check if a message gives a converted name in the target variant.
561
		$nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
562
		if ( $nsConvMsg->exists() ) {
563
			$nsVariantText = $nsConvMsg->plain();
564
		}
565
566
		// Then check if a message gives a converted name in content language
567
		// which needs extra translation to the target variant.
568
		if ( $nsVariantText === false ) {
569
			$nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
570
			if ( $nsConvMsg->exists() ) {
571
				$nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
572
			}
573
		}
574
575
		if ( $nsVariantText === false ) {
576
			// No message exists, retrieve it from the target variant's namespace names.
577
			$langObj = $this->mLangObj->factory( $variant );
578
			$nsVariantText = $langObj->getFormattedNsText( $index );
579
		}
580
581
		$cache->set( $key, $nsVariantText, 60 );
582
583
		return $nsVariantText;
584
	}
585
586
	/**
587
	 * Convert text to different variants of a language. The automatic
588
	 * conversion is done in autoConvert(). Here we parse the text
589
	 * marked with -{}-, which specifies special conversions of the
590
	 * text that can not be accomplished in autoConvert().
591
	 *
592
	 * Syntax of the markup:
593
	 * -{code1:text1;code2:text2;...}-  or
594
	 * -{flags|code1:text1;code2:text2;...}-  or
595
	 * -{text}- in which case no conversion should take place for text
596
	 *
597
	 * @param string $text Text to be converted
598
	 * @return string Converted text
599
	 */
600
	public function convert( $text ) {
601
		$variant = $this->getPreferredVariant();
602
		return $this->convertTo( $text, $variant );
603
	}
604
605
	/**
606
	 * Same as convert() except a extra parameter to custom variant.
607
	 *
608
	 * @param string $text Text to be converted
609
	 * @param string $variant The target variant code
610
	 * @return string Converted text
611
	 */
612
	public function convertTo( $text, $variant ) {
613
		global $wgDisableLangConversion;
614
		if ( $wgDisableLangConversion ) {
615
			return $text;
616
		}
617
		// Reset converter state for a new converter run.
618
		$this->mConvRuleTitle = false;
619
		return $this->recursiveConvertTopLevel( $text, $variant );
620
	}
621
622
	/**
623
	 * Recursively convert text on the outside. Allow to use nested
624
	 * markups to custom rules.
625
	 *
626
	 * @param string $text Text to be converted
627
	 * @param string $variant The target variant code
628
	 * @param int $depth Depth of recursion
629
	 * @return string Converted text
630
	 */
631
	protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
632
		$startPos = 0;
633
		$out = '';
634
		$length = strlen( $text );
635
		$shouldConvert = !$this->guessVariant( $text, $variant );
636
637
		while ( $startPos < $length ) {
638
			$pos = strpos( $text, '-{', $startPos );
639
640
			if ( $pos === false ) {
641
				// No more markup, append final segment
642
				$fragment = substr( $text, $startPos );
643
				$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
644
				return $out;
645
			}
646
647
			// Markup found
648
			// Append initial segment
649
			$fragment = substr( $text, $startPos, $pos - $startPos );
650
			$out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
651
652
			// Advance position
653
			$startPos = $pos;
654
655
			// Do recursive conversion
656
			$out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
657
		}
658
659
		return $out;
660
	}
661
662
	/**
663
	 * Recursively convert text on the inside.
664
	 *
665
	 * @param string $text Text to be converted
666
	 * @param string $variant The target variant code
667
	 * @param int $startPos
668
	 * @param int $depth Depth of recursion
669
	 *
670
	 * @throws MWException
671
	 * @return string Converted text
672
	 */
673
	protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
674
		// Quick sanity check (no function calls)
675
		if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
676
			throw new MWException( __METHOD__ . ': invalid input string' );
677
		}
678
679
		$startPos += 2;
680
		$inner = '';
681
		$warningDone = false;
682
		$length = strlen( $text );
683
684
		while ( $startPos < $length ) {
685
			$m = false;
686
			preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
687
			if ( !$m ) {
688
				// Unclosed rule
689
				break;
690
			}
691
692
			$token = $m[0][0];
693
			$pos = $m[0][1];
694
695
			// Markup found
696
			// Append initial segment
697
			$inner .= substr( $text, $startPos, $pos - $startPos );
698
699
			// Advance position
700
			$startPos = $pos;
701
702
			switch ( $token ) {
703
				case '-{':
704
					// Check max depth
705
					if ( $depth >= $this->mMaxDepth ) {
706
						$inner .= '-{';
707 View Code Duplication
						if ( !$warningDone ) {
708
							$inner .= '<span class="error">' .
709
								wfMessage( 'language-converter-depth-warning' )
710
									->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
711
								'</span>';
712
							$warningDone = true;
713
						}
714
						$startPos += 2;
715
						continue;
716
					}
717
					// Recursively parse another rule
718
					$inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
719
					break;
720
				case '}-':
721
					// Apply the rule
722
					$startPos += 2;
723
					$rule = new ConverterRule( $inner, $this );
724
					$rule->parse( $variant );
725
					$this->applyManualConv( $rule );
726
					return $rule->getDisplay();
727
				default:
728
					throw new MWException( __METHOD__ . ': invalid regex match' );
729
			}
730
		}
731
732
		// Unclosed rule
733
		if ( $startPos < $length ) {
734
			$inner .= substr( $text, $startPos );
735
		}
736
		$startPos = $length;
737
		return '-{' . $this->autoConvert( $inner, $variant );
738
	}
739
740
	/**
741
	 * If a language supports multiple variants, it is possible that
742
	 * non-existing link in one variant actually exists in another variant.
743
	 * This function tries to find it. See e.g. LanguageZh.php
744
	 * The input parameters may be modified upon return
745
	 *
746
	 * @param string &$link The name of the link
747
	 * @param Title &$nt The title object of the link
748
	 * @param bool $ignoreOtherCond To disable other conditions when
749
	 *   we need to transclude a template or update a category's link
750
	 */
751
	public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
752
		# If the article has already existed, there is no need to
753
		# check it again, otherwise it may cause a fault.
754
		if ( is_object( $nt ) && $nt->exists() ) {
755
			return;
756
		}
757
758
		global $wgDisableLangConversion, $wgDisableTitleConversion, $wgRequest;
759
		$isredir = $wgRequest->getText( 'redirect', 'yes' );
760
		$action = $wgRequest->getText( 'action' );
761
		if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
762
			$action = 'view';
763
		}
764
		$linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
765
		$disableLinkConversion = $wgDisableLangConversion
766
			|| $wgDisableTitleConversion;
767
		$linkBatch = new LinkBatch();
768
769
		$ns = NS_MAIN;
770
771
		if ( $disableLinkConversion ||
772
			( !$ignoreOtherCond &&
773
				( $isredir == 'no'
774
					|| $action == 'edit'
775
					|| $action == 'submit'
776
					|| $linkconvert == 'no' ) ) ) {
777
			return;
778
		}
779
780
		if ( is_object( $nt ) ) {
781
			$ns = $nt->getNamespace();
782
		}
783
784
		$variants = $this->autoConvertToAllVariants( $link );
785
		if ( !$variants ) { // give up
786
			return;
787
		}
788
789
		$titles = [];
790
791
		foreach ( $variants as $v ) {
792
			if ( $v != $link ) {
793
				$varnt = Title::newFromText( $v, $ns );
794
				if ( !is_null( $varnt ) ) {
795
					$linkBatch->addObj( $varnt );
796
					$titles[] = $varnt;
797
				}
798
			}
799
		}
800
801
		// fetch all variants in single query
802
		$linkBatch->execute();
803
804
		foreach ( $titles as $varnt ) {
805
			if ( $varnt->getArticleID() > 0 ) {
806
				$nt = $varnt;
807
				$link = $varnt->getText();
808
				break;
809
			}
810
		}
811
	}
812
813
	/**
814
	 * Returns language specific hash options.
815
	 *
816
	 * @return string
817
	 */
818
	public function getExtraHashOptions() {
819
		$variant = $this->getPreferredVariant();
820
821
		return '!' . $variant;
822
	}
823
824
	/**
825
	 * Guess if a text is written in a variant. This should be implemented in subclasses.
826
	 *
827
	 * @param string $text The text to be checked
828
	 * @param string $variant Language code of the variant to be checked for
829
	 * @return bool True if $text appears to be written in $variant, false if not
830
	 *
831
	 * @author Nikola Smolenski <[email protected]>
832
	 * @since 1.19
833
	 */
834
	public function guessVariant( $text, $variant ) {
835
		return false;
836
	}
837
838
	/**
839
	 * Load default conversion tables.
840
	 * This method must be implemented in derived class.
841
	 *
842
	 * @private
843
	 * @throws MWException
844
	 */
845
	function loadDefaultTables() {
846
		$name = get_class( $this );
847
848
		throw new MWException( "Must implement loadDefaultTables() method in class $name" );
849
	}
850
851
	/**
852
	 * Load conversion tables either from the cache or the disk.
853
	 * @private
854
	 * @param bool $fromCache Load from memcached? Defaults to true.
855
	 */
856
	function loadTables( $fromCache = true ) {
857
		global $wgLanguageConverterCacheType;
858
859
		if ( $this->mTablesLoaded ) {
860
			return;
861
		}
862
863
		$this->mTablesLoaded = true;
864
		$this->mTables = false;
865
		$cache = ObjectCache::getInstance( $wgLanguageConverterCacheType );
866
		if ( $fromCache ) {
867
			wfProfileIn( __METHOD__ . '-cache' );
0 ignored issues
show
Deprecated Code introduced by
The function wfProfileIn() has been deprecated with message: 1.25

This function has been deprecated. The supplier of the file has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead.

Loading history...
868
			$this->mTables = $cache->get( $this->mCacheKey );
869
			wfProfileOut( __METHOD__ . '-cache' );
0 ignored issues
show
Deprecated Code introduced by
The function wfProfileOut() has been deprecated with message: 1.25

This function has been deprecated. The supplier of the file has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead.

Loading history...
870
		}
871
		if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
872
			wfProfileIn( __METHOD__ . '-recache' );
0 ignored issues
show
Deprecated Code introduced by
The function wfProfileIn() has been deprecated with message: 1.25

This function has been deprecated. The supplier of the file has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead.

Loading history...
873
			// not in cache, or we need a fresh reload.
874
			// We will first load the default tables
875
			// then update them using things in MediaWiki:Conversiontable/*
876
			$this->loadDefaultTables();
877
			foreach ( $this->mVariants as $var ) {
878
				$cached = $this->parseCachedTable( $var );
879
				$this->mTables[$var]->mergeArray( $cached );
880
			}
881
882
			$this->postLoadTables();
883
			$this->mTables[self::CACHE_VERSION_KEY] = true;
884
885
			$cache->set( $this->mCacheKey, $this->mTables, 43200 );
886
			wfProfileOut( __METHOD__ . '-recache' );
0 ignored issues
show
Deprecated Code introduced by
The function wfProfileOut() has been deprecated with message: 1.25

This function has been deprecated. The supplier of the file has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the function will be removed from the class and what other function to use instead.

Loading history...
887
		}
888
	}
889
890
	/**
891
	 * Hook for post processing after conversion tables are loaded.
892
	 */
893
	function postLoadTables() {
894
	}
895
896
	/**
897
	 * Reload the conversion tables.
898
	 *
899
	 * @private
900
	 */
901
	function reloadTables() {
902
		if ( $this->mTables ) {
903
			unset( $this->mTables );
904
		}
905
906
		$this->mTablesLoaded = false;
907
		$this->loadTables( false );
908
	}
909
910
	/**
911
	 * Parse the conversion table stored in the cache.
912
	 *
913
	 * The tables should be in blocks of the following form:
914
	 *		-{
915
	 *			word => word ;
916
	 *			word => word ;
917
	 *			...
918
	 *		}-
919
	 *
920
	 * To make the tables more manageable, subpages are allowed
921
	 * and will be parsed recursively if $recursive == true.
922
	 *
923
	 * @param string $code Language code
924
	 * @param string $subpage Subpage name
925
	 * @param bool $recursive Parse subpages recursively? Defaults to true.
926
	 *
927
	 * @return array
928
	 */
929
	function parseCachedTable( $code, $subpage = '', $recursive = true ) {
930
		static $parsed = [];
931
932
		$key = 'Conversiontable/' . $code;
933
		if ( $subpage ) {
934
			$key .= '/' . $subpage;
935
		}
936
		if ( array_key_exists( $key, $parsed ) ) {
937
			return [];
938
		}
939
940
		$parsed[$key] = true;
941
942
		if ( $subpage === '' ) {
943
			$txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
944
		} else {
945
			$txt = false;
946
			$title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
947
			if ( $title && $title->exists() ) {
948
				$revision = Revision::newFromTitle( $title );
949
				if ( $revision ) {
950
					if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
951
						$txt = $revision->getContent( Revision::RAW )->getNativeData();
952
					}
953
954
					// @todo in the future, use a specialized content model, perhaps based on json!
955
				}
956
			}
957
		}
958
959
		# Nothing to parse if there's no text
960
		if ( $txt === false || $txt === null || $txt === '' ) {
961
			return [];
962
		}
963
964
		// get all subpage links of the form
965
		// [[MediaWiki:Conversiontable/zh-xx/...|...]]
966
		$linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
967
			':Conversiontable';
968
		$subs = StringUtils::explode( '[[', $txt );
969
		$sublinks = [];
970
		foreach ( $subs as $sub ) {
971
			$link = explode( ']]', $sub, 2 );
972
			if ( count( $link ) != 2 ) {
973
				continue;
974
			}
975
			$b = explode( '|', $link[0], 2 );
976
			$b = explode( '/', trim( $b[0] ), 3 );
977
			if ( count( $b ) == 3 ) {
978
				$sublink = $b[2];
979
			} else {
980
				$sublink = '';
981
			}
982
983
			if ( $b[0] == $linkhead && $b[1] == $code ) {
984
				$sublinks[] = $sublink;
985
			}
986
		}
987
988
		// parse the mappings in this page
989
		$blocks = StringUtils::explode( '-{', $txt );
990
		$ret = [];
991
		$first = true;
992
		foreach ( $blocks as $block ) {
993
			if ( $first ) {
994
				// Skip the part before the first -{
995
				$first = false;
996
				continue;
997
			}
998
			$mappings = explode( '}-', $block, 2 )[0];
999
			$stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1000
			$table = StringUtils::explode( ';', $stripped );
1001
			foreach ( $table as $t ) {
1002
				$m = explode( '=>', $t, 3 );
1003
				if ( count( $m ) != 2 ) {
1004
					continue;
1005
				}
1006
				// trim any trailling comments starting with '//'
1007
				$tt = explode( '//', $m[1], 2 );
1008
				$ret[trim( $m[0] )] = trim( $tt[0] );
1009
			}
1010
		}
1011
1012
		// recursively parse the subpages
1013
		if ( $recursive ) {
1014
			foreach ( $sublinks as $link ) {
1015
				$s = $this->parseCachedTable( $code, $link, $recursive );
1016
				$ret = $s + $ret;
1017
			}
1018
		}
1019
1020
		if ( $this->mUcfirst ) {
1021
			foreach ( $ret as $k => $v ) {
1022
				$ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1023
			}
1024
		}
1025
		return $ret;
1026
	}
1027
1028
	/**
1029
	 * Enclose a string with the "no conversion" tag. This is used by
1030
	 * various functions in the Parser.
1031
	 *
1032
	 * @param string $text Text to be tagged for no conversion
1033
	 * @param bool $noParse Unused
1034
	 * @return string The tagged text
1035
	 */
1036
	public function markNoConversion( $text, $noParse = false ) {
1037
		# don't mark if already marked
1038
		if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1039
			return $text;
1040
		}
1041
1042
		$ret = "-{R|$text}-";
1043
		return $ret;
1044
	}
1045
1046
	/**
1047
	 * Convert the sorting key for category links. This should make different
1048
	 * keys that are variants of each other map to the same key.
1049
	 *
1050
	 * @param string $key
1051
	 *
1052
	 * @return string
1053
	 */
1054
	function convertCategoryKey( $key ) {
1055
		return $key;
1056
	}
1057
1058
	/**
1059
	 * Refresh the cache of conversion tables when
1060
	 * MediaWiki:Conversiontable* is updated.
1061
	 *
1062
	 * @param Title $titleobj The Title of the page being updated
1063
	 */
1064
	public function updateConversionTable( Title $titleobj ) {
1065
		if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1066
			$title = $titleobj->getDBkey();
1067
			$t = explode( '/', $title, 3 );
1068
			$c = count( $t );
1069
			if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1070
				if ( $this->validateVariant( $t[1] ) ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->validateVariant($t[1]) of type string|null is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1071
					$this->reloadTables();
1072
				}
1073
			}
1074
		}
1075
	}
1076
1077
	/**
1078
	 * Get the cached separator pattern for ConverterRule::parseRules()
1079
	 * @return string
1080
	 */
1081
	function getVarSeparatorPattern() {
1082
		if ( is_null( $this->mVarSeparatorPattern ) ) {
1083
			// varsep_pattern for preg_split:
1084
			// text should be splited by ";" only if a valid variant
1085
			// name exist after the markup, for example:
1086
			//  -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1087
			// 	<span style="font-size:120%;">yyy</span>;}-
1088
			// we should split it as:
1089
			//  array(
1090
			// 	  [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1091
			// 	  [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1092
			// 	  [2] => ''
1093
			// 	 )
1094
			$pat = '/;\s*(?=';
1095
			foreach ( $this->mVariants as $variant ) {
1096
				// zh-hans:xxx;zh-hant:yyy
1097
				$pat .= $variant . '\s*:|';
1098
				// xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1099
				$pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1100
			}
1101
			$pat .= '\s*$)/';
1102
			$this->mVarSeparatorPattern = $pat;
1103
		}
1104
		return $this->mVarSeparatorPattern;
1105
	}
1106
}
1107