GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( 841b7d...3ed1d3 )
by
unknown
13:44 queued 04:45
created

src/LingoParser.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
/**
4
 * File holding the Lingo\LingoParser class.
5
 *
6
 * This file is part of the MediaWiki extension Lingo.
7
 *
8
 * @copyright 2011 - 2018, Stephan Gambke
9
 * @license   GNU General Public License, version 2 (or any later version)
10
 *
11
 * The Lingo extension is free software: you can redistribute it and/or modify
12
 * it under the terms of the GNU General Public License as published by the Free
13
 * Software Foundation; either version 2 of the License, or (at your option) any
14
 * later version.
15
 *
16
 * The Lingo extension is distributed in the hope that it will be useful, but
17
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19
 * details.
20
 *
21
 * You should have received a copy of the GNU General Public License along
22
 * with this program. If not, see <http://www.gnu.org/licenses/>.
23
 *
24
 * @author Stephan Gambke
25
 *
26
 * @file
27
 * @ingroup Lingo
28
 */
29
namespace Lingo;
30
31
use DOMDocument;
32
use DOMXPath;
33
use ObjectCache;
34
use Parser;
35
use Title;
36
37
/**
38
 * This class parses the given text and enriches it with definitions for defined
39
 * terms.
40
 *
41
 * Contains a static function to initiate the parsing.
42
 *
43
 * @ingroup Lingo
44
 */
45
class LingoParser {
46
47
	const WORD_VALUE = 0;
48
	const WORD_OFFSET = 1;
49
50
	private $mLingoTree = null;
51
52
	/**
53
	 * @var Backend
54
	 */
55
	private $mLingoBackend = null;
56
	private static $parserSingleton = null;
57
58
	// The RegEx to split a chunk of text into words
59
	public $regex = null;
60
61
	/**
62
	 * Lingo\LingoParser constructor.
63
	 * @param MessageLog|null $messages
64
	 */
65 1
	public function __construct( MessageLog &$messages = null ) {
66
		// The RegEx to split a chunk of text into words
67
		// Words are: placeholders for stripped items, sequences of letters and numbers, single characters that are neither letter nor number
68 1
		$this->regex = '/' . preg_quote( Parser::MARKER_PREFIX, '/' ) . '.*?' . preg_quote( Parser::MARKER_SUFFIX, '/' ) . '|[\p{L}\p{N}]+|[^\p{L}\p{N}]/u';
69 1
	}
70
71
	/**
72
	 * @return Boolean
73
	 */
74
	public function parse( /*$content, $title, $po */ ) {
0 ignored issues
show
Unused Code Comprehensibility introduced by
72% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
75
76
		/** @var \Parser $parser */
77
		$parser = $GLOBALS[ 'wgParser' ];
78
79
		if ( $this->shouldParse( $parser ) ) {
80
			$this->realParse( $parser );
81
		}
82
83
		return true;
84
	}
85
86
	/**
87
	 * @return LingoParser
88
	 * @since 2.0.1
89
	 */
90
	public static function getInstance() {
91
		if ( !self::$parserSingleton ) {
92
			self::$parserSingleton = new LingoParser();
93
94
		}
95
96
		return self::$parserSingleton;
97
	}
98
99
	/**
100
	 * @return string
101
	 */
102
	private function getCacheKey() {
103
		// FIXME: If Lingo ever stores the glossary tree per user, then the cache key also needs to include the user id (see T163608)
104
		return ObjectCache::getLocalClusterInstance()->makeKey( 'ext', 'lingo', 'lingotree', Tree::TREE_VERSION, get_class( self::getInstance()->getBackend() ) );
105
	}
106
107
	/**
108
	 * @return Backend the backend used by the parser
109
	 * @throws \MWException
110
	 */
111
	public function getBackend() {
112
113
		if ( $this->mLingoBackend === null ) {
114
			throw new \MWException( 'No Lingo backend available!' );
115
		}
116
117
		return $this->mLingoBackend;
118
	}
119
120
	/**
121
	 * Returns the list of terms in the glossary
122
	 *
123
	 * @return array an array mapping terms (keys) to descriptions (values)
124
	 */
125
	public function getLingoArray() {
126
		return $this->getLingoTree()->getTermList();
127
	}
128
129
	/**
130
	 * Returns the list of terms in the glossary as a Lingo\Tree
131
	 *
132
	 * @return Tree a Lingo\Tree mapping terms (keys) to descriptions (values)
133
	 */
134
	public function getLingoTree() {
135
136
		// build glossary array only once per request
137
		if ( !$this->mLingoTree ) {
138
139
			// use cache if enabled
140
			if ( $this->mLingoBackend->useCache() ) {
141
142
				// Try cache first
143
				global $wgexLingoCacheType;
144
				$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
145
				$cachekey = $this->getCacheKey();
146
				$cachedLingoTree = $cache->get( $cachekey );
147
148
				// cache hit?
149
				if ( $cachedLingoTree !== false && $cachedLingoTree !== null ) {
150
151
					wfDebug( "Cache hit: Got lingo tree from cache.\n" );
152
					$this->mLingoTree = &$cachedLingoTree;
153
154
					wfDebug( "Re-cached lingo tree.\n" );
155
				} else {
156
157
					wfDebug( "Cache miss: Lingo tree not found in cache.\n" );
158
					$this->mLingoTree =& $this->buildLingo();
159
					wfDebug( "Cached lingo tree.\n" );
160
				}
161
162
				// Keep for one month
163
				// Limiting the cache validity will allow to purge stale cache
164
				// entries inserted by older versions after one month
165
				$cache->set( $cachekey, $this->mLingoTree, 60 * 60 * 24 * 30 );
166
167
			} else {
168
				wfDebug( "Caching of lingo tree disabled.\n" );
169
				$this->mLingoTree =& $this->buildLingo();
170
			}
171
172
		}
173
174
		return $this->mLingoTree;
175
	}
176
177
	/**
178
	 * @return Tree
179
	 */
180
	protected function &buildLingo() {
181
182
		$lingoTree = new Tree();
183
		$backend = &$this->mLingoBackend;
184
185
		// assemble the result array
186
		while ( $elementData = $backend->next() ) {
187
			$lingoTree->addTerm( $elementData[ Element::ELEMENT_TERM ], $elementData );
188
		}
189
190
		return $lingoTree;
191
	}
192
193
	/**
194
	 * Parses the given text and enriches applicable terms
195
	 *
196
	 * This method currently only recognizes terms consisting of max one word
197
	 *
198
	 * @param Parser $parser
199
	 *
200
	 * @return Boolean
201
	 */
202
	protected function realParse( &$parser ) {
203
204
		$text = $parser->getOutput()->getText();
205
206
		if ( $text === null || $text === '' ) {
207
			return true;
208
		}
209
210
		// Get array of terms
211
		$glossary = $this->getLingoTree();
212
213
		if ( $glossary == null ) {
214
			return true;
215
		}
216
217
		// Parse HTML from page
218
		\MediaWiki\suppressWarnings();
219
220
		$doc = new DOMDocument( '1.0', 'utf-8' );
221
		$doc->loadHTML( '<html><head><meta http-equiv="content-type" content="charset=utf-8"/></head><body>' . $text . '</body></html>' );
222
223
		\MediaWiki\restoreWarnings();
224
225
		// Find all text in HTML.
226
		$xpath = new DOMXPath( $doc );
227
		$textElements = $xpath->query(
228
			"//*[not(ancestor-or-self::*[@class='noglossary'] or ancestor-or-self::a)][text()!=' ']/text()"
229
		);
230
231
		// Iterate all HTML text matches
232
		$numberOfTextElements = $textElements->length;
233
234
		$definitions = [];
235
236
		for ( $textElementIndex = 0; $textElementIndex < $numberOfTextElements; $textElementIndex++ ) {
237
			$textElement = $textElements->item( $textElementIndex );
238
239
			if ( strlen( $textElement->nodeValue ) < $glossary->getMinTermLength() ) {
240
				continue;
241
			}
242
243
			$matches = [];
244
			preg_match_all(
245
				$this->regex,
246
				$textElement->nodeValue,
247
				$matches,
248
				PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER
249
			);
250
251
			if ( count( $matches ) === 0 || count( $matches[ 0 ] ) === 0 ) {
252
				continue;
253
			}
254
255
			$wordDescriptors = &$matches[ 0 ];  // See __construct() for definition of "word"
256
			$numberOfWordDescriptors = count( $wordDescriptors );
257
258
			$parentNode = &$textElement->parentNode;
259
260
			$wordDescriptorIndex = 0;
261
			$changedElem = false;
262
263
			while ( $wordDescriptorIndex < $numberOfWordDescriptors ) {
264
265
				/** @var \Lingo\Element $definition */
266
				list( $skippedWords, $usedWords, $definition ) =
267
					$glossary->findNextTerm( $wordDescriptors, $wordDescriptorIndex, $numberOfWordDescriptors );
268
269
				if ( $usedWords > 0 ) { // found a term
270
271
					if ( $skippedWords > 0 ) { // skipped some text, insert it as is
272
273
						$start = $wordDescriptors[$wordDescriptorIndex][self::WORD_OFFSET];
274
						$length = $wordDescriptors[$wordDescriptorIndex + $skippedWords][self::WORD_OFFSET] - $start;
275
276
						$parentNode->insertBefore(
277
							$doc->createTextNode(
278
								substr( $textElement->nodeValue, $start, $length)
279
							),
280
							$textElement
281
						);
282
					}
283
284
					$parentNode->insertBefore( $definition->getFormattedTerm( $doc ), $textElement );
285
286
					$definitions[ $definition->getId() ] = $definition->getFormattedDefinitions();
287
288
					$changedElem = true;
289
290
				} else { // did not find any term, just use the rest of the text
291
292
					// If we found no term now and no term before, there was no
293
					// term in the whole element. Might as well not change the
294
					// element at all.
295
296
					// Only change element if found term before
297
					if ( $changedElem === true ) {
298
299
						$start = $wordDescriptors[$wordDescriptorIndex][self::WORD_OFFSET];
300
301
						$parentNode->insertBefore(
302
							$doc->createTextNode(
303
								substr( $textElement->nodeValue, $start)
304
							),
305
							$textElement
306
						);
307
308
					}
309
310
					// In principle superfluous, the loop would run out anyway. Might save a bit of time.
311
					break;
312
				}
313
314
				$wordDescriptorIndex += $usedWords + $skippedWords;
315
			}
316
317
			if ( $changedElem ) {
318
				$parentNode->removeChild( $textElement );
319
			}
320
		}
321
322
		if ( count( $definitions ) > 0 ) {
323
324
			$this->loadModules( $parser );
325
326
			// U - Ungreedy, D - dollar matches only end of string, s - dot matches newlines
327
			$text = preg_replace( '%(^.*<body>)|(</body>.*$)%UDs', '', $doc->saveHTML() );
328
			$text .= $parser->recursiveTagParseFully( join( $definitions ) );
329
330
			$parser->getOutput()->setText( $text );
331
		}
332
333
		return true;
334
	}
335
336
	/**
337
	 * @param Parser $parser
338
	 */
339
	protected function loadModules( &$parser ) {
340
		global $wgOut;
341
342
		$parserOutput = $parser->getOutput();
343
344
		// load scripts
345
		$parserOutput->addModules( 'ext.Lingo.Scripts' );
346
347
		if ( !$wgOut->isArticle() ) {
348
			$wgOut->addModules( 'ext.Lingo.Scripts' );
349
		}
350
351
		// load styles
352
		$parserOutput->addModuleStyles( 'ext.Lingo.Styles' );
353
354
		if ( !$wgOut->isArticle() ) {
355
			$wgOut->addModuleStyles( 'ext.Lingo.Styles' );
356
		}
357
	}
358
359
	/**
360
	 * Purges the lingo tree from the cache.
361
	 *
362
	 * @deprecated 2.0.2
363
	 */
364
	public static function purgeCache() {
365
366
		self::getInstance()->purgeGlossaryFromCache();
367
	}
368
369
	/**
370
	 * Purges the lingo tree from the cache.
371
	 *
372
	 * @since 2.0.2
373
	 */
374
	public function purgeGlossaryFromCache() {
375
376
		global $wgexLingoCacheType;
377
		$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
378
		$cache->delete( $this->getCacheKey() );
379
	}
380
381
	/**
382
	 * @since 2.0.1
383
	 * @param Backend $backend
384
	 */
385
	public function setBackend( Backend $backend ) {
386
		$this->mLingoBackend = $backend;
387
		$backend->setLingoParser( $this );
388
	}
389
390
	/**
391
	 * @param Parser $parser
392
	 * @return bool
393
	 */
394
	protected function shouldParse( &$parser ) {
395
		global $wgexLingoUseNamespaces;
396
397
		if ( !( $parser instanceof Parser ) ) {
398
			return false;
399
		}
400
401
		if ( isset( $parser->mDoubleUnderscores[ 'noglossary' ] ) ) { // __NOGLOSSARY__ found in wikitext
402
			return false;
403
		}
404
405
		$title = $parser->getTitle();
406
407
		if ( !( $title instanceof Title ) ) {
408
			return false;
409
		}
410
411
		$namespace = $title->getNamespace();
412
413
		if ( isset( $wgexLingoUseNamespaces[ $namespace ] ) && $wgexLingoUseNamespaces[ $namespace ] === false ) {
414
			return false;
415
		};
416
417
		return true;
418
	}
419
}
420
421