GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( 841b7d...3ed1d3 )
by
unknown
13:44 queued 04:45
created

src/LingoParser.php (3 issues)

Severity

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
/**
4
 * File holding the Lingo\LingoParser class.
5
 *
6
 * This file is part of the MediaWiki extension Lingo.
7
 *
8
 * @copyright 2011 - 2018, Stephan Gambke
9
 * @license   GNU General Public License, version 2 (or any later version)
10
 *
11
 * The Lingo extension is free software: you can redistribute it and/or modify
12
 * it under the terms of the GNU General Public License as published by the Free
13
 * Software Foundation; either version 2 of the License, or (at your option) any
14
 * later version.
15
 *
16
 * The Lingo extension is distributed in the hope that it will be useful, but
17
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19
 * details.
20
 *
21
 * You should have received a copy of the GNU General Public License along
22
 * with this program. If not, see <http://www.gnu.org/licenses/>.
23
 *
24
 * @author Stephan Gambke
25
 *
26
 * @file
27
 * @ingroup Lingo
28
 */
29
namespace Lingo;
30
31
use DOMDocument;
32
use DOMXPath;
33
use ObjectCache;
34
use Parser;
35
use Title;
36
37
/**
38
 * This class parses the given text and enriches it with definitions for defined
39
 * terms.
40
 *
41
 * Contains a static function to initiate the parsing.
42
 *
43
 * @ingroup Lingo
44
 */
45
class LingoParser {
46
47
	const WORD_VALUE = 0;
48
	const WORD_OFFSET = 1;
49
50
	private $mLingoTree = null;
51
52
	/**
53
	 * @var Backend
54
	 */
55
	private $mLingoBackend = null;
56
	private static $parserSingleton = null;
57
58
	// The RegEx to split a chunk of text into words
59 1
	public $regex = null;
60
61
	/**
62 1
	 * Lingo\LingoParser constructor.
63 1
	 * @param MessageLog|null $messages
64
	 */
65
	public function __construct( MessageLog &$messages = null ) {
66
		// The RegEx to split a chunk of text into words
67
		// Words are: placeholders for stripped items, sequences of letters and numbers, single characters that are neither letter nor number
68
		$this->regex = '/' . preg_quote( Parser::MARKER_PREFIX, '/' ) . '.*?' . preg_quote( Parser::MARKER_SUFFIX, '/' ) . '|[\p{L}\p{N}]+|[^\p{L}\p{N}]/u';
69
	}
70
71
	/**
72
	 *
73
	 * @param \AbstractContent $content
74
	 * @param \Title $title
75
	 * @param \ParserOutput $po
76
	 *
77
	 * @return Boolean
78
	 */
79
	public function parse( $content, $title, $po ) {
0 ignored issues
show
The parameter $content is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
The parameter $title is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
The parameter $po is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
80
81
		/** @var \Parser $parser */
82
		$parser = $GLOBALS[ 'wgParser' ];
83
84
		if ( $this->shouldParse( $parser ) ) {
85
			$this->realParse( $parser );
86
		}
87
88
		return true;
89
	}
90
91
	/**
92
	 * @return LingoParser
93
	 * @since 2.0.1
94
	 */
95
	public static function getInstance() {
96
		if ( !self::$parserSingleton ) {
97
			self::$parserSingleton = new LingoParser();
98
99
		}
100
101
		return self::$parserSingleton;
102
	}
103
104
	/**
105
	 * @return string
106
	 */
107
	private function getCacheKey() {
108
		// FIXME: If Lingo ever stores the glossary tree per user, then the cache key also needs to include the user id (see T163608)
109
		return ObjectCache::getLocalClusterInstance()->makeKey( 'ext', 'lingo', 'lingotree', Tree::TREE_VERSION, get_class( self::getInstance()->getBackend() ) );
110
	}
111
112
	/**
113
	 * @return Backend the backend used by the parser
114
	 * @throws \MWException
115
	 */
116
	public function getBackend() {
117
118
		if ( $this->mLingoBackend === null ) {
119
			throw new \MWException( 'No Lingo backend available!' );
120
		}
121
122
		return $this->mLingoBackend;
123
	}
124
125
	/**
126
	 * Returns the list of terms in the glossary
127
	 *
128
	 * @return array an array mapping terms (keys) to descriptions (values)
129
	 */
130
	public function getLingoArray() {
131
		return $this->getLingoTree()->getTermList();
132
	}
133
134
	/**
135
	 * Returns the list of terms in the glossary as a Lingo\Tree
136
	 *
137
	 * @return Tree a Lingo\Tree mapping terms (keys) to descriptions (values)
138
	 */
139
	public function getLingoTree() {
140
141
		// build glossary array only once per request
142
		if ( !$this->mLingoTree ) {
143
144
			// use cache if enabled
145
			if ( $this->mLingoBackend->useCache() ) {
146
147
				// Try cache first
148
				global $wgexLingoCacheType;
149
				$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
150
				$cachekey = $this->getCacheKey();
151
				$cachedLingoTree = $cache->get( $cachekey );
152
153
				// cache hit?
154
				if ( $cachedLingoTree !== false && $cachedLingoTree !== null ) {
155
156
					wfDebug( "Cache hit: Got lingo tree from cache.\n" );
157
					$this->mLingoTree = &$cachedLingoTree;
158
159
					wfDebug( "Re-cached lingo tree.\n" );
160
				} else {
161
162
					wfDebug( "Cache miss: Lingo tree not found in cache.\n" );
163
					$this->mLingoTree =& $this->buildLingo();
164
					wfDebug( "Cached lingo tree.\n" );
165
				}
166
167
				// Keep for one month
168
				// Limiting the cache validity will allow to purge stale cache
169
				// entries inserted by older versions after one month
170
				$cache->set( $cachekey, $this->mLingoTree, 60 * 60 * 24 * 30 );
171
172
			} else {
173
				wfDebug( "Caching of lingo tree disabled.\n" );
174
				$this->mLingoTree =& $this->buildLingo();
175
			}
176
177
		}
178
179
		return $this->mLingoTree;
180
	}
181
182
	/**
183
	 * @return Tree
184
	 */
185
	protected function &buildLingo() {
186
187
		$lingoTree = new Tree();
188
		$backend = &$this->mLingoBackend;
189
190
		// assemble the result array
191
		while ( $elementData = $backend->next() ) {
192
			$lingoTree->addTerm( $elementData[ Element::ELEMENT_TERM ], $elementData );
193
		}
194
195
		return $lingoTree;
196
	}
197
198
	/**
199
	 * Parses the given text and enriches applicable terms
200
	 *
201
	 * This method currently only recognizes terms consisting of max one word
202
	 *
203
	 * @param Parser $parser
204
	 *
205
	 * @return Boolean
206
	 */
207
	protected function realParse( &$parser ) {
208
209
		$text = $parser->getOutput()->getText();
210
211
		if ( $text === null || $text === '' ) {
212
			return true;
213
		}
214
215
		// Get array of terms
216
		$glossary = $this->getLingoTree();
217
218
		if ( $glossary == null ) {
219
			return true;
220
		}
221
222
		// Parse HTML from page
223
		\MediaWiki\suppressWarnings();
224
225
		$doc = new DOMDocument( '1.0', 'utf-8' );
226
		$doc->loadHTML( '<html><head><meta http-equiv="content-type" content="charset=utf-8"/></head><body>' . $text . '</body></html>' );
227
228
		\MediaWiki\restoreWarnings();
229
230
		// Find all text in HTML.
231
		$xpath = new DOMXPath( $doc );
232
		$textElements = $xpath->query(
233
			"//*[not(ancestor-or-self::*[@class='noglossary'] or ancestor-or-self::a)][text()!=' ']/text()"
234
		);
235
236
		// Iterate all HTML text matches
237
		$numberOfTextElements = $textElements->length;
238
239
		$definitions = [];
240
241
		for ( $textElementIndex = 0; $textElementIndex < $numberOfTextElements; $textElementIndex++ ) {
242
			$textElement = $textElements->item( $textElementIndex );
243
244
			if ( strlen( $textElement->nodeValue ) < $glossary->getMinTermLength() ) {
245
				continue;
246
			}
247
248
			$matches = [];
249
			preg_match_all(
250
				$this->regex,
251
				$textElement->nodeValue,
252
				$matches,
253
				PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER
254
			);
255
256
			if ( count( $matches ) === 0 || count( $matches[ 0 ] ) === 0 ) {
257
				continue;
258
			}
259
260
			$wordDescriptors = &$matches[ 0 ];  // See __construct() for definition of "word"
261
			$numberOfWordDescriptors = count( $wordDescriptors );
262
263
			$parentNode = &$textElement->parentNode;
264
265
			$wordDescriptorIndex = 0;
266
			$changedElem = false;
267
268
			while ( $wordDescriptorIndex < $numberOfWordDescriptors ) {
269
270
				/** @var \Lingo\Element $definition */
271
				list( $skippedWords, $usedWords, $definition ) =
272
					$glossary->findNextTerm( $wordDescriptors, $wordDescriptorIndex, $numberOfWordDescriptors );
273
274
				if ( $usedWords > 0 ) { // found a term
275
276
					if ( $skippedWords > 0 ) { // skipped some text, insert it as is
277
278
						$start = $wordDescriptors[$wordDescriptorIndex][self::WORD_OFFSET];
279
						$length = $wordDescriptors[$wordDescriptorIndex + $skippedWords][self::WORD_OFFSET] - $start;
280
281
						$parentNode->insertBefore(
282
							$doc->createTextNode(
283
								substr( $textElement->nodeValue, $start, $length)
284
							),
285
							$textElement
286
						);
287
					}
288
289
					$parentNode->insertBefore( $definition->getFormattedTerm( $doc ), $textElement );
290
291
					$definitions[ $definition->getId() ] = $definition->getFormattedDefinitions();
292
293
					$changedElem = true;
294
295
				} else { // did not find any term, just use the rest of the text
296
297
					// If we found no term now and no term before, there was no
298
					// term in the whole element. Might as well not change the
299
					// element at all.
300
301
					// Only change element if found term before
302
					if ( $changedElem === true ) {
303
304
						$start = $wordDescriptors[$wordDescriptorIndex][self::WORD_OFFSET];
305
306
						$parentNode->insertBefore(
307
							$doc->createTextNode(
308
								substr( $textElement->nodeValue, $start)
309
							),
310
							$textElement
311
						);
312
313
					}
314
315
					// In principle superfluous, the loop would run out anyway. Might save a bit of time.
316
					break;
317
				}
318
319
				$wordDescriptorIndex += $usedWords + $skippedWords;
320
			}
321
322
			if ( $changedElem ) {
323
				$parentNode->removeChild( $textElement );
324
			}
325
		}
326
327
		if ( count( $definitions ) > 0 ) {
328
329
			$this->loadModules( $parser );
330
331
			// U - Ungreedy, D - dollar matches only end of string, s - dot matches newlines
332
			$text = preg_replace( '%(^.*<body>)|(</body>.*$)%UDs', '', $doc->saveHTML() );
333
			$text .= $parser->recursiveTagParseFully( join( $definitions ) );
334
335
			$parser->getOutput()->setText( $text );
336
		}
337
338
		return true;
339
	}
340
341
	/**
342
	 * @param Parser $parser
343
	 */
344
	protected function loadModules( &$parser ) {
345
		global $wgOut;
346
347
		$parserOutput = $parser->getOutput();
348
349
		// load scripts
350
		$parserOutput->addModules( 'ext.Lingo.Scripts' );
351
352
		if ( !$wgOut->isArticle() ) {
353
			$wgOut->addModules( 'ext.Lingo.Scripts' );
354
		}
355
356
		// load styles
357
		$parserOutput->addModuleStyles( 'ext.Lingo.Styles' );
358
359
		if ( !$wgOut->isArticle() ) {
360
			$wgOut->addModuleStyles( 'ext.Lingo.Styles' );
361
		}
362
	}
363
364
	/**
365
	 * Purges the lingo tree from the cache.
366
	 *
367
	 * @deprecated 2.0.2
368
	 */
369
	public static function purgeCache() {
370
371
		self::getInstance()->purgeGlossaryFromCache();
372
	}
373
374
	/**
375
	 * Purges the lingo tree from the cache.
376
	 *
377
	 * @since 2.0.2
378
	 */
379
	public function purgeGlossaryFromCache() {
380
381
		global $wgexLingoCacheType;
382
		$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
383
		$cache->delete( $this->getCacheKey() );
384
	}
385
386
	/**
387
	 * @since 2.0.1
388
	 * @param Backend $backend
389
	 */
390
	public function setBackend( Backend $backend ) {
391
		$this->mLingoBackend = $backend;
392
		$backend->setLingoParser( $this );
393
	}
394
395
	/**
396
	 * @param Parser $parser
397
	 * @return bool
398
	 */
399
	protected function shouldParse( &$parser ) {
400
		global $wgexLingoUseNamespaces;
401
402
		if ( !( $parser instanceof Parser ) ) {
403
			return false;
404
		}
405
406
		if ( isset( $parser->mDoubleUnderscores[ 'noglossary' ] ) ) { // __NOGLOSSARY__ found in wikitext
407
			return false;
408
		}
409
410
		$title = $parser->getTitle();
411
412
		if ( !( $title instanceof Title ) ) {
413
			return false;
414
		}
415
416
		$namespace = $title->getNamespace();
417
418
		if ( isset( $wgexLingoUseNamespaces[ $namespace ] ) && $wgexLingoUseNamespaces[ $namespace ] === false ) {
419
			return false;
420
		};
421
422
		return true;
423
	}
424
}
425
426