GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Push — master ( bead6a...6dc797 )
by
unknown
04:30
created

src/LingoParser.php (2 issues)

Labels
Severity
1
<?php
2
3
/**
4
 * File holding the Lingo\LingoParser class.
5
 *
6
 * This file is part of the MediaWiki extension Lingo.
7
 *
8
 * @copyright 2011 - 2018, Stephan Gambke
9
 * @license   GNU General Public License, version 2 (or any later version)
10
 *
11
 * The Lingo extension is free software: you can redistribute it and/or modify
12
 * it under the terms of the GNU General Public License as published by the Free
13
 * Software Foundation; either version 2 of the License, or (at your option) any
14
 * later version.
15
 *
16
 * The Lingo extension is distributed in the hope that it will be useful, but
17
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19
 * details.
20
 *
21
 * You should have received a copy of the GNU General Public License along
22
 * with this program. If not, see <http://www.gnu.org/licenses/>.
23
 *
24
 * @author Stephan Gambke
25
 *
26
 * @file
27
 * @ingroup Lingo
28
 */
29
namespace Lingo;
30
31
use DOMDocument;
32
use DOMXPath;
33
use ObjectCache;
34
use Parser;
35
use Title;
36
37
/**
38
 * This class parses the given text and enriches it with definitions for defined
39
 * terms.
40
 *
41
 * Contains a static function to initiate the parsing.
42
 *
43
 * @ingroup Lingo
44
 */
45
class LingoParser {
46
47
	const WORD_VALUE = 0;
48
	const WORD_OFFSET = 1;
49
50
	private $mLingoTree = null;
51
52
	/**
53
	 * @var Backend
54
	 */
55
	private $mLingoBackend = null;
56
	private static $parserSingleton = null;
57
58
	// The RegEx to split a chunk of text into words
59
	public $regex = null;
60
61
	/**
62
	 * Lingo\LingoParser constructor.
63
	 * @param MessageLog|null $messages
64
	 */
65 8
	public function __construct( MessageLog &$messages = null ) {
66
		// The RegEx to split a chunk of text into words
67
		// Words are: placeholders for stripped items, sequences of letters and numbers, single characters that are neither letter nor number
68 8
		$this->regex = '/' . preg_quote( Parser::MARKER_PREFIX, '/' ) . '.*?' . preg_quote( Parser::MARKER_SUFFIX, '/' ) . '|[\p{L}\p{N}]+|[^\p{L}\p{N}]/u';
69 8
	}
70
71
	/**
72
	 * @param Parser $mwParser
73
	 *
74
	 * @return Boolean
75
	 */
76 7
	public function parse( $mwParser ) {
77
78 7
		if ( $this->shouldParse( $mwParser ) ) {
79 3
			$this->realParse( $mwParser );
80
		}
81
82 7
		return true;
83
	}
84
85
	/**
86
	 * @return LingoParser
87
	 * @since 2.0.1
88
	 */
89
	public static function getInstance() {
90
		if ( !self::$parserSingleton ) {
91
			self::$parserSingleton = new LingoParser();
92
93
		}
94
95
		return self::$parserSingleton;
96
	}
97
98
	/**
99
	 * @return string
100
	 */
101
	private function getCacheKey() {
102
		// FIXME: If Lingo ever stores the glossary tree per user, then the cache key also needs to include the user id (see T163608)
103
		return ObjectCache::getLocalClusterInstance()->makeKey( 'ext', 'lingo', 'lingotree', Tree::TREE_VERSION, get_class( $this->getBackend() ) );
104
	}
105
106
	/**
107
	 * @return Backend the backend used by the parser
108
	 * @throws \MWException
109
	 */
110
	public function getBackend() {
111
112
		if ( $this->mLingoBackend === null ) {
113
			throw new \MWException( 'No Lingo backend available!' );
114
		}
115
116
		return $this->mLingoBackend;
117
	}
118
119
	/**
120
	 * Returns the list of terms in the glossary
121
	 *
122
	 * @return array an array mapping terms (keys) to descriptions (values)
123
	 */
124
	public function getLingoArray() {
125
		return $this->getLingoTree()->getTermList();
126
	}
127
128
	/**
129
	 * Returns the list of terms in the glossary as a Lingo\Tree
130
	 *
131
	 * @return Tree a Lingo\Tree mapping terms (keys) to descriptions (values)
132
	 */
133
	public function getLingoTree() {
134
135
		// build glossary array only once per request
136
		if ( !$this->mLingoTree ) {
137
138
			// use cache if enabled
139
			if ( $this->getBackend()->useCache() ) {
140
141
				// Try cache first
142
				global $wgexLingoCacheType;
143
				$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
144
				$cachekey = $this->getCacheKey();
145
				$cachedLingoTree = $cache->get( $cachekey );
146
147
				// cache hit?
148
				if ( $cachedLingoTree !== false && $cachedLingoTree !== null ) {
149
150
					wfDebug( "Cache hit: Got lingo tree from cache.\n" );
151
					$this->mLingoTree = &$cachedLingoTree;
152
153
					wfDebug( "Re-cached lingo tree.\n" );
154
				} else {
155
156
					wfDebug( "Cache miss: Lingo tree not found in cache.\n" );
157
					$this->mLingoTree =& $this->buildLingo();
158
					wfDebug( "Cached lingo tree.\n" );
159
				}
160
161
				// Keep for one month
162
				// Limiting the cache validity will allow to purge stale cache
163
				// entries inserted by older versions after one month
164
				$cache->set( $cachekey, $this->mLingoTree, 60 * 60 * 24 * 30 );
165
166
			} else {
167
				wfDebug( "Caching of lingo tree disabled.\n" );
168
				$this->mLingoTree =& $this->buildLingo();
169
			}
170
171
		}
172
173
		return $this->mLingoTree;
174
	}
175
176
	/**
177
	 * @return Tree
178
	 */
179
	protected function &buildLingo() {
180
181
		$lingoTree = new Tree();
182
		$backend = &$this->mLingoBackend;
183
184
		// assemble the result array
185
		while ( $elementData = $backend->next() ) {
186
			$lingoTree->addTerm( $elementData[ Element::ELEMENT_TERM ], $elementData );
187
		}
188
189
		return $lingoTree;
190
	}
191
192
	/**
193
	 * Parses the given text and enriches applicable terms
194
	 *
195
	 * This method currently only recognizes terms consisting of max one word
196
	 *
197
	 * @param Parser $parser
198
	 *
199
	 * @return Boolean
200
	 */
201 3
	protected function realParse( &$parser ) {
202
203 3
		$text = $parser->getOutput()->getText();
204
205 3
		if ( $text === null || $text === '' ) {
206 3
			return true;
207
		}
208
209
		// Get array of terms
210
		$glossary = $this->getLingoTree();
211
212
		if ( $glossary == null ) {
213
			return true;
214
		}
215
216
		// Parse HTML from page
217
		\MediaWiki\suppressWarnings();
218
219
		$doc = new DOMDocument( '1.0', 'utf-8' );
220
		$doc->loadHTML( '<html><head><meta http-equiv="content-type" content="charset=utf-8"/></head><body>' . $text . '</body></html>' );
221
222
		\MediaWiki\restoreWarnings();
223
224
		// Find all text in HTML.
225
		$xpath = new DOMXPath( $doc );
226
		$textElements = $xpath->query(
227
			"//*[not(ancestor-or-self::*[@class='noglossary'] or ancestor-or-self::a)][text()!=' ']/text()"
228
		);
229
230
		// Iterate all HTML text matches
231
		$numberOfTextElements = $textElements->length;
232
233
		$definitions = [];
234
235
		for ( $textElementIndex = 0; $textElementIndex < $numberOfTextElements; $textElementIndex++ ) {
236
			$textElement = $textElements->item( $textElementIndex );
237
238
			if ( strlen( $textElement->nodeValue ) < $glossary->getMinTermLength() ) {
239
				continue;
240
			}
241
242
			$matches = [];
243
			preg_match_all(
244
				$this->regex,
245
				$textElement->nodeValue,
246
				$matches,
247
				PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER
248
			);
249
250
			if ( count( $matches ) === 0 || count( $matches[ 0 ] ) === 0 ) {
251
				continue;
252
			}
253
254
			$wordDescriptors = &$matches[ 0 ]; // See __construct() for definition of "word"
255
			$numberOfWordDescriptors = count( $wordDescriptors );
256
257
			$parentNode = &$textElement->parentNode;
258
259
			$wordDescriptorIndex = 0;
260
			$changedElem = false;
261
262
			while ( $wordDescriptorIndex < $numberOfWordDescriptors ) {
263
264
				/** @var \Lingo\Element $definition */
265
				list( $skippedWords, $usedWords, $definition ) =
266
					$glossary->findNextTerm( $wordDescriptors, $wordDescriptorIndex, $numberOfWordDescriptors );
267
268
				if ( $usedWords > 0 ) { // found a term
269
270
					if ( $skippedWords > 0 ) { // skipped some text, insert it as is
271
272
						$start = $wordDescriptors[ $wordDescriptorIndex ][ self::WORD_OFFSET ];
273
						$length = $wordDescriptors[ $wordDescriptorIndex + $skippedWords ][ self::WORD_OFFSET ] - $start;
274
275
						$parentNode->insertBefore(
276
							$doc->createTextNode(
277
								substr( $textElement->nodeValue, $start, $length )
278
							),
279
							$textElement
280
						);
281
					}
282
283
					$parentNode->insertBefore( $definition->getFormattedTerm( $doc ), $textElement );
284
285
					$definitions[ $definition->getId() ] = $definition->getFormattedDefinitions();
286
287
					$changedElem = true;
288
289
				} else { // did not find any term, just use the rest of the text
290
291
					// If we found no term now and no term before, there was no
292
					// term in the whole element. Might as well not change the
293
					// element at all.
294
295
					// Only change element if found term before
296
					if ( $changedElem === true ) {
297
298
						$start = $wordDescriptors[ $wordDescriptorIndex ][ self::WORD_OFFSET ];
299
300
						$parentNode->insertBefore(
301
							$doc->createTextNode(
302
								substr( $textElement->nodeValue, $start )
303
							),
304
							$textElement
305
						);
306
307
					}
308
309
					// In principle superfluous, the loop would run out anyway. Might save a bit of time.
310
					break;
311
				}
312
313
				$wordDescriptorIndex += $usedWords + $skippedWords;
314
			}
315
316
			if ( $changedElem ) {
317
				$parentNode->removeChild( $textElement );
318
			}
319
		}
320
321
		if ( count( $definitions ) > 0 ) {
322
323
			$this->loadModules( $parser );
324
325
			// U - Ungreedy, D - dollar matches only end of string, s - dot matches newlines
326
			$text = preg_replace( '%(^.*<body>)|(</body>.*$)%UDs', '', $doc->saveHTML() );
327
			$text .= $parser->recursiveTagParseFully( join( $definitions ) );
0 ignored issues
show
The call to join() has too few arguments starting with pieces. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

327
			$text .= $parser->recursiveTagParseFully( /** @scrutinizer ignore-call */ join( $definitions ) );

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
$definitions of type array<mixed,string[]>|array is incompatible with the type string expected by parameter $glue of join(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

327
			$text .= $parser->recursiveTagParseFully( join( /** @scrutinizer ignore-type */ $definitions ) );
Loading history...
328
329
			$parser->getOutput()->setText( $text );
330
		}
331
332
		return true;
333
	}
334
335
	/**
336
	 * @param Parser $parser
337
	 */
338
	protected function loadModules( &$parser ) {
339
		global $wgOut;
340
341
		$parserOutput = $parser->getOutput();
342
343
		// load scripts
344
		$parserOutput->addModules( 'ext.Lingo.Scripts' );
345
346
		if ( !$wgOut->isArticle() ) {
347
			$wgOut->addModules( 'ext.Lingo.Scripts' );
348
		}
349
350
		// load styles
351
		$parserOutput->addModuleStyles( 'ext.Lingo.Styles' );
352
353
		if ( !$wgOut->isArticle() ) {
354
			$wgOut->addModuleStyles( 'ext.Lingo.Styles' );
355
		}
356
	}
357
358
	/**
359
	 * Purges the lingo tree from the cache.
360
	 *
361
	 * @deprecated 2.0.2
362
	 */
363
	public static function purgeCache() {
364
365
		self::getInstance()->purgeGlossaryFromCache();
366
	}
367
368
	/**
369
	 * Purges the lingo tree from the cache.
370
	 *
371
	 * @since 2.0.2
372
	 */
373
	public function purgeGlossaryFromCache() {
374
375
		global $wgexLingoCacheType;
376
		$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
377
		$cache->delete( $this->getCacheKey() );
378
	}
379
380
	/**
381
	 * @since 2.0.1
382
	 * @param Backend $backend
383
	 */
384
	public function setBackend( Backend $backend ) {
385
		$this->mLingoBackend = $backend;
386
		$backend->setLingoParser( $this );
387
	}
388
389
	/**
390
	 * @param Parser $parser
391
	 * @return bool
392
	 */
393 7
	protected function shouldParse( &$parser ) {
394 7
		global $wgexLingoUseNamespaces;
395
396 7
		if ( !( $parser instanceof Parser ) ) {
397 1
			return false;
398
		}
399
400 6
		if ( isset( $parser->mDoubleUnderscores[ 'noglossary' ] ) ) { // __NOGLOSSARY__ found in wikitext
401 1
			return false;
402
		}
403
404 5
		$title = $parser->getTitle();
405
406 5
		if ( !( $title instanceof Title ) ) {
407 1
			return false;
408
		}
409
410 4
		$namespace = $title->getNamespace();
411
412 4
		if ( isset( $wgexLingoUseNamespaces[ $namespace ] ) && $wgexLingoUseNamespaces[ $namespace ] === false ) {
413 1
			return false;
414
		};
415
416 3
		return true;
417
	}
418
}
419
420