Passed
Push — master ( bead6a...6dc797 )
by
unknown
04:30
created

LingoParser::shouldParse()   B

Complexity

Conditions 6
Paths 5

Size

Total Lines 24
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 13
CRAP Score 6

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 6
eloc 12
nc 5
nop 1
dl 0
loc 24
ccs 13
cts 13
cp 1
crap 6
rs 8.5125
c 1
b 0
f 0
1
<?php
2
3
/**
4
 * File holding the Lingo\LingoParser class.
5
 *
6
 * This file is part of the MediaWiki extension Lingo.
7
 *
8
 * @copyright 2011 - 2018, Stephan Gambke
9
 * @license   GNU General Public License, version 2 (or any later version)
10
 *
11
 * The Lingo extension is free software: you can redistribute it and/or modify
12
 * it under the terms of the GNU General Public License as published by the Free
13
 * Software Foundation; either version 2 of the License, or (at your option) any
14
 * later version.
15
 *
16
 * The Lingo extension is distributed in the hope that it will be useful, but
17
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19
 * details.
20
 *
21
 * You should have received a copy of the GNU General Public License along
22
 * with this program. If not, see <http://www.gnu.org/licenses/>.
23
 *
24
 * @author Stephan Gambke
25
 *
26
 * @file
27
 * @ingroup Lingo
28
 */
29
namespace Lingo;
30
31
use DOMDocument;
32
use DOMXPath;
33
use ObjectCache;
0 ignored issues
show
Bug introduced by
The type ObjectCache was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
34
use Parser;
0 ignored issues
show
Bug introduced by
The type Parser was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
35
use Title;
0 ignored issues
show
Bug introduced by
The type Title was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
36
37
/**
38
 * This class parses the given text and enriches it with definitions for defined
39
 * terms.
40
 *
41
 * Contains a static function to initiate the parsing.
42
 *
43
 * @ingroup Lingo
44
 */
45
class LingoParser {
46
47
	const WORD_VALUE = 0;
48
	const WORD_OFFSET = 1;
49
50
	private $mLingoTree = null;
51
52
	/**
53
	 * @var Backend
54
	 */
55
	private $mLingoBackend = null;
56
	private static $parserSingleton = null;
57
58
	// The RegEx to split a chunk of text into words
59
	public $regex = null;
60
61
	/**
62
	 * Lingo\LingoParser constructor.
63
	 * @param MessageLog|null $messages
64
	 */
65 8
	public function __construct( MessageLog &$messages = null ) {
0 ignored issues
show
Unused Code introduced by
The parameter $messages is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

65
	public function __construct( /** @scrutinizer ignore-unused */ MessageLog &$messages = null ) {

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
66
		// The RegEx to split a chunk of text into words
67
		// Words are: placeholders for stripped items, sequences of letters and numbers, single characters that are neither letter nor number
68 8
		$this->regex = '/' . preg_quote( Parser::MARKER_PREFIX, '/' ) . '.*?' . preg_quote( Parser::MARKER_SUFFIX, '/' ) . '|[\p{L}\p{N}]+|[^\p{L}\p{N}]/u';
69 8
	}
70
71
	/**
72
	 * @param Parser $mwParser
73
	 *
74
	 * @return Boolean
75
	 */
76 7
	public function parse( $mwParser ) {
77
78 7
		if ( $this->shouldParse( $mwParser ) ) {
79 3
			$this->realParse( $mwParser );
80
		}
81
82 7
		return true;
83
	}
84
85
	/**
86
	 * @return LingoParser
87
	 * @since 2.0.1
88
	 */
89
	public static function getInstance() {
90
		if ( !self::$parserSingleton ) {
91
			self::$parserSingleton = new LingoParser();
92
93
		}
94
95
		return self::$parserSingleton;
96
	}
97
98
	/**
99
	 * @return string
100
	 */
101
	private function getCacheKey() {
102
		// FIXME: If Lingo ever stores the glossary tree per user, then the cache key also needs to include the user id (see T163608)
103
		return ObjectCache::getLocalClusterInstance()->makeKey( 'ext', 'lingo', 'lingotree', Tree::TREE_VERSION, get_class( $this->getBackend() ) );
104
	}
105
106
	/**
107
	 * @return Backend the backend used by the parser
108
	 * @throws \MWException
109
	 */
110
	public function getBackend() {
111
112
		if ( $this->mLingoBackend === null ) {
113
			throw new \MWException( 'No Lingo backend available!' );
0 ignored issues
show
Bug introduced by
The type MWException was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
114
		}
115
116
		return $this->mLingoBackend;
117
	}
118
119
	/**
120
	 * Returns the list of terms in the glossary
121
	 *
122
	 * @return array an array mapping terms (keys) to descriptions (values)
123
	 */
124
	public function getLingoArray() {
125
		return $this->getLingoTree()->getTermList();
126
	}
127
128
	/**
129
	 * Returns the list of terms in the glossary as a Lingo\Tree
130
	 *
131
	 * @return Tree a Lingo\Tree mapping terms (keys) to descriptions (values)
132
	 */
133
	public function getLingoTree() {
134
135
		// build glossary array only once per request
136
		if ( !$this->mLingoTree ) {
137
138
			// use cache if enabled
139
			if ( $this->getBackend()->useCache() ) {
140
141
				// Try cache first
142
				global $wgexLingoCacheType;
143
				$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
0 ignored issues
show
Bug introduced by
The function wfGetMainCache was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

143
				$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : /** @scrutinizer ignore-call */ wfGetMainCache();
Loading history...
Bug introduced by
The function wfGetCache was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

143
				$cache = ( $wgexLingoCacheType !== null ) ? /** @scrutinizer ignore-call */ wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
Loading history...
144
				$cachekey = $this->getCacheKey();
145
				$cachedLingoTree = $cache->get( $cachekey );
146
147
				// cache hit?
148
				if ( $cachedLingoTree !== false && $cachedLingoTree !== null ) {
149
150
					wfDebug( "Cache hit: Got lingo tree from cache.\n" );
0 ignored issues
show
Bug introduced by
The function wfDebug was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

150
					/** @scrutinizer ignore-call */ 
151
     wfDebug( "Cache hit: Got lingo tree from cache.\n" );
Loading history...
151
					$this->mLingoTree = &$cachedLingoTree;
152
153
					wfDebug( "Re-cached lingo tree.\n" );
154
				} else {
155
156
					wfDebug( "Cache miss: Lingo tree not found in cache.\n" );
157
					$this->mLingoTree =& $this->buildLingo();
158
					wfDebug( "Cached lingo tree.\n" );
159
				}
160
161
				// Keep for one month
162
				// Limiting the cache validity will allow to purge stale cache
163
				// entries inserted by older versions after one month
164
				$cache->set( $cachekey, $this->mLingoTree, 60 * 60 * 24 * 30 );
165
166
			} else {
167
				wfDebug( "Caching of lingo tree disabled.\n" );
168
				$this->mLingoTree =& $this->buildLingo();
169
			}
170
171
		}
172
173
		return $this->mLingoTree;
174
	}
175
176
	/**
177
	 * @return Tree
178
	 */
179
	protected function &buildLingo() {
180
181
		$lingoTree = new Tree();
182
		$backend = &$this->mLingoBackend;
183
184
		// assemble the result array
185
		while ( $elementData = $backend->next() ) {
186
			$lingoTree->addTerm( $elementData[ Element::ELEMENT_TERM ], $elementData );
187
		}
188
189
		return $lingoTree;
190
	}
191
192
	/**
193
	 * Parses the given text and enriches applicable terms
194
	 *
195
	 * This method currently only recognizes terms consisting of max one word
196
	 *
197
	 * @param Parser $parser
198
	 *
199
	 * @return Boolean
200
	 */
201 3
	protected function realParse( &$parser ) {
202
203 3
		$text = $parser->getOutput()->getText();
204
205 3
		if ( $text === null || $text === '' ) {
206 3
			return true;
207
		}
208
209
		// Get array of terms
210
		$glossary = $this->getLingoTree();
211
212
		if ( $glossary == null ) {
213
			return true;
214
		}
215
216
		// Parse HTML from page
217
		\MediaWiki\suppressWarnings();
0 ignored issues
show
Bug introduced by
The function suppressWarnings was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

217
		/** @scrutinizer ignore-call */ 
218
  \MediaWiki\suppressWarnings();
Loading history...
218
219
		$doc = new DOMDocument( '1.0', 'utf-8' );
220
		$doc->loadHTML( '<html><head><meta http-equiv="content-type" content="charset=utf-8"/></head><body>' . $text . '</body></html>' );
221
222
		\MediaWiki\restoreWarnings();
0 ignored issues
show
Bug introduced by
The function restoreWarnings was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

222
		/** @scrutinizer ignore-call */ 
223
  \MediaWiki\restoreWarnings();
Loading history...
223
224
		// Find all text in HTML.
225
		$xpath = new DOMXPath( $doc );
226
		$textElements = $xpath->query(
227
			"//*[not(ancestor-or-self::*[@class='noglossary'] or ancestor-or-self::a)][text()!=' ']/text()"
228
		);
229
230
		// Iterate all HTML text matches
231
		$numberOfTextElements = $textElements->length;
232
233
		$definitions = [];
234
235
		for ( $textElementIndex = 0; $textElementIndex < $numberOfTextElements; $textElementIndex++ ) {
236
			$textElement = $textElements->item( $textElementIndex );
237
238
			if ( strlen( $textElement->nodeValue ) < $glossary->getMinTermLength() ) {
239
				continue;
240
			}
241
242
			$matches = [];
243
			preg_match_all(
244
				$this->regex,
245
				$textElement->nodeValue,
246
				$matches,
247
				PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER
248
			);
249
250
			if ( count( $matches ) === 0 || count( $matches[ 0 ] ) === 0 ) {
251
				continue;
252
			}
253
254
			$wordDescriptors = &$matches[ 0 ]; // See __construct() for definition of "word"
255
			$numberOfWordDescriptors = count( $wordDescriptors );
256
257
			$parentNode = &$textElement->parentNode;
258
259
			$wordDescriptorIndex = 0;
260
			$changedElem = false;
261
262
			while ( $wordDescriptorIndex < $numberOfWordDescriptors ) {
263
264
				/** @var \Lingo\Element $definition */
265
				list( $skippedWords, $usedWords, $definition ) =
266
					$glossary->findNextTerm( $wordDescriptors, $wordDescriptorIndex, $numberOfWordDescriptors );
267
268
				if ( $usedWords > 0 ) { // found a term
269
270
					if ( $skippedWords > 0 ) { // skipped some text, insert it as is
271
272
						$start = $wordDescriptors[ $wordDescriptorIndex ][ self::WORD_OFFSET ];
273
						$length = $wordDescriptors[ $wordDescriptorIndex + $skippedWords ][ self::WORD_OFFSET ] - $start;
274
275
						$parentNode->insertBefore(
276
							$doc->createTextNode(
277
								substr( $textElement->nodeValue, $start, $length )
278
							),
279
							$textElement
280
						);
281
					}
282
283
					$parentNode->insertBefore( $definition->getFormattedTerm( $doc ), $textElement );
284
285
					$definitions[ $definition->getId() ] = $definition->getFormattedDefinitions();
286
287
					$changedElem = true;
288
289
				} else { // did not find any term, just use the rest of the text
290
291
					// If we found no term now and no term before, there was no
292
					// term in the whole element. Might as well not change the
293
					// element at all.
294
295
					// Only change element if found term before
296
					if ( $changedElem === true ) {
297
298
						$start = $wordDescriptors[ $wordDescriptorIndex ][ self::WORD_OFFSET ];
299
300
						$parentNode->insertBefore(
301
							$doc->createTextNode(
302
								substr( $textElement->nodeValue, $start )
303
							),
304
							$textElement
305
						);
306
307
					}
308
309
					// In principle superfluous, the loop would run out anyway. Might save a bit of time.
310
					break;
311
				}
312
313
				$wordDescriptorIndex += $usedWords + $skippedWords;
314
			}
315
316
			if ( $changedElem ) {
317
				$parentNode->removeChild( $textElement );
318
			}
319
		}
320
321
		if ( count( $definitions ) > 0 ) {
322
323
			$this->loadModules( $parser );
324
325
			// U - Ungreedy, D - dollar matches only end of string, s - dot matches newlines
326
			$text = preg_replace( '%(^.*<body>)|(</body>.*$)%UDs', '', $doc->saveHTML() );
327
			$text .= $parser->recursiveTagParseFully( join( $definitions ) );
0 ignored issues
show
Bug introduced by
The call to join() has too few arguments starting with pieces. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

327
			$text .= $parser->recursiveTagParseFully( /** @scrutinizer ignore-call */ join( $definitions ) );

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
Bug introduced by
$definitions of type array<mixed,string[]>|array is incompatible with the type string expected by parameter $glue of join(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

327
			$text .= $parser->recursiveTagParseFully( join( /** @scrutinizer ignore-type */ $definitions ) );
Loading history...
328
329
			$parser->getOutput()->setText( $text );
330
		}
331
332
		return true;
333
	}
334
335
	/**
336
	 * @param Parser $parser
337
	 */
338
	protected function loadModules( &$parser ) {
339
		global $wgOut;
340
341
		$parserOutput = $parser->getOutput();
342
343
		// load scripts
344
		$parserOutput->addModules( 'ext.Lingo.Scripts' );
345
346
		if ( !$wgOut->isArticle() ) {
347
			$wgOut->addModules( 'ext.Lingo.Scripts' );
348
		}
349
350
		// load styles
351
		$parserOutput->addModuleStyles( 'ext.Lingo.Styles' );
352
353
		if ( !$wgOut->isArticle() ) {
354
			$wgOut->addModuleStyles( 'ext.Lingo.Styles' );
355
		}
356
	}
357
358
	/**
359
	 * Purges the lingo tree from the cache.
360
	 *
361
	 * @deprecated 2.0.2
362
	 */
363
	public static function purgeCache() {
364
365
		self::getInstance()->purgeGlossaryFromCache();
366
	}
367
368
	/**
369
	 * Purges the lingo tree from the cache.
370
	 *
371
	 * @since 2.0.2
372
	 */
373
	public function purgeGlossaryFromCache() {
374
375
		global $wgexLingoCacheType;
376
		$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
0 ignored issues
show
Bug introduced by
The function wfGetCache was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

376
		$cache = ( $wgexLingoCacheType !== null ) ? /** @scrutinizer ignore-call */ wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
Loading history...
Bug introduced by
The function wfGetMainCache was not found. Maybe you did not declare it correctly or list all dependencies? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

376
		$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : /** @scrutinizer ignore-call */ wfGetMainCache();
Loading history...
377
		$cache->delete( $this->getCacheKey() );
378
	}
379
380
	/**
381
	 * @since 2.0.1
382
	 * @param Backend $backend
383
	 */
384
	public function setBackend( Backend $backend ) {
385
		$this->mLingoBackend = $backend;
386
		$backend->setLingoParser( $this );
387
	}
388
389
	/**
390
	 * @param Parser $parser
391
	 * @return bool
392
	 */
393 7
	protected function shouldParse( &$parser ) {
394 7
		global $wgexLingoUseNamespaces;
395
396 7
		if ( !( $parser instanceof Parser ) ) {
397 1
			return false;
398
		}
399
400 6
		if ( isset( $parser->mDoubleUnderscores[ 'noglossary' ] ) ) { // __NOGLOSSARY__ found in wikitext
401 1
			return false;
402
		}
403
404 5
		$title = $parser->getTitle();
405
406 5
		if ( !( $title instanceof Title ) ) {
407 1
			return false;
408
		}
409
410 4
		$namespace = $title->getNamespace();
411
412 4
		if ( isset( $wgexLingoUseNamespaces[ $namespace ] ) && $wgexLingoUseNamespaces[ $namespace ] === false ) {
413 1
			return false;
414
		};
415
416 3
		return true;
417
	}
418
}
419
420