Completed
Push — master ( c48858...841b7d )
by
unknown
33:26
created

LingoParser::shouldParse()   B

Complexity

Conditions 6
Paths 5

Size

Total Lines 25
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 42

Importance

Changes 1
Bugs 0 Features 0
Metric Value
dl 0
loc 25
ccs 0
cts 4
cp 0
rs 8.439
c 1
b 0
f 0
cc 6
eloc 13
nc 5
nop 1
crap 42
1
<?php
2
3
/**
4
 * File holding the Lingo\LingoParser class.
5
 *
6
 * This file is part of the MediaWiki extension Lingo.
7
 *
8
 * @copyright 2011 - 2018, Stephan Gambke
9
 * @license   GNU General Public License, version 2 (or any later version)
10
 *
11
 * The Lingo extension is free software: you can redistribute it and/or modify
12
 * it under the terms of the GNU General Public License as published by the Free
13
 * Software Foundation; either version 2 of the License, or (at your option) any
14
 * later version.
15
 *
16
 * The Lingo extension is distributed in the hope that it will be useful, but
17
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19
 * details.
20
 *
21
 * You should have received a copy of the GNU General Public License along
22
 * with this program. If not, see <http://www.gnu.org/licenses/>.
23
 *
24
 * @author Stephan Gambke
25
 *
26
 * @file
27
 * @ingroup Lingo
28
 */
29
namespace Lingo;
30
31
use DOMDocument;
32
use DOMXPath;
33
use ObjectCache;
34
use Parser;
35
use Title;
36
37
/**
38
 * This class parses the given text and enriches it with definitions for defined
39
 * terms.
40
 *
41
 * Contains a static function to initiate the parsing.
42
 *
43
 * @ingroup Lingo
44
 */
45
class LingoParser {
46
47
	const WORD_VALUE = 0;
48
	const WORD_OFFSET = 1;
49
50
	private $mLingoTree = null;
51
52
	/**
53
	 * @var Backend
54
	 */
55
	private $mLingoBackend = null;
56
	private static $parserSingleton = null;
57
58
	// The RegEx to split a chunk of text into words
59 1
	public $regex = null;
60
61
	/**
62 1
	 * Lingo\LingoParser constructor.
63 1
	 * @param MessageLog|null $messages
64
	 */
65
	public function __construct( MessageLog &$messages = null ) {
0 ignored issues
show
Unused Code introduced by
The parameter $messages is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
66
		// The RegEx to split a chunk of text into words
67
		// Words are: placeholders for stripped items, sequences of letters and numbers, single characters that are neither letter nor number
68
		$this->regex = '/' . preg_quote( Parser::MARKER_PREFIX, '/' ) . '.*?' . preg_quote( Parser::MARKER_SUFFIX, '/' ) . '|[\p{L}\p{N}]+|[^\p{L}\p{N}]/u';
69
	}
70
71
	/**
72
	 *
73
	 * @param \AbstractContent $content
74
	 * @param \Title $title
75
	 * @param \ParserOutput $po
76
	 *
77
	 * @return Boolean
78
	 */
79
	public function parse( $content, $title, $po ) {
0 ignored issues
show
Unused Code introduced by
The parameter $content is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
Unused Code introduced by
The parameter $title is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
Unused Code introduced by
The parameter $po is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
Coding Style introduced by
parse uses the super-global variable $GLOBALS which is generally not recommended.

Instead of super-globals, we recommend to explicitly inject the dependencies of your class. This makes your code less dependent on global state and it becomes generally more testable:

// Bad
class Router
{
    public function generate($path)
    {
        return $_SERVER['HOST'].$path;
    }
}

// Better
class Router
{
    private $host;

    public function __construct($host)
    {
        $this->host = $host;
    }

    public function generate($path)
    {
        return $this->host.$path;
    }
}

class Controller
{
    public function myAction(Request $request)
    {
        // Instead of
        $page = isset($_GET['page']) ? intval($_GET['page']) : 1;

        // Better (assuming you use the Symfony2 request)
        $page = $request->query->get('page', 1);
    }
}
Loading history...
80
81
		/** @var \Parser $parser */
82
		$parser = $GLOBALS[ 'wgParser' ];
83
84
		if ( $this->shouldParse( $parser ) ) {
85
			$this->realParse( $parser );
86
		}
87
88
		return true;
89
	}
90
91
	/**
92
	 * @return LingoParser
93
	 * @since 2.0.1
94
	 */
95
	public static function getInstance() {
96
		if ( !self::$parserSingleton ) {
97
			self::$parserSingleton = new LingoParser();
98
99
		}
100
101
		return self::$parserSingleton;
102
	}
103
104
	/**
105
	 * @return string
106
	 */
107
	private function getCacheKey() {
108
		// FIXME: If Lingo ever stores the glossary tree per user, then the cache key also needs to include the user id (see T163608)
0 ignored issues
show
Coding Style introduced by
Comment refers to a FIXME task "If Lingo ever stores the glossary tree per user, then the cache key also needs to include the user id (see T163608"
Loading history...
109
		return ObjectCache::getLocalClusterInstance()->makeKey( 'ext', 'lingo', 'lingotree', Tree::TREE_VERSION, get_class( self::getInstance()->getBackend() ) );
110
	}
111
112
	/**
113
	 * @return Backend the backend used by the parser
114
	 * @throws \MWException
115
	 */
116
	public function getBackend() {
117
118
		if ( $this->mLingoBackend === null ) {
119
			throw new \MWException( 'No Lingo backend available!' );
120
		}
121
122
		return $this->mLingoBackend;
123
	}
124
125
	/**
126
	 * Returns the list of terms in the glossary
127
	 *
128
	 * @return array an array mapping terms (keys) to descriptions (values)
129
	 */
130
	public function getLingoArray() {
131
		return $this->getLingoTree()->getTermList();
132
	}
133
134
	/**
135
	 * Returns the list of terms in the glossary as a Lingo\Tree
136
	 *
137
	 * @return Tree a Lingo\Tree mapping terms (keys) to descriptions (values)
138
	 */
139
	public function getLingoTree() {
140
141
		// build glossary array only once per request
142
		if ( !$this->mLingoTree ) {
143
144
			// use cache if enabled
145
			if ( $this->mLingoBackend->useCache() ) {
146
147
				// Try cache first
148
				global $wgexLingoCacheType;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
149
				$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
150
				$cachekey = $this->getCacheKey();
151
				$cachedLingoTree = $cache->get( $cachekey );
152
153
				// cache hit?
154
				if ( $cachedLingoTree !== false && $cachedLingoTree !== null ) {
155
156
					wfDebug( "Cache hit: Got lingo tree from cache.\n" );
157
					$this->mLingoTree = &$cachedLingoTree;
158
159
					wfDebug( "Re-cached lingo tree.\n" );
160
				} else {
161
162
					wfDebug( "Cache miss: Lingo tree not found in cache.\n" );
163
					$this->mLingoTree =& $this->buildLingo();
164
					wfDebug( "Cached lingo tree.\n" );
165
				}
166
167
				// Keep for one month
168
				// Limiting the cache validity will allow to purge stale cache
169
				// entries inserted by older versions after one month
170
				$cache->set( $cachekey, $this->mLingoTree, 60 * 60 * 24 * 30 );
171
172
			} else {
173
				wfDebug( "Caching of lingo tree disabled.\n" );
174
				$this->mLingoTree =& $this->buildLingo();
175
			}
176
177
		}
178
179
		return $this->mLingoTree;
180
	}
181
182
	/**
183
	 * @return Tree
184
	 */
185
	protected function &buildLingo() {
186
187
		$lingoTree = new Tree();
188
		$backend = &$this->mLingoBackend;
189
190
		// assemble the result array
191
		while ( $elementData = $backend->next() ) {
192
			$lingoTree->addTerm( $elementData[ Element::ELEMENT_TERM ], $elementData );
193
		}
194
195
		return $lingoTree;
196
	}
197
198
	/**
199
	 * Parses the given text and enriches applicable terms
200
	 *
201
	 * This method currently only recognizes terms consisting of max one word
202
	 *
203
	 * @param Parser $parser
204
	 *
205
	 * @return Boolean
206
	 */
207
	protected function realParse( &$parser ) {
208
209
		$text = $parser->getOutput()->getText();
210
211
		if ( $text === null || $text === '' ) {
212
			return true;
213
		}
214
215
		// Get array of terms
216
		$glossary = $this->getLingoTree();
217
218
		if ( $glossary == null ) {
219
			return true;
220
		}
221
222
		// Parse HTML from page
223
		\MediaWiki\suppressWarnings();
224
225
		$doc = new DOMDocument( '1.0', 'utf-8' );
226
		$doc->loadHTML( '<html><head><meta http-equiv="content-type" content="charset=utf-8"/></head><body>' . $text . '</body></html>' );
227
228
		\MediaWiki\restoreWarnings();
229
230
		// Find all text in HTML.
231
		$xpath = new DOMXPath( $doc );
232
		$textElements = $xpath->query(
233
			"//*[not(ancestor-or-self::*[@class='noglossary'] or ancestor-or-self::a)][text()!=' ']/text()"
234
		);
235
236
		// Iterate all HTML text matches
237
		$numberOfTextElements = $textElements->length;
238
239
		$definitions = [];
240
241
		for ( $textElementIndex = 0; $textElementIndex < $numberOfTextElements; $textElementIndex++ ) {
242
			$textElement = $textElements->item( $textElementIndex );
243
244
			if ( strlen( $textElement->nodeValue ) < $glossary->getMinTermLength() ) {
245
				continue;
246
			}
247
248
			$matches = [];
249
			preg_match_all(
250
				$this->regex,
251
				$textElement->nodeValue,
252
				$matches,
253
				PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER
254
			);
255
256
			if ( count( $matches ) === 0 || count( $matches[ 0 ] ) === 0 ) {
257
				continue;
258
			}
259
260
			$wordDescriptors = &$matches[ 0 ];  // See __construct() for definition of "word"
261
			$numberOfWordDescriptors = count( $wordDescriptors );
262
263
			$parentNode = &$textElement->parentNode;
264
265
			$wordDescriptorIndex = 0;
266
			$changedElem = false;
267
268
			while ( $wordDescriptorIndex < $numberOfWordDescriptors ) {
269
270
				/** @var \Lingo\Element $definition */
271
				list( $skippedWords, $usedWords, $definition ) =
272
					$glossary->findNextTerm( $wordDescriptors, $wordDescriptorIndex, $numberOfWordDescriptors );
273
274
				if ( $usedWords > 0 ) { // found a term
275
276
					if ( $skippedWords > 0 ) { // skipped some text, insert it as is
277
278
						$start = $wordDescriptors[$wordDescriptorIndex][self::WORD_OFFSET];
279
						$length = $wordDescriptors[$wordDescriptorIndex + $skippedWords][self::WORD_OFFSET] - $start;
280
281
						$parentNode->insertBefore(
282
							$doc->createTextNode(
283
								substr( $textElement->nodeValue, $start, $length)
284
							),
285
							$textElement
286
						);
287
					}
288
289
					$parentNode->insertBefore( $definition->getFormattedTerm( $doc ), $textElement );
290
291
					$definitions[ $definition->getId() ] = $definition->getFormattedDefinitions();
292
293
					$changedElem = true;
294
295
				} else { // did not find any term, just use the rest of the text
296
297
					// If we found no term now and no term before, there was no
298
					// term in the whole element. Might as well not change the
299
					// element at all.
300
301
					// Only change element if found term before
302
					if ( $changedElem === true ) {
303
304
						$start = $wordDescriptors[$wordDescriptorIndex][self::WORD_OFFSET];
305
306
						$parentNode->insertBefore(
307
							$doc->createTextNode(
308
								substr( $textElement->nodeValue, $start)
309
							),
310
							$textElement
311
						);
312
313
					}
314
315
					// In principle superfluous, the loop would run out anyway. Might save a bit of time.
316
					break;
317
				}
318
319
				$wordDescriptorIndex += $usedWords + $skippedWords;
320
			}
321
322
			if ( $changedElem ) {
323
				$parentNode->removeChild( $textElement );
324
			}
325
		}
326
327
		if ( count( $definitions ) > 0 ) {
328
329
			$this->loadModules( $parser );
330
331
			// U - Ungreedy, D - dollar matches only end of string, s - dot matches newlines
332
			$text = preg_replace( '%(^.*<body>)|(</body>.*$)%UDs', '', $doc->saveHTML() );
333
			$text .= $parser->recursiveTagParseFully( join( $definitions ) );
334
335
			$parser->getOutput()->setText( $text );
336
		}
337
338
		return true;
339
	}
340
341
	/**
342
	 * @param Parser $parser
343
	 */
344
	protected function loadModules( &$parser ) {
345
		global $wgOut;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
346
347
		$parserOutput = $parser->getOutput();
348
349
		// load scripts
350
		$parserOutput->addModules( 'ext.Lingo.Scripts' );
351
352
		if ( !$wgOut->isArticle() ) {
353
			$wgOut->addModules( 'ext.Lingo.Scripts' );
354
		}
355
356
		// load styles
357
		$parserOutput->addModuleStyles( 'ext.Lingo.Styles' );
358
359
		if ( !$wgOut->isArticle() ) {
360
			$wgOut->addModuleStyles( 'ext.Lingo.Styles' );
361
		}
362
	}
363
364
	/**
365
	 * Purges the lingo tree from the cache.
366
	 *
367
	 * @deprecated 2.0.2
368
	 */
369
	public static function purgeCache() {
370
371
		self::getInstance()->purgeGlossaryFromCache();
372
	}
373
374
	/**
375
	 * Purges the lingo tree from the cache.
376
	 *
377
	 * @since 2.0.2
378
	 */
379
	public function purgeGlossaryFromCache() {
380
381
		global $wgexLingoCacheType;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
382
		$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
383
		$cache->delete( $this->getCacheKey() );
384
	}
385
386
	/**
387
	 * @since 2.0.1
388
	 * @param Backend $backend
389
	 */
390
	public function setBackend( Backend $backend ) {
391
		$this->mLingoBackend = $backend;
392
		$backend->setLingoParser( $this );
393
	}
394
395
	/**
396
	 * @param Parser $parser
397
	 * @return bool
398
	 */
399
	protected function shouldParse( &$parser ) {
400
		global $wgexLingoUseNamespaces;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
401
402
		if ( !( $parser instanceof Parser ) ) {
0 ignored issues
show
Bug introduced by
The class Parser does not exist. Is this class maybe located in a folder that is not analyzed, or in a newer version of your dependencies than listed in your composer.lock/composer.json?
Loading history...
403
			return false;
404
		}
405
406
		if ( isset( $parser->mDoubleUnderscores[ 'noglossary' ] ) ) { // __NOGLOSSARY__ found in wikitext
407
			return false;
408
		}
409
410
		$title = $parser->getTitle();
411
412
		if ( !( $title instanceof Title ) ) {
0 ignored issues
show
Bug introduced by
The class Title does not exist. Is this class maybe located in a folder that is not analyzed, or in a newer version of your dependencies than listed in your composer.lock/composer.json?
Loading history...
413
			return false;
414
		}
415
416
		$namespace = $title->getNamespace();
417
418
		if ( isset( $wgexLingoUseNamespaces[ $namespace ] ) && $wgexLingoUseNamespaces[ $namespace ] === false ) {
419
			return false;
420
		};
421
422
		return true;
423
	}
424
}
425
426