Completed
Branch master (fa787a)
by Stephan
02:24
created

LingoParser   B

Complexity

Total Complexity 46

Size/Duplication

Total Lines 359
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 3

Test Coverage

Coverage 2.24%

Importance

Changes 0
Metric Value
wmc 46
lcom 1
cbo 3
dl 0
loc 359
ccs 3
cts 134
cp 0.0224
rs 8.3999
c 0
b 0
f 0

14 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 5 1
A parse() 0 14 2
A getInstance() 0 8 2
A getCacheKey() 0 3 1
A getBackend() 0 8 2
A getLingoArray() 0 9 2
B getLingoTree() 0 42 6
A buildLingo() 0 12 2
D realParse() 0 121 17
A loadModules() 0 19 3
A purgeCache() 0 4 1
A purgeGlossaryFromCache() 0 6 2
A setBackend() 0 4 1
A shouldParse() 0 13 4

How to fix   Complexity   

Complex Class

Complex classes like LingoParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use LingoParser, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
 * File holding the Lingo\LingoParser class.
5
 *
6
 * This file is part of the MediaWiki extension Lingo.
7
 *
8
 * @copyright 2011 - 2016, Stephan Gambke
9
 * @license   GNU General Public License, version 2 (or any later version)
10
 *
11
 * The Lingo extension is free software: you can redistribute it and/or modify
12
 * it under the terms of the GNU General Public License as published by the Free
13
 * Software Foundation; either version 2 of the License, or (at your option) any
14
 * later version.
15
 *
16
 * The Lingo extension is distributed in the hope that it will be useful, but
17
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19
 * details.
20
 *
21
 * You should have received a copy of the GNU General Public License along
22
 * with this program. If not, see <http://www.gnu.org/licenses/>.
23
 *
24
 * @author Stephan Gambke
25
 *
26
 * @file
27
 * @ingroup Lingo
28
 */
29
namespace Lingo;
30
31
use DOMXPath;
32
use Parser;
33
34
/**
35
 * This class parses the given text and enriches it with definitions for defined
36
 * terms.
37
 *
38
 * Contains a static function to initiate the parsing.
39
 *
40
 * @ingroup Lingo
41
 */
42
class LingoParser {
43
44
	private $mLingoTree = null;
45
46
	/**
47
	 * @var Backend
48
	 */
49
	private $mLingoBackend = null;
50
	private static $parserSingleton = null;
51
52
	// The RegEx to split a chunk of text into words
53
	public $regex = null;
54
55
	/**
56
	 * Lingo\LingoParser constructor.
57
	 * @param MessageLog|null $messages
58
	 */
59 1
	public function __construct( MessageLog &$messages = null ) {
0 ignored issues
show
Unused Code introduced by
The parameter $messages is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
60
		// The RegEx to split a chunk of text into words
61
		// Words are: placeholders for stripped items, sequences of letters and numbers, single characters that are neither letter nor number
62 1
		$this->regex = '/' . preg_quote( Parser::MARKER_PREFIX, '/' ) . '.*?' . preg_quote( Parser::MARKER_SUFFIX, '/' ) . '|[\p{L}\p{N}]+|[^\p{L}\p{N}]/u';
63 1
	}
64
65
	/**
66
	 *
67
	 * @param Parser $parser
68
	 * @param string $text
69
	 * @return Boolean
70
	 */
71
	public function parse( Parser &$parser, &$text ) {
72
73
		// parse if
74
		if ( $this->shouldParse( $parser ) ) {
75
76
			// unstrip strip items of the 'general' group
77
			// this will be done again by parse when this hook returns, but it should not hurt to do this twice
78
			// Only problem is with other hook handlers that might not expect strip items to be unstripped already
79
			$text = $parser->mStripState->unstripGeneral( $text );
80
			$this->realParse( $parser, $text );
81
		}
82
83
		return true;
84
	}
85
86
	/**
87
	 * @return LingoParser
88
	 * @since 2.0.1
89
	 */
90
	public static function getInstance() {
91
		if ( !self::$parserSingleton ) {
92
			self::$parserSingleton = new LingoParser();
93
94
		}
95
96
		return self::$parserSingleton;
97
	}
98
99
	/**
100
	 * @return string
101
	 */
102
	private static function getCacheKey() {
103
		return wfMemcKey( 'ext', 'lingo', 'lingotree', Tree::TREE_VERSION, get_class( self::getInstance()->getBackend() ) );
104
	}
105
106
	/**
107
	 * @return Backend the backend used by the parser
108
	 * @throws \MWException
109
	 */
110
	public function getBackend() {
111
112
		if ( $this->mLingoBackend === null ) {
113
			throw new \MWException( 'No Lingo backend available!' );
114
		}
115
116
		return $this->mLingoBackend;
117
	}
118
119
	/**
120
	 * Returns the list of terms in the glossary
121
	 *
122
	 * @return Array an array mapping terms (keys) to descriptions (values)
123
	 */
124
	public function getLingoArray() {
125
126
		// build glossary array only once per request
127
		if ( !$this->mLingoTree ) {
128
			$this->buildLingo();
129
		}
130
131
		return $this->mLingoTree->getTermList();
132
	}
133
134
	/**
135
	 * Returns the list of terms in the glossary as a Lingo\Tree
136
	 *
137
	 * @return Tree a Lingo\Tree mapping terms (keys) to descriptions (values)
138
	 */
139
	public function getLingoTree() {
140
141
		// build glossary array only once per request
142
		if ( !$this->mLingoTree ) {
143
144
			// use cache if enabled
145
			if ( $this->mLingoBackend->useCache() ) {
146
147
				// Try cache first
148
				global $wgexLingoCacheType;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
149
				$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
150
				$cachekey = $this->getCacheKey();
151
				$cachedLingoTree = $cache->get( $cachekey );
152
153
				// cache hit?
154
				if ( $cachedLingoTree !== false && $cachedLingoTree !== null ) {
155
156
					wfDebug( "Cache hit: Got lingo tree from cache.\n" );
157
					$this->mLingoTree = &$cachedLingoTree;
158
159
					wfDebug( "Re-cached lingo tree.\n" );
160
				} else {
161
162
					wfDebug( "Cache miss: Lingo tree not found in cache.\n" );
163
					$this->mLingoTree =& $this->buildLingo();
164
					wfDebug( "Cached lingo tree.\n" );
165
				}
166
167
				// Keep for one month
168
				// Limiting the cache validity will allow to purge stale cache
169
				// entries inserted by older versions after one month
170
				$cache->set( $cachekey, $this->mLingoTree, 60 * 60 * 24 * 30 );
171
172
			} else {
173
				wfDebug( "Caching of lingo tree disabled.\n" );
174
				$this->mLingoTree =& $this->buildLingo();
175
			}
176
177
		}
178
179
		return $this->mLingoTree;
180
	}
181
182
	/**
183
	 * @return Tree
184
	 */
185
	protected function &buildLingo() {
186
187
		$lingoTree = new Tree();
188
		$backend = &$this->mLingoBackend;
189
190
		// assemble the result array
191
		while ( $elementData = $backend->next() ) {
192
			$lingoTree->addTerm( $elementData[ Element::ELEMENT_TERM ], $elementData );
193
		}
194
195
		return $lingoTree;
196
	}
197
198
	/**
199
	 * Parses the given text and enriches applicable terms
200
	 *
201
	 * This method currently only recognizes terms consisting of max one word
202
	 *
203
	 * @param $parser
204
	 * @param $text
205
	 * @return Boolean
206
	 */
207
	protected function realParse( &$parser, &$text ) {
0 ignored issues
show
Coding Style introduced by
realParse uses the super-global variable $_POST which is generally not recommended.

Instead of super-globals, we recommend to explicitly inject the dependencies of your class. This makes your code less dependent on global state and it becomes generally more testable:

// Bad
class Router
{
    public function generate($path)
    {
        return $_SERVER['HOST'].$path;
    }
}

// Better
class Router
{
    private $host;

    public function __construct($host)
    {
        $this->host = $host;
    }

    public function generate($path)
    {
        return $this->host.$path;
    }
}

class Controller
{
    public function myAction(Request $request)
    {
        // Instead of
        $page = isset($_GET['page']) ? intval($_GET['page']) : 1;

        // Better (assuming you use the Symfony2 request)
        $page = $request->query->get('page', 1);
    }
}
Loading history...
208
		global $wgRequest;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
209
210
		$action = $wgRequest->getVal( 'action', 'view' );
211
212
		if ( $text === null ||
213
			$text === '' ||
214
			$action === 'edit' ||
215
			$action === 'ajax' ||
216
			isset( $_POST[ 'wpPreview' ] )
217
		) {
218
219
			return true;
220
		}
221
222
		// Get array of terms
223
		$glossary = $this->getLingoTree();
224
225
		if ( $glossary == null ) {
226
			return true;
227
		}
228
229
		// Parse HTML from page
230
		\MediaWiki\suppressWarnings();
231
232
		$doc = new StashingDOMDocument( '1.0', 'utf-8' );
233
		$doc->loadHTML( '<html><head><meta http-equiv="content-type" content="charset=utf-8"/></head><body>' . $text . '</body></html>' );
234
235
		\MediaWiki\restoreWarnings();
236
237
		// Find all text in HTML.
238
		$xpath = new DOMXpath( $doc );
239
		$elements = $xpath->query(
240
			"//*[not(ancestor-or-self::*[@class='noglossary'] or ancestor-or-self::a)][text()!=' ']/text()"
241
		);
242
243
		// Iterate all HTML text matches
244
		$nb = $elements->length;
245
		$changedDoc = false;
246
247
		for ( $pos = 0; $pos < $nb; $pos++ ) {
248
			$el = $elements->item( $pos );
249
250
			if ( strlen( $el->nodeValue ) < $glossary->getMinTermLength() ) {
251
				continue;
252
			}
253
254
			$matches = array();
255
			preg_match_all(
256
				$this->regex,
257
				$el->nodeValue,
258
				$matches,
259
				PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER
260
			);
261
262
			if ( count( $matches ) == 0 || count( $matches[ 0 ] ) == 0 ) {
263
				continue;
264
			}
265
266
			$lexemes = &$matches[ 0 ];
267
			$countLexemes = count( $lexemes );
268
			$parent = &$el->parentNode;
269
			$index = 0;
270
			$changedElem = false;
271
272
			while ( $index < $countLexemes ) {
273
				list( $skipped, $used, $definition ) =
274
					$glossary->findNextTerm( $lexemes, $index, $countLexemes );
275
276
				if ( $used > 0 ) { // found a term
277
					if ( $skipped > 0 ) { // skipped some text, insert it as is
278
						$parent->insertBefore(
279
							$doc->createTextNode(
280
								substr( $el->nodeValue,
281
									$currLexIndex = $lexemes[ $index ][ 1 ],
282
									$lexemes[ $index + $skipped ][ 1 ] - $currLexIndex )
283
							),
284
							$el
285
						);
286
					}
287
288
					$parent->insertBefore( $definition->getFullDefinition( $doc ), $el );
289
290
					$changedElem = true;
291
				} else { // did not find term, just use the rest of the text
292
					// If we found no term now and no term before, there was no
293
					// term in the whole element. Might as well not change the
294
					// element at all.
295
					// Only change element if found term before
296
					if ( $changedElem ) {
297
						$parent->insertBefore(
298
							$doc->createTextNode(
299
								substr( $el->nodeValue, $lexemes[ $index ][ 1 ] )
300
							),
301
							$el
302
						);
303
					} else {
304
						// In principle superfluous, the loop would run out
305
						// anyway. Might save a bit of time.
306
						break;
307
					}
308
				}
309
310
				$index += $used + $skipped;
311
			}
312
313
			if ( $changedElem ) {
314
				$parent->removeChild( $el );
315
				$changedDoc = true;
316
			}
317
		}
318
319
		if ( $changedDoc ) {
320
			$this->loadModules( $parser );
321
322
			// U - Ungreedy, D - dollar matches only end of string, s - dot matches newlines
323
			$text = preg_replace( '%(^.*<body>)|(</body>.*$)%UDs', '', $doc->saveHTML() );
324
		}
325
326
		return true;
327
	}
328
329
	/**
330
	 * @param Parser $parser
331
	 */
332
	protected function loadModules( &$parser ) {
333
		global $wgOut, $wgScriptPath;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
334
335
		$parserOutput = $parser->getOutput();
336
337
		// load scripts
338
		$parserOutput->addModules( 'ext.Lingo.Scripts' );
339
340
		if ( !$wgOut->isArticle() ) {
341
			$wgOut->addModules( 'ext.Lingo.Scripts' );
342
		}
343
344
		// load styles
345
		$parserOutput->addModuleStyles( 'ext.Lingo.Styles' );
346
347
		if ( !$wgOut->isArticle() ) {
348
			$wgOut->addModuleStyles( 'ext.Lingo.Styles' );
349
		}
350
	}
351
352
	/**
353
	 * Purges the lingo tree from the cache.
354
	 *
355
	 * @deprecated 2.0.2
356
	 */
357
	public static function purgeCache() {
358
359
		self::getInstance()->purgeGlossaryFromCache();
360
	}
361
362
	/**
363
	 * Purges the lingo tree from the cache.
364
	 *
365
	 * @since 2.0.2
366
	 */
367
	public function purgeGlossaryFromCache() {
368
369
		global $wgexLingoCacheType;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
370
		$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
371
		$cache->delete( self::getCacheKey() );
372
	}
373
374
	/**
375
	 * @since 2.0.1
376
	 * @param Backend $backend
377
	 */
378
	public function setBackend( Backend $backend ) {
379
		$this->mLingoBackend = $backend;
380
		$backend->setLingoParser( $this );
381
	}
382
383
	/**
384
	 * @param Parser $parser
385
	 * @return bool
386
	 */
387
	protected function shouldParse( Parser &$parser ) {
388
		global $wgexLingoUseNamespaces;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
389
390
		$title = $parser->getTitle();
391
		$namespace = $title->getNamespace();
392
393
		return !isset( $parser->mDoubleUnderscores[ 'noglossary' ] ) && // __NOGLOSSARY__ not present and
394
		(
395
			!$title || // title not set (i.e. when text is outside the page content) or
396
			!isset( $wgexLingoUseNamespaces[ $namespace ] ) || // namespace not explicitly forbidden (i.e. not in list of namespaces and set to false) or
397
			$wgexLingoUseNamespaces[ $namespace ] // namespace explicitly allowed
398
		);
399
	}
400
}
401
402