Completed
Push — master ( aae2fd...925a43 )
by
unknown
06:06
created

LingoParser::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1
Metric Value
dl 0
loc 5
ccs 4
cts 4
cp 1
rs 9.4285
cc 1
eloc 3
nc 1
nop 1
crap 1
1
<?php
2
3
/**
4
 * File holding the Extensions\Lingo\LingoParser class.
5
 *
6
 * This file is part of the MediaWiki extension Lingo.
7
 *
8
 * @copyright 2011 - 2016, Stephan Gambke
9
 * @license   GNU General Public License, version 2 (or any later version)
10
 *
11
 * The Lingo extension is free software: you can redistribute it and/or modify
12
 * it under the terms of the GNU General Public License as published by the Free
13
 * Software Foundation; either version 2 of the License, or (at your option) any
14
 * later version.
15
 *
16
 * The Lingo extension is distributed in the hope that it will be useful, but
17
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19
 * details.
20
 *
21
 * You should have received a copy of the GNU General Public License along
22
 * with this program. If not, see <http://www.gnu.org/licenses/>.
23
 *
24
 * @author Stephan Gambke
25
 *
26
 * @file
27
 * @ingroup Lingo
28
 */
29
namespace Extensions\Lingo;
30
31
use DOMDocument;
32
use DOMXPath;
33
use Parser;
34
35
/**
36
 * This class parses the given text and enriches it with definitions for defined
37
 * terms.
38
 *
39
 * Contains a static function to initiate the parsing.
40
 *
41
 * @ingroup Lingo
42
 */
43
class LingoParser {
44
45
	private $mLingoTree = null;
46
47
	/**
48
	 * @var LingoBackend
49
	 */
50
	private $mLingoBackend = null;
51
	private static $parserSingleton = null;
52
53
	// The RegEx to split a chunk of text into words
54
	public static $regex = null;
55
56
	/**
57
	 * Extensions\Lingo\LingoParser constructor.
58
	 * @param LingoMessageLog|null $messages
59
	 */
60 1
	public function __construct( LingoMessageLog &$messages = null ) {
61 1
		global $wgexLingoBackend;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
62
63 1
		$this->mLingoBackend = new $wgexLingoBackend( $messages );
64 1
	}
65
66
	/**
67
	 * @param Parser $parser
68
	 * @return string
69
	 */
70
	private static function uniqPrefix( Parser &$parser ) {
71
		if ( defined( "Parser::MARKER_PREFIX" ) ) {
0 ignored issues
show
Coding Style Comprehensibility introduced by
The string literal Parser::MARKER_PREFIX does not require double quotes, as per coding-style, please use single quotes.

PHP provides two ways to mark string literals. Either with single quotes 'literal' or with double quotes "literal". The difference between these is that string literals in double quotes may contain variables with are evaluated at run-time as well as escape sequences.

String literals in single quotes on the other hand are evaluated very literally and the only two characters that needs escaping in the literal are the single quote itself (\') and the backslash (\\). Every other character is displayed as is.

Double quoted string literals may contain other variables or more complex escape sequences.

<?php

$singleQuoted = 'Value';
$doubleQuoted = "\tSingle is $singleQuoted";

print $doubleQuoted;

will print an indented: Single is Value

If your string literal does not contain variables or escape sequences, it should be defined using single quotes to make that fact clear.

For more information on PHP string literals and available escape sequences see the PHP core documentation.

Loading history...
72
			return Parser::MARKER_PREFIX;
73
		} else {
74
			return $parser->uniqPrefix();
75
		}
76
	}
77
78
	/**
79
	 *
80
	 * @param Parser $parser
81
	 * @param string $text
82
	 * @return Boolean
83
	 */
84
	public static function parse( Parser &$parser, &$text ) {
85
86
		if ( !self::$parserSingleton ) {
87
			self::$parserSingleton = new LingoParser();
88
89
			// The RegEx to split a chunk of text into words
90
			// Words are: placeholders for stripped items, sequences of letters and numbers, single characters that are neither letter nor number
91
			self::$regex = '/' . preg_quote( self::uniqPrefix( $parser ), '/' ) . '.*?' . preg_quote( Parser::MARKER_SUFFIX, '/' ) . '|[\p{L}\p{N}]+|[^\p{L}\p{N}]/u';
92
		}
93
94
		self::$parserSingleton->realParse( $parser, $text );
95
96
		return true;
97
	}
98
99
	/**
100
	 *
101
	 * @return LingoBackend the backend used by the parser
102
	 */
103
	public function getBackend() {
104
		return $this->mLingoBackend;
105
	}
106
107
	/**
108
	 * Returns the list of terms in the glossary
109
	 *
110
	 * @return Array an array mapping terms (keys) to descriptions (values)
111
	 */
112
	public function getLingoArray() {
113
114
		// build glossary array only once per request
115
		if ( !$this->mLingoTree ) {
116
			$this->buildLingo();
117
		}
118
119
		return $this->mLingoTree->getTermList();
120
	}
121
122
	/**
123
	 * Returns the list of terms in the glossary as a Extensions\Lingo\LingoTree
124
	 *
125
	 * @return LingoTree a Extensions\Lingo\LingoTree mapping terms (keys) to descriptions (values)
126
	 */
127
	public function getLingoTree() {
128
129
		// build glossary array only once per request
130
		if ( !$this->mLingoTree ) {
131
132
			// use cache if enabled
133
			if ( $this->mLingoBackend->useCache() ) {
134
135
				// Try cache first
136
				global $wgexLingoCacheType;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
137
				$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
138
				$cachekey = wfMemcKey( 'ext', 'lingo', 'lingotree' );
139
				$cachedLingoTree = $cache->get( $cachekey );
140
141
				// cache hit?
142
				if ( $cachedLingoTree !== false && $cachedLingoTree !== null ) {
143
144
					wfDebug( "Cache hit: Got lingo tree from cache.\n" );
145
					$this->mLingoTree = &$cachedLingoTree;
146
147
				} else {
148
149
					wfDebug( "Cache miss: Lingo tree not found in cache.\n" );
150
					$this->mLingoTree =& $this->buildLingo();
151
					$cache->set( $cachekey, $this->mLingoTree );
152
					wfDebug( "Cached lingo tree.\n" );
153
				}
154
			} else {
155
				wfDebug( "Caching of lingo tree disabled.\n" );
156
				$this->mLingoTree =& $this->buildLingo();
157
			}
158
159
		}
160
161
		return $this->mLingoTree;
162
	}
163
164
	/**
165
	 * @return LingoTree
166
	 */
167
	protected function &buildLingo() {
168
169
		$lingoTree = new LingoTree();
170
		$backend = &$this->mLingoBackend;
171
172
		// assemble the result array
173
		while ( $elementData = $backend->next() ) {
174
			$lingoTree->addTerm( $elementData[ LingoElement::ELEMENT_TERM ], $elementData );
175
		}
176
177
		return $lingoTree;
178
	}
179
180
	/**
181
	 * Parses the given text and enriches applicable terms
182
	 *
183
	 * This method currently only recognizes terms consisting of max one word
184
	 *
185
	 * @param $parser
186
	 * @param $text
187
	 * @return Boolean
188
	 */
189
	protected function realParse( &$parser, &$text ) {
0 ignored issues
show
Coding Style introduced by
realParse uses the super-global variable $_POST which is generally not recommended.

Instead of super-globals, we recommend to explicitly inject the dependencies of your class. This makes your code less dependent on global state and it becomes generally more testable:

// Bad
class Router
{
    public function generate($path)
    {
        return $_SERVER['HOST'].$path;
    }
}

// Better
class Router
{
    private $host;

    public function __construct($host)
    {
        $this->host = $host;
    }

    public function generate($path)
    {
        return $this->host.$path;
    }
}

class Controller
{
    public function myAction(Request $request)
    {
        // Instead of
        $page = isset($_GET['page']) ? intval($_GET['page']) : 1;

        // Better (assuming you use the Symfony2 request)
        $page = $request->query->get('page', 1);
    }
}
Loading history...
190
		global $wgRequest;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
191
192
		$action = $wgRequest->getVal( 'action', 'view' );
193
194
		if ( $text === null ||
195
			$text === '' ||
196
			$action === 'edit' ||
197
			$action === 'ajax' ||
198
			isset( $_POST[ 'wpPreview' ] )
199
		) {
200
201
			return true;
202
		}
203
204
		// Get array of terms
205
		$glossary = $this->getLingoTree();
206
207
		if ( $glossary == null ) {
208
			return true;
209
		}
210
211
		// Parse HTML from page
212
		wfSuppressWarnings();
213
214
		$doc = new DOMDocument( '1.0', 'utf-8' );
215
		$doc->loadHTML( '<html><head><meta http-equiv="content-type" content="charset=utf-8"/></head><body>' . $text . '</body></html>' );
216
217
		wfRestoreWarnings();
218
219
		// Find all text in HTML.
220
		$xpath = new DOMXpath( $doc );
221
		$elements = $xpath->query(
222
			"//*[not(ancestor-or-self::*[@class='noglossary'] or ancestor-or-self::a)][text()!=' ']/text()"
223
		);
224
225
		// Iterate all HTML text matches
226
		$nb = $elements->length;
227
		$changedDoc = false;
228
229
		for ( $pos = 0; $pos < $nb; $pos++ ) {
230
			$el = $elements->item( $pos );
231
232
			if ( strlen( $el->nodeValue ) < $glossary->getMinTermLength() ) {
233
				continue;
234
			}
235
236
			$matches = array();
237
			preg_match_all(
238
				self::$regex,
239
				$el->nodeValue,
240
				$matches,
241
				PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER
242
			);
243
244
			if ( count( $matches ) == 0 || count( $matches[ 0 ] ) == 0 ) {
245
				continue;
246
			}
247
248
			$lexemes = &$matches[ 0 ];
249
			$countLexemes = count( $lexemes );
250
			$parent = &$el->parentNode;
251
			$index = 0;
252
			$changedElem = false;
253
254
			while ( $index < $countLexemes ) {
255
				list( $skipped, $used, $definition ) =
256
					$glossary->findNextTerm( $lexemes, $index, $countLexemes );
257
258
				if ( $used > 0 ) { // found a term
259
					if ( $skipped > 0 ) { // skipped some text, insert it as is
260
						$parent->insertBefore(
261
							$doc->createTextNode(
262
								substr( $el->nodeValue,
263
									$currLexIndex = $lexemes[ $index ][ 1 ],
264
									$lexemes[ $index + $skipped ][ 1 ] - $currLexIndex )
265
							),
266
							$el
267
						);
268
					}
269
270
					$parent->insertBefore( $definition->getFullDefinition( $doc ), $el );
271
272
					$changedElem = true;
273
				} else { // did not find term, just use the rest of the text
274
					// If we found no term now and no term before, there was no
275
					// term in the whole element. Might as well not change the
276
					// element at all.
277
					// Only change element if found term before
278
					if ( $changedElem ) {
279
						$parent->insertBefore(
280
							$doc->createTextNode(
281
								substr( $el->nodeValue, $lexemes[ $index ][ 1 ] )
282
							),
283
							$el
284
						);
285
					} else {
286
						// In principle superfluous, the loop would run out
287
						// anyway. Might save a bit of time.
288
						break;
289
					}
290
				}
291
292
				$index += $used + $skipped;
293
			}
294
295
			if ( $changedElem ) {
296
				$parent->removeChild( $el );
297
				$changedDoc = true;
298
			}
299
		}
300
301
		if ( $changedDoc ) {
302
			$this->loadModules( $parser );
303
304
			// U - Ungreedy, D - dollar matches only end of string, s - dot matches newlines
305
			$text = preg_replace( '%(^.*<body>)|(</body>.*$)%UDs', '', $doc->saveHTML() );
306
		}
307
308
		return true;
309
	}
310
311
	/**
312
	 * @param $parser
313
	 */
314
	protected function loadModules( &$parser ) {
315
		global $wgOut, $wgScriptPath;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
316
317
		$parserOutput = $parser->getOutput();
318
319
		// load scripts
320
		if ( defined( 'MW_SUPPORTS_RESOURCE_MODULES' ) ) {
321
			$parserOutput->addModules( 'ext.Lingo.Scripts' );
322
323
			if ( !$wgOut->isArticle() ) {
324
				$wgOut->addModules( 'ext.Lingo.Scripts' );
325
			}
326
		} else {
327
			global $wgStylePath;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
328
			$parserOutput->addHeadItem( "<script src='$wgStylePath/common/jquery.min.js'></script>\n", 'ext.Lingo.jq' );
0 ignored issues
show
Coding Style Best Practice introduced by
As per coding-style, please use concatenation or sprintf for the variable $wgStylePath instead of interpolation.

It is generally a best practice as it is often more readable to use concatenation instead of interpolation for variables inside strings.

// Instead of
$x = "foo $bar $baz";

// Better use either
$x = "foo " . $bar . " " . $baz;
$x = sprintf("foo %s %s", $bar, $baz);
Loading history...
329
			$parserOutput->addHeadItem( "<script src='$wgScriptPath/extensions/Lingo/libs/Lingo.js'></script>\n", 'ext.Lingo.Scripts' );
0 ignored issues
show
Coding Style Best Practice introduced by
As per coding-style, please use concatenation or sprintf for the variable $wgScriptPath instead of interpolation.

It is generally a best practice as it is often more readable to use concatenation instead of interpolation for variables inside strings.

// Instead of
$x = "foo $bar $baz";

// Better use either
$x = "foo " . $bar . " " . $baz;
$x = sprintf("foo %s %s", $bar, $baz);
Loading history...
330
331
			if ( !$wgOut->isArticle() ) {
332
				$wgOut->addHeadItem( 'ext.Lingo.jq', "<script src='$wgStylePath/common/jquery.min.js'></script>\n" );
0 ignored issues
show
Coding Style Best Practice introduced by
As per coding-style, please use concatenation or sprintf for the variable $wgStylePath instead of interpolation.

It is generally a best practice as it is often more readable to use concatenation instead of interpolation for variables inside strings.

// Instead of
$x = "foo $bar $baz";

// Better use either
$x = "foo " . $bar . " " . $baz;
$x = sprintf("foo %s %s", $bar, $baz);
Loading history...
333
				$wgOut->addHeadItem( 'ext.Lingo.Scripts', "<script src='$wgScriptPath/extensions/Lingo/libs/Lingo.js'></script>\n" );
0 ignored issues
show
Coding Style Best Practice introduced by
As per coding-style, please use concatenation or sprintf for the variable $wgScriptPath instead of interpolation.

It is generally a best practice as it is often more readable to use concatenation instead of interpolation for variables inside strings.

// Instead of
$x = "foo $bar $baz";

// Better use either
$x = "foo " . $bar . " " . $baz;
$x = sprintf("foo %s %s", $bar, $baz);
Loading history...
334
			}
335
		}
336
337
		// load styles
338
		if ( method_exists( $parserOutput, 'addModuleStyles' ) ) {
339
			$parserOutput->addModuleStyles( 'ext.Lingo.Styles' );
340
			if ( !$wgOut->isArticle() ) {
341
				$wgOut->addModuleStyles( 'ext.Lingo.Styles' );
342
			}
343
		} else {
344
			$parserOutput->addHeadItem( "<link rel='stylesheet' href='$wgScriptPath/extensions/Lingo/styles/Lingo.css' />\n", 'ext.Lingo.Styles' );
0 ignored issues
show
Coding Style Best Practice introduced by
As per coding-style, please use concatenation or sprintf for the variable $wgScriptPath instead of interpolation.

It is generally a best practice as it is often more readable to use concatenation instead of interpolation for variables inside strings.

// Instead of
$x = "foo $bar $baz";

// Better use either
$x = "foo " . $bar . " " . $baz;
$x = sprintf("foo %s %s", $bar, $baz);
Loading history...
345
			if ( !$wgOut->isArticle() ) {
346
				$wgOut->addHeadItem( 'ext.Lingo.Styles', "<link rel='stylesheet' href='$wgScriptPath/extensions/Lingo/styles/Lingo.css' />\n" );
0 ignored issues
show
Coding Style Best Practice introduced by
As per coding-style, please use concatenation or sprintf for the variable $wgScriptPath instead of interpolation.

It is generally a best practice as it is often more readable to use concatenation instead of interpolation for variables inside strings.

// Instead of
$x = "foo $bar $baz";

// Better use either
$x = "foo " . $bar . " " . $baz;
$x = sprintf("foo %s %s", $bar, $baz);
Loading history...
347
			}
348
		}
349
	}
350
351
	/**
352
	 * Purges the lingo tree from the cache.
353
	 */
354
	public static function purgeCache() {
355
356
		global $wgexLingoCacheType;
0 ignored issues
show
Compatibility Best Practice introduced by
Use of global functionality is not recommended; it makes your code harder to test, and less reusable.

Instead of relying on global state, we recommend one of these alternatives:

1. Pass all data via parameters

function myFunction($a, $b) {
    // Do something
}

2. Create a class that maintains your state

class MyClass {
    private $a;
    private $b;

    public function __construct($a, $b) {
        $this->a = $a;
        $this->b = $b;
    }

    public function myFunction() {
        // Do something
    }
}
Loading history...
357
		$cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache();
358
		$cache->delete( wfMemcKey( 'ext', 'lingo', 'lingotree' ) );
359
360
	}
361
}
362
363