1 | <?php |
||||||
2 | |||||||
3 | /** |
||||||
4 | * File holding the Lingo\LingoParser class. |
||||||
5 | * |
||||||
6 | * This file is part of the MediaWiki extension Lingo. |
||||||
7 | * |
||||||
8 | * @copyright 2011 - 2018, Stephan Gambke |
||||||
9 | * @license GPL-2.0-or-later |
||||||
10 | * |
||||||
11 | * The Lingo extension is free software: you can redistribute it and/or modify |
||||||
12 | * it under the terms of the GNU General Public License as published by the Free |
||||||
13 | * Software Foundation; either version 2 of the License, or (at your option) any |
||||||
14 | * later version. |
||||||
15 | * |
||||||
16 | * The Lingo extension is distributed in the hope that it will be useful, but |
||||||
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
||||||
18 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
||||||
19 | * details. |
||||||
20 | * |
||||||
21 | * You should have received a copy of the GNU General Public License along |
||||||
22 | * with this program. If not, see <http://www.gnu.org/licenses/>. |
||||||
23 | * |
||||||
24 | * @author Stephan Gambke |
||||||
25 | * |
||||||
26 | * @file |
||||||
27 | * @ingroup Lingo |
||||||
28 | */ |
||||||
29 | namespace Lingo; |
||||||
30 | |||||||
31 | use DOMDocument; |
||||||
32 | use DOMXPath; |
||||||
33 | use ObjectCache; |
||||||
0 ignored issues
–
show
|
|||||||
34 | use Parser; |
||||||
0 ignored issues
–
show
The type
Parser was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths
Loading history...
|
|||||||
35 | use StubObject; |
||||||
0 ignored issues
–
show
The type
StubObject was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths
Loading history...
|
|||||||
36 | use Title; |
||||||
0 ignored issues
–
show
The type
Title was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths
Loading history...
|
|||||||
37 | use Wikimedia\AtEase\AtEase; |
||||||
0 ignored issues
–
show
The type
Wikimedia\AtEase\AtEase was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths
Loading history...
|
|||||||
38 | |||||||
39 | /** |
||||||
40 | * This class parses the given text and enriches it with definitions for defined |
||||||
41 | * terms. |
||||||
42 | * |
||||||
43 | * Contains a static function to initiate the parsing. |
||||||
44 | * |
||||||
45 | * @ingroup Lingo |
||||||
46 | */ |
||||||
47 | class LingoParser { |
||||||
48 | |||||||
49 | const WORD_VALUE = 0; |
||||||
50 | const WORD_OFFSET = 1; |
||||||
51 | |||||||
52 | private $mLingoTree = null; |
||||||
53 | |||||||
54 | /** |
||||||
55 | * @var Backend |
||||||
56 | */ |
||||||
57 | private $mLingoBackend = null; |
||||||
58 | private static $parserSingleton = null; |
||||||
59 | |||||||
60 | // Api params passed in from ApiMakeParserOptions Hook |
||||||
61 | private $mApiParams = null; |
||||||
62 | |||||||
63 | // The RegEx to split a chunk of text into words |
||||||
64 | public $regex = null; |
||||||
65 | 8 | ||||||
66 | /** |
||||||
67 | * Lingo\LingoParser constructor. |
||||||
68 | 8 | * @param MessageLog|null $messages |
|||||
69 | 8 | */ |
|||||
70 | public function __construct( MessageLog &$messages = null ) { |
||||||
71 | // The RegEx to split a chunk of text into words |
||||||
72 | // Words are: placeholders for stripped items, sequences of letters and numbers, single characters that are neither letter nor number |
||||||
73 | $this->regex = '/' . preg_quote( Parser::MARKER_PREFIX, '/' ) . '.*?' . preg_quote( Parser::MARKER_SUFFIX, '/' ) . '|[\p{L}\p{N}]+|[^\p{L}\p{N}]/u'; |
||||||
74 | } |
||||||
75 | |||||||
76 | 7 | /** |
|||||
77 | 7 | * @param Parser $mwParser |
|||||
78 | 3 | * |
|||||
79 | * @return Boolean |
||||||
80 | */ |
||||||
81 | 7 | public function parse( $mwParser ) { |
|||||
82 | if ( $this->shouldParse( $mwParser ) ) { |
||||||
83 | $this->realParse( $mwParser ); |
||||||
84 | } |
||||||
85 | |||||||
86 | return true; |
||||||
87 | } |
||||||
88 | |||||||
89 | /** |
||||||
90 | * @return LingoParser |
||||||
91 | * @since 2.0.1 |
||||||
92 | */ |
||||||
93 | public static function getInstance() { |
||||||
94 | if ( !self::$parserSingleton ) { |
||||||
95 | self::$parserSingleton = new LingoParser(); |
||||||
96 | |||||||
97 | } |
||||||
98 | |||||||
99 | return self::$parserSingleton; |
||||||
100 | } |
||||||
101 | |||||||
102 | /** |
||||||
103 | * @return string |
||||||
104 | */ |
||||||
105 | private function getCacheKey() { |
||||||
106 | // FIXME: If Lingo ever stores the glossary tree per user, then the cache key also needs to include the user id (see T163608) |
||||||
107 | return ObjectCache::getLocalClusterInstance()->makeKey( 'ext', 'lingo', 'lingotree', Tree::TREE_VERSION, get_class( $this->getBackend() ) ); |
||||||
108 | } |
||||||
109 | |||||||
110 | /** |
||||||
111 | * @return Backend the backend used by the parser |
||||||
112 | * @throws \MWException |
||||||
113 | */ |
||||||
114 | public function getBackend() { |
||||||
115 | if ( $this->mLingoBackend === null ) { |
||||||
116 | throw new \MWException( 'No Lingo backend available!' ); |
||||||
0 ignored issues
–
show
The type
MWException was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths
Loading history...
|
|||||||
117 | } |
||||||
118 | |||||||
119 | return $this->mLingoBackend; |
||||||
120 | } |
||||||
121 | |||||||
122 | /** |
||||||
123 | * Returns the list of terms in the glossary |
||||||
124 | * |
||||||
125 | * @return array an array mapping terms (keys) to descriptions (values) |
||||||
126 | */ |
||||||
127 | public function getLingoArray() { |
||||||
128 | return $this->getLingoTree()->getTermList(); |
||||||
129 | } |
||||||
130 | |||||||
131 | /** |
||||||
132 | * Returns the list of terms in the glossary as a Lingo\Tree |
||||||
133 | * |
||||||
134 | * @return Tree a Lingo\Tree mapping terms (keys) to descriptions (values) |
||||||
135 | */ |
||||||
136 | public function getLingoTree() { |
||||||
137 | // build glossary array only once per request |
||||||
138 | if ( !$this->mLingoTree ) { |
||||||
139 | |||||||
140 | // use cache if enabled |
||||||
141 | if ( $this->getBackend()->useCache() ) { |
||||||
142 | |||||||
143 | // Try cache first |
||||||
144 | global $wgexLingoCacheType; |
||||||
145 | $cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache(); |
||||||
0 ignored issues
–
show
The function
wfGetCache was not found. Maybe you did not declare it correctly or list all dependencies?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
The function
wfGetMainCache was not found. Maybe you did not declare it correctly or list all dependencies?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
146 | $cachekey = $this->getCacheKey(); |
||||||
147 | $cachedLingoTree = $cache->get( $cachekey ); |
||||||
148 | |||||||
149 | // cache hit? |
||||||
150 | if ( $cachedLingoTree !== false && $cachedLingoTree !== null ) { |
||||||
151 | |||||||
152 | wfDebug( "Cache hit: Got lingo tree from cache.\n" ); |
||||||
0 ignored issues
–
show
The function
wfDebug was not found. Maybe you did not declare it correctly or list all dependencies?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
153 | $this->mLingoTree = &$cachedLingoTree; |
||||||
154 | |||||||
155 | wfDebug( "Re-cached lingo tree.\n" ); |
||||||
156 | } else { |
||||||
157 | |||||||
158 | wfDebug( "Cache miss: Lingo tree not found in cache.\n" ); |
||||||
159 | $this->mLingoTree =& $this->buildLingo(); |
||||||
160 | wfDebug( "Cached lingo tree.\n" ); |
||||||
161 | } |
||||||
162 | |||||||
163 | // Keep for one month |
||||||
164 | // Limiting the cache validity will allow to purge stale cache |
||||||
165 | // entries inserted by older versions after one month |
||||||
166 | $cache->set( $cachekey, $this->mLingoTree, 60 * 60 * 24 * 30 ); |
||||||
167 | |||||||
168 | } else { |
||||||
169 | wfDebug( "Caching of lingo tree disabled.\n" ); |
||||||
170 | $this->mLingoTree =& $this->buildLingo(); |
||||||
171 | } |
||||||
172 | |||||||
173 | } |
||||||
174 | |||||||
175 | return $this->mLingoTree; |
||||||
176 | } |
||||||
177 | |||||||
178 | /** |
||||||
179 | * @return Tree |
||||||
180 | */ |
||||||
181 | protected function &buildLingo() { |
||||||
182 | $lingoTree = new Tree(); |
||||||
183 | $backend = &$this->mLingoBackend; |
||||||
184 | |||||||
185 | // assemble the result array |
||||||
186 | while ( $elementData = $backend->next() ) { |
||||||
187 | $lingoTree->addTerm( $elementData[ Element::ELEMENT_TERM ], $elementData ); |
||||||
188 | } |
||||||
189 | |||||||
190 | return $lingoTree; |
||||||
191 | } |
||||||
192 | |||||||
193 | /** |
||||||
194 | * Parses the given text and enriches applicable terms |
||||||
195 | * |
||||||
196 | * This method currently only recognizes terms consisting of max one word |
||||||
197 | 3 | * |
|||||
198 | 3 | * @param Parser $parser |
|||||
199 | * |
||||||
200 | 3 | * @return Boolean |
|||||
201 | 3 | */ |
|||||
202 | protected function realParse( &$parser ) { |
||||||
203 | // Parse text identical to options used in includes/api/ApiParse.php |
||||||
204 | $params = $this->mApiParams; |
||||||
205 | $text = is_null( $params ) ? $parser->getOutput()->getText() : $parser->getOutput()->getText( [ |
||||||
206 | 'allowTOC' => !$params['disabletoc'], |
||||||
207 | 'enableSectionEditLinks' => !$params['disableeditsection'], |
||||||
208 | 'wrapperDivClass' => $params['wrapoutputclass'], |
||||||
209 | 'deduplicateStyles' => !$params['disablestylededuplication'], |
||||||
210 | ] ); |
||||||
211 | |||||||
212 | if ( $text === null || $text === '' ) { |
||||||
213 | return true; |
||||||
214 | } |
||||||
215 | |||||||
216 | // Get array of terms |
||||||
217 | $glossary = $this->getLingoTree(); |
||||||
218 | |||||||
219 | if ( $glossary == null ) { |
||||||
220 | return true; |
||||||
221 | } |
||||||
222 | |||||||
223 | // Parse HTML from page |
||||||
224 | |||||||
225 | // TODO: Remove call to \MediaWiki\suppressWarnings() for MW 1.34+. |
||||||
226 | // \Wikimedia\AtEase\AtEase::suppressWarnings() is available from MW 1.34. |
||||||
227 | if (method_exists( AtEase::class, 'suppressWarnings' ) ) { |
||||||
228 | \Wikimedia\AtEase\AtEase::suppressWarnings(); |
||||||
229 | } else { |
||||||
230 | \MediaWiki\suppressWarnings(); |
||||||
0 ignored issues
–
show
The function
suppressWarnings was not found. Maybe you did not declare it correctly or list all dependencies?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
231 | } |
||||||
232 | |||||||
233 | $doc = new DOMDocument( '1.0', 'utf-8' ); |
||||||
234 | $doc->loadHTML( '<html><head><meta http-equiv="content-type" content="charset=utf-8"/></head><body>' . $text . '</body></html>' ); |
||||||
235 | |||||||
236 | // TODO: Remove call to \MediaWiki\restoreWarnings() for MW 1.34+. |
||||||
237 | // \Wikimedia\AtEase\AtEase::restoreWarnings() is available from MW 1.34. |
||||||
238 | if (method_exists( AtEase::class, 'suppressWarnings' ) ) { |
||||||
239 | \Wikimedia\AtEase\AtEase::restoreWarnings(); |
||||||
240 | } else { |
||||||
241 | \MediaWiki\restoreWarnings(); |
||||||
0 ignored issues
–
show
The function
restoreWarnings was not found. Maybe you did not declare it correctly or list all dependencies?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
242 | } |
||||||
243 | |||||||
244 | // Find all text in HTML. |
||||||
245 | $xpath = new DOMXPath( $doc ); |
||||||
246 | $textElements = $xpath->query( |
||||||
247 | "//*[not(ancestor-or-self::*[@class='noglossary'] or ancestor-or-self::a)][text()!=' ']/text()" |
||||||
248 | ); |
||||||
249 | |||||||
250 | // Iterate all HTML text matches |
||||||
251 | $numberOfTextElements = $textElements->length; |
||||||
252 | |||||||
253 | $definitions = []; |
||||||
254 | |||||||
255 | for ( $textElementIndex = 0; $textElementIndex < $numberOfTextElements; $textElementIndex++ ) { |
||||||
256 | $textElement = $textElements->item( $textElementIndex ); |
||||||
257 | |||||||
258 | if ( strlen( $textElement->nodeValue ) < $glossary->getMinTermLength() ) { |
||||||
259 | continue; |
||||||
260 | } |
||||||
261 | |||||||
262 | $matches = []; |
||||||
263 | preg_match_all( |
||||||
264 | $this->regex, |
||||||
265 | $textElement->nodeValue, |
||||||
266 | $matches, |
||||||
267 | PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER |
||||||
268 | ); |
||||||
269 | |||||||
270 | if ( count( $matches ) === 0 || count( $matches[ 0 ] ) === 0 ) { |
||||||
271 | continue; |
||||||
272 | } |
||||||
273 | |||||||
274 | $wordDescriptors = &$matches[ 0 ]; // See __construct() for definition of "word" |
||||||
275 | $numberOfWordDescriptors = count( $wordDescriptors ); |
||||||
276 | |||||||
277 | $parentNode = &$textElement->parentNode; |
||||||
278 | |||||||
279 | $wordDescriptorIndex = 0; |
||||||
280 | $changedElem = false; |
||||||
281 | |||||||
282 | while ( $wordDescriptorIndex < $numberOfWordDescriptors ) { |
||||||
283 | |||||||
284 | /** @var \Lingo\Element $definition */ |
||||||
285 | list( $skippedWords, $usedWords, $definition ) = |
||||||
286 | $glossary->findNextTerm( $wordDescriptors, $wordDescriptorIndex, $numberOfWordDescriptors ); |
||||||
287 | |||||||
288 | if ( $usedWords > 0 ) { // found a term |
||||||
289 | |||||||
290 | if ( $skippedWords > 0 ) { // skipped some text, insert it as is |
||||||
291 | |||||||
292 | $start = $wordDescriptors[ $wordDescriptorIndex ][ self::WORD_OFFSET ]; |
||||||
293 | $length = $wordDescriptors[ $wordDescriptorIndex + $skippedWords ][ self::WORD_OFFSET ] - $start; |
||||||
294 | |||||||
295 | $parentNode->insertBefore( |
||||||
296 | $doc->createTextNode( |
||||||
297 | substr( $textElement->nodeValue, $start, $length ) |
||||||
298 | ), |
||||||
299 | $textElement |
||||||
300 | ); |
||||||
301 | } |
||||||
302 | |||||||
303 | $parentNode->insertBefore( $definition->getFormattedTerm( $doc ), $textElement ); |
||||||
304 | |||||||
305 | $definitions[ $definition->getId() ] = $definition->getFormattedDefinitions(); |
||||||
306 | |||||||
307 | $changedElem = true; |
||||||
308 | |||||||
309 | } else { // did not find any term, just use the rest of the text |
||||||
310 | |||||||
311 | // If we found no term now and no term before, there was no |
||||||
312 | // term in the whole element. Might as well not change the |
||||||
313 | // element at all. |
||||||
314 | |||||||
315 | // Only change element if found term before |
||||||
316 | if ( $changedElem === true ) { |
||||||
317 | |||||||
318 | $start = $wordDescriptors[ $wordDescriptorIndex ][ self::WORD_OFFSET ]; |
||||||
319 | |||||||
320 | $parentNode->insertBefore( |
||||||
321 | $doc->createTextNode( |
||||||
322 | substr( $textElement->nodeValue, $start ) |
||||||
323 | ), |
||||||
324 | $textElement |
||||||
325 | ); |
||||||
326 | |||||||
327 | } |
||||||
328 | |||||||
329 | // In principle superfluous, the loop would run out anyway. Might save a bit of time. |
||||||
330 | break; |
||||||
331 | } |
||||||
332 | |||||||
333 | $wordDescriptorIndex += $usedWords + $skippedWords; |
||||||
334 | } |
||||||
335 | |||||||
336 | if ( $changedElem ) { |
||||||
337 | $parentNode->removeChild( $textElement ); |
||||||
338 | } |
||||||
339 | } |
||||||
340 | |||||||
341 | if ( count( $definitions ) > 0 ) { |
||||||
342 | |||||||
343 | $this->loadModules( $parser ); |
||||||
344 | |||||||
345 | // U - Ungreedy, D - dollar matches only end of string, s - dot matches newlines |
||||||
346 | $text = preg_replace( '%(^.*<body>)|(</body>.*$)%UDs', '', $doc->saveHTML() ); |
||||||
347 | $text .= $parser->recursiveTagParseFully( implode( $definitions ) ); |
||||||
348 | |||||||
349 | $parser->getOutput()->setText( $text ); |
||||||
350 | } |
||||||
351 | |||||||
352 | return true; |
||||||
353 | } |
||||||
354 | |||||||
355 | /** |
||||||
356 | * @param Parser $parser |
||||||
357 | */ |
||||||
358 | protected function loadModules( &$parser ) { |
||||||
359 | global $wgOut; |
||||||
360 | |||||||
361 | $parserOutput = $parser->getOutput(); |
||||||
362 | |||||||
363 | // load scripts |
||||||
364 | $parserOutput->addModules( 'ext.Lingo' ); |
||||||
365 | |||||||
366 | if ( !$wgOut->isArticle() ) { |
||||||
367 | $wgOut->addModules( 'ext.Lingo' ); |
||||||
368 | } |
||||||
369 | } |
||||||
370 | |||||||
371 | /** |
||||||
372 | * Purges the lingo tree from the cache. |
||||||
373 | * |
||||||
374 | * @deprecated 2.0.2 |
||||||
375 | */ |
||||||
376 | public static function purgeCache() { |
||||||
377 | self::getInstance()->purgeGlossaryFromCache(); |
||||||
378 | } |
||||||
379 | |||||||
380 | /** |
||||||
381 | * Purges the lingo tree from the cache. |
||||||
382 | * |
||||||
383 | * @since 2.0.2 |
||||||
384 | */ |
||||||
385 | public function purgeGlossaryFromCache() { |
||||||
386 | 7 | global $wgexLingoCacheType; |
|||||
387 | 7 | $cache = ( $wgexLingoCacheType !== null ) ? wfGetCache( $wgexLingoCacheType ) : wfGetMainCache(); |
|||||
0 ignored issues
–
show
The function
wfGetCache was not found. Maybe you did not declare it correctly or list all dependencies?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
The function
wfGetMainCache was not found. Maybe you did not declare it correctly or list all dependencies?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
388 | $cache->delete( $this->getCacheKey() ); |
||||||
389 | 7 | } |
|||||
390 | 1 | ||||||
391 | /** |
||||||
392 | * @since 2.0.1 |
||||||
393 | 6 | * @param Backend $backend |
|||||
394 | 1 | */ |
|||||
395 | public function setBackend( Backend $backend ) { |
||||||
396 | $this->mLingoBackend = $backend; |
||||||
397 | 5 | $backend->setLingoParser( $this ); |
|||||
398 | } |
||||||
399 | 5 | ||||||
400 | 1 | /** |
|||||
401 | * Set parser options from API |
||||||
402 | * |
||||||
403 | 4 | * @param array $params |
|||||
404 | */ |
||||||
405 | 4 | public function setApiParams( array $params ) { |
|||||
406 | 1 | $this->mApiParams = $params; |
|||||
407 | } |
||||||
408 | |||||||
409 | 3 | /** |
|||||
410 | * @param Parser $parser |
||||||
411 | * @return bool |
||||||
412 | */ |
||||||
413 | protected function shouldParse( &$parser ) { |
||||||
414 | global $wgexLingoUseNamespaces; |
||||||
415 | |||||||
416 | if ( !( $parser instanceof Parser || $parser instanceof StubObject ) ) { |
||||||
417 | return false; |
||||||
418 | } |
||||||
419 | |||||||
420 | if ( isset( $parser->mDoubleUnderscores[ 'noglossary' ] ) ) { // __NOGLOSSARY__ found in wikitext |
||||||
421 | return false; |
||||||
422 | } |
||||||
423 | |||||||
424 | $title = $parser->getTitle(); |
||||||
425 | |||||||
426 | if ( !( $title instanceof Title ) ) { |
||||||
427 | return false; |
||||||
428 | } |
||||||
429 | |||||||
430 | $namespace = $title->getNamespace(); |
||||||
431 | |||||||
432 | if ( isset( $wgexLingoUseNamespaces[ $namespace ] ) && $wgexLingoUseNamespaces[ $namespace ] === false ) { |
||||||
433 | return false; |
||||||
434 | }; |
||||||
435 | |||||||
436 | return true; |
||||||
437 | } |
||||||
438 | } |
||||||
439 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths