Completed
Push — master ( 161651...6c145b )
by mw
15s
created

InTextAnnotationParser::removeAnnotation()   C

Complexity

Conditions 11
Paths 1

Size

Total Lines 36
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 20
CRAP Score 11

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 11
eloc 19
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 36
ccs 20
cts 20
cp 1
crap 11
rs 5.2653

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace SMW;
4
5
use Hooks;
6
use SMW\MediaWiki\MagicWordsFinder;
7
use SMW\MediaWiki\RedirectTargetFinder;
8
use SMWOutputs;
9
use Title;
10
11
/**
12
 * Class collects all functions for wiki text parsing / processing that are
13
 * relevant for SMW
14
 *
15
 * This class is contains all functions necessary for parsing wiki text before
16
 * it is displayed or previewed while identifying SMW related annotations.
17
 *
18
 * @note Settings involve smwgNamespacesWithSemanticLinks, smwgLinksInValues,
19
 * smwgInlineErrors
20
 *
21
 * @license GNU GPL v2+
22
 * @since 1.9
23
 *
24
 * @author Markus Krötzsch
25
 * @author Denny Vrandecic
26
 * @author mwjames
27
 */
28
class InTextAnnotationParser {
29
30
	/**
31
	 * @var ParserData
32
	 */
33
	private $parserData;
34
35
	/**
36
	 * @var MagicWordsFinder
37
	 */
38
	private $magicWordsFinder;
39
40
	/**
41
	 * @var RedirectTargetFinder
42
	 */
43
	private $redirectTargetFinder;
44
45
	/**
46
	 * @var DataValueFactory
47
	 */
48
	private $dataValueFactory = null;
49
50
	/**
51
	 * @var ApplicationFactory
52
	 */
53
	private $applicationFactory = null;
54
55
	/**
56
	 * @var Settings
57
	 */
58
	protected $settings = null;
59
60
	/**
61
	 * @var boolean
62
	 */
63
	protected $isEnabledNamespace;
64
65
	/**
66
	 * Internal state for switching SMW link annotations off/on during parsing
67
	 * ([[SMW::on]] and [[SMW:off]])
68
	 * @var boolean
69
	 */
70
	protected $isAnnotation = true;
71
72
	/**
73
	 * @var boolean
74
	 */
75
	private $strictModeState = true;
76
77
	/**
78
	 * @since 1.9
79
	 *
80
	 * @param ParserData $parserData
81
	 * @param MagicWordsFinder $magicWordsFinder
82
	 * @param RedirectTargetFinder $redirectTargetFinder
83
	 */
84 217
	public function __construct( ParserData $parserData, MagicWordsFinder $magicWordsFinder, RedirectTargetFinder $redirectTargetFinder ) {
85 217
		$this->parserData = $parserData;
86 217
		$this->magicWordsFinder = $magicWordsFinder;
87 217
		$this->redirectTargetFinder = $redirectTargetFinder;
88 217
		$this->dataValueFactory = DataValueFactory::getInstance();
89 217
		$this->applicationFactory = ApplicationFactory::getInstance();
90 217
	}
91
92
	/**
93
	 * Whether a strict interpretation (e.g [[property::value:partOfTheValue::alsoPartOfTheValue]])
94
	 * or a more loose interpretation (e.g. [[property1::property2::value]]) for
95
	 * annotations is to be applied.
96
	 *
97
	 * @since 2.3
98
	 *
99
	 * @param boolean $strictModeState
100
	 */
101 195
	public function setStrictModeState( $strictModeState ) {
102 195
		$this->strictModeState = (bool)$strictModeState;
103 195
	}
104
105
	/**
106
	 * Parsing text before an article is displayed or previewed, strip out
107
	 * semantic properties and add them to the ParserOutput object
108
	 *
109
	 * @since 1.9
110
	 *
111
	 * @param string &$text
112
	 */
113 203
	public function parse( &$text ) {
114
115 203
		$title = $this->parserData->getTitle();
116 203
		$this->settings = $this->applicationFactory->getSettings();
117 203
		$start = microtime( true );
118
119
		// Identifies the current parser run (especially when called recursively)
120 203
		$this->parserData->getSubject()->setContextReference( 'intp:' . uniqid() );
121
122 203
		$this->doStripMagicWordsFromText( $text );
123
124 203
		$this->setSemanticEnabledNamespaceState( $title );
125 203
		$this->addRedirectTargetAnnotation( $text );
126
127 203
		$linksInValues = $this->settings->get( 'smwgLinksInValues' );
128
129 203
		$text = preg_replace_callback(
130 203
			$this->getRegexpPattern( $linksInValues ),
131 203
			$linksInValues ? 'self::process' : 'self::preprocess',
132
			$text
133
		);
134
135 203
		if ( $this->isEnabledNamespace ) {
136 199
			$this->parserData->getOutput()->addModules( $this->getModules() );
137
138 199
			if ( method_exists( $this->parserData->getOutput(), 'recordOption' ) ) {
139 199
				$this->parserData->getOutput()->recordOption( 'userlang' );
140
			}
141
		}
142
143 203
		$this->parserData->pushSemanticDataToParserOutput();
144
145 203
		$this->parserData->addLimitReport(
146 203
			'intext-parsertime',
147 203
			number_format( ( microtime( true ) - $start ), 3 )
148
		);
149
150 203
		SMWOutputs::commitToParserOutput( $this->parserData->getOutput() );
151 203
	}
152
153
	/**
154
	 * @since 2.4
155
	 *
156
	 * @param string $text
157
	 *
158
	 * @return text
159
	 */
160 27
	public static function decodeSquareBracket( $text ) {
161 27
		return str_replace( array( '%5B', '%5D' ), array( '[', ']' ), $text );
162
	}
163
164
	/**
165
	 * @since 2.4
166
	 *
167
	 * @param string $text
168
	 *
169
	 * @return text
170
	 */
171 7
	public static function obscureAnnotation( $text ) {
172 7
		return preg_replace_callback(
173 7
			self::getRegexpPattern( false ),
174
			function( array $matches ) {
175 5
				return str_replace( '[', '&#x005B;', $matches[0] );
176 7
			},
177 7
			self::decodeSquareBracket( $text )
178
		);
179
	}
180
181
	/**
182
	 * @since 2.4
183
	 *
184
	 * @param string $text
185
	 *
186
	 * @return text
187
	 */
188 27
	public static function removeAnnotation( $text ) {
189 27
		return preg_replace_callback(
190 27
			self::getRegexpPattern( false ),
191 27
			function( array $matches ) {
192 8
				$caption = false;
193 8
				$value = '';
194
195
				// #1453
196 8
				if ( $matches[0] === '[[SMW::off]]' || $matches[0] === '[[SMW::on]]' ) {
197 1
					return false;
198
				}
199
200
				// Strict mode matching
201 7
				if ( array_key_exists( 1, $matches ) ) {
202 7
					if ( strpos( $matches[1], ':' ) !== false && isset( $matches[2] ) ) {
203 3
						list( $matches[1], $matches[2] ) = explode( '::', $matches[1] . '::' . $matches[2], 2 );
204
					}
205
				}
206
207 7
				if ( array_key_exists( 2, $matches ) ) {
208
209
					// #1747
210 7
					if ( strpos( $matches[1], '|' ) !== false ) {
211 2
						return $matches[0];
212
					}
213
214 5
					$parts = explode( '|', $matches[2] );
215 5
					$value = array_key_exists( 0, $parts ) ? $parts[0] : '';
216 5
					$caption = array_key_exists( 1, $parts ) ? $parts[1] : false;
217
				}
218
219 5
				return $caption !== false ? $caption : $value;
220 27
			},
221 27
			self::decodeSquareBracket( $text )
222
		);
223
	}
224
225
	/**
226
	 * @since 2.1
227
	 *
228
	 * @param Title|null $redirectTarget
229
	 */
230 183
	public function setRedirectTarget( Title $redirectTarget = null ) {
231 183
		$this->redirectTargetFinder->setRedirectTarget( $redirectTarget );
232 183
	}
233
234 203
	protected function addRedirectTargetAnnotation( $text ) {
235
236 203
		if ( $this->isEnabledNamespace ) {
237
238 199
			$this->redirectTargetFinder->findRedirectTargetFromText( $text );
239
240 199
			$redirectPropertyAnnotator = $this->applicationFactory->newPropertyAnnotatorFactory()->newRedirectPropertyAnnotator(
241 199
				$this->parserData->getSemanticData(),
242 199
				$this->redirectTargetFinder
243
			);
244
245 199
			$redirectPropertyAnnotator->addAnnotation();
246
		}
247 203
	}
248
249
	/**
250
	 * Returns required resource modules
251
	 *
252
	 * @since 1.9
253
	 *
254
	 * @return array
255
	 */
256 199
	protected function getModules() {
257
		return array(
258 199
			'ext.smw.style',
259
			'ext.smw.tooltips'
260
		);
261
	}
262
263
	/**
264
	 * $smwgLinksInValues (default = false) determines which regexp pattern
265
	 * is returned, either a more complex (lib PCRE may cause segfaults if text
266
	 * is long) or a simpler (no segfaults found for those, but no links
267
	 * in values) pattern.
268
	 *
269
	 * If enabled (SMW accepts inputs like [[property::Some [[link]] in value]]),
270
	 * this may lead to PHP crashes (!) when very long texts are
271
	 * used as values. This is due to limitations in the library PCRE that
272
	 * PHP uses for pattern matching.
273
	 *
274
	 * @since 1.9
275
	 *
276
	 * @param boolean $linksInValues
277
	 *
278
	 * @return string
279
	 */
280 212
	protected static function getRegexpPattern( $linksInValues ) {
281 212
		if ( $linksInValues ) {
282
			return '/\[\[             # Beginning of the link
283
				(?:([^:][^]]*):[=:])+ # Property name (or a list of those)
284
				(                     # After that:
285
				  (?:[^|\[\]]         #   either normal text (without |, [ or ])
286
				  |\[\[[^]]*\]\]      #   or a [[link]]
287
				  |\[[^]]*\]          #   or an [external link]
288
				)*)                   # all this zero or more times
289
				(?:\|([^]]*))?        # Display text (like "text" in [[link|text]]), optional
290
				\]\]                  # End of link
291 1
				/xu';
292
		} else {
293
			return '/\[\[             # Beginning of the link
294
				(?:([^:][^]]*):[=:])+ # Property name (or a list of those)
295
				([^\[\]]*)            # content: anything but [, |, ]
296
				\]\]                  # End of link
297 211
				/xu';
298
		}
299
	}
300
301
	/**
302
	 * A method that precedes the process() callback, it takes care of separating
303
	 * value and caption (instead of leaving this to a more complex regexp).
304
	 *
305
	 * @since 1.9
306
	 *
307
	 * @param array $semanticLink expects (linktext, properties, value|caption)
308
	 *
309
	 * @return string
310
	 */
311 174
	protected function preprocess( array $semanticLink ) {
312 174
		$value = '';
313 174
		$caption = false;
314
315 174
		if ( array_key_exists( 2, $semanticLink ) ) {
316
317
			// #1747 avoid a mismatch on an annotation like [[Foo|Bar::Foobar]]
318
			// where the left part of :: is split and would contain "Foo|Bar"
319
			// hence this type is categorized as no value annotation
320 174
			if ( strpos( $semanticLink[1], '|' ) !== false ) {
321 4
				return $semanticLink[0];
322
			}
323
324 173
			$parts = explode( '|', $semanticLink[2] );
325
326 173
			if ( array_key_exists( 0, $parts ) ) {
327 173
				$value = $parts[0];
328
			}
329 173
			if ( array_key_exists( 1, $parts ) ) {
330 10
				$caption = $parts[1];
331
			}
332
		}
333
334 173
		if ( $caption !== false ) {
335 10
			return $this->process( array( $semanticLink[0], $semanticLink[1], $value, $caption ) );
336
		}
337
338 173
		return $this->process( array( $semanticLink[0], $semanticLink[1], $value ) );
339
	}
340
341
	/**
342
	 * This callback function strips out the semantic attributes from a wiki
343
	 * link.
344
	 *
345
	 * @since 1.9
346
	 *
347
	 * @param array $semanticLink expects (linktext, properties, value|caption)
348
	 *
349
	 * @return string
350
	 */
351 175
	protected function process( array $semanticLink ) {
352
353 175
		$valueCaption = false;
354 175
		$property = '';
355 175
		$value = '';
356
357 175
		if ( array_key_exists( 1, $semanticLink ) ) {
358
359
			// #1252 Strict mode being disabled for support of multi property
360
			// assignments (e.g. [[property1::property2::value]])
0 ignored issues
show
Unused Code Comprehensibility introduced by
43% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
361
362
			// #1066 Strict mode is to check for colon(s) produced by something
363
			// like [[Foo::Bar::Foobar]], [[Foo:::0049 30 12345678]]
0 ignored issues
show
Unused Code Comprehensibility introduced by
58% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
364
			// In case a colon appears (in what is expected to be a string without a colon)
365
			// then concatenate the string again and split for the first :: occurrence
366
			// only
367 175
			if ( $this->strictModeState && strpos( $semanticLink[1], ':' ) !== false && isset( $semanticLink[2] ) ) {
368 14
				list( $semanticLink[1], $semanticLink[2] ) = explode( '::', $semanticLink[1] . '::' . $semanticLink[2], 2 );
369
			}
370
371 175
			$property = $semanticLink[1];
372
		}
373
374 175
		if ( array_key_exists( 2, $semanticLink ) ) {
375 175
			$value = $semanticLink[2];
376
		}
377
378 175
		if ( $value === '' ) { // silently ignore empty values
379 1
			return '';
380
		}
381
382 175
		if ( $property == 'SMW' ) {
383
			switch ( $value ) {
384 8
				case 'on':
385 8
					$this->isAnnotation = true;
386 8
					break;
387 8
				case 'off':
388 8
					$this->isAnnotation = false;
389 8
					break;
390
			}
391 8
			return '';
392
		}
393
394 173
		if ( array_key_exists( 3, $semanticLink ) ) {
395 11
			$valueCaption = $semanticLink[3];
396
		}
397
398
		// Extract annotations and create tooltip.
399 173
		$properties = preg_split( '/:[=:]/u', $property );
400
401 173
		return $this->addPropertyValue( $properties, $value, $valueCaption );
402
	}
403
404
	/**
405
	 * Adds property values to the ParserOutput instance
406
	 *
407
	 * @since 1.9
408
	 *
409
	 * @param array $properties
410
	 *
411
	 * @return string
412
	 */
413 173
	protected function addPropertyValue( array $properties, $value, $valueCaption ) {
414
415 173
		$subject = $this->parserData->getSubject();
416
417
		// Add properties to the semantic container
418 173
		foreach ( $properties as $property ) {
419 173
			$dataValue = $this->dataValueFactory->newDataValueByText(
420
				$property,
421
				$value,
422
				$valueCaption,
423
				$subject
424
			);
425
426
			if (
427 173
				$this->isEnabledNamespace &&
428 173
				$this->isAnnotation &&
429 173
				$this->parserData->canModifySemanticData() ) {
430 173
				$this->parserData->addDataValue( $dataValue );
431
			}
432
		}
433
434
		// Return the text representation
435 173
		$result = $dataValue->getShortWikitext( true );
0 ignored issues
show
Bug introduced by
The variable $dataValue does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
436
437
		// If necessary add an error text
438 173
		if ( ( $this->settings->get( 'smwgInlineErrors' ) &&
439 173
			$this->isEnabledNamespace && $this->isAnnotation ) &&
440 173
			( !$dataValue->isValid() ) ) {
441
			// Encode `:` to avoid a comment block and instead of the nowiki tag
442
			// use &#58; as placeholder
443 13
			$result = str_replace( ':', '&#58;', $result ) . $dataValue->getErrorText();
444
		}
445
446 173
		return $result;
447
	}
448
449 203
	protected function doStripMagicWordsFromText( &$text ) {
450
451 203
		$words = array();
452
453 203
		$this->magicWordsFinder->setOutput( $this->parserData->getOutput() );
454
455
		$magicWords = array(
456 203
			'SMW_NOFACTBOX',
457
			'SMW_SHOWFACTBOX'
458
		);
459
460 203
		Hooks::run( 'SMW::Parser::BeforeMagicWordsFinder', array( &$magicWords ) );
461
462 203
		foreach ( $magicWords as $magicWord ) {
463 203
			$words[] = $this->magicWordsFinder->findMagicWordInText( $magicWord, $text );
464
		}
465
466 203
		$this->magicWordsFinder->pushMagicWordsToParserOutput( $words );
467
468 203
		return $words;
469
	}
470
471 203
	private function setSemanticEnabledNamespaceState( Title $title ) {
472 203
		$this->isEnabledNamespace = $this->applicationFactory->getNamespaceExaminer()->isSemanticEnabled( $title->getNamespace() );
473 203
	}
474
475
}
476