Complex classes like InTextAnnotationParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use InTextAnnotationParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
29 | class InTextAnnotationParser { |
||
30 | |||
31 | /** |
||
32 | * Internal state for switching SMW link annotations off/on during parsing |
||
33 | * ([[SMW::on]] and [[SMW:off]]) |
||
34 | */ |
||
35 | const OFF = '[[SMW::off]]'; |
||
36 | const ON = '[[SMW::on]]'; |
||
37 | |||
38 | /** |
||
39 | * @var ParserData |
||
40 | */ |
||
41 | private $parserData; |
||
42 | |||
43 | /** |
||
44 | * @var MagicWordsFinder |
||
45 | */ |
||
46 | private $magicWordsFinder; |
||
47 | |||
48 | /** |
||
49 | * @var RedirectTargetFinder |
||
50 | */ |
||
51 | private $redirectTargetFinder; |
||
52 | |||
53 | /** |
||
54 | * @var DataValueFactory |
||
55 | */ |
||
56 | private $dataValueFactory = null; |
||
57 | |||
58 | /** |
||
59 | * @var ApplicationFactory |
||
60 | */ |
||
61 | private $applicationFactory = null; |
||
62 | |||
63 | /** |
||
64 | * @var Settings |
||
65 | */ |
||
66 | protected $settings = null; |
||
67 | |||
68 | /** |
||
69 | * @var boolean |
||
70 | */ |
||
71 | protected $isEnabledNamespace; |
||
72 | |||
73 | /** |
||
74 | * Internal state for switching SMW link annotations off/on during parsing |
||
75 | * ([[SMW::on]] and [[SMW:off]]) |
||
76 | * @var boolean |
||
77 | */ |
||
78 | protected $isAnnotation = true; |
||
79 | |||
80 | /** |
||
81 | * @var boolean |
||
82 | */ |
||
83 | private $isStrictMode = true; |
||
84 | |||
85 | /** |
||
86 | * @var boolean|integer |
||
87 | */ |
||
88 | private $enabledLinksInValues = false; |
||
89 | |||
90 | /** |
||
91 | 252 | * @since 1.9 |
|
92 | 252 | * |
|
93 | 252 | * @param ParserData $parserData |
|
94 | 252 | * @param MagicWordsFinder $magicWordsFinder |
|
95 | 252 | * @param RedirectTargetFinder $redirectTargetFinder |
|
96 | 252 | */ |
|
97 | 252 | public function __construct( ParserData $parserData, MagicWordsFinder $magicWordsFinder, RedirectTargetFinder $redirectTargetFinder ) { |
|
98 | $this->parserData = $parserData; |
||
99 | $this->magicWordsFinder = $magicWordsFinder; |
||
100 | $this->redirectTargetFinder = $redirectTargetFinder; |
||
101 | $this->dataValueFactory = DataValueFactory::getInstance(); |
||
102 | $this->applicationFactory = ApplicationFactory::getInstance(); |
||
103 | } |
||
104 | |||
105 | /** |
||
106 | * Whether a strict interpretation (e.g [[property::value:partOfTheValue::alsoPartOfTheValue]]) |
||
107 | * or a more loose interpretation (e.g. [[property1::property2::value]]) for |
||
108 | 229 | * annotations is to be applied. |
|
109 | 229 | * |
|
110 | 229 | * @since 2.3 |
|
111 | * |
||
112 | * @param boolean $isStrictMode |
||
113 | */ |
||
114 | public function isStrictMode( $isStrictMode ) { |
||
115 | $this->isStrictMode = (bool)$isStrictMode; |
||
116 | } |
||
117 | |||
118 | /** |
||
119 | * @since 2.5 |
||
120 | 237 | * |
|
121 | * @param boolean $enabledLinksInValues |
||
122 | 237 | */ |
|
123 | 237 | public function enabledLinksInValues( $enabledLinksInValues ) { |
|
124 | 237 | $this->enabledLinksInValues = $enabledLinksInValues; |
|
125 | } |
||
126 | |||
127 | 237 | /** |
|
128 | * Parsing text before an article is displayed or previewed, strip out |
||
129 | 237 | * semantic properties and add them to the ParserOutput object |
|
130 | * |
||
131 | 237 | * @since 1.9 |
|
132 | * |
||
133 | 237 | * @param string &$text |
|
134 | */ |
||
135 | public function parse( &$text ) { |
||
136 | |||
137 | 237 | $title = $this->parserData->getTitle(); |
|
138 | $this->settings = $this->applicationFactory->getSettings(); |
||
139 | 237 | $start = microtime( true ); |
|
140 | 237 | ||
141 | 237 | // Identifies the current parser run (especially when called recursively) |
|
142 | $this->parserData->getSubject()->setContextReference( 'intp:' . uniqid() ); |
||
143 | |||
144 | $this->doStripMagicWordsFromText( $text ); |
||
145 | 237 | ||
146 | 233 | $this->isEnabledNamespace = $this->isSemanticEnabledForNamespace( $title ); |
|
147 | |||
148 | 233 | $this->addRedirectTargetAnnotationFromText( |
|
149 | 233 | $text |
|
150 | ); |
||
151 | |||
152 | // Obscure [/] to find a set of [[ :: ... ]] while those in-between are left for |
||
153 | 237 | // decoding for a later processing so that the regex can split the text |
|
154 | // appropriately |
||
155 | 237 | if ( ( $this->enabledLinksInValues & SMW_LINV_OBFU ) != 0 ) { |
|
156 | 237 | $text = Obfuscator::obfuscateLinks( $text, $this ); |
|
157 | 237 | } |
|
158 | |||
159 | $linksInValuesPcre = ( $this->enabledLinksInValues & SMW_LINV_PCRE ) != 0; |
||
160 | 237 | ||
161 | 237 | $text = preg_replace_callback( |
|
162 | $this->getRegexpPattern( $linksInValuesPcre ), |
||
163 | $linksInValuesPcre ? 'self::process' : 'self::preprocess', |
||
164 | $text |
||
165 | ); |
||
166 | |||
167 | // Ensure remaining encoded entities are decoded again |
||
168 | $text = Obfuscator::removeLinkObfuscation( $text ); |
||
169 | |||
170 | if ( $this->isEnabledNamespace ) { |
||
171 | $this->parserData->getOutput()->addModules( $this->getModules() ); |
||
172 | |||
173 | if ( method_exists( $this->parserData->getOutput(), 'recordOption' ) ) { |
||
174 | $this->parserData->getOutput()->recordOption( 'userlang' ); |
||
175 | } |
||
176 | } |
||
177 | |||
178 | $this->parserData->pushSemanticDataToParserOutput(); |
||
179 | |||
180 | $this->parserData->addLimitReport( |
||
181 | 1 | 'intext-parsertime', |
|
182 | 1 | number_format( ( microtime( true ) - $start ), 3 ) |
|
183 | ); |
||
184 | |||
185 | SMWOutputs::commitToParserOutput( $this->parserData->getOutput() ); |
||
186 | } |
||
187 | |||
188 | /** |
||
189 | * @since 2.4 |
||
190 | * |
||
191 | * @param string $text |
||
192 | 31 | * |
|
193 | 31 | * @return text |
|
194 | */ |
||
195 | public static function decodeSquareBracket( $text ) { |
||
196 | return Obfuscator::decodeSquareBracket( $text ); |
||
197 | } |
||
198 | |||
199 | /** |
||
200 | * @since 2.4 |
||
201 | 216 | * |
|
202 | 216 | * @param string $text |
|
203 | 216 | * |
|
204 | * @return text |
||
205 | 237 | */ |
|
206 | public static function obfuscateAnnotation( $text ) { |
||
209 | |||
210 | /** |
||
211 | 233 | * @since 2.4 |
|
212 | * |
||
213 | 233 | * @param string $text |
|
214 | * |
||
215 | 233 | * @return text |
|
216 | 233 | */ |
|
217 | public static function removeAnnotation( $text ) { |
||
218 | return Obfuscator::removeAnnotation( $text ); |
||
219 | 233 | } |
|
220 | |||
221 | 233 | /** |
|
222 | * @since 2.1 |
||
223 | * |
||
224 | 233 | * @param Title|null $redirectTarget |
|
225 | 233 | */ |
|
226 | public function setRedirectTarget( Title $redirectTarget = null ) { |
||
227 | $this->redirectTargetFinder->setRedirectTarget( $redirectTarget ); |
||
228 | } |
||
229 | |||
230 | protected function addRedirectTargetAnnotationFromText( $text ) { |
||
231 | |||
232 | if ( !$this->isEnabledNamespace ) { |
||
233 | return; |
||
234 | 233 | } |
|
235 | |||
236 | 233 | $this->redirectTargetFinder->findRedirectTargetFromText( $text ); |
|
237 | |||
238 | $propertyAnnotatorFactory = $this->applicationFactory->singleton( 'PropertyAnnotatorFactory' ); |
||
239 | |||
240 | $propertyAnnotator = $propertyAnnotatorFactory->newNullPropertyAnnotator( |
||
241 | $this->parserData->getSemanticData() |
||
242 | ); |
||
243 | |||
244 | $redirectPropertyAnnotator = $propertyAnnotatorFactory->newRedirectPropertyAnnotator( |
||
245 | $propertyAnnotator, |
||
246 | $this->redirectTargetFinder |
||
247 | ); |
||
248 | |||
249 | $redirectPropertyAnnotator->addAnnotation(); |
||
250 | } |
||
251 | |||
252 | /** |
||
253 | * Returns required resource modules |
||
254 | * |
||
255 | * @since 1.9 |
||
256 | * |
||
257 | * @return array |
||
258 | 240 | */ |
|
259 | 240 | protected function getModules() { |
|
260 | return array( |
||
261 | 'ext.smw.style', |
||
262 | 'ext.smw.tooltips' |
||
263 | ); |
||
264 | } |
||
265 | |||
266 | /** |
||
267 | * $smwgLinksInValues (default = false) determines which regexp pattern |
||
268 | * is returned, either a more complex (lib PCRE may cause segfaults if text |
||
269 | 1 | * is long) or a simpler (no segfaults found for those, but no links |
|
270 | * in values) pattern. |
||
271 | * |
||
272 | * If enabled (SMW accepts inputs like [[property::Some [[link]] in value]]), |
||
273 | * this may lead to PHP crashes (!) when very long texts are |
||
274 | * used as values. This is due to limitations in the library PCRE that |
||
275 | 239 | * PHP uses for pattern matching. |
|
276 | * |
||
277 | * @since 1.9 |
||
278 | * |
||
279 | * @param boolean $linksInValues |
||
280 | * |
||
281 | * @return string |
||
282 | */ |
||
283 | public static function getRegexpPattern( $linksInValues ) { |
||
284 | if ( $linksInValues ) { |
||
285 | return '/\[\[ # Beginning of the link |
||
286 | (?:([^:][^]]*):[=:])+ # Property name (or a list of those) |
||
287 | ( # After that: |
||
288 | (?:[^|\[\]] # either normal text (without |, [ or ]) |
||
289 | 207 | |\[\[[^]]*\]\] # or a [[link]] |
|
290 | 207 | |\[[^]]*\] # or an [external link] |
|
291 | 207 | )*) # all this zero or more times |
|
292 | (?:\|([^]]*))? # Display text (like "text" in [[link|text]]), optional |
||
293 | 207 | \]\] # End of link |
|
294 | /xu'; |
||
295 | } else { |
||
296 | return '/\[\[ # Beginning of the link |
||
297 | (?:([^:][^]]*):[=:])+ # Property name (or a list of those) |
||
298 | 207 | ([^\[\]]*) # content: anything but [, |, ] |
|
299 | 4 | \]\] # End of link |
|
300 | /xu'; |
||
301 | } |
||
302 | 206 | } |
|
303 | |||
304 | 206 | /** |
|
305 | 206 | * A method that precedes the process() callback, it takes care of separating |
|
306 | * value and caption (instead of leaving this to a more complex regexp). |
||
307 | 206 | * |
|
308 | 12 | * @since 1.9 |
|
309 | * |
||
310 | * @param array $semanticLink expects (linktext, properties, value|caption) |
||
311 | * |
||
312 | 206 | * @return string |
|
313 | 12 | */ |
|
314 | public function preprocess( array $semanticLink ) { |
||
315 | $value = ''; |
||
316 | 206 | $caption = false; |
|
317 | |||
318 | if ( array_key_exists( 2, $semanticLink ) ) { |
||
319 | |||
320 | // #1747 avoid a mismatch on an annotation like [[Foo|Bar::Foobar]] |
||
321 | // where the left part of :: is split and would contain "Foo|Bar" |
||
322 | // hence this type is categorized as no value annotation |
||
323 | if ( strpos( $semanticLink[1], '|' ) !== false ) { |
||
324 | return $semanticLink[0]; |
||
325 | } |
||
326 | |||
327 | $parts = explode( '|', $semanticLink[2] ); |
||
328 | |||
329 | 208 | if ( array_key_exists( 0, $parts ) ) { |
|
330 | $value = $parts[0]; |
||
331 | 208 | } |
|
332 | 208 | if ( array_key_exists( 1, $parts ) ) { |
|
333 | 208 | $caption = $parts[1]; |
|
334 | } |
||
335 | 208 | } |
|
336 | |||
337 | if ( $caption !== false ) { |
||
338 | return $this->process( array( $semanticLink[0], $semanticLink[1], $value, $caption ) ); |
||
339 | } |
||
340 | |||
341 | return $this->process( array( $semanticLink[0], $semanticLink[1], $value ) ); |
||
342 | } |
||
343 | |||
344 | /** |
||
345 | 208 | * This callback function strips out the semantic attributes from a wiki |
|
346 | 14 | * link. |
|
347 | * |
||
348 | * @since 1.9 |
||
349 | 208 | * |
|
350 | * @param array $semanticLink expects (linktext, properties, value|caption) |
||
351 | * |
||
352 | 208 | * @return string |
|
353 | 208 | */ |
|
354 | protected function process( array $semanticLink ) { |
||
355 | |||
356 | 208 | $valueCaption = false; |
|
357 | 1 | $property = ''; |
|
358 | $value = ''; |
||
359 | |||
360 | 208 | if ( array_key_exists( 1, $semanticLink ) ) { |
|
361 | |||
362 | 10 | // #1252 Strict mode being disabled for support of multi property |
|
363 | 10 | // assignments (e.g. [[property1::property2::value]]) |
|
|
|||
364 | 10 | ||
365 | 10 | // #1066 Strict mode is to check for colon(s) produced by something |
|
366 | 10 | // like [[Foo::Bar::Foobar]], [[Foo:::0049 30 12345678]] |
|
367 | 10 | // In case a colon appears (in what is expected to be a string without a colon) |
|
368 | // then concatenate the string again and split for the first :: occurrence |
||
369 | 10 | // only |
|
370 | if ( $this->isStrictMode && strpos( $semanticLink[1], ':' ) !== false && isset( $semanticLink[2] ) ) { |
||
371 | list( $semanticLink[1], $semanticLink[2] ) = explode( '::', $semanticLink[1] . '::' . $semanticLink[2], 2 ); |
||
372 | 206 | } |
|
373 | 13 | ||
374 | $property = $semanticLink[1]; |
||
375 | } |
||
376 | |||
377 | 206 | if ( array_key_exists( 2, $semanticLink ) ) { |
|
378 | $value = $semanticLink[2]; |
||
379 | 206 | } |
|
380 | |||
381 | $value = Obfuscator::removeLinkObfuscation( $value ); |
||
382 | |||
383 | if ( $value === '' ) { // silently ignore empty values |
||
384 | return ''; |
||
385 | } |
||
386 | |||
387 | if ( $property == 'SMW' ) { |
||
388 | switch ( $value ) { |
||
389 | case 'on': |
||
390 | $this->isAnnotation = true; |
||
391 | 206 | break; |
|
392 | case 'off': |
||
393 | 206 | $this->isAnnotation = false; |
|
394 | break; |
||
395 | 206 | } |
|
396 | 2 | return ''; |
|
397 | } |
||
398 | |||
399 | if ( array_key_exists( 3, $semanticLink ) ) { |
||
400 | 205 | $valueCaption = $semanticLink[3]; |
|
401 | 205 | } |
|
402 | |||
403 | // Extract annotations and create tooltip. |
||
404 | $properties = preg_split( '/:[=:]/u', $property ); |
||
405 | |||
406 | return $this->addPropertyValue( $properties, $value, $valueCaption ); |
||
407 | } |
||
408 | |||
409 | 205 | /** |
|
410 | 205 | * Adds property values to the ParserOutput instance |
|
411 | 205 | * |
|
412 | 205 | * @since 1.9 |
|
413 | * |
||
414 | * @param array $properties |
||
415 | * |
||
416 | * @return string |
||
417 | 205 | */ |
|
418 | protected function addPropertyValue( array $properties, $value, $valueCaption ) { |
||
457 | 206 | ||
458 | protected function doStripMagicWordsFromText( &$text ) { |
||
459 | |||
460 | 206 | $words = array(); |
|
461 | 205 | ||
462 | $this->magicWordsFinder->setOutput( $this->parserData->getOutput() ); |
||
463 | |||
464 | 2 | $magicWords = array( |
|
465 | 'SMW_NOFACTBOX', |
||
466 | 2 | 'SMW_SHOWFACTBOX' |
|
467 | ); |
||
468 | |||
469 | Hooks::run( 'SMW::Parser::BeforeMagicWordsFinder', array( &$magicWords ) ); |
||
479 | 2 | ||
480 | private function isSemanticEnabledForNamespace( Title $title ) { |
||
483 | |||
484 | private function getPropertyLink( $subject, $properties, $value, $valueCaption ) { |
||
510 | |||
511 | } |
||
512 |
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.