Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 15 | class Parser |
||
| 16 | { |
||
| 17 | /**#@+ |
||
| 18 | * Boolean rules bitfield |
||
| 19 | */ |
||
| 20 | const RULE_AUTO_CLOSE = 1 << 0; |
||
| 21 | const RULE_AUTO_REOPEN = 1 << 1; |
||
| 22 | const RULE_BREAK_PARAGRAPH = 1 << 2; |
||
| 23 | const RULE_CREATE_PARAGRAPHS = 1 << 3; |
||
| 24 | const RULE_DISABLE_AUTO_BR = 1 << 4; |
||
| 25 | const RULE_ENABLE_AUTO_BR = 1 << 5; |
||
| 26 | const RULE_IGNORE_TAGS = 1 << 6; |
||
| 27 | const RULE_IGNORE_TEXT = 1 << 7; |
||
| 28 | const RULE_IGNORE_WHITESPACE = 1 << 8; |
||
| 29 | const RULE_IS_TRANSPARENT = 1 << 9; |
||
| 30 | const RULE_PREVENT_BR = 1 << 10; |
||
| 31 | const RULE_SUSPEND_AUTO_BR = 1 << 11; |
||
| 32 | const RULE_TRIM_FIRST_LINE = 1 << 12; |
||
| 33 | /**#@-*/ |
||
| 34 | |||
| 35 | /** |
||
| 36 | * Bitwise disjunction of rules related to automatic line breaks |
||
| 37 | */ |
||
| 38 | const RULES_AUTO_LINEBREAKS = self::RULE_DISABLE_AUTO_BR | self::RULE_ENABLE_AUTO_BR | self::RULE_SUSPEND_AUTO_BR; |
||
| 39 | |||
| 40 | /** |
||
| 41 | * Bitwise disjunction of rules that are inherited by subcontexts |
||
| 42 | */ |
||
| 43 | const RULES_INHERITANCE = self::RULE_ENABLE_AUTO_BR; |
||
| 44 | |||
| 45 | /** |
||
| 46 | * All the characters that are considered whitespace |
||
| 47 | */ |
||
| 48 | const WHITESPACE = " \n\t"; |
||
| 49 | |||
| 50 | /** |
||
| 51 | * @var array Number of open tags for each tag name |
||
| 52 | */ |
||
| 53 | protected $cntOpen; |
||
| 54 | |||
| 55 | /** |
||
| 56 | * @var array Number of times each tag has been used |
||
| 57 | */ |
||
| 58 | protected $cntTotal; |
||
| 59 | |||
| 60 | /** |
||
| 61 | * @var array Current context |
||
| 62 | */ |
||
| 63 | protected $context; |
||
| 64 | |||
| 65 | /** |
||
| 66 | * @var Tag[] Every tag created by this parser, used for garbage collection |
||
| 67 | */ |
||
| 68 | protected $createdTags; |
||
| 69 | |||
| 70 | /** |
||
| 71 | * @var integer How hard the parser has worked on fixing bad markup so far |
||
| 72 | */ |
||
| 73 | protected $currentFixingCost; |
||
| 74 | |||
| 75 | /** |
||
| 76 | * @var Tag Current tag being processed |
||
| 77 | */ |
||
| 78 | protected $currentTag; |
||
| 79 | |||
| 80 | /** |
||
| 81 | * @var bool Whether the output contains "rich" tags, IOW any tag that is not <p> or <br/> |
||
| 82 | */ |
||
| 83 | protected $isRich; |
||
| 84 | |||
| 85 | /** |
||
| 86 | * @var Logger This parser's logger |
||
| 87 | */ |
||
| 88 | protected $logger; |
||
| 89 | |||
| 90 | /** |
||
| 91 | * @var integer How hard the parser should work on fixing bad markup |
||
| 92 | */ |
||
| 93 | public $maxFixingCost = 1000; |
||
| 94 | |||
| 95 | /** |
||
| 96 | * @var array Associative array of namespace prefixes in use in document (prefixes used as key) |
||
| 97 | */ |
||
| 98 | protected $namespaces; |
||
| 99 | |||
| 100 | /** |
||
| 101 | * @var array Stack of open tags (instances of Tag) |
||
| 102 | */ |
||
| 103 | protected $openTags; |
||
| 104 | |||
| 105 | /** |
||
| 106 | * @var string This parser's output |
||
| 107 | */ |
||
| 108 | protected $output; |
||
| 109 | |||
| 110 | /** |
||
| 111 | * @var integer Position of the cursor in the original text |
||
| 112 | */ |
||
| 113 | protected $pos; |
||
| 114 | |||
| 115 | /** |
||
| 116 | * @var array Array of callbacks, using plugin names as keys |
||
| 117 | */ |
||
| 118 | protected $pluginParsers = []; |
||
| 119 | |||
| 120 | /** |
||
| 121 | * @var array Associative array of [pluginName => pluginConfig] |
||
| 122 | */ |
||
| 123 | protected $pluginsConfig; |
||
| 124 | |||
| 125 | /** |
||
| 126 | * @var array Variables registered for use in filters |
||
| 127 | */ |
||
| 128 | public $registeredVars = []; |
||
| 129 | |||
| 130 | /** |
||
| 131 | * @var array Root context, used at the root of the document |
||
| 132 | */ |
||
| 133 | protected $rootContext; |
||
| 134 | |||
| 135 | /** |
||
| 136 | * @var array Tags' config |
||
| 137 | */ |
||
| 138 | protected $tagsConfig; |
||
| 139 | |||
| 140 | /** |
||
| 141 | * @var array Tag storage |
||
| 142 | */ |
||
| 143 | protected $tagStack; |
||
| 144 | |||
| 145 | /** |
||
| 146 | * @var bool Whether the tags in the stack are sorted |
||
| 147 | */ |
||
| 148 | protected $tagStackIsSorted; |
||
| 149 | |||
| 150 | /** |
||
| 151 | * @var string Text being parsed |
||
| 152 | */ |
||
| 153 | protected $text; |
||
| 154 | |||
| 155 | /** |
||
| 156 | * @var integer Length of the text being parsed |
||
| 157 | */ |
||
| 158 | protected $textLen; |
||
| 159 | |||
| 160 | /** |
||
| 161 | * @var integer Counter incremented everytime the parser is reset. Used to as a canary to detect |
||
| 162 | * whether the parser was reset during execution |
||
| 163 | */ |
||
| 164 | protected $uid = 0; |
||
| 165 | |||
| 166 | /** |
||
| 167 | * @var integer Position before which we output text verbatim, without paragraphs or linebreaks |
||
| 168 | */ |
||
| 169 | protected $wsPos; |
||
| 170 | |||
| 171 | /** |
||
| 172 | * Constructor |
||
| 173 | */ |
||
| 174 | 175 | public function __construct(array $config) |
|
| 175 | { |
||
| 176 | 175 | $this->pluginsConfig = $config['plugins']; |
|
| 177 | 175 | $this->registeredVars = $config['registeredVars']; |
|
| 178 | 175 | $this->rootContext = $config['rootContext']; |
|
| 179 | 175 | $this->tagsConfig = $config['tags']; |
|
| 180 | |||
| 181 | 175 | $this->__wakeup(); |
|
| 182 | 175 | } |
|
| 183 | |||
| 184 | /** |
||
| 185 | * Serializer |
||
| 186 | * |
||
| 187 | * Returns the properties that need to persist through serialization. |
||
| 188 | * |
||
| 189 | * NOTE: using __sleep() is preferable to implementing Serializable because it leaves the choice |
||
| 190 | * of the serializer to the user (e.g. igbinary) |
||
| 191 | * |
||
| 192 | * @return array |
||
| 193 | */ |
||
| 194 | 2 | public function __sleep() |
|
| 195 | { |
||
| 196 | 2 | return ['pluginsConfig', 'registeredVars', 'rootContext', 'tagsConfig']; |
|
| 197 | } |
||
| 198 | |||
| 199 | /** |
||
| 200 | * Unserializer |
||
| 201 | * |
||
| 202 | * @return void |
||
| 203 | */ |
||
| 204 | 175 | public function __wakeup() |
|
| 205 | { |
||
| 206 | 175 | $this->logger = new Logger; |
|
| 207 | 175 | } |
|
| 208 | |||
| 209 | /** |
||
| 210 | * Remove old references to tags |
||
| 211 | * |
||
| 212 | * @return void |
||
| 213 | */ |
||
| 214 | 163 | protected function gc() |
|
| 215 | { |
||
| 216 | 163 | foreach ($this->createdTags as $tag) |
|
| 217 | { |
||
| 218 | 144 | $tag->gc(); |
|
| 219 | 163 | } |
|
| 220 | 163 | $this->createdTags = []; |
|
| 221 | 163 | } |
|
| 222 | |||
| 223 | /** |
||
| 224 | * Reset the parser for a new parsing |
||
| 225 | * |
||
| 226 | * @param string $text Text to be parsed |
||
| 227 | * @return void |
||
| 228 | */ |
||
| 229 | 163 | protected function reset($text) |
|
| 230 | { |
||
| 231 | // Normalize CR/CRLF to LF, remove control characters that aren't allowed in XML |
||
| 232 | 163 | $text = preg_replace('/\\r\\n?/', "\n", $text); |
|
| 233 | 163 | $text = preg_replace('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]+/S', '', $text); |
|
| 234 | |||
| 235 | // Clear the logs |
||
| 236 | 163 | $this->logger->clear(); |
|
| 237 | |||
| 238 | // Initialize the rest |
||
| 239 | 163 | $this->cntOpen = []; |
|
| 240 | 163 | $this->cntTotal = []; |
|
| 241 | 163 | $this->createdTags = []; |
|
| 242 | 163 | $this->currentFixingCost = 0; |
|
| 243 | 163 | $this->currentTag = null; |
|
| 244 | 163 | $this->isRich = false; |
|
| 245 | 163 | $this->namespaces = []; |
|
| 246 | 163 | $this->openTags = []; |
|
| 247 | 163 | $this->output = ''; |
|
| 248 | 163 | $this->pos = 0; |
|
| 249 | 163 | $this->tagStack = []; |
|
| 250 | 163 | $this->tagStackIsSorted = false; |
|
| 251 | 163 | $this->text = $text; |
|
| 252 | 163 | $this->textLen = strlen($text); |
|
| 253 | 163 | $this->wsPos = 0; |
|
| 254 | |||
| 255 | // Initialize the root context |
||
| 256 | 163 | $this->context = $this->rootContext; |
|
| 257 | 163 | $this->context['inParagraph'] = false; |
|
| 258 | |||
| 259 | // Bump the UID |
||
| 260 | 163 | ++$this->uid; |
|
| 261 | 163 | } |
|
| 262 | |||
| 263 | /** |
||
| 264 | * Set a tag's option |
||
| 265 | * |
||
| 266 | * This method ensures that the tag's config is a value and not a reference, to prevent |
||
| 267 | * potential side-effects. References contained *inside* the tag's config are left untouched |
||
| 268 | * |
||
| 269 | * @param string $tagName Tag's name |
||
| 270 | * @param string $optionName Option's name |
||
| 271 | * @param mixed $optionValue Option's value |
||
| 272 | * @return void |
||
| 273 | */ |
||
| 274 | 7 | protected function setTagOption($tagName, $optionName, $optionValue) |
|
| 275 | { |
||
| 276 | 7 | if (isset($this->tagsConfig[$tagName])) |
|
| 277 | 7 | { |
|
| 278 | // Copy the tag's config and remove it. That will destroy the reference |
||
| 279 | 7 | $tagConfig = $this->tagsConfig[$tagName]; |
|
| 280 | 7 | unset($this->tagsConfig[$tagName]); |
|
| 281 | |||
| 282 | // Set the new value and replace the tag's config |
||
| 283 | 7 | $tagConfig[$optionName] = $optionValue; |
|
| 284 | 7 | $this->tagsConfig[$tagName] = $tagConfig; |
|
| 285 | 7 | } |
|
| 286 | 7 | } |
|
| 287 | |||
| 288 | //========================================================================== |
||
| 289 | // Public API |
||
| 290 | //========================================================================== |
||
| 291 | |||
| 292 | /** |
||
| 293 | * Disable a tag |
||
| 294 | * |
||
| 295 | * @param string $tagName Name of the tag |
||
| 296 | * @return void |
||
| 297 | */ |
||
| 298 | 3 | public function disableTag($tagName) |
|
| 299 | { |
||
| 300 | 3 | $this->setTagOption($tagName, 'isDisabled', true); |
|
| 301 | 3 | } |
|
| 302 | |||
| 303 | /** |
||
| 304 | * Enable a tag |
||
| 305 | * |
||
| 306 | * @param string $tagName Name of the tag |
||
| 307 | * @return void |
||
| 308 | */ |
||
| 309 | 1 | public function enableTag($tagName) |
|
| 310 | { |
||
| 311 | 1 | if (isset($this->tagsConfig[$tagName])) |
|
| 312 | 1 | { |
|
| 313 | 1 | unset($this->tagsConfig[$tagName]['isDisabled']); |
|
| 314 | 1 | } |
|
| 315 | 1 | } |
|
| 316 | |||
| 317 | /** |
||
| 318 | * Get this parser's Logger instance |
||
| 319 | * |
||
| 320 | * @return Logger |
||
| 321 | */ |
||
| 322 | 8 | public function getLogger() |
|
| 323 | { |
||
| 324 | 8 | return $this->logger; |
|
| 325 | } |
||
| 326 | |||
| 327 | /** |
||
| 328 | * Return the last text parsed |
||
| 329 | * |
||
| 330 | * This method returns the normalized text, which may be slightly different from the original |
||
| 331 | * text in that EOLs are normalized to LF and other control codes are stripped. This method is |
||
| 332 | * meant to be used in support of processing log entries, which contain offsets based on the |
||
| 333 | * normalized text |
||
| 334 | * |
||
| 335 | * @see Parser::reset() |
||
| 336 | * |
||
| 337 | * @return string |
||
| 338 | */ |
||
| 339 | 2 | public function getText() |
|
| 340 | { |
||
| 341 | 2 | return $this->text; |
|
| 342 | } |
||
| 343 | |||
| 344 | /** |
||
| 345 | * Parse a text |
||
| 346 | * |
||
| 347 | * @param string $text Text to parse |
||
| 348 | * @return string XML representation |
||
| 349 | */ |
||
| 350 | 163 | public function parse($text) |
|
| 351 | { |
||
| 352 | // Reset the parser and save the uid |
||
| 353 | 163 | $this->reset($text); |
|
| 354 | 163 | $uid = $this->uid; |
|
| 355 | |||
| 356 | // Do the heavy lifting |
||
| 357 | 163 | $this->executePluginParsers(); |
|
| 358 | 163 | $this->processTags(); |
|
| 359 | |||
| 360 | // Remove old references |
||
| 361 | 163 | $this->gc(); |
|
| 362 | |||
| 363 | // Finalize the document |
||
| 364 | 163 | $this->finalizeOutput(); |
|
| 365 | |||
| 366 | // Check the uid in case a plugin or a filter reset the parser mid-execution |
||
| 367 | 163 | if ($this->uid !== $uid) |
|
| 368 | 163 | { |
|
| 369 | 1 | throw new RuntimeException('The parser has been reset during execution'); |
|
| 370 | } |
||
| 371 | |||
| 372 | // Log a warning if the fixing cost limit was exceeded |
||
| 373 | 163 | if ($this->currentFixingCost > $this->maxFixingCost) |
|
| 374 | 163 | { |
|
| 375 | 3 | $this->logger->warn('Fixing cost limit exceeded'); |
|
| 376 | 3 | } |
|
| 377 | |||
| 378 | 163 | return $this->output; |
|
| 379 | } |
||
| 380 | |||
| 381 | /** |
||
| 382 | * Change a tag's tagLimit |
||
| 383 | * |
||
| 384 | * NOTE: the default tagLimit should generally be set during configuration instead |
||
| 385 | * |
||
| 386 | * @param string $tagName The tag's name, in UPPERCASE |
||
| 387 | * @param integer $tagLimit |
||
| 388 | * @return void |
||
| 389 | */ |
||
| 390 | 2 | public function setTagLimit($tagName, $tagLimit) |
|
| 391 | { |
||
| 392 | 2 | $this->setTagOption($tagName, 'tagLimit', $tagLimit); |
|
| 393 | 2 | } |
|
| 394 | |||
| 395 | /** |
||
| 396 | * Change a tag's nestingLimit |
||
| 397 | * |
||
| 398 | * NOTE: the default nestingLimit should generally be set during configuration instead |
||
| 399 | * |
||
| 400 | * @param string $tagName The tag's name, in UPPERCASE |
||
| 401 | * @param integer $nestingLimit |
||
| 402 | * @return void |
||
| 403 | */ |
||
| 404 | 2 | public function setNestingLimit($tagName, $nestingLimit) |
|
| 405 | { |
||
| 406 | 2 | $this->setTagOption($tagName, 'nestingLimit', $nestingLimit); |
|
| 407 | 2 | } |
|
| 408 | |||
| 409 | //========================================================================== |
||
| 410 | // Filter processing |
||
| 411 | //========================================================================== |
||
| 412 | |||
| 413 | /** |
||
| 414 | * Execute all the attribute preprocessors of given tag |
||
| 415 | * |
||
| 416 | * @private |
||
| 417 | * |
||
| 418 | * @param Tag $tag Source tag |
||
| 419 | * @param array $tagConfig Tag's config |
||
| 420 | * @return bool Unconditionally TRUE |
||
| 421 | */ |
||
| 422 | 7 | public static function executeAttributePreprocessors(Tag $tag, array $tagConfig) |
|
| 423 | { |
||
| 424 | 7 | if (!empty($tagConfig['attributePreprocessors'])) |
|
| 425 | 7 | { |
|
| 426 | 7 | foreach ($tagConfig['attributePreprocessors'] as list($attrName, $regexp, $map)) |
|
| 427 | { |
||
| 428 | 7 | if (!$tag->hasAttribute($attrName)) |
|
| 429 | 7 | { |
|
| 430 | 1 | continue; |
|
| 431 | } |
||
| 432 | |||
| 433 | 6 | self::executeAttributePreprocessor($tag, $attrName, $regexp, $map); |
|
| 434 | 7 | } |
|
| 435 | 7 | } |
|
| 436 | |||
| 437 | 7 | return true; |
|
| 438 | } |
||
| 439 | |||
| 440 | /** |
||
| 441 | * Execute an attribute preprocessor |
||
| 442 | * |
||
| 443 | * @param Tag $tag |
||
| 444 | * @param string $attrName |
||
| 445 | * @param string $regexp |
||
| 446 | * @param string[] $map |
||
| 447 | * @return void |
||
| 448 | */ |
||
| 449 | 6 | protected static function executeAttributePreprocessor(Tag $tag, $attrName, $regexp, $map) |
|
| 450 | { |
||
| 451 | 6 | $attrValue = $tag->getAttribute($attrName); |
|
| 452 | 6 | $captures = self::getNamedCaptures($attrValue, $regexp, $map); |
|
| 453 | 6 | foreach ($captures as $k => $v) |
|
| 454 | { |
||
| 455 | // Attribute preprocessors cannot overwrite other attributes but they can |
||
| 456 | // overwrite themselves |
||
| 457 | 5 | if ($k === $attrName || !$tag->hasAttribute($k)) |
|
| 458 | 5 | { |
|
| 459 | 5 | $tag->setAttribute($k, $v); |
|
| 460 | 5 | } |
|
| 461 | 6 | } |
|
| 462 | 6 | } |
|
| 463 | |||
| 464 | /** |
||
| 465 | * Execute a regexp and return the values of the mapped captures |
||
| 466 | * |
||
| 467 | * @param string $attrValue |
||
| 468 | * @param string $regexp |
||
| 469 | * @param string[] $map |
||
| 470 | * @return array |
||
| 471 | */ |
||
| 472 | 6 | protected static function getNamedCaptures($attrValue, $regexp, $map) |
|
| 473 | { |
||
| 474 | 6 | if (!preg_match($regexp, $attrValue, $m)) |
|
| 475 | 6 | { |
|
| 476 | 1 | return []; |
|
| 477 | } |
||
| 478 | |||
| 479 | 5 | $values = []; |
|
| 480 | 5 | foreach ($map as $i => $k) |
|
| 481 | { |
||
| 482 | 5 | if (isset($m[$i]) && $m[$i] !== '') |
|
| 483 | 5 | { |
|
| 484 | 5 | $values[$k] = $m[$i]; |
|
| 485 | 5 | } |
|
| 486 | 5 | } |
|
| 487 | |||
| 488 | 5 | return $values; |
|
| 489 | } |
||
| 490 | |||
| 491 | /** |
||
| 492 | * Execute a filter |
||
| 493 | * |
||
| 494 | * @see s9e\TextFormatter\Configurator\Items\ProgrammableCallback |
||
| 495 | * |
||
| 496 | * @param array $filter Programmed callback |
||
| 497 | * @param array $vars Variables to be used when executing the callback |
||
| 498 | * @return mixed Whatever the callback returns |
||
| 499 | */ |
||
| 500 | 138 | protected static function executeFilter(array $filter, array $vars) |
|
| 501 | { |
||
| 502 | 138 | $callback = $filter['callback']; |
|
| 503 | 138 | $params = (isset($filter['params'])) ? $filter['params'] : []; |
|
| 504 | |||
| 505 | 138 | $args = []; |
|
| 506 | 138 | foreach ($params as $k => $v) |
|
| 507 | { |
||
| 508 | 137 | if (is_numeric($k)) |
|
| 509 | 137 | { |
|
| 510 | // By-value param |
||
| 511 | 2 | $args[] = $v; |
|
| 512 | 2 | } |
|
| 513 | 136 | elseif (isset($vars[$k])) |
|
| 514 | { |
||
| 515 | // By-name param using a supplied var |
||
| 516 | 134 | $args[] = $vars[$k]; |
|
| 517 | 134 | } |
|
| 518 | 2 | elseif (isset($vars['registeredVars'][$k])) |
|
| 519 | { |
||
| 520 | // By-name param using a registered var |
||
| 521 | 1 | $args[] = $vars['registeredVars'][$k]; |
|
| 522 | 1 | } |
|
| 523 | else |
||
| 524 | { |
||
| 525 | // Unknown param |
||
| 526 | 1 | $args[] = null; |
|
| 527 | } |
||
| 528 | 138 | } |
|
| 529 | |||
| 530 | 138 | return call_user_func_array($callback, $args); |
|
| 531 | } |
||
| 532 | |||
| 533 | /** |
||
| 534 | * Filter the attributes of given tag |
||
| 535 | * |
||
| 536 | * @private |
||
| 537 | * |
||
| 538 | * @param Tag $tag Tag being checked |
||
| 539 | * @param array $tagConfig Tag's config |
||
| 540 | * @param array $registeredVars Array of registered vars for use in attribute filters |
||
| 541 | * @param Logger $logger This parser's Logger instance |
||
| 542 | * @return bool Whether the whole attribute set is valid |
||
| 543 | */ |
||
| 544 | 137 | public static function filterAttributes(Tag $tag, array $tagConfig, array $registeredVars, Logger $logger) |
|
| 545 | { |
||
| 546 | 137 | if (empty($tagConfig['attributes'])) |
|
| 547 | 137 | { |
|
| 548 | 122 | $tag->setAttributes([]); |
|
| 549 | |||
| 550 | 122 | return true; |
|
| 551 | } |
||
| 552 | |||
| 553 | // Generate values for attributes with a generator set |
||
| 554 | 16 | foreach ($tagConfig['attributes'] as $attrName => $attrConfig) |
|
| 555 | { |
||
| 556 | 16 | if (isset($attrConfig['generator'])) |
|
| 557 | 16 | { |
|
| 558 | 1 | $tag->setAttribute( |
|
| 559 | 1 | $attrName, |
|
| 560 | 1 | self::executeFilter( |
|
| 561 | 1 | $attrConfig['generator'], |
|
| 562 | [ |
||
| 563 | 1 | 'attrName' => $attrName, |
|
| 564 | 1 | 'logger' => $logger, |
|
| 565 | 'registeredVars' => $registeredVars |
||
| 566 | 1 | ] |
|
| 567 | 1 | ) |
|
| 568 | 1 | ); |
|
| 569 | 1 | } |
|
| 570 | 16 | } |
|
| 571 | |||
| 572 | // Filter and remove invalid attributes |
||
| 573 | 16 | foreach ($tag->getAttributes() as $attrName => $attrValue) |
|
| 574 | { |
||
| 575 | // Test whether this attribute exists and remove it if it doesn't |
||
| 576 | 15 | if (!isset($tagConfig['attributes'][$attrName])) |
|
| 577 | 15 | { |
|
| 578 | 2 | $tag->removeAttribute($attrName); |
|
| 579 | 2 | continue; |
|
| 580 | } |
||
| 581 | |||
| 582 | 15 | $attrConfig = $tagConfig['attributes'][$attrName]; |
|
| 583 | |||
| 584 | // Test whether this attribute has a filterChain |
||
| 585 | 15 | if (!isset($attrConfig['filterChain'])) |
|
| 586 | 15 | { |
|
| 587 | 10 | continue; |
|
| 588 | } |
||
| 589 | |||
| 590 | // Record the name of the attribute being filtered into the logger |
||
| 591 | 5 | $logger->setAttribute($attrName); |
|
| 592 | |||
| 593 | 5 | foreach ($attrConfig['filterChain'] as $filter) |
|
| 594 | { |
||
| 595 | 5 | $attrValue = self::executeFilter( |
|
| 596 | 5 | $filter, |
|
| 597 | [ |
||
| 598 | 5 | 'attrName' => $attrName, |
|
| 599 | 5 | 'attrValue' => $attrValue, |
|
| 600 | 5 | 'logger' => $logger, |
|
| 601 | 'registeredVars' => $registeredVars |
||
| 602 | 5 | ] |
|
| 603 | 5 | ); |
|
| 604 | |||
| 605 | 5 | if ($attrValue === false) |
|
| 606 | 5 | { |
|
| 607 | 3 | $tag->removeAttribute($attrName); |
|
| 608 | 3 | break; |
|
| 609 | } |
||
| 610 | 5 | } |
|
| 611 | |||
| 612 | // Update the attribute value if it's valid |
||
| 613 | 5 | if ($attrValue !== false) |
|
| 614 | 5 | { |
|
| 615 | 2 | $tag->setAttribute($attrName, $attrValue); |
|
| 616 | 2 | } |
|
| 617 | |||
| 618 | // Remove the attribute's name from the logger |
||
| 619 | 5 | $logger->unsetAttribute(); |
|
| 620 | 16 | } |
|
| 621 | |||
| 622 | // Iterate over the attribute definitions to handle missing attributes |
||
| 623 | 16 | foreach ($tagConfig['attributes'] as $attrName => $attrConfig) |
|
| 624 | { |
||
| 625 | // Test whether this attribute is missing |
||
| 626 | 16 | if (!$tag->hasAttribute($attrName)) |
|
| 627 | 16 | { |
|
| 628 | 5 | if (isset($attrConfig['defaultValue'])) |
|
| 629 | 5 | { |
|
| 630 | // Use the attribute's default value |
||
| 631 | 2 | $tag->setAttribute($attrName, $attrConfig['defaultValue']); |
|
| 632 | 2 | } |
|
| 633 | 3 | elseif (!empty($attrConfig['required'])) |
|
| 634 | { |
||
| 635 | // This attribute is missing, has no default value and is required, which means |
||
| 636 | // the attribute set is invalid |
||
| 637 | 3 | return false; |
|
| 638 | } |
||
| 639 | 2 | } |
|
| 640 | 13 | } |
|
| 641 | |||
| 642 | 13 | return true; |
|
| 643 | } |
||
| 644 | |||
| 645 | /** |
||
| 646 | * Execute given tag's filterChain |
||
| 647 | * |
||
| 648 | * @param Tag $tag Tag to filter |
||
| 649 | * @return bool Whether the tag is valid |
||
| 650 | */ |
||
| 651 | 129 | protected function filterTag(Tag $tag) |
|
| 689 | |||
| 690 | //========================================================================== |
||
| 691 | // Output handling |
||
| 692 | //========================================================================== |
||
| 693 | |||
| 694 | /** |
||
| 695 | * Finalize the output by appending the rest of the unprocessed text and create the root node |
||
| 696 | * |
||
| 697 | * @return void |
||
| 698 | */ |
||
| 699 | 163 | protected function finalizeOutput() |
|
| 700 | { |
||
| 701 | // Output the rest of the text and close the last paragraph |
||
| 702 | 163 | $this->outputText($this->textLen, 0, true); |
|
| 703 | |||
| 704 | // Remove empty tag pairs, e.g. <I><U></U></I> as well as empty paragraphs |
||
| 705 | do |
||
| 706 | { |
||
| 707 | 163 | $this->output = preg_replace('(<([^ />]+)></\\1>)', '', $this->output, -1, $cnt); |
|
| 732 | |||
| 733 | /** |
||
| 734 | * Append a tag to the output |
||
| 735 | * |
||
| 736 | * @param Tag $tag Tag to append |
||
| 737 | * @return void |
||
| 738 | */ |
||
| 739 | 121 | protected function outputTag(Tag $tag) |
|
| 855 | |||
| 856 | /** |
||
| 857 | * Output the text between the cursor's position (included) and given position (not included) |
||
| 858 | * |
||
| 859 | * @param integer $catchupPos Position we're catching up to |
||
| 860 | * @param integer $maxLines Maximum number of lines to ignore at the end of the text |
||
| 861 | * @param bool $closeParagraph Whether to close the paragraph at the end, if applicable |
||
| 862 | * @return void |
||
| 863 | */ |
||
| 864 | 163 | protected function outputText($catchupPos, $maxLines, $closeParagraph) |
|
| 1015 | |||
| 1016 | /** |
||
| 1017 | * Output a linebreak tag |
||
| 1018 | * |
||
| 1019 | * @param Tag $tag |
||
| 1020 | * @return void |
||
| 1021 | */ |
||
| 1022 | 5 | protected function outputBrTag(Tag $tag) |
|
| 1027 | |||
| 1028 | /** |
||
| 1029 | * Output an ignore tag |
||
| 1030 | * |
||
| 1031 | * @param Tag $tag |
||
| 1032 | * @return void |
||
| 1033 | */ |
||
| 1034 | 16 | protected function outputIgnoreTag(Tag $tag) |
|
| 1050 | |||
| 1051 | /** |
||
| 1052 | * Start a paragraph between current position and given position, if applicable |
||
| 1053 | * |
||
| 1054 | * @param integer $maxPos Rightmost position at which the paragraph can be opened |
||
| 1055 | * @return void |
||
| 1056 | */ |
||
| 1057 | 127 | protected function outputParagraphStart($maxPos) |
|
| 1076 | |||
| 1077 | /** |
||
| 1078 | * Close current paragraph at current position if applicable |
||
| 1079 | * |
||
| 1080 | * @return void |
||
| 1081 | */ |
||
| 1082 | 18 | protected function outputParagraphEnd() |
|
| 1093 | |||
| 1094 | /** |
||
| 1095 | * Output the content of a verbatim tag |
||
| 1096 | * |
||
| 1097 | * @param Tag $tag |
||
| 1098 | * @return void |
||
| 1099 | */ |
||
| 1100 | 4 | protected function outputVerbatim(Tag $tag) |
|
| 1107 | |||
| 1108 | /** |
||
| 1109 | * Skip as much whitespace after current position as possible |
||
| 1110 | * |
||
| 1111 | * @param integer $maxPos Rightmost character to be skipped |
||
| 1112 | * @return void |
||
| 1113 | */ |
||
| 1114 | 18 | protected function outputWhitespace($maxPos) |
|
| 1127 | |||
| 1128 | //========================================================================== |
||
| 1129 | // Plugins handling |
||
| 1130 | //========================================================================== |
||
| 1131 | |||
| 1132 | /** |
||
| 1133 | * Disable a plugin |
||
| 1134 | * |
||
| 1135 | * @param string $pluginName Name of the plugin |
||
| 1136 | * @return void |
||
| 1137 | */ |
||
| 1138 | 5 | public function disablePlugin($pluginName) |
|
| 1151 | |||
| 1152 | /** |
||
| 1153 | * Enable a plugin |
||
| 1154 | * |
||
| 1155 | * @param string $pluginName Name of the plugin |
||
| 1156 | * @return void |
||
| 1157 | */ |
||
| 1158 | 2 | public function enablePlugin($pluginName) |
|
| 1165 | |||
| 1166 | /** |
||
| 1167 | * Execute given plugin |
||
| 1168 | * |
||
| 1169 | * @param string $pluginName Plugin's name |
||
| 1170 | * @return void |
||
| 1171 | */ |
||
| 1172 | 164 | protected function executePluginParser($pluginName) |
|
| 1193 | |||
| 1194 | /** |
||
| 1195 | * Execute all the plugins |
||
| 1196 | * |
||
| 1197 | * @return void |
||
| 1198 | */ |
||
| 1199 | 174 | protected function executePluginParsers() |
|
| 1209 | |||
| 1210 | /** |
||
| 1211 | * Execute given regexp and returns as many matches as given limit |
||
| 1212 | * |
||
| 1213 | * @param string $regexp |
||
| 1214 | * @param integer $limit |
||
| 1215 | * @return array |
||
| 1216 | */ |
||
| 1217 | 6 | protected function getMatches($regexp, $limit) |
|
| 1227 | |||
| 1228 | /** |
||
| 1229 | * Get the cached callback for given plugin's parser |
||
| 1230 | * |
||
| 1231 | * @param string $pluginName Plugin's name |
||
| 1232 | * @return callable |
||
| 1233 | */ |
||
| 1234 | 162 | protected function getPluginParser($pluginName) |
|
| 1250 | |||
| 1251 | /** |
||
| 1252 | * Register a parser |
||
| 1253 | * |
||
| 1254 | * Can be used to add a new parser with no plugin config, or pre-generate a parser for an |
||
| 1255 | * existing plugin |
||
| 1256 | * |
||
| 1257 | * @param string $pluginName |
||
| 1258 | * @param callback $parser |
||
| 1259 | * @return void |
||
| 1260 | */ |
||
| 1261 | 157 | public function registerParser($pluginName, $parser, $regexp = null, $limit = PHP_INT_MAX) |
|
| 1279 | |||
| 1280 | //========================================================================== |
||
| 1281 | // Rules handling |
||
| 1282 | //========================================================================== |
||
| 1283 | |||
| 1284 | /** |
||
| 1285 | * Apply closeAncestor rules associated with given tag |
||
| 1286 | * |
||
| 1287 | * @param Tag $tag Tag |
||
| 1288 | * @return bool Whether a new tag has been added |
||
| 1289 | */ |
||
| 1290 | 123 | protected function closeAncestor(Tag $tag) |
|
| 1322 | |||
| 1323 | /** |
||
| 1324 | * Apply closeParent rules associated with given tag |
||
| 1325 | * |
||
| 1326 | * @param Tag $tag Tag |
||
| 1327 | * @return bool Whether a new tag has been added |
||
| 1328 | */ |
||
| 1329 | 123 | protected function closeParent(Tag $tag) |
|
| 1356 | |||
| 1357 | /** |
||
| 1358 | * Apply the createChild rules associated with given tag |
||
| 1359 | * |
||
| 1360 | * @param Tag $tag Tag |
||
| 1361 | * @return void |
||
| 1362 | */ |
||
| 1363 | 121 | protected function createChild(Tag $tag) |
|
| 1376 | |||
| 1377 | /** |
||
| 1378 | * Apply fosterParent rules associated with given tag |
||
| 1379 | * |
||
| 1380 | * NOTE: this rule has the potential for creating an unbounded loop, either if a tag tries to |
||
| 1381 | * foster itself or two or more tags try to foster each other in a loop. We mitigate the |
||
| 1382 | * risk by preventing a tag from creating a child of itself (the parent still gets closed) |
||
| 1383 | * and by checking and increasing the currentFixingCost so that a loop of multiple tags |
||
| 1384 | * do not run indefinitely. The default tagLimit and nestingLimit also serve to prevent the |
||
| 1385 | * loop from running indefinitely |
||
| 1386 | * |
||
| 1387 | * @param Tag $tag Tag |
||
| 1388 | * @return bool Whether a new tag has been added |
||
| 1389 | */ |
||
| 1390 | 123 | protected function fosterParent(Tag $tag) |
|
| 1429 | |||
| 1430 | /** |
||
| 1431 | * Apply requireAncestor rules associated with given tag |
||
| 1432 | * |
||
| 1433 | * @param Tag $tag Tag |
||
| 1434 | * @return bool Whether this tag has an unfulfilled requireAncestor requirement |
||
| 1435 | */ |
||
| 1436 | 123 | protected function requireAncestor(Tag $tag) |
|
| 1461 | |||
| 1462 | //========================================================================== |
||
| 1463 | // Tag processing |
||
| 1464 | //========================================================================== |
||
| 1465 | |||
| 1466 | /** |
||
| 1467 | * Create and add an end tag for given start tag at given position |
||
| 1468 | * |
||
| 1469 | * @param Tag $startTag Start tag |
||
| 1470 | * @param integer $tagPos End tag's position (will be adjusted for whitespace if applicable) |
||
| 1471 | * @param integer $prio End tag's priority |
||
| 1472 | * @return Tag |
||
| 1473 | */ |
||
| 1474 | 28 | protected function addMagicEndTag(Tag $startTag, $tagPos, $prio = 0) |
|
| 1490 | |||
| 1491 | /** |
||
| 1492 | * Compute the position of a magic end tag, adjusted for whitespace |
||
| 1493 | * |
||
| 1494 | * @param integer $tagPos Rightmost possible position for the tag |
||
| 1495 | * @return integer |
||
| 1496 | */ |
||
| 1497 | 2 | protected function getMagicPos($tagPos) |
|
| 1508 | |||
| 1509 | /** |
||
| 1510 | * Test whether given start tag is immediately followed by a closing tag |
||
| 1511 | * |
||
| 1512 | * @param Tag $tag Start tag |
||
| 1513 | * @return bool |
||
| 1514 | */ |
||
| 1515 | 3 | protected function isFollowedByClosingTag(Tag $tag) |
|
| 1519 | |||
| 1520 | /** |
||
| 1521 | * Process all tags in the stack |
||
| 1522 | * |
||
| 1523 | * @return void |
||
| 1524 | */ |
||
| 1525 | 163 | protected function processTags() |
|
| 1576 | |||
| 1577 | /** |
||
| 1578 | * Process current tag |
||
| 1579 | * |
||
| 1580 | * @return void |
||
| 1581 | */ |
||
| 1582 | 141 | protected function processCurrentTag() |
|
| 1661 | |||
| 1662 | /** |
||
| 1663 | * Process given start tag (including self-closing tags) at current position |
||
| 1664 | * |
||
| 1665 | * @param Tag $tag Start tag (including self-closing) |
||
| 1666 | * @return void |
||
| 1667 | */ |
||
| 1668 | 124 | protected function processStartTag(Tag $tag) |
|
| 1778 | |||
| 1779 | /** |
||
| 1780 | * Process given end tag at current position |
||
| 1781 | * |
||
| 1782 | * @param Tag $tag end tag |
||
| 1783 | * @return void |
||
| 1784 | */ |
||
| 1785 | 90 | protected function processEndTag(Tag $tag) |
|
| 1935 | |||
| 1936 | /** |
||
| 1937 | * Update counters and replace current context with its parent context |
||
| 1938 | * |
||
| 1939 | * @return void |
||
| 1940 | */ |
||
| 1941 | 89 | protected function popContext() |
|
| 1947 | |||
| 1948 | /** |
||
| 1949 | * Update counters and replace current context with a new context based on given tag |
||
| 1950 | * |
||
| 1951 | * If given tag is a self-closing tag, the context won't change |
||
| 1952 | * |
||
| 1953 | * @param Tag $tag Start tag (including self-closing) |
||
| 1954 | * @return void |
||
| 1955 | */ |
||
| 1956 | 121 | protected function pushContext(Tag $tag) |
|
| 2005 | |||
| 2006 | /** |
||
| 2007 | * Return whether given tag is allowed in current context |
||
| 2008 | * |
||
| 2009 | * @param string $tagName |
||
| 2010 | * @return bool |
||
| 2011 | */ |
||
| 2012 | 123 | protected function tagIsAllowed($tagName) |
|
| 2018 | |||
| 2019 | //========================================================================== |
||
| 2020 | // Tag stack |
||
| 2021 | //========================================================================== |
||
| 2022 | |||
| 2023 | /** |
||
| 2024 | * Add a start tag |
||
| 2025 | * |
||
| 2026 | * @param string $name Name of the tag |
||
| 2027 | * @param integer $pos Position of the tag in the text |
||
| 2028 | * @param integer $len Length of text consumed by the tag |
||
| 2029 | * @param integer $prio Tag's priority |
||
| 2030 | * @return Tag |
||
| 2031 | */ |
||
| 2032 | 107 | public function addStartTag($name, $pos, $len, $prio = 0) |
|
| 2036 | |||
| 2037 | /** |
||
| 2038 | * Add an end tag |
||
| 2039 | * |
||
| 2040 | * @param string $name Name of the tag |
||
| 2041 | * @param integer $pos Position of the tag in the text |
||
| 2042 | * @param integer $len Length of text consumed by the tag |
||
| 2043 | * @param integer $prio Tag's priority |
||
| 2044 | * @return Tag |
||
| 2045 | */ |
||
| 2046 | 95 | public function addEndTag($name, $pos, $len, $prio = 0) |
|
| 2050 | |||
| 2051 | /** |
||
| 2052 | * Add a self-closing tag |
||
| 2053 | * |
||
| 2054 | * @param string $name Name of the tag |
||
| 2055 | * @param integer $pos Position of the tag in the text |
||
| 2056 | * @param integer $len Length of text consumed by the tag |
||
| 2057 | * @param integer $prio Tag's priority |
||
| 2058 | * @return Tag |
||
| 2059 | */ |
||
| 2060 | 64 | public function addSelfClosingTag($name, $pos, $len, $prio = 0) |
|
| 2064 | |||
| 2065 | /** |
||
| 2066 | * Add a 0-width "br" tag to force a line break at given position |
||
| 2067 | * |
||
| 2068 | * @param integer $pos Position of the tag in the text |
||
| 2069 | * @param integer $prio Tag's priority |
||
| 2070 | * @return Tag |
||
| 2071 | */ |
||
| 2072 | 7 | public function addBrTag($pos, $prio = 0) |
|
| 2076 | |||
| 2077 | /** |
||
| 2078 | * Add an "ignore" tag |
||
| 2079 | * |
||
| 2080 | * @param integer $pos Position of the tag in the text |
||
| 2081 | * @param integer $len Length of text consumed by the tag |
||
| 2082 | * @param integer $prio Tag's priority |
||
| 2083 | * @return Tag |
||
| 2084 | */ |
||
| 2085 | 11 | public function addIgnoreTag($pos, $len, $prio = 0) |
|
| 2089 | |||
| 2090 | /** |
||
| 2091 | * Add a paragraph break at given position |
||
| 2092 | * |
||
| 2093 | * Uses a zero-width tag that is actually never output in the result |
||
| 2094 | * |
||
| 2095 | * @param integer $pos Position of the tag in the text |
||
| 2096 | * @param integer $prio Tag's priority |
||
| 2097 | * @return Tag |
||
| 2098 | */ |
||
| 2099 | 5 | public function addParagraphBreak($pos, $prio = 0) |
|
| 2103 | |||
| 2104 | /** |
||
| 2105 | * Add a copy of given tag at given position and length |
||
| 2106 | * |
||
| 2107 | * @param Tag $tag Original tag |
||
| 2108 | * @param integer $pos Copy's position |
||
| 2109 | * @param integer $len Copy's length |
||
| 2110 | * @param integer $prio Copy's priority (same as original by default) |
||
| 2111 | * @return Tag Copy tag |
||
| 2112 | */ |
||
| 2113 | 18 | public function addCopyTag(Tag $tag, $pos, $len, $prio = null) |
|
| 2124 | |||
| 2125 | /** |
||
| 2126 | * Add a tag |
||
| 2127 | * |
||
| 2128 | * @param integer $type Tag's type |
||
| 2129 | * @param string $name Name of the tag |
||
| 2130 | * @param integer $pos Position of the tag in the text |
||
| 2131 | * @param integer $len Length of text consumed by the tag |
||
| 2132 | * @param integer $prio Tag's priority |
||
| 2133 | * @return Tag |
||
| 2134 | */ |
||
| 2135 | 167 | protected function addTag($type, $name, $pos, $len, $prio) |
|
| 2177 | |||
| 2178 | /** |
||
| 2179 | * Insert given tag in the tag stack |
||
| 2180 | * |
||
| 2181 | * @param Tag $tag |
||
| 2182 | * @return void |
||
| 2183 | */ |
||
| 2184 | 160 | protected function insertTag(Tag $tag) |
|
| 2185 | { |
||
| 2186 | 160 | $i = count($this->tagStack) - 1; |
|
| 2187 | 160 | if (!$this->tagStackIsSorted || $i < 0 || self::compareTags($this->tagStack[$i], $tag) <= 0) |
|
| 2188 | 160 | { |
|
| 2189 | 160 | $this->tagStack[] = $tag; |
|
| 2190 | 160 | } |
|
| 2191 | else |
||
| 2192 | { |
||
| 2193 | // Scan the stack for the top tag that should be ordered after current tag, then insert |
||
| 2194 | // current tag after it. If none is found, prepend the tag at the bottom |
||
| 2195 | 2 | while (--$i >= 0) |
|
| 2196 | { |
||
| 2197 | 1 | if (self::compareTags($this->tagStack[$i], $tag) <= 0) |
|
| 2198 | 1 | { |
|
| 2199 | 1 | array_splice($this->tagStack, $i + 1, 0, [$tag]); |
|
| 2200 | 1 | break; |
|
| 2201 | } |
||
| 2202 | } |
||
| 2203 | 2 | if ($i < 0) |
|
| 2204 | 2 | { |
|
| 2205 | 1 | array_unshift($this->tagStack, $tag); |
|
| 2206 | 1 | } |
|
| 2207 | } |
||
| 2208 | 160 | } |
|
| 2209 | |||
| 2210 | /** |
||
| 2211 | * Add a pair of tags |
||
| 2212 | * |
||
| 2213 | * @param string $name Name of the tags |
||
| 2214 | * @param integer $startPos Position of the start tag |
||
| 2215 | * @param integer $startLen Length of the start tag |
||
| 2216 | * @param integer $endPos Position of the start tag |
||
| 2217 | * @param integer $endLen Length of the start tag |
||
| 2218 | * @param integer $prio Start tag's priority |
||
| 2219 | * @return Tag Start tag |
||
| 2220 | */ |
||
| 2221 | 16 | public function addTagPair($name, $startPos, $startLen, $endPos, $endLen, $prio = 0) |
|
| 2230 | |||
| 2231 | /** |
||
| 2232 | * Add a tag that represents a verbatim copy of the original text |
||
| 2233 | * |
||
| 2234 | * @param integer $pos Position of the tag in the text |
||
| 2235 | * @param integer $len Length of text consumed by the tag |
||
| 2236 | * @param integer $prio Tag's priority |
||
| 2237 | * @return Tag |
||
| 2238 | */ |
||
| 2239 | 4 | public function addVerbatim($pos, $len, $prio = 0) |
|
| 2243 | |||
| 2244 | /** |
||
| 2245 | * Sort tags by position and precedence |
||
| 2246 | * |
||
| 2247 | * @return void |
||
| 2248 | */ |
||
| 2249 | 147 | protected function sortTags() |
|
| 2254 | |||
| 2255 | /** |
||
| 2256 | * sortTags() callback |
||
| 2257 | * |
||
| 2258 | * Tags are stored as a stack, in LIFO order. We sort tags by position _descending_ so that they |
||
| 2259 | * are processed in the order they appear in the text. |
||
| 2260 | * |
||
| 2261 | * @param Tag $a First tag to compare |
||
| 2262 | * @param Tag $b Second tag to compare |
||
| 2263 | * @return integer |
||
| 2264 | */ |
||
| 2265 | 110 | protected static function compareTags(Tag $a, Tag $b) |
|
| 2318 | } |