Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like PreparseCode often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use PreparseCode, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 25 | class PreparseCode |
||
| 26 | { |
||
| 27 | /** The regular expression non breaking space */ |
||
| 28 | const NBS = '\x{A0}'; |
||
| 29 | /** @var string the message to preparse */ |
||
| 30 | public $message = ''; |
||
| 31 | /** @var bool if this is just a preview */ |
||
| 32 | protected $previewing = false; |
||
| 33 | /** @var array the code blocks that we want to protect */ |
||
| 34 | public $code_blocks = array(); |
||
| 35 | /** @var PreparseCode */ |
||
| 36 | public static $instance; |
||
| 37 | |||
| 38 | /** |
||
| 39 | * PreparseCode constructor. |
||
| 40 | */ |
||
| 41 | 2 | public function __construct() |
|
| 44 | |||
| 45 | /** |
||
| 46 | * Takes a message and parses it, returning the prepared message as a reference |
||
| 47 | * for use by parse_bbc. |
||
| 48 | * |
||
| 49 | * What it does: |
||
| 50 | * - Cleans up links (javascript, etc.) |
||
| 51 | * - Fixes improperly constructed lists [lists] |
||
| 52 | * - Repairs improperly constructed tables, row, headers, etc |
||
| 53 | * - Protects code sections |
||
| 54 | * - Checks for proper quote open / closing |
||
| 55 | * - Processes /me tag |
||
| 56 | * - Converts color tags to ones parse_bbc will understand |
||
| 57 | * - Removes empty tags outside of code blocks |
||
| 58 | * - Won't convert \n's and a few other things if previewing is true. |
||
| 59 | * |
||
| 60 | * @param string $message |
||
| 61 | * @param boolean $previewing |
||
| 62 | */ |
||
| 63 | 3 | public function preparsecode(&$message, $previewing = false) |
|
| 137 | |||
| 138 | /** |
||
| 139 | * Trim dangling quotes |
||
| 140 | */ |
||
| 141 | 3 | private function _trimTrailingQuotes() |
|
| 155 | |||
| 156 | /** |
||
| 157 | * Find all code blocks, work out whether we'd be parsing them, |
||
| 158 | * then ensure they are all closed. |
||
| 159 | */ |
||
| 160 | 3 | private function _validateCodeBlocks() |
|
| 161 | { |
||
| 162 | 3 | $in_tag = false; |
|
| 163 | 3 | $had_tag = false; |
|
| 164 | 3 | $code_open = false; |
|
| 165 | |||
| 166 | 3 | if (preg_match_all('~(\[(/)*code(?:=[^\]]+)?\])~is', $this->message, $matches)) |
|
| 167 | 3 | { |
|
| 168 | 2 | foreach ($matches[0] as $index => $dummy) |
|
| 169 | { |
||
| 170 | // Closing? |
||
| 171 | 2 | if (!empty($matches[2][$index])) |
|
| 172 | 2 | { |
|
| 173 | // If it's closing and we're not in a tag we need to open it... |
||
| 174 | 2 | if (!$in_tag) |
|
| 175 | 2 | { |
|
| 176 | $code_open = true; |
||
| 177 | } |
||
| 178 | |||
| 179 | // Either way we ain't in one any more. |
||
| 180 | 2 | $in_tag = false; |
|
| 181 | 2 | } |
|
| 182 | // Opening tag... |
||
| 183 | else |
||
| 184 | { |
||
| 185 | 2 | $had_tag = true; |
|
| 186 | |||
| 187 | // If we're in a tag don't do nought! |
||
| 188 | 2 | if (!$in_tag) |
|
| 189 | 2 | { |
|
| 190 | 2 | $in_tag = true; |
|
| 191 | 2 | } |
|
| 192 | } |
||
| 193 | 2 | } |
|
| 194 | 2 | } |
|
| 195 | |||
| 196 | // If we have an open code tag, close it. |
||
| 197 | if ($in_tag) |
||
| 198 | 3 | { |
|
| 199 | 2 | $this->message .= '[/code]'; |
|
| 200 | 2 | } |
|
| 201 | |||
| 202 | // Open any ones that need to be open, only if we've never had a tag. |
||
| 203 | 3 | if ($code_open && !$had_tag) |
|
| 204 | 3 | { |
|
| 205 | $this->message = '[code]' . $this->message; |
||
| 206 | } |
||
| 207 | 3 | } |
|
| 208 | |||
| 209 | /** |
||
| 210 | * Protects code blocks from preparse by replacing them with %%token%% values |
||
| 211 | */ |
||
| 212 | 3 | private function _tokenizeCodeBlocks() |
|
| 242 | |||
| 243 | /** |
||
| 244 | * Fix any URLs posted - ie. remove 'javascript:'. |
||
| 245 | * |
||
| 246 | * - Fix the img and url tags... |
||
| 247 | * - Fixes links in message and returns nothing. |
||
| 248 | */ |
||
| 249 | 3 | private function _fixTags() |
|
| 313 | |||
| 314 | /** |
||
| 315 | * Fix a specific class of tag - ie. url with =. |
||
| 316 | * |
||
| 317 | * - Used by fixTags, fixes a specific tag's links. |
||
| 318 | * |
||
| 319 | * @param string $myTag - the tag |
||
| 320 | * @param string[] $protocols - http, https or ftp |
||
| 321 | * @param bool $embeddedUrl = false - whether it *can* be set to something |
||
| 322 | * @param bool $hasEqualSign = false, whether it *is* set to something |
||
| 323 | * @param bool $hasExtra = false - whether it can have extra cruft after the begin tag. |
||
| 324 | */ |
||
| 325 | 3 | private function _fixTag($myTag, $protocols, $embeddedUrl = false, $hasEqualSign = false, $hasExtra = false) |
|
| 326 | { |
||
| 327 | 3 | global $boardurl, $scripturl; |
|
| 328 | |||
| 329 | 3 | $replaces = array(); |
|
| 330 | |||
| 331 | // Ensure it has a domain name, use the site name if needed |
||
| 332 | 3 | if (preg_match('~^([^:]+://[^/]+)~', $boardurl, $match) != 0) |
|
| 333 | 3 | { |
|
| 334 | 3 | $domain_url = $match[1]; |
|
| 335 | 3 | } |
|
| 336 | else |
||
| 337 | { |
||
| 338 | $domain_url = $boardurl . '/'; |
||
| 339 | } |
||
| 340 | |||
| 341 | if ($hasEqualSign) |
||
| 342 | 3 | { |
|
| 343 | 3 | preg_match_all('~\[(' . $myTag . ')=([^\]]*?)\](?:(.+?)\[/(' . $myTag . ')\])?~is', $this->message, $matches); |
|
| 344 | 3 | } |
|
| 345 | else |
||
| 346 | { |
||
| 347 | 3 | preg_match_all('~\[(' . $myTag . ($hasExtra ? '(?:[^\]]*?)' : '') . ')\](.+?)\[/(' . $myTag . ')\]~is', $this->message, $matches); |
|
| 348 | } |
||
| 349 | |||
| 350 | 3 | foreach ($matches[0] as $k => $dummy) |
|
| 351 | { |
||
| 352 | // Remove all leading and trailing whitespace. |
||
| 353 | 1 | $replace = trim($matches[2][$k]); |
|
| 354 | 1 | $this_tag = $matches[1][$k]; |
|
| 355 | 1 | $this_close = $hasEqualSign ? (empty($matches[4][$k]) ? '' : $matches[4][$k]) : $matches[3][$k]; |
|
| 356 | |||
| 357 | 1 | $found = false; |
|
| 358 | 1 | foreach ($protocols as $protocol) |
|
| 359 | { |
||
| 360 | 1 | $found = strncasecmp($replace, $protocol . '://', strlen($protocol) + 3) === 0; |
|
| 361 | if ($found) |
||
| 362 | 1 | { |
|
| 363 | 1 | break; |
|
| 364 | } |
||
| 365 | 1 | } |
|
| 366 | |||
| 367 | // Http url checking? |
||
| 368 | 1 | if (!$found && $protocols[0] === 'http') |
|
| 369 | 1 | { |
|
| 370 | 1 | if (substr($replace, 0, 1) === '/' && substr($replace, 0, 2) !== '//') |
|
| 371 | 1 | { |
|
| 372 | $replace = $domain_url . $replace; |
||
| 373 | } |
||
| 374 | 1 | elseif (substr($replace, 0, 1) === '?') |
|
| 375 | { |
||
| 376 | $replace = $scripturl . $replace; |
||
| 377 | } |
||
| 378 | 1 | elseif (substr($replace, 0, 1) === '#' && $embeddedUrl) |
|
| 379 | { |
||
| 380 | $replace = '#' . preg_replace('~[^A-Za-z0-9_\-#]~', '', substr($replace, 1)); |
||
| 381 | $this_tag = 'iurl'; |
||
| 382 | $this_close = 'iurl'; |
||
| 383 | } |
||
| 384 | 1 | elseif (substr($replace, 0, 2) === '//') |
|
| 385 | { |
||
| 386 | 1 | $replace = $protocols[0] . ':' . $replace; |
|
| 387 | 1 | } |
|
| 388 | else |
||
| 389 | { |
||
| 390 | 1 | $replace = $protocols[0] . '://' . $replace; |
|
| 391 | } |
||
| 392 | 1 | } |
|
| 393 | // FTP URL Checking |
||
| 394 | 1 | elseif (!$found && $protocols[0] === 'ftp') |
|
| 395 | { |
||
| 396 | $replace = $protocols[0] . '://' . preg_replace('~^(?!ftps?)[^:]+://~', '', $replace); |
||
| 397 | } |
||
| 398 | 1 | elseif (!$found) |
|
| 399 | { |
||
| 400 | $replace = $protocols[0] . '://' . $replace; |
||
| 401 | } |
||
| 402 | |||
| 403 | // Build a replacement array that is considered safe and proper |
||
| 404 | 1 | if ($hasEqualSign && $embeddedUrl) |
|
| 405 | 1 | { |
|
| 406 | 1 | $replaces[$matches[0][$k]] = '[' . $this_tag . '=' . $replace . ']' . (empty($matches[4][$k]) ? '' : $matches[3][$k] . '[/' . $this_close . ']'); |
|
| 407 | 1 | } |
|
| 408 | 1 | elseif ($hasEqualSign) |
|
| 409 | { |
||
| 410 | $replaces['[' . $matches[1][$k] . '=' . $matches[2][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']'; |
||
| 411 | } |
||
| 412 | 1 | elseif ($embeddedUrl) |
|
| 413 | { |
||
| 414 | 1 | $replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']' . $matches[2][$k] . '[/' . $this_close . ']'; |
|
| 415 | 1 | } |
|
| 416 | else |
||
| 417 | { |
||
| 418 | $replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . ']' . $replace . '[/' . $this_close . ']'; |
||
| 419 | } |
||
| 420 | 3 | } |
|
| 421 | |||
| 422 | 3 | foreach ($replaces as $k => $v) |
|
| 423 | { |
||
| 424 | 1 | if ($k == $v) |
|
| 425 | 1 | { |
|
| 426 | 1 | unset($replaces[$k]); |
|
| 427 | 1 | } |
|
| 428 | 3 | } |
|
| 429 | |||
| 430 | // Update as needed |
||
| 431 | 3 | if (!empty($replaces)) |
|
| 432 | 3 | { |
|
| 433 | 1 | $this->message = strtr($this->message, $replaces); |
|
| 434 | 1 | } |
|
| 435 | 3 | } |
|
| 436 | |||
| 437 | /** |
||
| 438 | * Updates BBC img tags in a message so that the width / height respect the forum settings. |
||
| 439 | * |
||
| 440 | * - Will add the width/height attrib if needed, or update existing ones if they break the rules |
||
| 441 | */ |
||
| 442 | public function resizeBBCImages() |
||
| 514 | |||
| 515 | /** |
||
| 516 | * Replace /me with the users name, including inside footnotes |
||
| 517 | */ |
||
| 518 | 3 | private function _itsAllAbout() |
|
| 536 | |||
| 537 | /** |
||
| 538 | * Make sure lists have open and close tags |
||
| 539 | */ |
||
| 540 | 3 | private function _validateLists() |
|
| 555 | |||
| 556 | /** |
||
| 557 | * Repair a few *cough* common mistakes from user input and from wizzy cut/paste |
||
| 558 | */ |
||
| 559 | 3 | private function _fixMistakes() |
|
| 613 | |||
| 614 | /** |
||
| 615 | * Replace our token-ized message with the saved code blocks |
||
| 616 | */ |
||
| 617 | 3 | private function _restoreCodeBlocks() |
|
| 624 | |||
| 625 | /** |
||
| 626 | * Validates and corrects table structure |
||
| 627 | * |
||
| 628 | * What it does |
||
| 629 | * - Checks tables for correct tag order / nesting |
||
| 630 | * - Adds in missing closing tags, removes excess closing tags |
||
| 631 | * - Although it prevents markup error, it can mess-up the intended (abiet wrong) layout |
||
| 632 | * driving the post author in to a furious rage |
||
| 633 | * |
||
| 634 | */ |
||
| 635 | 3 | private function _preparseTable() |
|
| 636 | { |
||
| 637 | 3 | $table_check = $this->message; |
|
| 638 | 3 | $table_offset = 0; |
|
| 639 | 3 | $table_array = array(); |
|
| 640 | |||
| 641 | // Define the allowable tags after a give tag |
||
| 642 | $table_order = array( |
||
| 643 | 3 | 'table' => array('tr'), |
|
| 644 | 3 | 'tr' => array('td', 'th'), |
|
| 645 | 3 | 'td' => array('table'), |
|
| 646 | 3 | 'th' => array(''), |
|
| 647 | 3 | ); |
|
| 648 | |||
| 649 | // Find all closing tags (/table /tr /td etc) |
||
| 650 | 3 | while (preg_match('~\[(/)*(table|tr|td|th)\]~', $table_check, $matches) === 1) |
|
| 651 | { |
||
| 652 | // Keep track of where this is. |
||
| 653 | 1 | $offset = strpos($table_check, $matches[0]); |
|
| 654 | 1 | $remove_tag = false; |
|
| 655 | |||
| 656 | // Is it opening? |
||
| 657 | 1 | if ($matches[1] != '/') |
|
| 658 | 1 | { |
|
| 659 | // If the previous table tag isn't correct simply remove it. |
||
| 660 | 1 | if ((!empty($table_array) && !in_array($matches[2], $table_order[$table_array[0]])) || (empty($table_array) && $matches[2] !== 'table')) |
|
| 661 | 1 | { |
|
| 662 | $remove_tag = true; |
||
| 663 | } |
||
| 664 | // Record this was the last tag. |
||
| 665 | else |
||
| 666 | { |
||
| 667 | 1 | array_unshift($table_array, $matches[2]); |
|
| 668 | } |
||
| 669 | 1 | } |
|
| 670 | // Otherwise is closed! |
||
| 671 | else |
||
| 672 | { |
||
| 673 | // Only keep the tag if it's closing the right thing. |
||
| 674 | 1 | if (empty($table_array) || ($table_array[0] != $matches[2])) |
|
| 675 | 1 | { |
|
| 676 | $remove_tag = true; |
||
| 677 | } |
||
| 678 | else |
||
| 679 | { |
||
| 680 | 1 | array_shift($table_array); |
|
| 681 | } |
||
| 682 | } |
||
| 683 | |||
| 684 | // Removing? |
||
| 685 | if ($remove_tag) |
||
| 686 | 1 | { |
|
| 687 | $this->message = substr($this->message, 0, $table_offset + $offset) . substr($this->message, $table_offset + strlen($matches[0]) + $offset); |
||
| 688 | |||
| 689 | // We've lost some data. |
||
| 690 | $table_offset -= strlen($matches[0]); |
||
| 691 | } |
||
| 692 | |||
| 693 | // Remove everything up to here. |
||
| 694 | 1 | $table_offset += $offset + strlen($matches[0]); |
|
| 695 | 1 | $table_check = substr($table_check, $offset + strlen($matches[0])); |
|
| 696 | 1 | } |
|
| 697 | |||
| 698 | // Close any remaining table tags. |
||
| 699 | 3 | foreach ($table_array as $tag) |
|
| 700 | { |
||
| 701 | $this->message .= '[/' . $tag . ']'; |
||
| 702 | 3 | } |
|
| 703 | 3 | } |
|
| 704 | |||
| 705 | /** |
||
| 706 | * This is very simple, and just removes things done by preparsecode. |
||
| 707 | * |
||
| 708 | * @param string $message |
||
| 709 | */ |
||
| 710 | public function un_preparsecode($message) |
||
| 725 | |||
| 726 | /** |
||
| 727 | * Ensure tags inside of nobbc do not get parsed by converting the markers to html entities |
||
| 728 | * |
||
| 729 | * @param string[] $matches |
||
| 730 | */ |
||
| 731 | private function _preparsecode_nobbc_callback($matches) |
||
| 735 | |||
| 736 | /** |
||
| 737 | * Use only the primary (first) font face when multiple are supplied |
||
| 738 | * |
||
| 739 | * @param string[] $matches |
||
| 740 | */ |
||
| 741 | 2 | private function _preparsecode_font_callback($matches) |
|
| 748 | |||
| 749 | /** |
||
| 750 | * Takes a tag and changes it to lowercase |
||
| 751 | * |
||
| 752 | * @param string[] $matches |
||
| 753 | */ |
||
| 754 | 2 | private function _preparsecode_lowertags_callback($matches) |
|
| 758 | |||
| 759 | /** |
||
| 760 | * Ensure image tags do not load anything by themselves (security) |
||
| 761 | * |
||
| 762 | * @param string[] $matches |
||
| 763 | */ |
||
| 764 | private function _fixTags_img_callback($matches) |
||
| 768 | |||
| 769 | /** |
||
| 770 | * Find and return PreparseCode instance if it exists, |
||
| 771 | * or create a new instance |
||
| 772 | * |
||
| 773 | * @return PreparseCode |
||
| 774 | */ |
||
| 775 | 2 | public static function instance() |
|
| 784 | } |
||
| 785 |