Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like PreparseCode often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use PreparseCode, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
25 | class PreparseCode |
||
26 | { |
||
27 | /** The regular expression non breaking space */ |
||
28 | const NBS = '\x{A0}'; |
||
29 | /** @var string the message to preparse */ |
||
30 | public $message = ''; |
||
31 | /** @var bool if this is just a preview */ |
||
32 | protected $previewing = false; |
||
33 | /** @var array the code blocks that we want to protect */ |
||
34 | public $code_blocks = array(); |
||
35 | /** @var PreparseCode */ |
||
36 | public static $instance; |
||
37 | |||
38 | /** |
||
39 | * PreparseCode constructor. |
||
40 | */ |
||
41 | 2 | public function __construct() |
|
44 | |||
45 | /** |
||
46 | * Takes a message and parses it, returning the prepared message as a reference |
||
47 | * for use by parse_bbc. |
||
48 | * |
||
49 | * What it does: |
||
50 | * - Cleans up links (javascript, etc.) |
||
51 | * - Fixes improperly constructed lists [lists] |
||
52 | * - Repairs improperly constructed tables, row, headers, etc |
||
53 | * - Protects code sections |
||
54 | * - Checks for proper quote open / closing |
||
55 | * - Processes /me tag |
||
56 | * - Converts color tags to ones parse_bbc will understand |
||
57 | * - Removes empty tags outside of code blocks |
||
58 | * - Won't convert \n's and a few other things if previewing is true. |
||
59 | * |
||
60 | * @param string $message |
||
61 | * @param boolean $previewing |
||
62 | */ |
||
63 | 3 | public function preparsecode(&$message, $previewing = false) |
|
137 | |||
138 | /** |
||
139 | * Trim dangling quotes |
||
140 | */ |
||
141 | 3 | private function _trimTrailingQuotes() |
|
155 | |||
156 | /** |
||
157 | * Find all code blocks, work out whether we'd be parsing them, |
||
158 | * then ensure they are all closed. |
||
159 | */ |
||
160 | 3 | private function _validateCodeBlocks() |
|
161 | { |
||
162 | 3 | $in_tag = false; |
|
163 | 3 | $had_tag = false; |
|
164 | 3 | $code_open = false; |
|
165 | |||
166 | 3 | if (preg_match_all('~(\[(/)*code(?:=[^\]]+)?\])~is', $this->message, $matches)) |
|
167 | 3 | { |
|
168 | 2 | foreach ($matches[0] as $index => $dummy) |
|
169 | { |
||
170 | // Closing? |
||
171 | 2 | if (!empty($matches[2][$index])) |
|
172 | 2 | { |
|
173 | // If it's closing and we're not in a tag we need to open it... |
||
174 | 2 | if (!$in_tag) |
|
175 | 2 | { |
|
176 | $code_open = true; |
||
177 | } |
||
178 | |||
179 | // Either way we ain't in one any more. |
||
180 | 2 | $in_tag = false; |
|
181 | 2 | } |
|
182 | // Opening tag... |
||
183 | else |
||
184 | { |
||
185 | 2 | $had_tag = true; |
|
186 | |||
187 | // If we're in a tag don't do nought! |
||
188 | 2 | if (!$in_tag) |
|
189 | 2 | { |
|
190 | 2 | $in_tag = true; |
|
191 | 2 | } |
|
192 | } |
||
193 | 2 | } |
|
194 | 2 | } |
|
195 | |||
196 | // If we have an open code tag, close it. |
||
197 | if ($in_tag) |
||
198 | 3 | { |
|
199 | 2 | $this->message .= '[/code]'; |
|
200 | 2 | } |
|
201 | |||
202 | // Open any ones that need to be open, only if we've never had a tag. |
||
203 | 3 | if ($code_open && !$had_tag) |
|
204 | 3 | { |
|
205 | $this->message = '[code]' . $this->message; |
||
206 | } |
||
207 | 3 | } |
|
208 | |||
209 | /** |
||
210 | * Protects code blocks from preparse by replacing them with %%token%% values |
||
211 | */ |
||
212 | 3 | private function _tokenizeCodeBlocks() |
|
242 | |||
243 | /** |
||
244 | * Fix any URLs posted - ie. remove 'javascript:'. |
||
245 | * |
||
246 | * - Fix the img and url tags... |
||
247 | * - Fixes links in message and returns nothing. |
||
248 | */ |
||
249 | 3 | private function _fixTags() |
|
313 | |||
314 | /** |
||
315 | * Fix a specific class of tag - ie. url with =. |
||
316 | * |
||
317 | * - Used by fixTags, fixes a specific tag's links. |
||
318 | * |
||
319 | * @param string $myTag - the tag |
||
320 | * @param string[] $protocols - http, https or ftp |
||
321 | * @param bool $embeddedUrl = false - whether it *can* be set to something |
||
322 | * @param bool $hasEqualSign = false, whether it *is* set to something |
||
323 | * @param bool $hasExtra = false - whether it can have extra cruft after the begin tag. |
||
324 | */ |
||
325 | 3 | private function _fixTag($myTag, $protocols, $embeddedUrl = false, $hasEqualSign = false, $hasExtra = false) |
|
326 | { |
||
327 | 3 | global $boardurl, $scripturl; |
|
328 | |||
329 | 3 | $replaces = array(); |
|
330 | |||
331 | // Ensure it has a domain name, use the site name if needed |
||
332 | 3 | if (preg_match('~^([^:]+://[^/]+)~', $boardurl, $match) != 0) |
|
333 | 3 | { |
|
334 | 3 | $domain_url = $match[1]; |
|
335 | 3 | } |
|
336 | else |
||
337 | { |
||
338 | $domain_url = $boardurl . '/'; |
||
339 | } |
||
340 | |||
341 | if ($hasEqualSign) |
||
342 | 3 | { |
|
343 | 3 | preg_match_all('~\[(' . $myTag . ')=([^\]]*?)\](?:(.+?)\[/(' . $myTag . ')\])?~is', $this->message, $matches); |
|
344 | 3 | } |
|
345 | else |
||
346 | { |
||
347 | 3 | preg_match_all('~\[(' . $myTag . ($hasExtra ? '(?:[^\]]*?)' : '') . ')\](.+?)\[/(' . $myTag . ')\]~is', $this->message, $matches); |
|
348 | } |
||
349 | |||
350 | 3 | foreach ($matches[0] as $k => $dummy) |
|
351 | { |
||
352 | // Remove all leading and trailing whitespace. |
||
353 | 1 | $replace = trim($matches[2][$k]); |
|
354 | 1 | $this_tag = $matches[1][$k]; |
|
355 | 1 | $this_close = $hasEqualSign ? (empty($matches[4][$k]) ? '' : $matches[4][$k]) : $matches[3][$k]; |
|
356 | |||
357 | 1 | $found = false; |
|
358 | 1 | foreach ($protocols as $protocol) |
|
359 | { |
||
360 | 1 | $found = strncasecmp($replace, $protocol . '://', strlen($protocol) + 3) === 0; |
|
361 | if ($found) |
||
362 | 1 | { |
|
363 | 1 | break; |
|
364 | } |
||
365 | 1 | } |
|
366 | |||
367 | // Http url checking? |
||
368 | 1 | if (!$found && $protocols[0] === 'http') |
|
369 | 1 | { |
|
370 | 1 | if (substr($replace, 0, 1) === '/' && substr($replace, 0, 2) !== '//') |
|
371 | 1 | { |
|
372 | $replace = $domain_url . $replace; |
||
373 | } |
||
374 | 1 | elseif (substr($replace, 0, 1) === '?') |
|
375 | { |
||
376 | $replace = $scripturl . $replace; |
||
377 | } |
||
378 | 1 | elseif (substr($replace, 0, 1) === '#' && $embeddedUrl) |
|
379 | { |
||
380 | $replace = '#' . preg_replace('~[^A-Za-z0-9_\-#]~', '', substr($replace, 1)); |
||
381 | $this_tag = 'iurl'; |
||
382 | $this_close = 'iurl'; |
||
383 | } |
||
384 | 1 | elseif (substr($replace, 0, 2) === '//') |
|
385 | { |
||
386 | 1 | $replace = $protocols[0] . ':' . $replace; |
|
387 | 1 | } |
|
388 | else |
||
389 | { |
||
390 | 1 | $replace = $protocols[0] . '://' . $replace; |
|
391 | } |
||
392 | 1 | } |
|
393 | // FTP URL Checking |
||
394 | 1 | elseif (!$found && $protocols[0] === 'ftp') |
|
395 | { |
||
396 | $replace = $protocols[0] . '://' . preg_replace('~^(?!ftps?)[^:]+://~', '', $replace); |
||
397 | } |
||
398 | 1 | elseif (!$found) |
|
399 | { |
||
400 | $replace = $protocols[0] . '://' . $replace; |
||
401 | } |
||
402 | |||
403 | // Build a replacement array that is considered safe and proper |
||
404 | 1 | if ($hasEqualSign && $embeddedUrl) |
|
405 | 1 | { |
|
406 | 1 | $replaces[$matches[0][$k]] = '[' . $this_tag . '=' . $replace . ']' . (empty($matches[4][$k]) ? '' : $matches[3][$k] . '[/' . $this_close . ']'); |
|
407 | 1 | } |
|
408 | 1 | elseif ($hasEqualSign) |
|
409 | { |
||
410 | $replaces['[' . $matches[1][$k] . '=' . $matches[2][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']'; |
||
411 | } |
||
412 | 1 | elseif ($embeddedUrl) |
|
413 | { |
||
414 | 1 | $replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . '=' . $replace . ']' . $matches[2][$k] . '[/' . $this_close . ']'; |
|
415 | 1 | } |
|
416 | else |
||
417 | { |
||
418 | $replaces['[' . $matches[1][$k] . ']' . $matches[2][$k] . '[/' . $matches[3][$k] . ']'] = '[' . $this_tag . ']' . $replace . '[/' . $this_close . ']'; |
||
419 | } |
||
420 | 3 | } |
|
421 | |||
422 | 3 | foreach ($replaces as $k => $v) |
|
423 | { |
||
424 | 1 | if ($k == $v) |
|
425 | 1 | { |
|
426 | 1 | unset($replaces[$k]); |
|
427 | 1 | } |
|
428 | 3 | } |
|
429 | |||
430 | // Update as needed |
||
431 | 3 | if (!empty($replaces)) |
|
432 | 3 | { |
|
433 | 1 | $this->message = strtr($this->message, $replaces); |
|
434 | 1 | } |
|
435 | 3 | } |
|
436 | |||
437 | /** |
||
438 | * Updates BBC img tags in a message so that the width / height respect the forum settings. |
||
439 | * |
||
440 | * - Will add the width/height attrib if needed, or update existing ones if they break the rules |
||
441 | */ |
||
442 | public function resizeBBCImages() |
||
514 | |||
515 | /** |
||
516 | * Replace /me with the users name, including inside footnotes |
||
517 | */ |
||
518 | 3 | private function _itsAllAbout() |
|
536 | |||
537 | /** |
||
538 | * Make sure lists have open and close tags |
||
539 | */ |
||
540 | 3 | private function _validateLists() |
|
555 | |||
556 | /** |
||
557 | * Repair a few *cough* common mistakes from user input and from wizzy cut/paste |
||
558 | */ |
||
559 | 3 | private function _fixMistakes() |
|
613 | |||
614 | /** |
||
615 | * Replace our token-ized message with the saved code blocks |
||
616 | */ |
||
617 | 3 | private function _restoreCodeBlocks() |
|
624 | |||
625 | /** |
||
626 | * Validates and corrects table structure |
||
627 | * |
||
628 | * What it does |
||
629 | * - Checks tables for correct tag order / nesting |
||
630 | * - Adds in missing closing tags, removes excess closing tags |
||
631 | * - Although it prevents markup error, it can mess-up the intended (abiet wrong) layout |
||
632 | * driving the post author in to a furious rage |
||
633 | * |
||
634 | */ |
||
635 | 3 | private function _preparseTable() |
|
636 | { |
||
637 | 3 | $table_check = $this->message; |
|
638 | 3 | $table_offset = 0; |
|
639 | 3 | $table_array = array(); |
|
640 | |||
641 | // Define the allowable tags after a give tag |
||
642 | $table_order = array( |
||
643 | 3 | 'table' => array('tr'), |
|
644 | 3 | 'tr' => array('td', 'th'), |
|
645 | 3 | 'td' => array('table'), |
|
646 | 3 | 'th' => array(''), |
|
647 | 3 | ); |
|
648 | |||
649 | // Find all closing tags (/table /tr /td etc) |
||
650 | 3 | while (preg_match('~\[(/)*(table|tr|td|th)\]~', $table_check, $matches) === 1) |
|
651 | { |
||
652 | // Keep track of where this is. |
||
653 | 1 | $offset = strpos($table_check, $matches[0]); |
|
654 | 1 | $remove_tag = false; |
|
655 | |||
656 | // Is it opening? |
||
657 | 1 | if ($matches[1] != '/') |
|
658 | 1 | { |
|
659 | // If the previous table tag isn't correct simply remove it. |
||
660 | 1 | if ((!empty($table_array) && !in_array($matches[2], $table_order[$table_array[0]])) || (empty($table_array) && $matches[2] !== 'table')) |
|
661 | 1 | { |
|
662 | $remove_tag = true; |
||
663 | } |
||
664 | // Record this was the last tag. |
||
665 | else |
||
666 | { |
||
667 | 1 | array_unshift($table_array, $matches[2]); |
|
668 | } |
||
669 | 1 | } |
|
670 | // Otherwise is closed! |
||
671 | else |
||
672 | { |
||
673 | // Only keep the tag if it's closing the right thing. |
||
674 | 1 | if (empty($table_array) || ($table_array[0] != $matches[2])) |
|
675 | 1 | { |
|
676 | $remove_tag = true; |
||
677 | } |
||
678 | else |
||
679 | { |
||
680 | 1 | array_shift($table_array); |
|
681 | } |
||
682 | } |
||
683 | |||
684 | // Removing? |
||
685 | if ($remove_tag) |
||
686 | 1 | { |
|
687 | $this->message = substr($this->message, 0, $table_offset + $offset) . substr($this->message, $table_offset + strlen($matches[0]) + $offset); |
||
688 | |||
689 | // We've lost some data. |
||
690 | $table_offset -= strlen($matches[0]); |
||
691 | } |
||
692 | |||
693 | // Remove everything up to here. |
||
694 | 1 | $table_offset += $offset + strlen($matches[0]); |
|
695 | 1 | $table_check = substr($table_check, $offset + strlen($matches[0])); |
|
696 | 1 | } |
|
697 | |||
698 | // Close any remaining table tags. |
||
699 | 3 | foreach ($table_array as $tag) |
|
700 | { |
||
701 | $this->message .= '[/' . $tag . ']'; |
||
702 | 3 | } |
|
703 | 3 | } |
|
704 | |||
705 | /** |
||
706 | * This is very simple, and just removes things done by preparsecode. |
||
707 | * |
||
708 | * @param string $message |
||
709 | */ |
||
710 | public function un_preparsecode($message) |
||
725 | |||
726 | /** |
||
727 | * Ensure tags inside of nobbc do not get parsed by converting the markers to html entities |
||
728 | * |
||
729 | * @param string[] $matches |
||
730 | */ |
||
731 | private function _preparsecode_nobbc_callback($matches) |
||
735 | |||
736 | /** |
||
737 | * Use only the primary (first) font face when multiple are supplied |
||
738 | * |
||
739 | * @param string[] $matches |
||
740 | */ |
||
741 | 2 | private function _preparsecode_font_callback($matches) |
|
748 | |||
749 | /** |
||
750 | * Takes a tag and changes it to lowercase |
||
751 | * |
||
752 | * @param string[] $matches |
||
753 | */ |
||
754 | 2 | private function _preparsecode_lowertags_callback($matches) |
|
758 | |||
759 | /** |
||
760 | * Ensure image tags do not load anything by themselves (security) |
||
761 | * |
||
762 | * @param string[] $matches |
||
763 | */ |
||
764 | private function _fixTags_img_callback($matches) |
||
768 | |||
769 | /** |
||
770 | * Find and return PreparseCode instance if it exists, |
||
771 | * or create a new instance |
||
772 | * |
||
773 | * @return PreparseCode |
||
774 | */ |
||
775 | 2 | public static function instance() |
|
784 | } |
||
785 |