Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like HtmlMin often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlMin, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 17 | class HtmlMin |
||
| 18 | { |
||
| 19 | /** |
||
| 20 | * // https://mathiasbynens.be/demo/javascript-mime-type |
||
| 21 | * // https://developer.mozilla.org/en/docs/Web/HTML/Element/script#attr-type |
||
| 22 | * |
||
| 23 | * @var array |
||
| 24 | */ |
||
| 25 | private static $executableScriptsMimeTypes = array( |
||
| 26 | 'text/javascript' => '', |
||
| 27 | 'text/ecmascript' => '', |
||
| 28 | 'text/jscript' => '', |
||
| 29 | 'application/javascript' => '', |
||
| 30 | 'application/x-javascript' => '', |
||
| 31 | 'application/ecmascript' => '', |
||
| 32 | ); |
||
| 33 | |||
| 34 | private static $selfClosingTags = array( |
||
| 35 | 'area', |
||
| 36 | 'base', |
||
| 37 | 'basefont', |
||
| 38 | 'br', |
||
| 39 | 'col', |
||
| 40 | 'command', |
||
| 41 | 'embed', |
||
| 42 | 'frame', |
||
| 43 | 'hr', |
||
| 44 | 'img', |
||
| 45 | 'input', |
||
| 46 | 'isindex', |
||
| 47 | 'keygen', |
||
| 48 | 'link', |
||
| 49 | 'meta', |
||
| 50 | 'param', |
||
| 51 | 'source', |
||
| 52 | 'track', |
||
| 53 | 'wbr', |
||
| 54 | ); |
||
| 55 | |||
| 56 | private static $trimWhitespaceFromTags = array( |
||
| 57 | 'article' => '', |
||
| 58 | 'br' => '', |
||
| 59 | 'div' => '', |
||
| 60 | 'footer' => '', |
||
| 61 | 'hr' => '', |
||
| 62 | 'nav' => '', |
||
| 63 | 'p' => '', |
||
| 64 | 'script' => '', |
||
| 65 | ); |
||
| 66 | |||
| 67 | /** |
||
| 68 | * @var array |
||
| 69 | */ |
||
| 70 | private static $booleanAttributes = array( |
||
| 71 | 'allowfullscreen' => '', |
||
| 72 | 'async' => '', |
||
| 73 | 'autofocus' => '', |
||
| 74 | 'autoplay' => '', |
||
| 75 | 'checked' => '', |
||
| 76 | 'compact' => '', |
||
| 77 | 'controls' => '', |
||
| 78 | 'declare' => '', |
||
| 79 | 'default' => '', |
||
| 80 | 'defaultchecked' => '', |
||
| 81 | 'defaultmuted' => '', |
||
| 82 | 'defaultselected' => '', |
||
| 83 | 'defer' => '', |
||
| 84 | 'disabled' => '', |
||
| 85 | 'enabled' => '', |
||
| 86 | 'formnovalidate' => '', |
||
| 87 | 'hidden' => '', |
||
| 88 | 'indeterminate' => '', |
||
| 89 | 'inert' => '', |
||
| 90 | 'ismap' => '', |
||
| 91 | 'itemscope' => '', |
||
| 92 | 'loop' => '', |
||
| 93 | 'multiple' => '', |
||
| 94 | 'muted' => '', |
||
| 95 | 'nohref' => '', |
||
| 96 | 'noresize' => '', |
||
| 97 | 'noshade' => '', |
||
| 98 | 'novalidate' => '', |
||
| 99 | 'nowrap' => '', |
||
| 100 | 'open' => '', |
||
| 101 | 'pauseonexit' => '', |
||
| 102 | 'readonly' => '', |
||
| 103 | 'required' => '', |
||
| 104 | 'reversed' => '', |
||
| 105 | 'scoped' => '', |
||
| 106 | 'seamless' => '', |
||
| 107 | 'selected' => '', |
||
| 108 | 'sortable' => '', |
||
| 109 | 'truespeed' => '', |
||
| 110 | 'typemustmatch' => '', |
||
| 111 | 'visible' => '', |
||
| 112 | ); |
||
| 113 | /** |
||
| 114 | * @var array |
||
| 115 | */ |
||
| 116 | private static $skipTagsForRemoveWhitespace = array( |
||
| 117 | 'code', |
||
| 118 | 'pre', |
||
| 119 | 'script', |
||
| 120 | 'style', |
||
| 121 | 'textarea', |
||
| 122 | ); |
||
| 123 | |||
| 124 | /** |
||
| 125 | * @var array |
||
| 126 | */ |
||
| 127 | private $protectedChildNodes = array(); |
||
| 128 | |||
| 129 | /** |
||
| 130 | * @var string |
||
| 131 | */ |
||
| 132 | private $protectedChildNodesHelper = 'html-min--voku--saved-content'; |
||
| 133 | |||
| 134 | /** |
||
| 135 | * @var string |
||
| 136 | 23 | */ |
|
| 137 | private $booleanAttributesHelper = 'html-min--voku--delete-this'; |
||
| 138 | 23 | ||
| 139 | 23 | /** |
|
| 140 | * @var bool |
||
| 141 | 23 | */ |
|
| 142 | 23 | private $doOptimizeAttributes = true; |
|
| 143 | 23 | ||
| 144 | /** |
||
| 145 | * @var bool |
||
| 146 | */ |
||
| 147 | private $doRemoveComments = true; |
||
| 148 | |||
| 149 | /** |
||
| 150 | 23 | * @var bool |
|
| 151 | */ |
||
| 152 | 23 | private $doRemoveWhitespaceAroundTags = true; |
|
| 153 | 23 | ||
| 154 | 1 | /** |
|
| 155 | * @var bool |
||
| 156 | */ |
||
| 157 | 23 | private $doRemoveHttpPrefixFromAttributes = true; |
|
| 158 | 23 | ||
| 159 | 3 | /** |
|
| 160 | * @var bool |
||
| 161 | */ |
||
| 162 | private $doSortCssClassNames = true; |
||
| 163 | 20 | ||
| 164 | 20 | /** |
|
| 165 | 20 | * @var bool |
|
| 166 | */ |
||
| 167 | 20 | private $doSortHtmlAttributes = true; |
|
| 168 | 20 | ||
| 169 | 20 | /** |
|
| 170 | * @var bool |
||
| 171 | 20 | */ |
|
| 172 | private $doRemoveDeprecatedScriptCharsetAttribute = true; |
||
| 173 | 20 | ||
| 174 | 20 | /** |
|
| 175 | 11 | * @var bool |
|
| 176 | 11 | */ |
|
| 177 | 11 | private $doRemoveDefaultAttributes = true; |
|
| 178 | |||
| 179 | 11 | /** |
|
| 180 | * @var bool |
||
| 181 | */ |
||
| 182 | private $doRemoveDeprecatedAnchorName = true; |
||
| 183 | |||
| 184 | /** |
||
| 185 | * @var bool |
||
| 186 | 11 | */ |
|
| 187 | private $doRemoveDeprecatedTypeFromStylesheetLink = true; |
||
| 188 | 11 | ||
| 189 | /** |
||
| 190 | 11 | * @var bool |
|
| 191 | */ |
||
| 192 | private $doRemoveDeprecatedTypeFromScriptTag = true; |
||
| 193 | |||
| 194 | /** |
||
| 195 | * @var bool |
||
| 196 | 11 | */ |
|
| 197 | 11 | private $doRemoveValueFromEmptyInput = true; |
|
| 198 | 11 | ||
| 199 | /** |
||
| 200 | 11 | * @var bool |
|
| 201 | 11 | */ |
|
| 202 | private $doRemoveEmptyAttributes = true; |
||
| 203 | |||
| 204 | /** |
||
| 205 | * @var bool |
||
| 206 | */ |
||
| 207 | 11 | private $doSumUpWhitespace = true; |
|
| 208 | |||
| 209 | 11 | /** |
|
| 210 | * HtmlMin constructor. |
||
| 211 | 11 | */ |
|
| 212 | 11 | public function __construct() |
|
| 215 | 11 | ||
| 216 | 11 | /** |
|
| 217 | * Check if the current string is an conditional comment. |
||
| 218 | 11 | * |
|
| 219 | 11 | * INFO: since IE >= 10 conditional comment are not working anymore |
|
| 220 | 11 | * |
|
| 221 | 11 | * <!--[if expression]> HTML <![endif]--> |
|
| 222 | 11 | * <![if expression]> HTML <![endif]> |
|
| 223 | 11 | * |
|
| 224 | * @param string $comment |
||
| 225 | 11 | * |
|
| 226 | * @return bool |
||
| 227 | 11 | */ |
|
| 228 | private function isConditionalComment($comment) |
||
| 240 | 1 | ||
| 241 | 11 | /** |
|
| 242 | * @param string $html |
||
| 243 | * |
||
| 244 | * @return string |
||
| 245 | */ |
||
| 246 | public function minify($html) |
||
| 388 | |||
| 389 | /** |
||
| 390 | * Sort HTML-Attributes, so that gzip can do better work |
||
| 391 | * and remove some default attributes. |
||
| 392 | * |
||
| 393 | * @param SimpleHtmlDom $element |
||
| 394 | * |
||
| 395 | * @return bool |
||
| 396 | 20 | */ |
|
| 397 | private function optimizeAttributes(SimpleHtmlDom $element) |
||
| 454 | |||
| 455 | /** |
||
| 456 | * Prevent changes of inline "styles" and "scripts". |
||
| 457 | 12 | * |
|
| 458 | * @param HtmlDomParser $dom |
||
| 459 | 11 | * |
|
| 460 | 11 | * @return HtmlDomParser |
|
| 461 | 11 | */ |
|
| 462 | 11 | private function protectTags(HtmlDomParser $dom) |
|
| 507 | |||
| 508 | /** |
||
| 509 | * Check if the attribute can be removed. |
||
| 510 | * |
||
| 511 | 9 | * @param string $tag |
|
| 512 | * @param string $attrName |
||
| 513 | 9 | * @param string $attrValue |
|
| 514 | 8 | * @param string $allAttr |
|
| 515 | * |
||
| 516 | * @return bool |
||
| 517 | 3 | */ |
|
| 518 | 3 | private function removeAttributeHelper($tag, $attrName, $attrValue, $allAttr) |
|
| 584 | |||
| 585 | /** |
||
| 586 | * Remove comments in the dom. |
||
| 587 | * |
||
| 588 | * @param HtmlDomParser $dom |
||
| 589 | * |
||
| 590 | * @return HtmlDomParser |
||
| 591 | */ |
||
| 592 | private function removeComments(HtmlDomParser $dom) |
||
| 606 | |||
| 607 | /** |
||
| 608 | * Trim tags in the dom. |
||
| 609 | * |
||
| 610 | * @param SimpleHtmlDom $element |
||
| 611 | * |
||
| 612 | * @return void |
||
| 613 | */ |
||
| 614 | private function removeWhitespaceAroundTags(SimpleHtmlDom $element) |
||
| 639 | |||
| 640 | /** |
||
| 641 | * Callback function for preg_replace_callback use. |
||
| 642 | * |
||
| 643 | * @param array $matches PREG matches |
||
| 644 | * |
||
| 645 | * @return string |
||
| 646 | */ |
||
| 647 | private function restoreProtectedHtml($matches) |
||
| 658 | |||
| 659 | /** |
||
| 660 | * @param boolean $doOptimizeAttributes |
||
| 661 | */ |
||
| 662 | public function setDoOptimizeAttributes($doOptimizeAttributes) |
||
| 666 | |||
| 667 | /** |
||
| 668 | * @param boolean $doRemoveComments |
||
| 669 | */ |
||
| 670 | public function setDoRemoveComments($doRemoveComments) |
||
| 674 | |||
| 675 | /** |
||
| 676 | * @param boolean $doRemoveDefaultAttributes |
||
| 677 | */ |
||
| 678 | public function setDoRemoveDefaultAttributes($doRemoveDefaultAttributes) |
||
| 682 | |||
| 683 | /** |
||
| 684 | * @param boolean $doRemoveDeprecatedAnchorName |
||
| 685 | */ |
||
| 686 | public function setDoRemoveDeprecatedAnchorName($doRemoveDeprecatedAnchorName) |
||
| 690 | |||
| 691 | /** |
||
| 692 | * @param boolean $doRemoveDeprecatedScriptCharsetAttribute |
||
| 693 | */ |
||
| 694 | public function setDoRemoveDeprecatedScriptCharsetAttribute($doRemoveDeprecatedScriptCharsetAttribute) |
||
| 698 | |||
| 699 | /** |
||
| 700 | * @param boolean $doRemoveDeprecatedTypeFromScriptTag |
||
| 701 | */ |
||
| 702 | public function setDoRemoveDeprecatedTypeFromScriptTag($doRemoveDeprecatedTypeFromScriptTag) |
||
| 706 | |||
| 707 | /** |
||
| 708 | * @param boolean $doRemoveDeprecatedTypeFromStylesheetLink |
||
| 709 | */ |
||
| 710 | public function setDoRemoveDeprecatedTypeFromStylesheetLink($doRemoveDeprecatedTypeFromStylesheetLink) |
||
| 714 | |||
| 715 | /** |
||
| 716 | * @param boolean $doRemoveEmptyAttributes |
||
| 717 | */ |
||
| 718 | public function setDoRemoveEmptyAttributes($doRemoveEmptyAttributes) |
||
| 722 | |||
| 723 | /** |
||
| 724 | * @param boolean $doRemoveHttpPrefixFromAttributes |
||
| 725 | */ |
||
| 726 | public function setDoRemoveHttpPrefixFromAttributes($doRemoveHttpPrefixFromAttributes) |
||
| 730 | |||
| 731 | /** |
||
| 732 | * @param boolean $doRemoveValueFromEmptyInput |
||
| 733 | */ |
||
| 734 | public function setDoRemoveValueFromEmptyInput($doRemoveValueFromEmptyInput) |
||
| 738 | |||
| 739 | /** |
||
| 740 | * @param boolean $doRemoveWhitespaceAroundTags |
||
| 741 | */ |
||
| 742 | public function setDoRemoveWhitespaceAroundTags($doRemoveWhitespaceAroundTags) |
||
| 746 | |||
| 747 | /** |
||
| 748 | * @param boolean $doSortCssClassNames |
||
| 749 | */ |
||
| 750 | public function setDoSortCssClassNames($doSortCssClassNames) |
||
| 754 | |||
| 755 | /** |
||
| 756 | * @param boolean $doSortHtmlAttributes |
||
| 757 | */ |
||
| 758 | public function setDoSortHtmlAttributes($doSortHtmlAttributes) |
||
| 762 | |||
| 763 | /** |
||
| 764 | * @param boolean $doSumUpWhitespace |
||
| 765 | */ |
||
| 766 | public function setDoSumUpWhitespace($doSumUpWhitespace) |
||
| 770 | |||
| 771 | /** |
||
| 772 | * @param $attrName |
||
| 773 | * @param $attrValue |
||
| 774 | * |
||
| 775 | * @return string |
||
| 776 | */ |
||
| 777 | private function sortCssClassNames($attrName, $attrValue) |
||
| 800 | |||
| 801 | /** |
||
| 802 | * Sum-up extra whitespace from dom-nodes. |
||
| 803 | * |
||
| 804 | * @param HtmlDomParser $dom |
||
| 805 | * |
||
| 806 | * @return HtmlDomParser |
||
| 807 | */ |
||
| 808 | private function sumUpWhitespace(HtmlDomParser $dom) |
||
| 833 | } |
||
| 834 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.