Complex classes like Element often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Element, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 7 | class Element extends AbstractToken |
||
| 8 | { |
||
| 9 | /** @var array[Token] */ |
||
| 10 | private $attributes; |
||
| 11 | |||
| 12 | /** @var array[Token] */ |
||
| 13 | private $children; |
||
| 14 | |||
| 15 | /** @var string */ |
||
| 16 | private $name; |
||
| 17 | |||
| 18 | 44 | public function __construct(Token $parent = null, $throwOnError = false) |
|
| 26 | |||
| 27 | 22 | public function isClosingElementImplied($html) |
|
| 28 | { |
||
| 29 | 22 | $parent = $this->getParent(); |
|
| 30 | 22 | if ($parent === null || !($parent instanceof self)) { |
|
| 31 | 1 | return false; |
|
| 32 | } |
||
| 33 | |||
| 34 | 21 | $name = $this->parseElementName($html); |
|
| 35 | 21 | $parentName = $parent->getName(); |
|
| 36 | |||
| 37 | // HEAD: no closing tag. |
||
| 38 | 21 | if ($name === 'body' && $parentName === 'head') { |
|
| 39 | 2 | return true; |
|
| 40 | } |
||
| 41 | |||
| 42 | // Closed-only elements. |
||
| 43 | // Closing tags not required. We will close them now. |
||
| 44 | switch ($parentName) { |
||
| 45 | 20 | case 'base': |
|
| 46 | 20 | case 'link': |
|
| 47 | 20 | case 'meta': |
|
| 48 | 20 | case 'hr': |
|
| 49 | 20 | case 'br': |
|
| 50 | 6 | return true; |
|
| 51 | } |
||
| 52 | |||
| 53 | // P |
||
| 54 | 15 | if ($parentName === 'p') { |
|
| 55 | switch ($name) { |
||
| 56 | 4 | case 'address': |
|
| 57 | 4 | case 'article': |
|
| 58 | 4 | case 'aside': |
|
| 59 | 4 | case 'blockquote': |
|
| 60 | 4 | case 'details': |
|
| 61 | 4 | case 'div': |
|
| 62 | 4 | case 'dl': |
|
| 63 | 4 | case 'fieldset': |
|
| 64 | 4 | case 'figcaption': |
|
| 65 | 4 | case 'figure': |
|
| 66 | 4 | case 'footer': |
|
| 67 | 4 | case 'form': |
|
| 68 | 4 | case 'h1': |
|
| 69 | 4 | case 'h2': |
|
| 70 | 4 | case 'h3': |
|
| 71 | 4 | case 'h4': |
|
| 72 | 4 | case 'h5': |
|
| 73 | 4 | case 'h6': |
|
| 74 | 4 | case 'header': |
|
| 75 | 4 | case 'hgroup': |
|
| 76 | 4 | case 'hr': |
|
| 77 | 4 | case 'main': |
|
| 78 | 4 | case 'menu': |
|
| 79 | 4 | case 'nav': |
|
| 80 | 4 | case 'ol': |
|
| 81 | 4 | case 'p': |
|
| 82 | 4 | case 'pre': |
|
| 83 | 4 | case 'section': |
|
| 84 | 4 | case 'table': |
|
| 85 | 4 | case 'ul': |
|
| 86 | 3 | return true; |
|
| 87 | } |
||
| 88 | 1 | } |
|
| 89 | |||
| 90 | // LI |
||
| 91 | 12 | if ($parentName == 'li' && $name == 'li') { |
|
| 92 | 1 | return true; |
|
| 93 | } |
||
| 94 | |||
| 95 | // DT and DD |
||
| 96 | 11 | if (($parentName == 'dt' || $parentName == 'dd') && ($name == 'dt' || $name == 'dd')) { |
|
| 97 | 4 | return true; |
|
| 98 | } |
||
| 99 | |||
| 100 | // RP and RT |
||
| 101 | 7 | if (($parentName == 'rp' || $parentName == 'rt') && ($name == 'rp' || $name == 'rt')) { |
|
| 102 | 4 | return true; |
|
| 103 | } |
||
| 104 | |||
| 105 | 3 | return false; |
|
| 106 | } |
||
| 107 | |||
| 108 | 14 | public static function isMatch($html) |
|
| 109 | { |
||
| 110 | 14 | return preg_match("/^<[a-zA-Z]/", $html) === 1; |
|
| 111 | } |
||
| 112 | |||
| 113 | 23 | public function parse($html) |
|
| 114 | { |
||
| 115 | 23 | $this->name = $this->parseElementName($html); |
|
| 116 | |||
| 117 | // Parse attributes. |
||
| 118 | 23 | $remainingHtml = substr($html, strlen($this->name) + 1); |
|
| 119 | 23 | while (strpos($remainingHtml, '>') !== false && preg_match("/^\s*[\/]?>/", $remainingHtml) === 0) { |
|
| 120 | 11 | $remainingHtml = $this->parseAttribute($remainingHtml); |
|
| 121 | 11 | } |
|
| 122 | |||
| 123 | // Find position of end of tag. |
||
| 124 | 23 | $posOfClosingBracket = strpos($remainingHtml, '>'); |
|
| 125 | 23 | if ($posOfClosingBracket === false) { |
|
| 126 | 2 | if ($this->getThrowOnError()) { |
|
| 127 | 1 | throw new ParseException('Invalid element: missing closing bracket.'); |
|
| 128 | } |
||
| 129 | |||
| 130 | 1 | return ''; |
|
| 131 | } |
||
| 132 | |||
| 133 | // Is self-closing? |
||
| 134 | 21 | $posOfSelfClosingBracket = strpos($remainingHtml, '/>'); |
|
| 135 | 21 | $remainingHtml = trim(substr($remainingHtml, $posOfClosingBracket + 1)); |
|
| 136 | 21 | if ($posOfSelfClosingBracket !== false && $posOfSelfClosingBracket == $posOfClosingBracket - 1) { |
|
| 137 | // Self-closing element. |
||
| 138 | 12 | return $remainingHtml; |
|
| 139 | } |
||
| 140 | |||
| 141 | // Open element. |
||
| 142 | 10 | return $this->parseContents($remainingHtml); |
|
| 143 | } |
||
| 144 | |||
| 145 | 11 | private function parseAttribute($html) |
|
| 192 | |||
| 193 | 10 | private function parseContents($html) |
|
| 194 | { |
||
| 195 | 10 | $remainingHtml = trim($html); |
|
| 221 | |||
| 222 | /** |
||
| 223 | * Will get the element name from the html string. |
||
| 224 | * |
||
| 225 | * @param $html string |
||
| 226 | * |
||
| 227 | * @return string The element name. |
||
| 228 | */ |
||
| 229 | 42 | private function parseElementName($html) |
|
| 246 | |||
| 247 | public function getAttributes() |
||
| 251 | |||
| 252 | public function hasAttributes() |
||
| 256 | |||
| 257 | public function getChildren() |
||
| 261 | |||
| 262 | public function hasChildren() |
||
| 266 | |||
| 267 | /** |
||
| 268 | * Getter for 'name'. |
||
| 269 | * |
||
| 270 | * @return string |
||
| 271 | */ |
||
| 272 | 30 | public function getName() |
|
| 276 | |||
| 277 | 13 | public function toArray() |
|
| 300 | } |
||
| 301 |