Complex classes like Element often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Element, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 7 | class Element extends AbstractToken |
||
| 8 | { |
||
| 9 | /** @var array[Token] */ |
||
| 10 | private $attributes; |
||
| 11 | |||
| 12 | /** @var array[Token] */ |
||
| 13 | private $children; |
||
| 14 | |||
| 15 | /** @var string */ |
||
| 16 | private $name; |
||
| 17 | |||
| 18 | 23 | public function __construct(Token $parent = null, $throwOnError = false) |
|
| 26 | |||
| 27 | 1 | public function isClosingElementImplied($html) |
|
| 28 | { |
||
| 29 | 1 | $name = $this->parseElementName($html); |
|
| 30 | 1 | $parentName = null; |
|
| 31 | 1 | $parent = $this->getParent(); |
|
| 32 | 1 | if ($parent instanceof self) { |
|
| 33 | 1 | $parentName = $parent->getName(); |
|
| 34 | 1 | } |
|
| 35 | |||
| 36 | // HEAD: no closing tag. |
||
| 37 | 1 | if ($name === 'body' && $parentName === 'head') { |
|
| 38 | return true; |
||
| 39 | } |
||
| 40 | |||
| 41 | // Closed-only elements. |
||
| 42 | // Closing tags not required. We will close them now. |
||
| 43 | switch ($parentName) { |
||
| 44 | 1 | case 'base': |
|
| 45 | 1 | case 'link': |
|
| 46 | 1 | case 'meta': |
|
| 47 | 1 | case 'hr': |
|
| 48 | 1 | case 'br': |
|
| 49 | 1 | return true; |
|
| 50 | } |
||
| 51 | |||
| 52 | // P |
||
| 53 | 1 | if ($parentName === 'p') { |
|
| 54 | switch ($name) { |
||
| 55 | case 'address': |
||
| 56 | case 'article': |
||
| 57 | case 'aside': |
||
| 58 | case 'blockquote': |
||
| 59 | case 'details': |
||
| 60 | case 'div': |
||
| 61 | case 'dl': |
||
| 62 | case 'fieldset': |
||
| 63 | case 'figcaption': |
||
| 64 | case 'figure': |
||
| 65 | case 'footer': |
||
| 66 | case 'form': |
||
| 67 | case 'h1': |
||
| 68 | case 'h2': |
||
| 69 | case 'h3': |
||
| 70 | case 'h4': |
||
| 71 | case 'h5': |
||
| 72 | case 'h6': |
||
| 73 | case 'header': |
||
| 74 | case 'hgroup': |
||
| 75 | case 'hr': |
||
| 76 | case 'main': |
||
| 77 | case 'menu': |
||
| 78 | case 'nav': |
||
| 79 | case 'ol': |
||
| 80 | case 'p': |
||
| 81 | case 'pre': |
||
| 82 | case 'section': |
||
| 83 | case 'table': |
||
| 84 | case 'ul': |
||
| 85 | return true; |
||
| 86 | } |
||
| 87 | } |
||
| 88 | |||
| 89 | // LI |
||
| 90 | 1 | if ($parentName == 'li' && $name == 'li') { |
|
| 91 | return true; |
||
| 92 | } |
||
| 93 | |||
| 94 | // DT and DD |
||
| 95 | 1 | if (($parentName == 'dt' || $parentName == 'dd') && ($name == 'dt' || $name == 'dd')) { |
|
| 96 | return true; |
||
| 97 | } |
||
| 98 | |||
| 99 | // RP and RT |
||
| 100 | 1 | if (($parentName == 'rp' || $parentName == 'rt') && ($name == 'rp' || $name == 'rt')) { |
|
| 101 | return true; |
||
| 102 | } |
||
| 103 | |||
| 104 | 1 | return false; |
|
| 105 | } |
||
| 106 | |||
| 107 | 13 | public static function isMatch($html) |
|
| 111 | |||
| 112 | 22 | public function parse($html) |
|
| 144 | |||
| 145 | 11 | private function parseAttribute($html) |
|
| 192 | |||
| 193 | 20 | private function parseContents($html) |
|
| 194 | { |
||
| 195 | 20 | $remainingHtml = trim($html); |
|
| 196 | 20 | if ($remainingHtml == '') { |
|
| 197 | 11 | return ''; |
|
| 198 | } |
||
| 199 | |||
| 200 | // Parse contents one token at a time. |
||
| 201 | 9 | while (preg_match("/^<\/\s*" . $this->name . "\s*>/is", $remainingHtml) === 0) { |
|
| 202 | // Validate closing bracket actually exists. |
||
| 203 | 7 | $posOfClosingBracket = strpos($remainingHtml, '>'); |
|
| 204 | 7 | if ($posOfClosingBracket === false) { |
|
| 205 | 1 | if ($this->getThrowOnError()) { |
|
| 206 | throw new ParseException('Invalid attribute.'); |
||
| 207 | } |
||
| 208 | |||
| 209 | 1 | return ''; |
|
| 210 | } |
||
| 211 | |||
| 212 | 6 | $token = TokenFactory::buildFromHtml( |
|
| 213 | 6 | $remainingHtml, |
|
| 214 | 6 | $this, |
|
| 215 | 6 | $this->getThrowOnError() |
|
| 216 | 6 | ); |
|
| 217 | |||
| 218 | 6 | if ($token === false || $token->isClosingElementImplied($remainingHtml)) { |
|
| 219 | 1 | return $remainingHtml; |
|
| 220 | } |
||
| 221 | |||
| 222 | 6 | $remainingHtml = trim($token->parse($remainingHtml)); |
|
| 223 | 6 | $this->children[] = $token; |
|
| 224 | 6 | } |
|
| 225 | |||
| 226 | // Remove remaining closing tag. |
||
| 227 | 8 | $posOfClosingBracket = strpos($remainingHtml, '>'); |
|
| 228 | |||
| 229 | 8 | return substr($remainingHtml, $posOfClosingBracket + 1); |
|
| 230 | } |
||
| 231 | |||
| 232 | /** |
||
| 233 | * Will get the element name from the html string. |
||
| 234 | * |
||
| 235 | * @param $html string |
||
| 236 | * |
||
| 237 | * @return string The element name. |
||
| 238 | */ |
||
| 239 | 22 | private function parseElementName($html) |
|
| 256 | |||
| 257 | public function getAttributes() |
||
| 261 | |||
| 262 | public function hasAttributes() |
||
| 266 | |||
| 267 | public function getChildren() |
||
| 271 | |||
| 272 | public function hasChildren() |
||
| 276 | |||
| 277 | /** |
||
| 278 | * Getter for 'name'. |
||
| 279 | * |
||
| 280 | * @return string |
||
| 281 | */ |
||
| 282 | 10 | public function getName() |
|
| 286 | |||
| 287 | 12 | public function toArray() |
|
| 310 | } |
||
| 311 |