Complex classes like Element often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Element, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
7 | class Element extends AbstractToken |
||
8 | { |
||
9 | /** @var array[Token] */ |
||
10 | private $attributes; |
||
11 | |||
12 | /** @var array[Token] */ |
||
13 | private $children; |
||
14 | |||
15 | /** @var string */ |
||
16 | private $name; |
||
17 | |||
18 | 44 | public function __construct(Token $parent = null, $throwOnError = false) |
|
26 | |||
27 | 22 | public function isClosingElementImplied($html) |
|
28 | { |
||
29 | 22 | $parent = $this->getParent(); |
|
30 | 22 | if ($parent === null || !($parent instanceof self)) { |
|
31 | 1 | return false; |
|
32 | } |
||
33 | |||
34 | 21 | $name = $this->parseElementName($html); |
|
35 | 21 | $parentName = $parent->getName(); |
|
36 | |||
37 | // HEAD: no closing tag. |
||
38 | 21 | if ($name === 'body' && $parentName === 'head') { |
|
39 | 2 | return true; |
|
40 | } |
||
41 | |||
42 | // Closed-only elements. |
||
43 | // Closing tags not required. We will close them now. |
||
44 | switch ($parentName) { |
||
45 | 20 | case 'base': |
|
46 | 20 | case 'link': |
|
47 | 20 | case 'meta': |
|
48 | 20 | case 'hr': |
|
49 | 20 | case 'br': |
|
50 | 6 | return true; |
|
51 | } |
||
52 | |||
53 | // P |
||
54 | 15 | if ($parentName === 'p') { |
|
55 | switch ($name) { |
||
56 | 4 | case 'address': |
|
57 | 4 | case 'article': |
|
58 | 4 | case 'aside': |
|
59 | 4 | case 'blockquote': |
|
60 | 4 | case 'details': |
|
61 | 4 | case 'div': |
|
62 | 4 | case 'dl': |
|
63 | 4 | case 'fieldset': |
|
64 | 4 | case 'figcaption': |
|
65 | 4 | case 'figure': |
|
66 | 4 | case 'footer': |
|
67 | 4 | case 'form': |
|
68 | 4 | case 'h1': |
|
69 | 4 | case 'h2': |
|
70 | 4 | case 'h3': |
|
71 | 4 | case 'h4': |
|
72 | 4 | case 'h5': |
|
73 | 4 | case 'h6': |
|
74 | 4 | case 'header': |
|
75 | 4 | case 'hgroup': |
|
76 | 4 | case 'hr': |
|
77 | 4 | case 'main': |
|
78 | 4 | case 'menu': |
|
79 | 4 | case 'nav': |
|
80 | 4 | case 'ol': |
|
81 | 4 | case 'p': |
|
82 | 4 | case 'pre': |
|
83 | 4 | case 'section': |
|
84 | 4 | case 'table': |
|
85 | 4 | case 'ul': |
|
86 | 3 | return true; |
|
87 | } |
||
88 | 1 | } |
|
89 | |||
90 | // LI |
||
91 | 12 | if ($parentName == 'li' && $name == 'li') { |
|
92 | 1 | return true; |
|
93 | } |
||
94 | |||
95 | // DT and DD |
||
96 | 11 | if (($parentName == 'dt' || $parentName == 'dd') && ($name == 'dt' || $name == 'dd')) { |
|
97 | 4 | return true; |
|
98 | } |
||
99 | |||
100 | // RP and RT |
||
101 | 7 | if (($parentName == 'rp' || $parentName == 'rt') && ($name == 'rp' || $name == 'rt')) { |
|
102 | 4 | return true; |
|
103 | } |
||
104 | |||
105 | 3 | return false; |
|
106 | } |
||
107 | |||
108 | 14 | public static function isMatch($html) |
|
109 | { |
||
110 | 14 | return preg_match("/^<[a-zA-Z]/", $html) === 1; |
|
111 | } |
||
112 | |||
113 | 23 | public function parse($html) |
|
114 | { |
||
115 | 23 | $this->name = $this->parseElementName($html); |
|
116 | |||
117 | // Parse attributes. |
||
118 | 23 | $remainingHtml = substr($html, strlen($this->name) + 1); |
|
119 | 23 | while (strpos($remainingHtml, '>') !== false && preg_match("/^\s*[\/]?>/", $remainingHtml) === 0) { |
|
120 | 11 | $remainingHtml = $this->parseAttribute($remainingHtml); |
|
121 | 11 | } |
|
122 | |||
123 | // Find position of end of tag. |
||
124 | 23 | $posOfClosingBracket = strpos($remainingHtml, '>'); |
|
125 | 23 | if ($posOfClosingBracket === false) { |
|
126 | 2 | if ($this->getThrowOnError()) { |
|
127 | 1 | throw new ParseException('Invalid element: missing closing bracket.'); |
|
128 | } |
||
129 | |||
130 | 1 | return ''; |
|
131 | } |
||
132 | |||
133 | // Is self-closing? |
||
134 | 21 | $posOfSelfClosingBracket = strpos($remainingHtml, '/>'); |
|
135 | 21 | $remainingHtml = trim(substr($remainingHtml, $posOfClosingBracket + 1)); |
|
136 | 21 | if ($posOfSelfClosingBracket !== false && $posOfSelfClosingBracket == $posOfClosingBracket - 1) { |
|
137 | // Self-closing element. |
||
138 | 12 | return $remainingHtml; |
|
139 | } |
||
140 | |||
141 | // Open element. |
||
142 | 10 | return $this->parseContents($remainingHtml); |
|
143 | } |
||
144 | |||
145 | 11 | private function parseAttribute($html) |
|
192 | |||
193 | 10 | private function parseContents($html) |
|
194 | { |
||
195 | 10 | $remainingHtml = trim($html); |
|
221 | |||
222 | /** |
||
223 | * Will get the element name from the html string. |
||
224 | * |
||
225 | * @param $html string |
||
226 | * |
||
227 | * @return string The element name. |
||
228 | */ |
||
229 | 42 | private function parseElementName($html) |
|
246 | |||
247 | public function getAttributes() |
||
251 | |||
252 | public function hasAttributes() |
||
256 | |||
257 | public function getChildren() |
||
261 | |||
262 | public function hasChildren() |
||
266 | |||
267 | /** |
||
268 | * Getter for 'name'. |
||
269 | * |
||
270 | * @return string |
||
271 | */ |
||
272 | 30 | public function getName() |
|
276 | |||
277 | 13 | public function toArray() |
|
300 | } |
||
301 |