Complex classes like DOMTreeBuilder often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DOMTreeBuilder, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
26 | class DOMTreeBuilder implements EventHandler |
||
27 | { |
||
28 | /** |
||
29 | * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0. |
||
30 | */ |
||
31 | const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; |
||
32 | |||
33 | const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; |
||
34 | |||
35 | const NAMESPACE_SVG = 'http://www.w3.org/2000/svg'; |
||
36 | |||
37 | const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink'; |
||
38 | |||
39 | const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace'; |
||
40 | |||
41 | const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/'; |
||
42 | |||
43 | const OPT_DISABLE_HTML_NS = 'disable_html_ns'; |
||
44 | |||
45 | const OPT_TARGET_DOC = 'target_document'; |
||
46 | |||
47 | const OPT_IMPLICIT_NS = 'implicit_namespaces'; |
||
48 | |||
49 | /** |
||
50 | * Holds the HTML5 element names that causes a namespace switch. |
||
51 | * |
||
52 | * @var array |
||
53 | */ |
||
54 | protected $nsRoots = array( |
||
55 | 'html' => self::NAMESPACE_HTML, |
||
56 | 'svg' => self::NAMESPACE_SVG, |
||
57 | 'math' => self::NAMESPACE_MATHML, |
||
58 | ); |
||
59 | |||
60 | /** |
||
61 | * Holds the always available namespaces (which does not require the XMLNS declaration). |
||
62 | * |
||
63 | * @var array |
||
64 | */ |
||
65 | protected $implicitNamespaces = array( |
||
66 | 'xml' => self::NAMESPACE_XML, |
||
67 | 'xmlns' => self::NAMESPACE_XMLNS, |
||
68 | 'xlink' => self::NAMESPACE_XLINK, |
||
69 | ); |
||
70 | |||
71 | /** |
||
72 | * Holds a stack of currently active namespaces. |
||
73 | * |
||
74 | * @var array |
||
75 | */ |
||
76 | protected $nsStack = array(); |
||
77 | |||
78 | /** |
||
79 | * Holds the number of namespaces declared by a node. |
||
80 | * |
||
81 | * @var array |
||
82 | */ |
||
83 | protected $pushes = array(); |
||
84 | |||
85 | /** |
||
86 | * Defined in 8.2.5. |
||
87 | */ |
||
88 | const IM_INITIAL = 0; |
||
89 | |||
90 | const IM_BEFORE_HTML = 1; |
||
91 | |||
92 | const IM_BEFORE_HEAD = 2; |
||
93 | |||
94 | const IM_IN_HEAD = 3; |
||
95 | |||
96 | const IM_IN_HEAD_NOSCRIPT = 4; |
||
97 | |||
98 | const IM_AFTER_HEAD = 5; |
||
99 | |||
100 | const IM_IN_BODY = 6; |
||
101 | |||
102 | const IM_TEXT = 7; |
||
103 | |||
104 | const IM_IN_TABLE = 8; |
||
105 | |||
106 | const IM_IN_TABLE_TEXT = 9; |
||
107 | |||
108 | const IM_IN_CAPTION = 10; |
||
109 | |||
110 | const IM_IN_COLUMN_GROUP = 11; |
||
111 | |||
112 | const IM_IN_TABLE_BODY = 12; |
||
113 | |||
114 | const IM_IN_ROW = 13; |
||
115 | |||
116 | const IM_IN_CELL = 14; |
||
117 | |||
118 | const IM_IN_SELECT = 15; |
||
119 | |||
120 | const IM_IN_SELECT_IN_TABLE = 16; |
||
121 | |||
122 | const IM_AFTER_BODY = 17; |
||
123 | |||
124 | const IM_IN_FRAMESET = 18; |
||
125 | |||
126 | const IM_AFTER_FRAMESET = 19; |
||
127 | |||
128 | const IM_AFTER_AFTER_BODY = 20; |
||
129 | |||
130 | const IM_AFTER_AFTER_FRAMESET = 21; |
||
131 | |||
132 | const IM_IN_SVG = 22; |
||
133 | |||
134 | const IM_IN_MATHML = 23; |
||
135 | |||
136 | protected $options = array(); |
||
137 | |||
138 | protected $stack = array(); |
||
139 | |||
140 | protected $current; // Pointer in the tag hierarchy. |
||
141 | protected $rules; |
||
142 | protected $doc; |
||
143 | |||
144 | protected $frag; |
||
145 | |||
146 | protected $processor; |
||
147 | |||
148 | protected $insertMode = 0; |
||
149 | |||
150 | /** |
||
151 | * Track if we are in an element that allows only inline child nodes. |
||
152 | * |
||
153 | * @var string|null |
||
154 | */ |
||
155 | protected $onlyInline; |
||
156 | |||
157 | /** |
||
158 | * Quirks mode is enabled by default. |
||
159 | * Any document that is missing the DT will be considered to be in quirks mode. |
||
160 | */ |
||
161 | protected $quirks = true; |
||
162 | |||
163 | protected $errors = array(); |
||
164 | |||
165 | 114 | public function __construct($isFragment = false, array $options = array()) |
|
205 | |||
206 | /** |
||
207 | * Get the document. |
||
208 | */ |
||
209 | 103 | public function document() |
|
213 | |||
214 | /** |
||
215 | * Get the DOM fragment for the body. |
||
216 | * |
||
217 | * This returns a DOMNodeList because a fragment may have zero or more |
||
218 | * DOMNodes at its root. |
||
219 | * |
||
220 | * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context |
||
221 | * |
||
222 | * @return \DOMDocumentFragment |
||
223 | */ |
||
224 | 19 | public function fragment() |
|
228 | |||
229 | /** |
||
230 | * Provide an instruction processor. |
||
231 | * |
||
232 | * This is used for handling Processor Instructions as they are |
||
233 | * inserted. If omitted, PI's are inserted directly into the DOM tree. |
||
234 | * |
||
235 | * @param InstructionProcessor $proc |
||
236 | */ |
||
237 | 1 | public function setInstructionProcessor(InstructionProcessor $proc) |
|
241 | |||
242 | 97 | public function doctype($name, $idType = 0, $id = null, $quirks = false) |
|
256 | |||
257 | /** |
||
258 | * Process the start tag. |
||
259 | * |
||
260 | * @todo - XMLNS namespace handling (we need to parse, even if it's not valid) |
||
261 | * - XLink, MathML and SVG namespace handling |
||
262 | * - Omission rules: 8.1.2.4 Optional tags |
||
263 | * |
||
264 | * @param string $name |
||
265 | * @param array $attributes |
||
266 | * @param bool $selfClosing |
||
267 | * |
||
268 | * @return int |
||
269 | */ |
||
270 | 109 | public function startTag($name, $attributes = array(), $selfClosing = false) |
|
472 | |||
473 | 107 | public function endTag($name) |
|
474 | { |
||
475 | 107 | $lname = $this->normalizeTagName($name); |
|
476 | |||
477 | // Special case within 12.2.6.4.7: An end tag whose tag name is "br" should be treated as an opening tag |
||
478 | 107 | if ($name === 'br') { |
|
479 | 1 | $this->parseError('Closing tag encountered for void element br.'); |
|
480 | |||
481 | 1 | $this->startTag('br'); |
|
482 | 1 | } |
|
483 | // Ignore closing tags for other unary elements. |
||
484 | 107 | elseif (Elements::isA($name, Elements::VOID_TAG)) { |
|
485 | return; |
||
486 | } |
||
487 | |||
488 | 107 | if ($this->insertMode <= static::IM_BEFORE_HTML) { |
|
489 | // 8.2.5.4.2 |
||
490 | if (in_array($name, array( |
||
491 | 'html', |
||
492 | 'br', |
||
493 | 'head', |
||
494 | 'title', |
||
495 | ))) { |
||
496 | $this->startTag('html'); |
||
497 | $this->endTag($name); |
||
498 | $this->insertMode = static::IM_BEFORE_HEAD; |
||
499 | |||
500 | return; |
||
501 | } |
||
502 | |||
503 | // Ignore the tag. |
||
504 | $this->parseError('Illegal closing tag at global scope.'); |
||
505 | |||
506 | return; |
||
507 | } |
||
508 | |||
509 | // Special case handling for SVG. |
||
510 | 107 | if ($this->insertMode === static::IM_IN_SVG) { |
|
511 | 8 | $lname = Elements::normalizeSvgElement($lname); |
|
512 | 8 | } |
|
513 | |||
514 | 107 | $cid = spl_object_hash($this->current); |
|
515 | |||
516 | // XXX: HTML has no parent. What do we do, though, |
||
517 | // if this element appears in the wrong place? |
||
518 | 107 | if ('html' === $lname) { |
|
519 | 98 | return; |
|
520 | } |
||
521 | |||
522 | // remove the namespaced definded by current node |
||
523 | 101 | if (isset($this->pushes[$cid])) { |
|
524 | 15 | for ($i = 0; $i < $this->pushes[$cid][0]; ++$i) { |
|
525 | 15 | array_shift($this->nsStack); |
|
526 | 15 | } |
|
527 | 15 | unset($this->pushes[$cid]); |
|
528 | 15 | } |
|
529 | |||
530 | 101 | if (!$this->autoclose($lname)) { |
|
531 | 3 | $this->parseError('Could not find closing tag for ' . $lname); |
|
532 | 3 | } |
|
533 | |||
534 | switch ($lname) { |
||
535 | 101 | case 'head': |
|
536 | 44 | $this->insertMode = static::IM_AFTER_HEAD; |
|
537 | 44 | break; |
|
538 | 100 | case 'body': |
|
539 | 88 | $this->insertMode = static::IM_AFTER_BODY; |
|
540 | 88 | break; |
|
541 | 85 | case 'svg': |
|
542 | 85 | case 'mathml': |
|
543 | 8 | $this->insertMode = static::IM_IN_BODY; |
|
544 | 8 | break; |
|
545 | } |
||
546 | 101 | } |
|
547 | |||
548 | 5 | public function comment($cdata) |
|
554 | |||
555 | 91 | public function text($data) |
|
575 | |||
576 | 114 | public function eof() |
|
580 | |||
581 | 13 | public function parseError($msg, $line = 0, $col = 0) |
|
585 | |||
586 | 108 | public function getErrors() |
|
590 | |||
591 | 3 | public function cdata($data) |
|
596 | |||
597 | 5 | public function processingInstruction($name, $data = null) |
|
619 | |||
620 | // ========================================================================== |
||
621 | // UTILITIES |
||
622 | // ========================================================================== |
||
623 | |||
624 | /** |
||
625 | * Apply normalization rules to a tag name. |
||
626 | * See sections 2.9 and 8.1.2. |
||
627 | * |
||
628 | * @param string $tagName |
||
629 | * |
||
630 | * @return string The normalized tag name. |
||
631 | */ |
||
632 | 109 | protected function normalizeTagName($tagName) |
|
639 | |||
640 | protected function quirksTreeResolver($name) |
||
644 | |||
645 | /** |
||
646 | * Automatically climb the tree and close the closest node with the matching $tag. |
||
647 | * |
||
648 | * @param string $tagName |
||
649 | * |
||
650 | * @return bool |
||
651 | */ |
||
652 | 101 | protected function autoclose($tagName) |
|
668 | |||
669 | /** |
||
670 | * Checks if the given tagname is an ancestor of the present candidate. |
||
671 | * |
||
672 | * If $this->current or anything above $this->current matches the given tag |
||
673 | * name, this returns true. |
||
674 | * |
||
675 | * @param string $tagName |
||
676 | * |
||
677 | * @return bool |
||
678 | */ |
||
679 | protected function isAncestor($tagName) |
||
691 | |||
692 | /** |
||
693 | * Returns true if the immediate parent element is of the given tagname. |
||
694 | * |
||
695 | * @param string $tagName |
||
696 | * |
||
697 | * @return bool |
||
698 | */ |
||
699 | protected function isParent($tagName) |
||
703 | } |
||
704 |
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.