Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like DOMDoc often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DOMDoc, and based on these observations, apply Extract Interface, too.
1 | <?php namespace BetterDOMDocument; |
||
11 | class DOMDoc extends \DOMDocument { |
||
12 | |||
13 | private $auto_ns = FALSE; |
||
14 | public $ns = array(); |
||
15 | public $default_ns = FALSE; |
||
16 | public $error_checking = 'strict'; // Can be 'strict', 'warning', 'none' / FALSE |
||
17 | |||
18 | /** |
||
19 | * Create a new DOMDoc. |
||
20 | * |
||
21 | * @param object|string $xml |
||
22 | * $xml can either be an XML string, a DOMDocument, or a DOMElement. |
||
23 | * You can also pass FALSE or NULL (or omit it) and load XML later using loadXML or loadHTML. |
||
24 | * @param true|string $auto_register_namespaces |
||
25 | * Auto-register namespaces. All namespaces in the root element will be registered for use in xpath queries. |
||
26 | * Namespaces that are not declared in the root element will not be auto-registered |
||
27 | * Defaults to TRUE (Meaning it will auto register all auxiliary namespaces but not the default namespace). |
||
28 | * Pass a prefix string to automatically register the default namespace. |
||
29 | * Pass FALSE to disable auto-namespace registeration. |
||
30 | * @param false|string $error_checking |
||
31 | * Can be 'strict', 'warning', or 'none. Defaults to 'strict'. |
||
32 | * 'none' supresses all errors. |
||
33 | * 'warning' is the default behavior in DOMDocument. |
||
34 | * 'strict' corresponds to DOMDocument strictErrorChecking TRUE. |
||
35 | */ |
||
36 | 25 | public function __construct($xml = FALSE, $auto_register_namespaces = TRUE, $error_checking = 'strict') { |
|
37 | 25 | parent::__construct(); |
|
38 | |||
39 | 25 | $this->setErrorChecking($error_checking); |
|
40 | |||
41 | 25 | if (is_object($xml)) { |
|
42 | 17 | if (is_a($xml, 'DOMElement')) { |
|
43 | 8 | $this->appendChild($this->importNode($xml, true)); |
|
44 | } |
||
45 | 17 | elseif (is_a($xml, 'BetterDOMDocument\DOMDoc')) { |
|
46 | if ($xml->documentElement) { |
||
47 | $this->appendChild($this->importNode($xml->documentElement, true)); |
||
48 | } |
||
49 | $this->ns = $xml->ns; |
||
50 | } |
||
51 | 17 | elseif (is_a($xml, 'DOMDocument')) { |
|
52 | 17 | if ($xml->documentElement) { |
|
53 | 17 | $this->appendChild($this->importNode($xml->documentElement, true)); |
|
54 | } |
||
55 | } |
||
56 | elseif (method_exists($xml, '__toString')) { |
||
57 | $this->loadFromString($xml->__toString()); |
||
58 | } |
||
59 | } |
||
60 | 18 | elseif (is_string($xml) && !empty($xml)) { |
|
61 | 15 | $this->loadFromString($xml); |
|
62 | } |
||
63 | |||
64 | 25 | if ($auto_register_namespaces) { |
|
65 | 25 | $this->AutoRegisterNamespace($auto_register_namespaces); |
|
66 | } |
||
67 | 25 | } |
|
68 | |||
69 | /** |
||
70 | * Register a namespace to be used in xpath queries. |
||
71 | * |
||
72 | * @param string $prefix |
||
73 | * Namespace prefix to register. |
||
74 | * @param string $url |
||
75 | * Connonical URL for this namespace prefix. |
||
76 | */ |
||
77 | 14 | public function registerNamespace($prefix, $url) { |
|
78 | 14 | $this->ns[$prefix] = $url; |
|
79 | 14 | } |
|
80 | |||
81 | /** |
||
82 | * Get the list of registered namespaces as an array. |
||
83 | * |
||
84 | * @return array |
||
85 | * An array in form ['prefix' => 'namespace-uri'] |
||
86 | */ |
||
87 | 7 | public function getNamespaces() { |
|
90 | |||
91 | /** |
||
92 | * Given a namespace URL, get the prefix. |
||
93 | * |
||
94 | * @param string $url |
||
95 | * Connonical URL for this namespace prefix. |
||
96 | * |
||
97 | * @return string|false |
||
98 | * The namespace prefix or FALSE if there is no namespace with that URL. |
||
99 | */ |
||
100 | 1 | public function lookupPrefix($url) { |
|
103 | |||
104 | /** |
||
105 | * Given a namespace prefix, get the URL. |
||
106 | * |
||
107 | * @param string $prefix |
||
108 | * Namespace prefix. |
||
109 | * |
||
110 | * @return string|false |
||
111 | * The namespace URL or FALSE if there is no namespace with that prefix |
||
112 | */ |
||
113 | 1 | public function lookupURL($prefix) { |
|
121 | |||
122 | /** |
||
123 | * Given an xpath, get a list of nodes. |
||
124 | * |
||
125 | * @param string $xpath |
||
126 | * XPath to be used for query. |
||
127 | * |
||
128 | * @param mixed $context |
||
129 | * $context can either be an xpath string, or a DOMElement. |
||
130 | * Provides context for the xpath query. |
||
131 | * |
||
132 | * @return DOMList|false |
||
133 | * A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
||
134 | */ |
||
135 | 19 | public function xpath($xpath, $context = NULL) { |
|
163 | |||
164 | |||
165 | /** |
||
166 | * Given an xpath, get a single node (first one found) |
||
167 | * |
||
168 | * @param string $xpath |
||
169 | * xpath to be used for query |
||
170 | * |
||
171 | * @param mixed $context |
||
172 | * $context can either be an xpath string, or a DOMElement |
||
173 | * Provides context for the xpath query |
||
174 | * |
||
175 | * @return mixed |
||
176 | * The first node found by the xpath query |
||
177 | */ |
||
178 | 18 | public function xpathSingle($xpath, $context = NULL) { |
|
188 | |||
189 | |||
190 | /** |
||
191 | * Given an CSS selector, get a list of nodes. |
||
192 | * |
||
193 | * @param string $css_selector |
||
194 | * CSS Selector to be used for query |
||
195 | * |
||
196 | * @param mixed $context |
||
197 | * $context can either be an xpath string, or a DOMElement |
||
198 | * Provides context for the CSS selector |
||
199 | * |
||
200 | * @return DOMList|false |
||
201 | * A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
||
202 | */ |
||
203 | 1 | public function select($css_selector, $context = NULL) { |
|
209 | |||
210 | /** |
||
211 | * Given an CSS selector, get a single node. |
||
212 | * |
||
213 | * @param string $css_selector |
||
214 | * CSS Selector to be used for query |
||
215 | * |
||
216 | * @param mixed $context |
||
217 | * $context can either be an xpath string, or a DOMElement |
||
218 | * Provides context for the CSS selector |
||
219 | * |
||
220 | * @return DOMList |
||
221 | * A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
||
222 | */ |
||
223 | 1 | public function selectSingle($css_selector, $context = NULL) { |
|
229 | |||
230 | /** |
||
231 | * Get the document (or an element) as an array |
||
232 | * |
||
233 | * @param string $raw |
||
234 | * Can be either FALSE, 'full', or 'inner'. Defaults to FALSE. |
||
235 | * When set to 'full' every node's full XML is also attached to the array |
||
236 | * When set to 'inner' every node's inner XML is attached to the array. |
||
237 | * |
||
238 | * @param mixed $context |
||
239 | * Optional context node. Can pass an DOMElement object or an xpath string. |
||
240 | * If passed, only the given node will be used when generating the array |
||
241 | */ |
||
242 | 1 | public function getArray($raw = FALSE, $context = NULL) { |
|
287 | |||
288 | /** |
||
289 | * Get the inner text of an element |
||
290 | * |
||
291 | * @param mixed $context |
||
292 | * Optional context node. Can pass an DOMElement object or an xpath string. |
||
293 | */ |
||
294 | 1 | public function innerText($context = NULL) { |
|
306 | |||
307 | /** |
||
308 | * Create an DOMElement from XML and attach it to the DOMDocument |
||
309 | * |
||
310 | * Note that this does not place it anywhere in the dom tree, it merely imports it. |
||
311 | * |
||
312 | * @param string $xml |
||
313 | * XML string to import |
||
314 | */ |
||
315 | 5 | public function createElementFromXML($xml) { |
|
347 | |||
348 | /** |
||
349 | * Append a child to the context node, make it the last child |
||
350 | * |
||
351 | * @param mixed $newnode |
||
352 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
353 | * |
||
354 | * @param mixed $context |
||
355 | * $context can either be an xpath string, or a DOMElement |
||
356 | * Omiting $context results in using the root document element as the context |
||
357 | * |
||
358 | * @return DOMElement|false |
||
359 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
360 | * then you should replace your DOMElement with the returned one. |
||
361 | */ |
||
362 | 1 | View Code Duplication | public function append($newnode, $context = NULL) { |
372 | |||
373 | /** |
||
374 | * Append a child to the context node, make it the first child |
||
375 | * |
||
376 | * @param mixed $newnode |
||
377 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
378 | * |
||
379 | * @param mixed $context |
||
380 | * $context can either be an xpath string, or a DOMElement |
||
381 | * Omiting $context results in using the root document element as the context |
||
382 | * |
||
383 | * @return DOMElement|false |
||
384 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
385 | * then you should replace your DOMElement with the returned one. |
||
386 | */ |
||
387 | 1 | View Code Duplication | public function prepend($newnode, $context = NULL) { |
397 | |||
398 | /** |
||
399 | * Prepend a sibling to the context node, put it just before the context node |
||
400 | * |
||
401 | * @param mixed $newnode |
||
402 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
403 | * |
||
404 | * @param mixed $context |
||
405 | * $context can either be an xpath string, or a DOMElement |
||
406 | * Omiting $context results in using the root document element as the context |
||
407 | * |
||
408 | * @return DOMElement|false |
||
409 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
410 | * then you should replace your DOMElement with the returned one. |
||
411 | */ |
||
412 | 1 | View Code Duplication | public function prependSibling($newnode, $context = NULL) { |
422 | |||
423 | /** |
||
424 | * Append a sibling to the context node, put it just after the context node |
||
425 | * |
||
426 | * @param mixed $newnode |
||
427 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
428 | * |
||
429 | * @param mixed $context |
||
430 | * $context can either be an xpath string, or a DOMElement |
||
431 | * Omiting $context results in using the root document element as the context |
||
432 | * |
||
433 | * @return DOMElement|false |
||
434 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
435 | * then you should replace your DOMElement with the returned one. |
||
436 | */ |
||
437 | 1 | public function appendSibling($newnode, $context) { |
|
453 | |||
454 | /** |
||
455 | * Given an xpath or DOMElement, return a new DOMDoc. |
||
456 | * |
||
457 | * @param mixed $node |
||
458 | * $node can either be an xpath string or a DOMElement. |
||
459 | * |
||
460 | * @return DOMDoc |
||
461 | * A new DOMDoc created from the xpath or DOMElement |
||
462 | */ |
||
463 | 7 | public function extract($node, $auto_register_namespaces = TRUE, $error_checking = 'none') { |
|
469 | |||
470 | /** |
||
471 | * Given a pair of nodes, replace the first with the second |
||
472 | * |
||
473 | * @param mixed $node |
||
474 | * Node to be replaced. Can either be an xpath string or a DOMDocument (or even a DOMNode). |
||
475 | * |
||
476 | * @param mixed $replace |
||
477 | * Replace $node with $replace. Replace can be an XML string, or a DOMNode |
||
478 | * |
||
479 | * @return mixed |
||
480 | * The overwritten / replaced node. |
||
481 | */ |
||
482 | 2 | public function replace($node, $replace) { |
|
497 | |||
498 | /** |
||
499 | * Given a node(s), remove / delete them |
||
500 | * |
||
501 | * @param mixed $node |
||
502 | * Can pass a DOMNode, a NodeList, DOMNodeList, an xpath string, or an array of any of these. |
||
503 | */ |
||
504 | 1 | public function remove($node) { |
|
525 | |||
526 | /** |
||
527 | * Given an XSL string, transform the DOMDoc (or a passed context node) |
||
528 | * |
||
529 | * @param string $xsl |
||
530 | * XSL Transormation |
||
531 | * |
||
532 | * @param mixed $context |
||
533 | * $context can either be an xpath string, or a DOMElement. Ommiting it |
||
534 | * results in transforming the entire document |
||
535 | * |
||
536 | * @return a new DOMDoc |
||
537 | */ |
||
538 | 4 | public function tranform($xsl, $context = NULL) { |
|
555 | |||
556 | /** |
||
557 | * Given a node, change it's namespace to the specified namespace in situ |
||
558 | * |
||
559 | * @param mixed $node |
||
560 | * Node to be changed. Can either be an xpath string or a DOMElement. |
||
561 | * |
||
562 | * @param mixed $prefix |
||
563 | * prefix for the new namespace |
||
564 | * |
||
565 | * @param mixed $url |
||
566 | * The URL for the new namespace |
||
567 | * |
||
568 | * @return mixed |
||
569 | * The node with the new namespace. The node will also be changed in-situ in the document as well. |
||
570 | */ |
||
571 | 1 | public function changeNamespace($node, $prefix, $url) { |
|
572 | 1 | $this->createContext($node, 'xpath'); |
|
573 | |||
574 | 1 | if (!$node) { |
|
575 | return FALSE; |
||
576 | } |
||
577 | |||
578 | 1 | $this->registerNamespace($prefix, $url); |
|
579 | |||
580 | 1 | if (get_class($node) == 'DOMElement') { |
|
581 | $xsl = ' |
||
582 | <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> |
||
583 | <xsl:template match="*"> |
||
584 | 1 | <xsl:element name="' . $prefix . ':{local-name()}" namespace="' . $url . '"> |
|
585 | <xsl:copy-of select="@*"/> |
||
586 | <xsl:apply-templates/> |
||
587 | </xsl:element> |
||
588 | </xsl:template> |
||
589 | </xsl:stylesheet>'; |
||
590 | |||
591 | 1 | $transformed = $this->tranform($xsl, $node); |
|
592 | 1 | return $this->replace($node, $transformed->documentElement); |
|
593 | } |
||
594 | else { |
||
595 | // @@TODO: Report the correct calling file and number |
||
596 | throw new \Exception("Changing the namespace of a " . get_class($node) . " is not supported"); |
||
597 | } |
||
598 | } |
||
599 | |||
600 | /** |
||
601 | * Get a lossless HTML representation of the XML |
||
602 | * |
||
603 | * Transforms the document (or passed context) into a set of HTML spans. |
||
604 | * The element name becomes the class, all other attributes become HTML5 |
||
605 | * "data-" attributes. |
||
606 | * |
||
607 | * @param mixed $context |
||
608 | * $context can either be an xpath string, or a DOMElement. Ommiting it |
||
609 | * results in transforming the entire document |
||
610 | * |
||
611 | * @param array $options |
||
612 | * Options for transforming the HTML into XML. The following options are supported: |
||
613 | * 'xlink' => {TRUE or xpath} |
||
614 | * Transform xlink links into <a href> elements. If you specify 'xlink' => TRUE then |
||
615 | * it will transform all elements with xlink:type = simple into a <a href> element. |
||
616 | * Alternatively you may specify your own xpath for selecting which elements get transformed |
||
617 | * into <a href> tags. |
||
618 | * @return HTML string |
||
619 | */ |
||
620 | 3 | public function asHTML($context = NULL, $options = array()) { |
|
692 | |||
693 | /** |
||
694 | * Output the DOMDoc as an XML string |
||
695 | * |
||
696 | * @param mixed $context |
||
697 | * $context can either be an xpath string, or a DOMElement. Ommiting it |
||
698 | * results in outputting the entire document |
||
699 | * |
||
700 | * @return XML string |
||
701 | */ |
||
702 | 13 | public function out($context = NULL) { |
|
703 | 13 | $this->createContext($context, 'xpath'); |
|
704 | 13 | if (!$context) { |
|
705 | 2 | return ''; |
|
706 | } |
||
707 | |||
708 | // Copy namespace prefixes |
||
709 | 11 | foreach ($this->ns as $prefix => $namespace) { |
|
710 | if (!empty($namespace) && !$context->hasAttribute('xmlns:' . $prefix)) { |
||
711 | $context->setAttribute('xmlns:' . $prefix, $namespace); |
||
712 | } |
||
713 | } |
||
714 | |||
715 | // Check to seee if it's HTML, if it is we need to fix broken html void elements. |
||
716 | 11 | if ($this->documentElement->lookupNamespaceURI(NULL) == 'http://www.w3.org/1999/xhtml' || $this->documentElement->tagName == 'html') { |
|
717 | 2 | $output = $this->saveXML($context, LIBXML_NOEMPTYTAG); |
|
718 | // The types listed are html "void" elements. |
||
719 | // Find any of these elements that have no child nodes and are therefore candidates for self-closing, replace them with a self-closed version. |
||
720 | 2 | $pattern = '<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)(\b[^<]*)><\/\1>'; |
|
721 | 2 | return preg_replace('/' . $pattern . '/', '<$1$2/>', $output); |
|
722 | } |
||
723 | else { |
||
724 | 9 | return $this->saveXML($context, LIBXML_NOEMPTYTAG); |
|
725 | } |
||
726 | } |
||
727 | |||
728 | /** |
||
729 | * Magic method for casting a DOMDoc as a string |
||
730 | */ |
||
731 | 1 | public function __toString() { |
|
734 | |||
735 | /** |
||
736 | * Magic method to get good debug info view var_dump |
||
737 | */ |
||
738 | public function __debugInfo() { |
||
747 | |||
748 | 25 | public function setErrorChecking($error_checking) { |
|
760 | |||
761 | 14 | public static function loadFile($file_or_url, $auto_register_namespaces = TRUE) { |
|
769 | |||
770 | 1 | public function loadHTML($source, $options = NULL) { |
|
776 | |||
777 | 15 | public function loadXML($source, $options = NULL) { |
|
783 | |||
784 | /** |
||
785 | * Removes a namespace from the document, moving the |
||
786 | * namespaced nodes to the default namespace. |
||
787 | * |
||
788 | * @param string $prefix |
||
789 | * Namespace prefix. |
||
790 | */ |
||
791 | 1 | public function removeNamespace($prefix) { |
|
802 | |||
803 | 25 | protected function AutoRegisterNamespace($auto_register_namespaces) { |
|
804 | 25 | $this->auto_ns = TRUE; |
|
805 | |||
806 | // If it's an "XML" document, then get namespaces via xpath |
||
807 | 25 | $xpath = new \DOMXPath($this); |
|
808 | 25 | foreach($xpath->query('namespace::*') as $namespace) { |
|
809 | 24 | if (!empty($namespace->prefix)) { |
|
810 | 24 | if ($namespace->prefix != 'xml' && $namespace->nodeValue != 'http://www.w3.org/XML/1998/namespace') { |
|
811 | 11 | $this->registerNamespace($namespace->prefix, $namespace->nodeValue); |
|
812 | } |
||
813 | } |
||
814 | View Code Duplication | else { |
|
815 | 3 | $this->default_ns = $namespace->nodeValue; |
|
816 | 3 | if (is_string($auto_register_namespaces)) { |
|
817 | $this->registerNamespace($auto_register_namespaces, $namespace->nodeValue); |
||
818 | } |
||
819 | // Otherwise, automatically set-up the root element tag name as the prefix for the default namespace |
||
820 | else { |
||
821 | 3 | $tagname = $this->documentElement->tagName; |
|
822 | 3 | if (empty($this->ns[$tagname])) { |
|
823 | 3 | $this->registerNamespace($tagname, $this->documentElement->getAttribute('xmlns')); |
|
824 | } |
||
825 | } |
||
826 | } |
||
827 | } |
||
828 | |||
829 | // If it's an "HTML" document, we get namespaces via attributes |
||
830 | 25 | if (empty($this->ns) && !empty($this->documentElement)) { |
|
831 | 15 | foreach ($this->documentElement->attributes as $attr) { |
|
832 | 4 | if ($attr->name == 'xmlns') { |
|
833 | 1 | $this->default_ns = $attr->value; |
|
834 | // If auto_register_namespaces is a prefix string, then we register the default namespace to that string |
||
835 | 1 | View Code Duplication | if (is_string($auto_register_namespaces)) { |
836 | $this->registerNamespace($auto_register_namespaces, $attr->value); |
||
837 | } |
||
838 | // Otherwise, automatically set-up the root element tag name as the prefix for the default namespace |
||
839 | else { |
||
840 | 1 | $tagname = $this->documentElement->tagName; |
|
841 | 1 | if (empty($this->ns[$tagname])) { |
|
842 | 1 | $this->registerNamespace($tagname, $attr->value); |
|
843 | } |
||
844 | } |
||
845 | } |
||
846 | 3 | else if (substr($attr->name,0,6) == 'xmlns:') { |
|
847 | $prefix = substr($attr->name,6); |
||
848 | $this->registerNamespace($prefix, $attr->value); |
||
849 | } |
||
850 | } |
||
851 | } |
||
852 | 25 | } |
|
853 | |||
854 | 23 | protected function createContext(&$context, $type = 'xpath', $createDocument = TRUE) { |
|
898 | |||
899 | 15 | protected function loadFromString($xml) { |
|
907 | } |
||
908 |
There are different options of fixing this problem.
If you want to be on the safe side, you can add an additional type-check:
If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:
Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.