Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like DOMDoc often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DOMDoc, and based on these observations, apply Extract Interface, too.
1 | <?php namespace BetterDOMDocument; |
||
11 | class DOMDoc extends \DOMDocument { |
||
12 | |||
13 | private $auto_ns = FALSE; |
||
14 | public $ns = array(); |
||
15 | public $default_ns = FALSE; |
||
16 | public $error_checking = 'strict'; // Can be 'strict', 'warning', 'none' / FALSE |
||
17 | |||
18 | /** |
||
19 | * Create a new DOMDoc |
||
20 | * |
||
21 | * @param mixed $xml |
||
22 | * $xml can either be an XML string, a DOMDocument, or a DOMElement. |
||
23 | * You can also pass FALSE or NULL (or omit it) and load XML later using loadXML or loadHTML |
||
24 | * |
||
25 | * @param mixed $auto_register_namespaces |
||
26 | * Auto-register namespaces. All namespaces in the root element will be registered for use in xpath queries. |
||
27 | * Namespaces that are not declared in the root element will not be auto-registered |
||
28 | * Defaults to TRUE (Meaning it will auto register all auxiliary namespaces but not the default namespace). |
||
29 | * Pass a prefix string to automatically register the default namespace. |
||
30 | * Pass FALSE to disable auto-namespace registeration |
||
31 | * |
||
32 | * @param bool $error_checking |
||
33 | * Can be 'strict', 'warning', or 'none. Defaults to 'strict'. |
||
34 | * 'none' supresses all errors |
||
35 | * 'warning' is the default behavior in DOMDocument |
||
36 | * 'strict' corresponds to DOMDocument strictErrorChecking TRUE |
||
37 | */ |
||
38 | 24 | public function __construct($xml = FALSE, $auto_register_namespaces = TRUE, $error_checking = 'strict') { |
|
73 | |||
74 | /** |
||
75 | * Register a namespace to be used in xpath queries |
||
76 | * |
||
77 | * @param string $prefix |
||
78 | * Namespace prefix to register |
||
79 | * |
||
80 | * @param string $url |
||
81 | * Connonical URL for this namespace prefix |
||
82 | */ |
||
83 | 13 | public function registerNamespace($prefix, $url) { |
|
86 | |||
87 | /** |
||
88 | * Get the list of registered namespaces as an array |
||
89 | */ |
||
90 | 7 | public function getNamespaces() { |
|
93 | |||
94 | /** |
||
95 | * Given a namespace URL, get the prefix |
||
96 | * |
||
97 | * @param string $url |
||
98 | * Connonical URL for this namespace prefix |
||
99 | * |
||
100 | * @return string|false |
||
101 | * The namespace prefix or FALSE if there is no namespace with that URL |
||
102 | */ |
||
103 | 1 | public function lookupPrefix($url) { |
|
104 | 1 | return array_search($url, $this->ns); |
|
105 | } |
||
106 | |||
107 | /** |
||
108 | * Given a namespace prefix, get the URL |
||
109 | * |
||
110 | * @param string $prefix |
||
111 | * namespace prefix |
||
112 | * |
||
113 | * return string|false |
||
114 | * The namespace URL or FALSE if there is no namespace with that prefix |
||
115 | */ |
||
116 | 1 | public function lookupURL($prefix) { |
|
117 | 1 | if (isset($this->ns[$prefix])) { |
|
118 | 1 | return $this->ns[$prefix]; |
|
119 | } |
||
120 | else { |
||
121 | return FALSE; |
||
122 | } |
||
123 | } |
||
124 | |||
125 | /** |
||
126 | * Given an xpath, get a list of nodes. |
||
127 | * |
||
128 | * @param string $xpath |
||
129 | * xpath to be used for query |
||
130 | * |
||
131 | * @param mixed $context |
||
132 | * $context can either be an xpath string, or a DOMElement |
||
133 | * Provides context for the xpath query |
||
134 | * |
||
135 | * @return DOMList|false |
||
136 | * A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
||
137 | */ |
||
138 | 18 | public function xpath($xpath, $context = NULL) { |
|
166 | |||
167 | |||
168 | /** |
||
169 | * Given an xpath, get a single node (first one found) |
||
170 | * |
||
171 | * @param string $xpath |
||
172 | * xpath to be used for query |
||
173 | * |
||
174 | * @param mixed $context |
||
175 | * $context can either be an xpath string, or a DOMElement |
||
176 | * Provides context for the xpath query |
||
177 | * |
||
178 | * @return mixed |
||
179 | * The first node found by the xpath query |
||
180 | */ |
||
181 | 18 | public function xpathSingle($xpath, $context = NULL) { |
|
191 | |||
192 | |||
193 | /** |
||
194 | * Given an CSS selector, get a list of nodes. |
||
195 | * |
||
196 | * @param string $css_selector |
||
197 | * CSS Selector to be used for query |
||
198 | * |
||
199 | * @param mixed $context |
||
200 | * $context can either be an xpath string, or a DOMElement |
||
201 | * Provides context for the CSS selector |
||
202 | * |
||
203 | * @return DOMList|false |
||
204 | * A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
||
205 | */ |
||
206 | 1 | public function select($css_selector, $context = NULL) { |
|
207 | 1 | $converter = new CssSelectorConverter(); |
|
208 | 1 | $xpath = $converter->toXPath($css_selector); |
|
209 | |||
210 | 1 | return $this->xpath($xpath, $context); |
|
211 | } |
||
212 | |||
213 | /** |
||
214 | * Given an CSS selector, get a single node. |
||
215 | * |
||
216 | * @param string $css_selector |
||
217 | * CSS Selector to be used for query |
||
218 | * |
||
219 | * @param mixed $context |
||
220 | * $context can either be an xpath string, or a DOMElement |
||
221 | * Provides context for the CSS selector |
||
222 | * |
||
223 | * @return DOMList |
||
224 | * A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
||
225 | */ |
||
226 | 1 | public function selectSingle($css_selector, $context = NULL) { |
|
227 | 1 | $converter = new CssSelectorConverter(); |
|
228 | 1 | $xpath = $converter->toXPath($css_selector); |
|
229 | |||
230 | 1 | return $this->xpathSingle($xpath, $context); |
|
231 | } |
||
232 | |||
233 | /** |
||
234 | * Get the document (or an element) as an array |
||
235 | * |
||
236 | * @param string $raw |
||
237 | * Can be either FALSE, 'full', or 'inner'. Defaults to FALSE. |
||
238 | * When set to 'full' every node's full XML is also attached to the array |
||
239 | * When set to 'inner' every node's inner XML is attached to the array. |
||
240 | * |
||
241 | * @param mixed $context |
||
242 | * Optional context node. Can pass an DOMElement object or an xpath string. |
||
243 | * If passed, only the given node will be used when generating the array |
||
244 | */ |
||
245 | 1 | public function getArray($raw = FALSE, $context = NULL) { |
|
246 | 1 | $array = false; |
|
247 | |||
248 | 1 | $this->createContext($context, 'xpath', FALSE); |
|
249 | |||
250 | 1 | if ($context) { |
|
251 | 1 | if ($raw == 'full') { |
|
252 | 1 | $array['#raw'] = $this->saveXML($context); |
|
253 | } |
||
254 | 1 | if ($raw == 'inner') { |
|
255 | 1 | $array['#raw'] = $this->innerText($context); |
|
256 | } |
||
257 | 1 | if ($context->hasAttributes()) { |
|
258 | 1 | foreach ($context->attributes as $attr) { |
|
259 | 1 | $array['@'.$attr->nodeName] = $attr->nodeValue; |
|
260 | } |
||
261 | } |
||
262 | |||
263 | 1 | if ($context->hasChildNodes()) { |
|
264 | 1 | if ($context->childNodes->length == 1 && $context->firstChild->nodeType == XML_TEXT_NODE) { |
|
265 | 1 | $array['#text'] = $context->firstChild->nodeValue; |
|
266 | } |
||
267 | else { |
||
268 | 1 | foreach ($context->childNodes as $childNode) { |
|
269 | 1 | if ($childNode->nodeType == XML_ELEMENT_NODE) { |
|
270 | 1 | $array[$childNode->nodeName][] = $this->getArray($raw, $childNode); |
|
271 | } |
||
272 | 1 | elseif ($childNode->nodeType == XML_CDATA_SECTION_NODE) { |
|
273 | 1 | $array['#text'] = $childNode->textContent; |
|
274 | } |
||
275 | } |
||
276 | } |
||
277 | } |
||
278 | } |
||
279 | // Else no node was passed, which means we are processing the entire domDocument |
||
280 | else { |
||
281 | 1 | foreach ($this->childNodes as $childNode) { |
|
282 | 1 | if ($childNode->nodeType == XML_ELEMENT_NODE) { |
|
283 | 1 | $array[$childNode->nodeName][] = $this->getArray($raw, $childNode); |
|
284 | } |
||
285 | } |
||
286 | } |
||
287 | |||
288 | 1 | return $array; |
|
289 | } |
||
290 | |||
291 | /** |
||
292 | * Get the inner text of an element |
||
293 | * |
||
294 | * @param mixed $context |
||
295 | * Optional context node. Can pass an DOMElement object or an xpath string. |
||
296 | */ |
||
297 | 1 | public function innerText($context = NULL) { |
|
298 | 1 | $this->createContext($context, 'xpath'); |
|
299 | |||
300 | 1 | $pattern = "/<".preg_quote($context->nodeName)."\b[^>]*>(.*)<\/".preg_quote($context->nodeName).">/s"; |
|
301 | 1 | $matches = array(); |
|
302 | 1 | if (preg_match($pattern, $this->saveXML($context), $matches)) { |
|
303 | 1 | return $matches[1]; |
|
304 | } |
||
305 | else { |
||
306 | 1 | return ''; |
|
307 | } |
||
308 | } |
||
309 | |||
310 | /** |
||
311 | * Create an DOMElement from XML and attach it to the DOMDocument |
||
312 | * |
||
313 | * Note that this does not place it anywhere in the dom tree, it merely imports it. |
||
314 | * |
||
315 | * @param string $xml |
||
316 | * XML string to import |
||
317 | */ |
||
318 | 5 | public function createElementFromXML($xml) { |
|
350 | |||
351 | /** |
||
352 | * Append a child to the context node, make it the last child |
||
353 | * |
||
354 | * @param mixed $newnode |
||
355 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
356 | * |
||
357 | * @param mixed $context |
||
358 | * $context can either be an xpath string, or a DOMElement |
||
359 | * Omiting $context results in using the root document element as the context |
||
360 | * |
||
361 | * @return DOMElement|false |
||
362 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
363 | * then you should replace your DOMElement with the returned one. |
||
364 | */ |
||
365 | 1 | public function append($newnode, $context = NULL) { |
|
382 | |||
383 | /** |
||
384 | * Append a child to the context node, make it the first child |
||
385 | * |
||
386 | * @param mixed $newnode |
||
387 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
388 | * |
||
389 | * @param mixed $context |
||
390 | * $context can either be an xpath string, or a DOMElement |
||
391 | * Omiting $context results in using the root document element as the context |
||
392 | * |
||
393 | * @return DOMElement|false |
||
394 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
395 | * then you should replace your DOMElement with the returned one. |
||
396 | */ |
||
397 | 1 | View Code Duplication | public function prepend($newnode, $context = NULL) { |
407 | |||
408 | /** |
||
409 | * Prepend a sibling to the context node, put it just before the context node |
||
410 | * |
||
411 | * @param mixed $newnode |
||
412 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
413 | * |
||
414 | * @param mixed $context |
||
415 | * $context can either be an xpath string, or a DOMElement |
||
416 | * Omiting $context results in using the root document element as the context |
||
417 | * |
||
418 | * @return DOMElement|false |
||
419 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
420 | * then you should replace your DOMElement with the returned one. |
||
421 | */ |
||
422 | 1 | View Code Duplication | public function prependSibling($newnode, $context = NULL) { |
432 | |||
433 | /** |
||
434 | * Append a sibling to the context node, put it just after the context node |
||
435 | * |
||
436 | * @param mixed $newnode |
||
437 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
438 | * |
||
439 | * @param mixed $context |
||
440 | * $context can either be an xpath string, or a DOMElement |
||
441 | * Omiting $context results in using the root document element as the context |
||
442 | * |
||
443 | * @return DOMElement|false |
||
444 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
445 | * then you should replace your DOMElement with the returned one. |
||
446 | */ |
||
447 | 1 | public function appendSibling($newnode, $context) { |
|
470 | |||
471 | /** |
||
472 | * Given an xpath or DOMElement, return a new DOMDoc. |
||
473 | * |
||
474 | * @param mixed $node |
||
475 | * $node can either be an xpath string or a DOMElement. |
||
476 | * |
||
477 | * @return DOMDoc |
||
478 | * A new DOMDoc created from the xpath or DOMElement |
||
479 | */ |
||
480 | 7 | public function extract($node, $auto_register_namespaces = TRUE, $error_checking = 'none') { |
|
486 | |||
487 | /** |
||
488 | * Given a pair of nodes, replace the first with the second |
||
489 | * |
||
490 | * @param mixed $node |
||
491 | * Node to be replaced. Can either be an xpath string or a DOMDocument (or even a DOMNode). |
||
492 | * |
||
493 | * @param mixed $replace |
||
494 | * Replace $node with $replace. Replace can be an XML string, or a DOMNode |
||
495 | * |
||
496 | * @return mixed |
||
497 | * The overwritten / replaced node. |
||
498 | */ |
||
499 | 2 | public function replace($node, $replace) { |
|
500 | 2 | $this->createContext($node, 'xpath'); |
|
501 | 2 | $this->createContext($replace, 'xml'); |
|
502 | |||
503 | 2 | if (!$node || !$replace) { |
|
504 | return FALSE; |
||
505 | } |
||
506 | |||
507 | 2 | if (!$replace->ownerDocument->documentElement->isSameNode($this->documentElement)) { |
|
508 | 1 | $replace = $this->importNode($replace, true); |
|
509 | } |
||
510 | 2 | $node->parentNode->replaceChild($replace, $node); |
|
511 | 2 | $node = $replace; |
|
512 | 2 | return $node; |
|
513 | } |
||
514 | |||
515 | /** |
||
516 | * Given a node(s), remove / delete them |
||
517 | * |
||
518 | * @param mixed $node |
||
519 | * Can pass a DOMNode, a NodeList, DOMNodeList, an xpath string, or an array of any of these. |
||
520 | */ |
||
521 | 1 | public function remove($node) { |
|
542 | |||
543 | /** |
||
544 | * Given an XSL string, transform the DOMDoc (or a passed context node) |
||
545 | * |
||
546 | * @param string $xsl |
||
547 | * XSL Transormation |
||
548 | * |
||
549 | * @param mixed $context |
||
550 | * $context can either be an xpath string, or a DOMElement. Ommiting it |
||
551 | * results in transforming the entire document |
||
552 | * |
||
553 | * @return a new DOMDoc |
||
554 | */ |
||
555 | 4 | public function tranform($xsl, $context = NULL) { |
|
572 | |||
573 | /** |
||
574 | * Given a node, change it's namespace to the specified namespace in situ |
||
575 | * |
||
576 | * @param mixed $node |
||
577 | * Node to be changed. Can either be an xpath string or a DOMElement. |
||
578 | * |
||
579 | * @param mixed $prefix |
||
580 | * prefix for the new namespace |
||
581 | * |
||
582 | * @param mixed $url |
||
583 | * The URL for the new namespace |
||
584 | * |
||
585 | * @return mixed |
||
586 | * The node with the new namespace. The node will also be changed in-situ in the document as well. |
||
587 | */ |
||
588 | 1 | public function changeNamespace($node, $prefix, $url) { |
|
589 | 1 | $this->createContext($node, 'xpath'); |
|
590 | |||
591 | 1 | if (!$node) { |
|
592 | return FALSE; |
||
593 | } |
||
594 | |||
595 | 1 | $this->registerNamespace($prefix, $url); |
|
596 | |||
597 | 1 | if (get_class($node) == 'DOMElement') { |
|
598 | $xsl = ' |
||
599 | <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> |
||
600 | <xsl:template match="*"> |
||
601 | 1 | <xsl:element name="' . $prefix . ':{local-name()}" namespace="' . $url . '"> |
|
602 | <xsl:copy-of select="@*"/> |
||
603 | <xsl:apply-templates/> |
||
604 | </xsl:element> |
||
605 | </xsl:template> |
||
606 | 1 | </xsl:stylesheet>'; |
|
607 | |||
608 | 1 | $transformed = $this->tranform($xsl, $node); |
|
609 | 1 | return $this->replace($node, $transformed->documentElement); |
|
610 | } |
||
611 | else { |
||
612 | // @@TODO: Report the correct calling file and number |
||
613 | throw new Exception("Changing the namespace of a " . get_class($node) . " is not supported"); |
||
614 | } |
||
615 | } |
||
616 | |||
617 | /** |
||
618 | * Get a lossless HTML representation of the XML |
||
619 | * |
||
620 | * Transforms the document (or passed context) into a set of HTML spans. |
||
621 | * The element name becomes the class, all other attributes become HTML5 |
||
622 | * "data-" attributes. |
||
623 | * |
||
624 | * @param mixed $context |
||
625 | * $context can either be an xpath string, or a DOMElement. Ommiting it |
||
626 | * results in transforming the entire document |
||
627 | * |
||
628 | * @param array $options |
||
629 | * Options for transforming the HTML into XML. The following options are supported: |
||
630 | * 'xlink' => {TRUE or xpath} |
||
631 | * Transform xlink links into <a href> elements. If you specify 'xlink' => TRUE then |
||
632 | * it will transform all elements with xlink:type = simple into a <a href> element. |
||
633 | * Alternatively you may specify your own xpath for selecting which elements get transformed |
||
634 | * into <a href> tags. |
||
635 | * @return HTML string |
||
636 | */ |
||
637 | 3 | public function asHTML($context = NULL, $options = array()) { |
|
709 | |||
710 | /** |
||
711 | * Output the DOMDoc as an XML string |
||
712 | * |
||
713 | * @param mixed $context |
||
714 | * $context can either be an xpath string, or a DOMElement. Ommiting it |
||
715 | * results in outputting the entire document |
||
716 | * |
||
717 | * @return XML string |
||
718 | */ |
||
719 | 12 | public function out($context = NULL) { |
|
744 | |||
745 | /** |
||
746 | * Magic method for casting a DOMDoc as a string |
||
747 | */ |
||
748 | 1 | public function __toString() { |
|
751 | |||
752 | 24 | public function setErrorChecking($error_checking) { |
|
764 | |||
765 | 14 | public static function loadFile($file_or_url, $auto_register_namespaces = TRUE) { |
|
773 | |||
774 | 1 | public function loadHTML($source, $options = NULL) { |
|
775 | 1 | $success = parent::loadHTML($source, $options); |
|
776 | 1 | $this->AutoRegisterNamespace(TRUE); |
|
777 | |||
778 | 1 | return boolval($success); |
|
779 | } |
||
780 | |||
781 | 14 | public function loadXML($source, $options = NULL) { |
|
787 | |||
788 | 24 | private function AutoRegisterNamespace($auto_register_namespaces) { |
|
838 | |||
839 | 22 | private function createContext(&$context, $type = 'xpath', $createDocument = TRUE) { |
|
869 | } |
||
870 | |||
871 | |||
873 |