Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like DOMDoc often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DOMDoc, and based on these observations, apply Extract Interface, too.
1 | <?php namespace BetterDOMDocument; |
||
11 | class DOMDoc extends \DOMDocument { |
||
12 | |||
13 | private $auto_ns = FALSE; |
||
14 | public $ns = array(); |
||
15 | public $default_ns = FALSE; |
||
16 | public $error_checking = 'strict'; // Can be 'strict', 'warning', 'none' / FALSE |
||
17 | |||
18 | /** |
||
19 | * Create a new DOMDoc |
||
20 | * |
||
21 | * @param mixed $xml |
||
22 | * $xml can either be an XML string, a DOMDocument, or a DOMElement. |
||
23 | * You can also pass FALSE or NULL (or omit it) and load XML later using loadXML or loadHTML |
||
24 | * |
||
25 | * @param mixed $auto_register_namespaces |
||
26 | * Auto-register namespaces. All namespaces in the root element will be registered for use in xpath queries. |
||
27 | * Namespaces that are not declared in the root element will not be auto-registered |
||
28 | * Defaults to TRUE (Meaning it will auto register all auxiliary namespaces but not the default namespace). |
||
29 | * Pass a prefix string to automatically register the default namespace. |
||
30 | * Pass FALSE to disable auto-namespace registeration |
||
31 | * |
||
32 | * @param bool $error_checking |
||
33 | * Can be 'strict', 'warning', or 'none. Defaults to 'strict'. |
||
34 | * 'none' supresses all errors |
||
35 | * 'warning' is the default behavior in DOMDocument |
||
36 | * 'strict' corresponds to DOMDocument strictErrorChecking TRUE |
||
37 | */ |
||
38 | 19 | public function __construct($xml = FALSE, $auto_register_namespaces = TRUE, $error_checking = 'strict') { |
|
75 | |||
76 | /** |
||
77 | * Register a namespace to be used in xpath queries |
||
78 | * |
||
79 | * @param string $prefix |
||
80 | * Namespace prefix to register |
||
81 | * |
||
82 | * @param string $url |
||
83 | * Connonical URL for this namespace prefix |
||
84 | */ |
||
85 | 10 | public function registerNamespace($prefix, $url) { |
|
88 | |||
89 | /** |
||
90 | * Get the list of registered namespaces as an array |
||
91 | */ |
||
92 | 6 | public function getNamespaces() { |
|
95 | |||
96 | /** |
||
97 | * Given a namespace URL, get the prefix |
||
98 | * |
||
99 | * @param string $url |
||
100 | * Connonical URL for this namespace prefix |
||
101 | * |
||
102 | * @return string|false |
||
103 | * The namespace prefix or FALSE if there is no namespace with that URL |
||
104 | */ |
||
105 | public function lookupPrefix($url) { |
||
108 | |||
109 | /** |
||
110 | * Given a namespace prefix, get the URL |
||
111 | * |
||
112 | * @param string $prefix |
||
113 | * namespace prefix |
||
114 | * |
||
115 | * return string|false |
||
116 | * The namespace URL or FALSE if there is no namespace with that prefix |
||
117 | */ |
||
118 | public function lookupURL($prefix) { |
||
126 | |||
127 | /** |
||
128 | * Given an xpath, get a list of nodes. |
||
129 | * |
||
130 | * @param string $xpath |
||
131 | * xpath to be used for query |
||
132 | * |
||
133 | * @param mixed $context |
||
134 | * $context can either be an xpath string, or a DOMElement |
||
135 | * Provides context for the xpath query |
||
136 | * |
||
137 | * @return DOMList|false |
||
138 | * A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
||
139 | */ |
||
140 | 16 | public function xpath($xpath, $context = NULL) { |
|
168 | |||
169 | |||
170 | /** |
||
171 | * Given an xpath, get a single node (first one found) |
||
172 | * |
||
173 | * @param string $xpath |
||
174 | * xpath to be used for query |
||
175 | * |
||
176 | * @param mixed $context |
||
177 | * $context can either be an xpath string, or a DOMElement |
||
178 | * Provides context for the xpath query |
||
179 | * |
||
180 | * @return mixed |
||
181 | * The first node found by the xpath query |
||
182 | */ |
||
183 | 16 | public function xpathSingle($xpath, $context = NULL) { |
|
184 | 16 | $result = $this->xpath($xpath, $context); |
|
185 | |||
186 | 16 | if (empty($result) || !count($result)) { |
|
187 | 1 | return FALSE; |
|
188 | } |
||
189 | else { |
||
190 | 15 | return $result->item(0); |
|
191 | } |
||
192 | } |
||
193 | |||
194 | |||
195 | /** |
||
196 | * Given an CSS selector, get a list of nodes. |
||
197 | * |
||
198 | * @param string $css_selector |
||
199 | * CSS Selector to be used for query |
||
200 | * |
||
201 | * @param mixed $context |
||
202 | * $context can either be an xpath string, or a DOMElement |
||
203 | * Provides context for the CSS selector |
||
204 | * |
||
205 | * @return DOMList|false |
||
206 | * A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
||
207 | */ |
||
208 | public function select($css_selector, $context = NULL) { |
||
214 | |||
215 | /** |
||
216 | * Given an CSS selector, get a single node. |
||
217 | * |
||
218 | * @param string $css_selector |
||
219 | * CSS Selector to be used for query |
||
220 | * |
||
221 | * @param mixed $context |
||
222 | * $context can either be an xpath string, or a DOMElement |
||
223 | * Provides context for the CSS selector |
||
224 | * |
||
225 | * @return DOMList |
||
226 | * A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
||
227 | */ |
||
228 | public function selectSingle($css_selector, $context = NULL) { |
||
234 | |||
235 | /** |
||
236 | * Get the document (or an element) as an array |
||
237 | * |
||
238 | * @param string $raw |
||
239 | * Can be either FALSE, 'full', or 'inner'. Defaults to FALSE. |
||
240 | * When set to 'full' every node's full XML is also attached to the array |
||
241 | * When set to 'inner' every node's inner XML is attached to the array. |
||
242 | * |
||
243 | * @param mixed $context |
||
244 | * Optional context node. Can pass an DOMElement object or an xpath string. |
||
245 | * If passed, only the given node will be used when generating the array |
||
246 | */ |
||
247 | public function getArray($raw = FALSE, $context = NULL) { |
||
292 | |||
293 | /** |
||
294 | * Get the inner text of an element |
||
295 | * |
||
296 | * @param mixed $context |
||
297 | * Optional context node. Can pass an DOMElement object or an xpath string. |
||
298 | */ |
||
299 | public function innerText($context = NULL) { |
||
311 | |||
312 | /** |
||
313 | * Create an DOMElement from XML and attach it to the DOMDocument |
||
314 | * |
||
315 | * Note that this does not place it anywhere in the dom tree, it merely imports it. |
||
316 | * |
||
317 | * @param string $xml |
||
318 | * XML string to import |
||
319 | */ |
||
320 | 5 | public function createElementFromXML($xml) { |
|
321 | |||
322 | // To make thing easy and make sure namespaces work properly, we add the root namespace delcarations if it is not declared |
||
323 | 5 | $namespaces = $this->ns; |
|
324 | 5 | $xml = preg_replace_callback('/<[^\?^!].+?>/s', function($root_match) use ($namespaces) { |
|
325 | 5 | preg_match('/<([^ <>]+)[\d\s]?.*?>/s', $root_match[0], $root_tag); |
|
326 | 5 | $new_root = $root_tag[1]; |
|
327 | 5 | if (strpos($new_root, ':')) { |
|
328 | $parts = explode(':', $new_root); |
||
329 | $prefix = $parts[0]; |
||
330 | if (isset($namespaces[$prefix])) { |
||
331 | if (!strpos($root_match[0], "xmlns:$prefix")) { |
||
332 | $new_root .= " xmlns:$prefix='" . $namespaces[$prefix] . "'"; |
||
333 | } |
||
334 | } |
||
335 | } |
||
336 | 5 | return str_replace($root_tag[1], $new_root, $root_match[0]); |
|
337 | 5 | }, $xml, 1); |
|
338 | |||
339 | 5 | $dom = new DOMDoc($xml, $this->auto_ns); |
|
340 | 5 | if (!$dom->documentElement) { |
|
341 | trigger_error('BetterDomDocument\DOMDoc Error: Invalid XML: ' . $xml); |
||
342 | } |
||
343 | 5 | $element = $dom->documentElement; |
|
344 | |||
345 | // Merge the namespaces |
||
346 | 5 | foreach ($dom->getNamespaces() as $prefix => $url) { |
|
347 | $this->registerNamespace($prefix, $url); |
||
348 | } |
||
349 | |||
350 | 5 | return $this->importNode($element, true); |
|
351 | } |
||
352 | |||
353 | /** |
||
354 | * Append a child to the context node, make it the last child |
||
355 | * |
||
356 | * @param mixed $newnode |
||
357 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
358 | * |
||
359 | * @param mixed $context |
||
360 | * $context can either be an xpath string, or a DOMElement |
||
361 | * Omiting $context results in using the root document element as the context |
||
362 | * |
||
363 | * @return DOMElement|false |
||
364 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
365 | * then you should replace your DOMElement with the returned one. |
||
366 | */ |
||
367 | 1 | public function append($newnode, $context = NULL) { |
|
368 | 1 | $this->createContext($newnode, 'xml'); |
|
369 | 1 | $this->createContext($context, 'xpath'); |
|
370 | |||
371 | 1 | if (!$context || !$newnode) { |
|
372 | return FALSE; |
||
373 | } |
||
374 | |||
375 | 1 | View Code Duplication | if ($newnode->ownerDocument === $this) { |
376 | 1 | $appendnode = $newnode; |
|
377 | } |
||
378 | else { |
||
379 | $appendnode = $this->importNode($newnode, true); |
||
380 | } |
||
381 | |||
382 | 1 | return $context->appendChild($appendnode); |
|
383 | } |
||
384 | |||
385 | /** |
||
386 | * Append a child to the context node, make it the first child |
||
387 | * |
||
388 | * @param mixed $newnode |
||
389 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
390 | * |
||
391 | * @param mixed $context |
||
392 | * $context can either be an xpath string, or a DOMElement |
||
393 | * Omiting $context results in using the root document element as the context |
||
394 | * |
||
395 | * @return DOMElement|false |
||
396 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
397 | * then you should replace your DOMElement with the returned one. |
||
398 | */ |
||
399 | 1 | View Code Duplication | public function prepend($newnode, $context = NULL) { |
400 | 1 | $this->createContext($newnode, 'xml'); |
|
401 | 1 | $this->createContext($context, 'xpath'); |
|
402 | |||
403 | 1 | if (!$context || !$newnode) { |
|
404 | return FALSE; |
||
405 | } |
||
406 | |||
407 | 1 | return $context->insertBefore($newnode, $context->firstChild); |
|
408 | } |
||
409 | |||
410 | /** |
||
411 | * Prepend a sibling to the context node, put it just before the context node |
||
412 | * |
||
413 | * @param mixed $newnode |
||
414 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
415 | * |
||
416 | * @param mixed $context |
||
417 | * $context can either be an xpath string, or a DOMElement |
||
418 | * Omiting $context results in using the root document element as the context |
||
419 | * |
||
420 | * @return DOMElement|false |
||
421 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
422 | * then you should replace your DOMElement with the returned one. |
||
423 | */ |
||
424 | 1 | View Code Duplication | public function prependSibling($newnode, $context = NULL) { |
425 | 1 | $this->createContext($newnode, 'xml'); |
|
426 | 1 | $this->createContext($context, 'xpath'); |
|
427 | |||
428 | 1 | if (!$context || !$newnode) { |
|
429 | return FALSE; |
||
430 | } |
||
431 | |||
432 | 1 | return $context->parentNode->insertBefore($newnode, $context); |
|
433 | } |
||
434 | |||
435 | /** |
||
436 | * Append a sibling to the context node, put it just after the context node |
||
437 | * |
||
438 | * @param mixed $newnode |
||
439 | * $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
||
440 | * |
||
441 | * @param mixed $context |
||
442 | * $context can either be an xpath string, or a DOMElement |
||
443 | * Omiting $context results in using the root document element as the context |
||
444 | * |
||
445 | * @return DOMElement|false |
||
446 | * The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
||
447 | * then you should replace your DOMElement with the returned one. |
||
448 | */ |
||
449 | 1 | public function appendSibling($newnode, $context) { |
|
450 | 1 | $this->createContext($newnode, 'xml'); |
|
451 | 1 | $this->createContext($context, 'xpath'); |
|
452 | |||
453 | 1 | if (!$context){ |
|
454 | return FALSE; |
||
455 | } |
||
456 | |||
457 | 1 | if ($context->nextSibling) { |
|
458 | // $context has an immediate sibling : insert newnode before this one |
||
459 | 1 | return $context->parentNode->insertBefore($newnode, $context->nextSibling); |
|
460 | } |
||
461 | else { |
||
462 | // $context has no sibling next to it : insert newnode as last child of it's parent |
||
463 | View Code Duplication | if ($newnode->ownerDocument === $this) { |
|
464 | $appendnode = $newnode; |
||
465 | } |
||
466 | else { |
||
467 | $appendnode = $this->importNode($newnode, true); |
||
468 | } |
||
469 | return $context->parentNode->appendChild($appendnode); |
||
470 | } |
||
471 | } |
||
472 | |||
473 | /** |
||
474 | * Given an xpath or DOMElement, return a new DOMDoc. |
||
475 | * |
||
476 | * @param mixed $node |
||
477 | * $node can either be an xpath string or a DOMElement. |
||
478 | * |
||
479 | * @return DOMDoc |
||
480 | * A new DOMDoc created from the xpath or DOMElement |
||
481 | */ |
||
482 | 7 | public function extract($node, $auto_register_namespaces = TRUE, $error_checking = 'none') { |
|
483 | 7 | $this->createContext($node, 'xpath'); |
|
484 | 7 | $dom = new DOMDoc($node, $auto_register_namespaces, $error_checking); |
|
485 | 7 | $dom->ns = $this->ns; |
|
486 | 7 | return $dom; |
|
487 | } |
||
488 | |||
489 | /** |
||
490 | * Given a pair of nodes, replace the first with the second |
||
491 | * |
||
492 | * @param mixed $node |
||
493 | * Node to be replaced. Can either be an xpath string or a DOMDocument (or even a DOMNode). |
||
494 | * |
||
495 | * @param mixed $replace |
||
496 | * Replace $node with $replace. Replace can be an XML string, or a DOMNode |
||
497 | * |
||
498 | * @return mixed |
||
499 | * The overwritten / replaced node. |
||
500 | */ |
||
501 | 1 | public function replace($node, $replace) { |
|
502 | 1 | $this->createContext($node, 'xpath'); |
|
503 | 1 | $this->createContext($replace, 'xml'); |
|
504 | |||
505 | 1 | if (!$node || !$replace) { |
|
506 | return FALSE; |
||
507 | } |
||
508 | |||
509 | 1 | if (!$replace->ownerDocument->documentElement->isSameNode($this->documentElement)) { |
|
510 | $replace = $this->importNode($replace, true); |
||
511 | } |
||
512 | 1 | $node->parentNode->replaceChild($replace, $node); |
|
513 | 1 | $node = $replace; |
|
514 | 1 | return $node; |
|
515 | } |
||
516 | |||
517 | /** |
||
518 | * Given a node(s), remove / delete them |
||
519 | * |
||
520 | * @param mixed $node |
||
521 | * Can pass a DOMNode, a NodeList, DOMNodeList, an xpath string, or an array of any of these. |
||
522 | */ |
||
523 | 1 | public function remove($node) { |
|
524 | // We can't use createContext here because we want to use the entire nodeList (not just a single element) |
||
525 | 1 | if (is_string($node)) { |
|
526 | 1 | $node = $this->xpath($node); |
|
527 | } |
||
528 | |||
529 | 1 | if ($node) { |
|
530 | 1 | if (is_array($node) || get_class($node) == 'BetterDOMDocument\DOMList') { |
|
531 | 1 | foreach($node as $item) { |
|
532 | 1 | $this->remove($item); |
|
533 | } |
||
534 | } |
||
535 | 1 | else if (get_class($node) == 'DOMNodeList') { |
|
536 | $this->remove(new DOMList($node, $this)); |
||
537 | } |
||
538 | else { |
||
539 | 1 | $parent = $node->parentNode; |
|
540 | 1 | $parent->removeChild($node); |
|
541 | } |
||
542 | } |
||
543 | 1 | } |
|
544 | |||
545 | /** |
||
546 | * Given an XSL string, transform the DOMDoc (or a passed context node) |
||
547 | * |
||
548 | * @param string $xsl |
||
549 | * XSL Transormation |
||
550 | * |
||
551 | * @param mixed $context |
||
552 | * $context can either be an xpath string, or a DOMElement. Ommiting it |
||
553 | * results in transforming the entire document |
||
554 | * |
||
555 | * @return a new DOMDoc |
||
556 | */ |
||
557 | 3 | public function tranform($xsl, $context = NULL) { |
|
574 | |||
575 | /** |
||
576 | * Given a node, change it's namespace to the specified namespace in situ |
||
577 | * |
||
578 | * @param mixed $node |
||
579 | * Node to be changed. Can either be an xpath string or a DOMElement. |
||
580 | * |
||
581 | * @param mixed $prefix |
||
582 | * prefix for the new namespace |
||
583 | * |
||
584 | * @param mixed $url |
||
585 | * The URL for the new namespace |
||
586 | * |
||
587 | * @return mixed |
||
588 | * The node with the new namespace. The node will also be changed in-situ in the document as well. |
||
589 | */ |
||
590 | public function changeNamespace($node, $prefix, $url) { |
||
618 | |||
619 | /** |
||
620 | * Get a lossless HTML representation of the XML |
||
621 | * |
||
622 | * Transforms the document (or passed context) into a set of HTML spans. |
||
623 | * The element name becomes the class, all other attributes become HTML5 |
||
624 | * "data-" attributes. |
||
625 | * |
||
626 | * @param mixed $context |
||
627 | * $context can either be an xpath string, or a DOMElement. Ommiting it |
||
628 | * results in transforming the entire document |
||
629 | * |
||
630 | * @param array $options |
||
631 | * Options for transforming the HTML into XML. The following options are supported: |
||
632 | * 'xlink' => {TRUE or xpath} |
||
633 | * Transform xlink links into <a href> elements. If you specify 'xlink' => TRUE then |
||
634 | * it will transform all elements with xlink:type = simple into a <a href> element. |
||
635 | * Alternatively you may specify your own xpath for selecting which elements get transformed |
||
636 | * into <a href> tags. |
||
637 | * @return HTML string |
||
638 | */ |
||
639 | 3 | public function asHTML($context = NULL, $options = array()) { |
|
711 | |||
712 | /** |
||
713 | * Output the DOMDoc as an XML string |
||
714 | * |
||
715 | * @param mixed $context |
||
716 | * $context can either be an xpath string, or a DOMElement. Ommiting it |
||
717 | * results in outputting the entire document |
||
718 | * |
||
719 | * @return XML string |
||
720 | */ |
||
721 | 12 | public function out($context = NULL) { |
|
722 | 12 | $this->createContext($context, 'xpath'); |
|
723 | 12 | if (!$context) { |
|
724 | 2 | return ''; |
|
725 | } |
||
726 | |||
727 | // Copy namespace prefixes |
||
728 | 10 | foreach ($this->ns as $prefix => $namespace) { |
|
729 | if (!$context->hasAttribute('xmlns:' . $prefix)) { |
||
730 | $context->setAttribute('xmlns:' . $prefix, $namespace); |
||
731 | } |
||
732 | } |
||
733 | |||
734 | // Check to seee if it's HTML, if it is we need to fix broken html void elements. |
||
735 | 10 | if ($this->documentElement->lookupNamespaceURI(NULL) == 'http://www.w3.org/1999/xhtml' || $this->documentElement->tagName == 'html') { |
|
736 | 1 | $output = $this->saveXML($context, LIBXML_NOEMPTYTAG); |
|
737 | // The types listed are html "void" elements. |
||
738 | // Find any of these elements that have no child nodes and are therefore candidates for self-closing, replace them with a self-closed version. |
||
739 | 1 | $pattern = '<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)(\b[^<]*)><\/\1>'; |
|
740 | 1 | return preg_replace('/' . $pattern . '/', '<$1$2/>', $output); |
|
741 | } |
||
742 | else { |
||
743 | 9 | return $this->saveXML($context, LIBXML_NOEMPTYTAG); |
|
744 | } |
||
745 | } |
||
746 | |||
747 | /** |
||
748 | * Magic method for casting a DOMDoc as a string |
||
749 | */ |
||
750 | 1 | public function __toString() { |
|
751 | 1 | return $this->out(); |
|
752 | } |
||
753 | |||
754 | 19 | public function setErrorChecking($error_checking) { |
|
755 | // Check up error-checking |
||
756 | 19 | if ($error_checking == FALSE) { |
|
757 | $this->error_checking = 'none'; |
||
758 | } |
||
759 | else { |
||
760 | 19 | $this->error_checking = $error_checking; |
|
761 | } |
||
762 | 19 | if ($this->error_checking != 'strict') { |
|
763 | 7 | $this->strictErrorChecking = FALSE; |
|
764 | } |
||
765 | 19 | } |
|
766 | |||
767 | 10 | public static function loadFile($file_or_url, $auto_register_namespaces = TRUE) { |
|
775 | |||
776 | 19 | private function AutoRegisterNamespace($auto_register_namespaces) { |
|
777 | 19 | $this->auto_ns = TRUE; |
|
778 | |||
779 | // If it's an "XML" document, then get namespaces via xpath |
||
780 | 19 | $xpath = new \DOMXPath($this); |
|
781 | 19 | foreach($xpath->query('namespace::*') as $namespace) { |
|
782 | 18 | if (!empty($namespace->prefix)) { |
|
783 | 18 | if ($namespace->prefix != 'xml' && $namespace->nodeValue != 'http://www.w3.org/XML/1998/namespace') { |
|
784 | 18 | $this->registerNamespace($namespace->prefix, $namespace->nodeValue); |
|
785 | } |
||
786 | } |
||
787 | View Code Duplication | else { |
|
826 | |||
827 | 19 | private function createContext(&$context, $type = 'xpath', $createDocument = TRUE) { |
|
857 | } |
||
858 | |||
861 |