| Total Complexity | 77 |
| Total Lines | 505 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like DOM often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DOM, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 17 | abstract class DOM implements Query, \IteratorAggregate, \Countable |
||
| 18 | { |
||
| 19 | |||
| 20 | /** |
||
| 21 | * The array of matches. |
||
| 22 | */ |
||
| 23 | protected $matches = []; |
||
| 24 | |||
| 25 | /** |
||
| 26 | * Default parser flags. |
||
| 27 | * |
||
| 28 | * These are flags that will be used if no global or local flags override them. |
||
| 29 | * |
||
| 30 | * @since 2.0 |
||
| 31 | */ |
||
| 32 | public const DEFAULT_PARSER_FLAGS = NULL; |
||
| 33 | |||
| 34 | public const JS_CSS_ESCAPE_CDATA = '\\1'; |
||
| 35 | public const JS_CSS_ESCAPE_CDATA_CCOMMENT = '/* \\1 */'; |
||
| 36 | public const JS_CSS_ESCAPE_CDATA_DOUBLESLASH = '// \\1'; |
||
| 37 | public const JS_CSS_ESCAPE_NONE = ''; |
||
| 38 | |||
| 39 | protected $errTypes = 771; //E_ERROR; | E_USER_ERROR; |
||
| 40 | |||
| 41 | protected $document; |
||
| 42 | /** |
||
| 43 | * The base DOMDocument. |
||
| 44 | */ |
||
| 45 | protected $options = [ |
||
| 46 | 'parser_flags' => NULL, |
||
| 47 | 'omit_xml_declaration' => false, |
||
| 48 | 'replace_entities' => false, |
||
| 49 | 'exception_level' => 771, // E_ERROR | E_USER_ERROR | E_USER_WARNING | E_WARNING |
||
| 50 | 'ignore_parser_warnings' => false, |
||
| 51 | 'escape_xhtml_js_css_sections' => self::JS_CSS_ESCAPE_CDATA_CCOMMENT, |
||
| 52 | ]; |
||
| 53 | |||
| 54 | /** |
||
| 55 | * Constructor. |
||
| 56 | * |
||
| 57 | * Typically, a new DOMQuery is created by QueryPath::with(), QueryPath::withHTML(), |
||
| 58 | * qp(), or htmlqp(). |
||
| 59 | * |
||
| 60 | * @param mixed $document |
||
| 61 | * A document-like object. |
||
| 62 | * @param string $string |
||
| 63 | * A CSS 3 Selector |
||
| 64 | * @param array $options |
||
| 65 | * An associative array of options. |
||
| 66 | * @see qp() |
||
| 67 | * @throws Exception |
||
| 68 | */ |
||
| 69 | public function __construct($document = NULL, $string = NULL, $options = []) |
||
| 70 | { |
||
| 71 | $string = trim($string); |
||
| 72 | $this->options = $options + Options::get() + $this->options; |
||
| 73 | |||
| 74 | $parser_flags = $options['parser_flags'] ?? self::DEFAULT_PARSER_FLAGS; |
||
| 75 | if (!empty($this->options['ignore_parser_warnings'])) { |
||
| 76 | // Don't convert parser warnings into exceptions. |
||
| 77 | $this->errTypes = 257; //E_ERROR | E_USER_ERROR; |
||
| 78 | } elseif (isset($this->options['exception_level'])) { |
||
| 79 | // Set the error level at which exceptions will be thrown. By default, |
||
| 80 | // QueryPath will throw exceptions for |
||
| 81 | // E_ERROR | E_USER_ERROR | E_WARNING | E_USER_WARNING. |
||
| 82 | $this->errTypes = $this->options['exception_level']; |
||
| 83 | } |
||
| 84 | |||
| 85 | // Empty: Just create an empty QP. |
||
| 86 | if (empty($document)) { |
||
| 87 | $this->document = isset($this->options['encoding']) ? new \DOMDocument('1.0', |
||
| 88 | $this->options['encoding']) : new \DOMDocument(); |
||
| 89 | $this->setMatches(new \SplObjectStorage()); |
||
| 90 | } // Figure out if document is DOM, HTML/XML, or a filename |
||
| 91 | elseif (is_object($document)) { |
||
| 92 | |||
| 93 | // This is the most frequent object type. |
||
| 94 | if ($document instanceof \SplObjectStorage) { |
||
| 95 | $this->matches = $document; |
||
| 96 | if ($document->count() !== 0) { |
||
| 97 | $first = $this->getFirstMatch(); |
||
| 98 | if (!empty($first->ownerDocument)) { |
||
| 99 | $this->document = $first->ownerDocument; |
||
| 100 | } |
||
| 101 | } |
||
| 102 | } elseif ($document instanceof self) { |
||
| 103 | //$this->matches = $document->get(NULL, TRUE); |
||
| 104 | $this->setMatches($document->get(NULL, true)); |
||
|
|
|||
| 105 | if ($this->matches->count() > 0) { |
||
| 106 | $this->document = $this->getFirstMatch()->ownerDocument; |
||
| 107 | } |
||
| 108 | } elseif ($document instanceof \DOMDocument) { |
||
| 109 | $this->document = $document; |
||
| 110 | //$this->matches = $this->matches($document->documentElement); |
||
| 111 | $this->setMatches($document->documentElement); |
||
| 112 | } elseif ($document instanceof \DOMNode) { |
||
| 113 | $this->document = $document->ownerDocument; |
||
| 114 | //$this->matches = array($document); |
||
| 115 | $this->setMatches($document); |
||
| 116 | } elseif ($document instanceof \Masterminds\HTML5) { |
||
| 117 | $this->document = $document; |
||
| 118 | $this->setMatches($document->documentElement); |
||
| 119 | } elseif ($document instanceof \SimpleXMLElement) { |
||
| 120 | $import = dom_import_simplexml($document); |
||
| 121 | $this->document = $import->ownerDocument; |
||
| 122 | //$this->matches = array($import); |
||
| 123 | $this->setMatches($import); |
||
| 124 | } else { |
||
| 125 | throw new \QueryPath\Exception('Unsupported class type: ' . get_class($document)); |
||
| 126 | } |
||
| 127 | } elseif (is_array($document)) { |
||
| 128 | //trigger_error('Detected deprecated array support', E_USER_NOTICE); |
||
| 129 | if (!empty($document) && $document[0] instanceof \DOMNode) { |
||
| 130 | $found = new \SplObjectStorage(); |
||
| 131 | foreach ($document as $item) { |
||
| 132 | $found->attach($item); |
||
| 133 | } |
||
| 134 | //$this->matches = $found; |
||
| 135 | $this->setMatches($found); |
||
| 136 | $this->document = $this->getFirstMatch()->ownerDocument; |
||
| 137 | } |
||
| 138 | } elseif ($this->isXMLish($document)) { |
||
| 139 | // $document is a string with XML |
||
| 140 | $this->document = $this->parseXMLString($document); |
||
| 141 | $this->setMatches($this->document->documentElement); |
||
| 142 | } else { |
||
| 143 | |||
| 144 | // $document is a filename |
||
| 145 | $context = empty($options['context']) ? NULL : $options['context']; |
||
| 146 | $this->document = $this->parseXMLFile($document, $parser_flags, $context); |
||
| 147 | $this->setMatches($this->document->documentElement); |
||
| 148 | } |
||
| 149 | |||
| 150 | // Globally set the output option. |
||
| 151 | $this->document->formatOutput = true; |
||
| 152 | if (isset($this->options['format_output']) && $this->options['format_output'] === false) { |
||
| 153 | $this->document->formatOutput = false; |
||
| 154 | } |
||
| 155 | |||
| 156 | // Do a find if the second param was set. |
||
| 157 | if (strlen($string) > 0) { |
||
| 158 | // We don't issue a find because that creates a new DOMQuery. |
||
| 159 | //$this->find($string); |
||
| 160 | |||
| 161 | $query = new DOMTraverser($this->matches); |
||
| 162 | $query->find($string); |
||
| 163 | $this->setMatches($query->matches()); |
||
| 164 | } |
||
| 165 | } |
||
| 166 | |||
| 167 | private function parseXMLString($string, $flags = NULL) |
||
| 168 | { |
||
| 169 | $document = new \DOMDocument('1.0'); |
||
| 170 | $lead = strtolower(substr($string, 0, 5)); // <?xml |
||
| 171 | try { |
||
| 172 | set_error_handler([ParseException::class, 'initializeFromError'], $this->errTypes); |
||
| 173 | |||
| 174 | if (isset($this->options['convert_to_encoding'])) { |
||
| 175 | // Is there another way to do this? |
||
| 176 | |||
| 177 | $from_enc = $this->options['convert_from_encoding'] ?? 'auto'; |
||
| 178 | $to_enc = $this->options['convert_to_encoding']; |
||
| 179 | |||
| 180 | if (function_exists('mb_convert_encoding')) { |
||
| 181 | $string = mb_convert_encoding($string, $to_enc, $from_enc); |
||
| 182 | } |
||
| 183 | |||
| 184 | } |
||
| 185 | |||
| 186 | // This is to avoid cases where low ascii digits have slipped into HTML. |
||
| 187 | // AFAIK, it should not adversly effect UTF-8 documents. |
||
| 188 | if (!empty($this->options['strip_low_ascii'])) { |
||
| 189 | $string = filter_var($string, FILTER_UNSAFE_RAW, FILTER_FLAG_ENCODE_LOW); |
||
| 190 | } |
||
| 191 | |||
| 192 | // Allow users to override parser settings. |
||
| 193 | $useParser = ''; |
||
| 194 | if (!empty($this->options['use_parser'])) { |
||
| 195 | $useParser = strtolower($this->options['use_parser']); |
||
| 196 | } |
||
| 197 | |||
| 198 | // If HTML parser is requested, we use it. |
||
| 199 | if ($useParser === 'html') { |
||
| 200 | $document->loadHTML($string); |
||
| 201 | } // Parse as XML if it looks like XML, or if XML parser is requested. |
||
| 202 | elseif ($lead === '<?xml' || $useParser === 'xml') { |
||
| 203 | if ($this->options['replace_entities']) { |
||
| 204 | $string = Entities::replaceAllEntities($string); |
||
| 205 | } |
||
| 206 | $document->loadXML($string, $flags); |
||
| 207 | } // In all other cases, we try the HTML parser. |
||
| 208 | else { |
||
| 209 | $document->loadHTML($string); |
||
| 210 | } |
||
| 211 | } // Emulate 'finally' behavior. |
||
| 212 | catch (Exception $e) { |
||
| 213 | restore_error_handler(); |
||
| 214 | throw $e; |
||
| 215 | } |
||
| 216 | restore_error_handler(); |
||
| 217 | |||
| 218 | if (empty($document)) { |
||
| 219 | throw new \QueryPath\ParseException('Unknown parser exception.'); |
||
| 220 | } |
||
| 221 | |||
| 222 | return $document; |
||
| 223 | } |
||
| 224 | |||
| 225 | /** |
||
| 226 | * EXPERT: Be very, very careful using this. |
||
| 227 | * A utility function for setting the current set of matches. |
||
| 228 | * It makes sure the last matches buffer is set (for end() and andSelf()). |
||
| 229 | * |
||
| 230 | * @since 2.0 |
||
| 231 | * @param $matches |
||
| 232 | */ |
||
| 233 | public function setMatches($matches) |
||
| 234 | { |
||
| 235 | // This causes a lot of overhead.... |
||
| 236 | //if ($unique) $matches = self::unique($matches); |
||
| 237 | $this->last = $this->matches; |
||
| 238 | |||
| 239 | // Just set current matches. |
||
| 240 | if ($matches instanceof \SplObjectStorage) { |
||
| 241 | $this->matches = $matches; |
||
| 242 | } // This is likely legacy code that needs conversion. |
||
| 243 | elseif (is_array($matches)) { |
||
| 244 | trigger_error('Legacy array detected.'); |
||
| 245 | $tmp = new \SplObjectStorage(); |
||
| 246 | foreach ($matches as $m) { |
||
| 247 | $tmp->attach($m); |
||
| 248 | } |
||
| 249 | $this->matches = $tmp; |
||
| 250 | } |
||
| 251 | // For non-arrays, try to create a new match set and |
||
| 252 | // add this object. |
||
| 253 | else { |
||
| 254 | $found = new \SplObjectStorage(); |
||
| 255 | if (isset($matches)) { |
||
| 256 | $found->attach($matches); |
||
| 257 | } |
||
| 258 | $this->matches = $found; |
||
| 259 | } |
||
| 260 | |||
| 261 | // EXPERIMENTAL: Support for qp()->length. |
||
| 262 | $this->length = $this->matches->count(); |
||
| 263 | } |
||
| 264 | |||
| 265 | /** |
||
| 266 | * A depth-checking function. Typically, it only needs to be |
||
| 267 | * invoked with the first parameter. The rest are used for recursion. |
||
| 268 | * |
||
| 269 | * @see deepest(); |
||
| 270 | * @param DOMNode $ele |
||
| 271 | * The element. |
||
| 272 | * @param int $depth |
||
| 273 | * The depth guage |
||
| 274 | * @param mixed $current |
||
| 275 | * The current set. |
||
| 276 | * @param DOMNode $deepest |
||
| 277 | * A reference to the current deepest node. |
||
| 278 | * @return array |
||
| 279 | * Returns an array of DOM nodes. |
||
| 280 | */ |
||
| 281 | protected function deepestNode(\DOMNode $ele, $depth = 0, $current = NULL, &$deepest = NULL) |
||
| 282 | { |
||
| 283 | // FIXME: Should this use SplObjectStorage? |
||
| 284 | if (!isset($current)) { |
||
| 285 | $current = [$ele]; |
||
| 286 | } |
||
| 287 | if (!isset($deepest)) { |
||
| 288 | $deepest = $depth; |
||
| 289 | } |
||
| 290 | if ($ele->hasChildNodes()) { |
||
| 291 | foreach ($ele->childNodes as $child) { |
||
| 292 | if ($child->nodeType === XML_ELEMENT_NODE) { |
||
| 293 | $current = $this->deepestNode($child, $depth + 1, $current, $deepest); |
||
| 294 | } |
||
| 295 | } |
||
| 296 | } elseif ($depth > $deepest) { |
||
| 297 | $current = [$ele]; |
||
| 298 | $deepest = $depth; |
||
| 299 | } elseif ($depth === $deepest) { |
||
| 300 | $current[] = $ele; |
||
| 301 | } |
||
| 302 | |||
| 303 | return $current; |
||
| 304 | } |
||
| 305 | |||
| 306 | /** |
||
| 307 | * Prepare an item for insertion into a DOM. |
||
| 308 | * |
||
| 309 | * This handles a variety of boilerplate tasks that need doing before an |
||
| 310 | * indeterminate object can be inserted into a DOM tree. |
||
| 311 | * - If item is a string, this is converted into a document fragment and returned. |
||
| 312 | * - If item is a DOMQuery, then all items are retrieved and converted into |
||
| 313 | * a document fragment and returned. |
||
| 314 | * - If the item is a DOMNode, it is imported into the current DOM if necessary. |
||
| 315 | * - If the item is a SimpleXMLElement, it is converted into a DOM node and then |
||
| 316 | * imported. |
||
| 317 | * |
||
| 318 | * @param mixed $item |
||
| 319 | * Item to prepare for insert. |
||
| 320 | * @return mixed |
||
| 321 | * Returns the prepared item. |
||
| 322 | * @throws QueryPath::Exception |
||
| 323 | * Thrown if the object passed in is not of a supprted object type. |
||
| 324 | * @throws Exception |
||
| 325 | */ |
||
| 326 | protected function prepareInsert($item) |
||
| 382 | } |
||
| 383 | |||
| 384 | /** |
||
| 385 | * Convenience function for getNthMatch(0). |
||
| 386 | */ |
||
| 387 | protected function getFirstMatch() |
||
| 392 | } |
||
| 393 | |||
| 394 | /** |
||
| 395 | * Parse an XML or HTML file. |
||
| 396 | * |
||
| 397 | * This attempts to autodetect the type of file, and then parse it. |
||
| 398 | * |
||
| 399 | * @param string $filename |
||
| 400 | * The file name to parse. |
||
| 401 | * @param int $flags |
||
| 402 | * The OR-combined flags accepted by the DOM parser. See the PHP documentation |
||
| 403 | * for DOM or for libxml. |
||
| 404 | * @param resource $context |
||
| 405 | * The stream context for the file IO. If this is set, then an alternate |
||
| 406 | * parsing path is followed: The file is loaded by PHP's stream-aware IO |
||
| 407 | * facilities, read entirely into memory, and then handed off to |
||
| 408 | * {@link parseXMLString()}. On large files, this can have a performance impact. |
||
| 409 | * @throws \QueryPath\ParseException |
||
| 410 | * Thrown when a file cannot be loaded or parsed. |
||
| 411 | */ |
||
| 412 | private function parseXMLFile($filename, $flags = NULL, $context = NULL) |
||
| 478 | } |
||
| 479 | |||
| 480 | /** |
||
| 481 | * Determine whether a given string looks like XML or not. |
||
| 482 | * |
||
| 483 | * Basically, this scans a portion of the supplied string, checking to see |
||
| 484 | * if it has a tag-like structure. It is possible to "confuse" this, which |
||
| 485 | * may subsequently result in parse errors, but in the vast majority of |
||
| 486 | * cases, this method serves as a valid inicator of whether or not the |
||
| 487 | * content looks like XML. |
||
| 488 | * |
||
| 489 | * Things that are intentional excluded: |
||
| 490 | * - plain text with no markup. |
||
| 491 | * - strings that look like filesystem paths. |
||
| 492 | * |
||
| 493 | * Subclasses SHOULD NOT OVERRIDE THIS. Altering it may be altering |
||
| 494 | * core assumptions about how things work. Instead, classes should |
||
| 495 | * override the constructor and pass in only one of the parsed types |
||
| 496 | * that this class expects. |
||
| 497 | */ |
||
| 498 | protected function isXMLish($string) |
||
| 499 | { |
||
| 500 | return (strpos($string, '<') !== false && strpos($string, '>') !== false); |
||
| 501 | } |
||
| 502 | |||
| 503 | /** |
||
| 504 | * A utility function for retriving a match by index. |
||
| 505 | * |
||
| 506 | * The internal data structure used in DOMQuery does not have |
||
| 507 | * strong random access support, so we suppliment it with this method. |
||
| 508 | * |
||
| 509 | * @param $index |
||
| 510 | * @return object|void |
||
| 511 | */ |
||
| 512 | protected function getNthMatch(int $index) |
||
| 522 | } |
||
| 523 | } |
||
| 524 | } |
||
| 525 | } |