Total Complexity | 77 |
Total Lines | 505 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like DOM often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DOM, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
17 | abstract class DOM implements Query, \IteratorAggregate, \Countable |
||
18 | { |
||
19 | |||
20 | /** |
||
21 | * The array of matches. |
||
22 | */ |
||
23 | protected $matches = []; |
||
24 | |||
25 | /** |
||
26 | * Default parser flags. |
||
27 | * |
||
28 | * These are flags that will be used if no global or local flags override them. |
||
29 | * |
||
30 | * @since 2.0 |
||
31 | */ |
||
32 | public const DEFAULT_PARSER_FLAGS = NULL; |
||
33 | |||
34 | public const JS_CSS_ESCAPE_CDATA = '\\1'; |
||
35 | public const JS_CSS_ESCAPE_CDATA_CCOMMENT = '/* \\1 */'; |
||
36 | public const JS_CSS_ESCAPE_CDATA_DOUBLESLASH = '// \\1'; |
||
37 | public const JS_CSS_ESCAPE_NONE = ''; |
||
38 | |||
39 | protected $errTypes = 771; //E_ERROR; | E_USER_ERROR; |
||
40 | |||
41 | protected $document; |
||
42 | /** |
||
43 | * The base DOMDocument. |
||
44 | */ |
||
45 | protected $options = [ |
||
46 | 'parser_flags' => NULL, |
||
47 | 'omit_xml_declaration' => false, |
||
48 | 'replace_entities' => false, |
||
49 | 'exception_level' => 771, // E_ERROR | E_USER_ERROR | E_USER_WARNING | E_WARNING |
||
50 | 'ignore_parser_warnings' => false, |
||
51 | 'escape_xhtml_js_css_sections' => self::JS_CSS_ESCAPE_CDATA_CCOMMENT, |
||
52 | ]; |
||
53 | |||
54 | /** |
||
55 | * Constructor. |
||
56 | * |
||
57 | * Typically, a new DOMQuery is created by QueryPath::with(), QueryPath::withHTML(), |
||
58 | * qp(), or htmlqp(). |
||
59 | * |
||
60 | * @param mixed $document |
||
61 | * A document-like object. |
||
62 | * @param string $string |
||
63 | * A CSS 3 Selector |
||
64 | * @param array $options |
||
65 | * An associative array of options. |
||
66 | * @see qp() |
||
67 | * @throws Exception |
||
68 | */ |
||
69 | public function __construct($document = NULL, $string = NULL, $options = []) |
||
70 | { |
||
71 | $string = trim($string); |
||
72 | $this->options = $options + Options::get() + $this->options; |
||
73 | |||
74 | $parser_flags = $options['parser_flags'] ?? self::DEFAULT_PARSER_FLAGS; |
||
75 | if (!empty($this->options['ignore_parser_warnings'])) { |
||
76 | // Don't convert parser warnings into exceptions. |
||
77 | $this->errTypes = 257; //E_ERROR | E_USER_ERROR; |
||
78 | } elseif (isset($this->options['exception_level'])) { |
||
79 | // Set the error level at which exceptions will be thrown. By default, |
||
80 | // QueryPath will throw exceptions for |
||
81 | // E_ERROR | E_USER_ERROR | E_WARNING | E_USER_WARNING. |
||
82 | $this->errTypes = $this->options['exception_level']; |
||
83 | } |
||
84 | |||
85 | // Empty: Just create an empty QP. |
||
86 | if (empty($document)) { |
||
87 | $this->document = isset($this->options['encoding']) ? new \DOMDocument('1.0', |
||
88 | $this->options['encoding']) : new \DOMDocument(); |
||
89 | $this->setMatches(new \SplObjectStorage()); |
||
90 | } // Figure out if document is DOM, HTML/XML, or a filename |
||
91 | elseif (is_object($document)) { |
||
92 | |||
93 | // This is the most frequent object type. |
||
94 | if ($document instanceof \SplObjectStorage) { |
||
95 | $this->matches = $document; |
||
96 | if ($document->count() !== 0) { |
||
97 | $first = $this->getFirstMatch(); |
||
98 | if (!empty($first->ownerDocument)) { |
||
99 | $this->document = $first->ownerDocument; |
||
100 | } |
||
101 | } |
||
102 | } elseif ($document instanceof self) { |
||
103 | //$this->matches = $document->get(NULL, TRUE); |
||
104 | $this->setMatches($document->get(NULL, true)); |
||
|
|||
105 | if ($this->matches->count() > 0) { |
||
106 | $this->document = $this->getFirstMatch()->ownerDocument; |
||
107 | } |
||
108 | } elseif ($document instanceof \DOMDocument) { |
||
109 | $this->document = $document; |
||
110 | //$this->matches = $this->matches($document->documentElement); |
||
111 | $this->setMatches($document->documentElement); |
||
112 | } elseif ($document instanceof \DOMNode) { |
||
113 | $this->document = $document->ownerDocument; |
||
114 | //$this->matches = array($document); |
||
115 | $this->setMatches($document); |
||
116 | } elseif ($document instanceof \Masterminds\HTML5) { |
||
117 | $this->document = $document; |
||
118 | $this->setMatches($document->documentElement); |
||
119 | } elseif ($document instanceof \SimpleXMLElement) { |
||
120 | $import = dom_import_simplexml($document); |
||
121 | $this->document = $import->ownerDocument; |
||
122 | //$this->matches = array($import); |
||
123 | $this->setMatches($import); |
||
124 | } else { |
||
125 | throw new \QueryPath\Exception('Unsupported class type: ' . get_class($document)); |
||
126 | } |
||
127 | } elseif (is_array($document)) { |
||
128 | //trigger_error('Detected deprecated array support', E_USER_NOTICE); |
||
129 | if (!empty($document) && $document[0] instanceof \DOMNode) { |
||
130 | $found = new \SplObjectStorage(); |
||
131 | foreach ($document as $item) { |
||
132 | $found->attach($item); |
||
133 | } |
||
134 | //$this->matches = $found; |
||
135 | $this->setMatches($found); |
||
136 | $this->document = $this->getFirstMatch()->ownerDocument; |
||
137 | } |
||
138 | } elseif ($this->isXMLish($document)) { |
||
139 | // $document is a string with XML |
||
140 | $this->document = $this->parseXMLString($document); |
||
141 | $this->setMatches($this->document->documentElement); |
||
142 | } else { |
||
143 | |||
144 | // $document is a filename |
||
145 | $context = empty($options['context']) ? NULL : $options['context']; |
||
146 | $this->document = $this->parseXMLFile($document, $parser_flags, $context); |
||
147 | $this->setMatches($this->document->documentElement); |
||
148 | } |
||
149 | |||
150 | // Globally set the output option. |
||
151 | $this->document->formatOutput = true; |
||
152 | if (isset($this->options['format_output']) && $this->options['format_output'] === false) { |
||
153 | $this->document->formatOutput = false; |
||
154 | } |
||
155 | |||
156 | // Do a find if the second param was set. |
||
157 | if (strlen($string) > 0) { |
||
158 | // We don't issue a find because that creates a new DOMQuery. |
||
159 | //$this->find($string); |
||
160 | |||
161 | $query = new DOMTraverser($this->matches); |
||
162 | $query->find($string); |
||
163 | $this->setMatches($query->matches()); |
||
164 | } |
||
165 | } |
||
166 | |||
167 | private function parseXMLString($string, $flags = NULL) |
||
168 | { |
||
169 | $document = new \DOMDocument('1.0'); |
||
170 | $lead = strtolower(substr($string, 0, 5)); // <?xml |
||
171 | try { |
||
172 | set_error_handler([ParseException::class, 'initializeFromError'], $this->errTypes); |
||
173 | |||
174 | if (isset($this->options['convert_to_encoding'])) { |
||
175 | // Is there another way to do this? |
||
176 | |||
177 | $from_enc = $this->options['convert_from_encoding'] ?? 'auto'; |
||
178 | $to_enc = $this->options['convert_to_encoding']; |
||
179 | |||
180 | if (function_exists('mb_convert_encoding')) { |
||
181 | $string = mb_convert_encoding($string, $to_enc, $from_enc); |
||
182 | } |
||
183 | |||
184 | } |
||
185 | |||
186 | // This is to avoid cases where low ascii digits have slipped into HTML. |
||
187 | // AFAIK, it should not adversly effect UTF-8 documents. |
||
188 | if (!empty($this->options['strip_low_ascii'])) { |
||
189 | $string = filter_var($string, FILTER_UNSAFE_RAW, FILTER_FLAG_ENCODE_LOW); |
||
190 | } |
||
191 | |||
192 | // Allow users to override parser settings. |
||
193 | $useParser = ''; |
||
194 | if (!empty($this->options['use_parser'])) { |
||
195 | $useParser = strtolower($this->options['use_parser']); |
||
196 | } |
||
197 | |||
198 | // If HTML parser is requested, we use it. |
||
199 | if ($useParser === 'html') { |
||
200 | $document->loadHTML($string); |
||
201 | } // Parse as XML if it looks like XML, or if XML parser is requested. |
||
202 | elseif ($lead === '<?xml' || $useParser === 'xml') { |
||
203 | if ($this->options['replace_entities']) { |
||
204 | $string = Entities::replaceAllEntities($string); |
||
205 | } |
||
206 | $document->loadXML($string, $flags); |
||
207 | } // In all other cases, we try the HTML parser. |
||
208 | else { |
||
209 | $document->loadHTML($string); |
||
210 | } |
||
211 | } // Emulate 'finally' behavior. |
||
212 | catch (Exception $e) { |
||
213 | restore_error_handler(); |
||
214 | throw $e; |
||
215 | } |
||
216 | restore_error_handler(); |
||
217 | |||
218 | if (empty($document)) { |
||
219 | throw new \QueryPath\ParseException('Unknown parser exception.'); |
||
220 | } |
||
221 | |||
222 | return $document; |
||
223 | } |
||
224 | |||
225 | /** |
||
226 | * EXPERT: Be very, very careful using this. |
||
227 | * A utility function for setting the current set of matches. |
||
228 | * It makes sure the last matches buffer is set (for end() and andSelf()). |
||
229 | * |
||
230 | * @since 2.0 |
||
231 | * @param $matches |
||
232 | */ |
||
233 | public function setMatches($matches) |
||
234 | { |
||
235 | // This causes a lot of overhead.... |
||
236 | //if ($unique) $matches = self::unique($matches); |
||
237 | $this->last = $this->matches; |
||
238 | |||
239 | // Just set current matches. |
||
240 | if ($matches instanceof \SplObjectStorage) { |
||
241 | $this->matches = $matches; |
||
242 | } // This is likely legacy code that needs conversion. |
||
243 | elseif (is_array($matches)) { |
||
244 | trigger_error('Legacy array detected.'); |
||
245 | $tmp = new \SplObjectStorage(); |
||
246 | foreach ($matches as $m) { |
||
247 | $tmp->attach($m); |
||
248 | } |
||
249 | $this->matches = $tmp; |
||
250 | } |
||
251 | // For non-arrays, try to create a new match set and |
||
252 | // add this object. |
||
253 | else { |
||
254 | $found = new \SplObjectStorage(); |
||
255 | if (isset($matches)) { |
||
256 | $found->attach($matches); |
||
257 | } |
||
258 | $this->matches = $found; |
||
259 | } |
||
260 | |||
261 | // EXPERIMENTAL: Support for qp()->length. |
||
262 | $this->length = $this->matches->count(); |
||
263 | } |
||
264 | |||
265 | /** |
||
266 | * A depth-checking function. Typically, it only needs to be |
||
267 | * invoked with the first parameter. The rest are used for recursion. |
||
268 | * |
||
269 | * @see deepest(); |
||
270 | * @param DOMNode $ele |
||
271 | * The element. |
||
272 | * @param int $depth |
||
273 | * The depth guage |
||
274 | * @param mixed $current |
||
275 | * The current set. |
||
276 | * @param DOMNode $deepest |
||
277 | * A reference to the current deepest node. |
||
278 | * @return array |
||
279 | * Returns an array of DOM nodes. |
||
280 | */ |
||
281 | protected function deepestNode(\DOMNode $ele, $depth = 0, $current = NULL, &$deepest = NULL) |
||
282 | { |
||
283 | // FIXME: Should this use SplObjectStorage? |
||
284 | if (!isset($current)) { |
||
285 | $current = [$ele]; |
||
286 | } |
||
287 | if (!isset($deepest)) { |
||
288 | $deepest = $depth; |
||
289 | } |
||
290 | if ($ele->hasChildNodes()) { |
||
291 | foreach ($ele->childNodes as $child) { |
||
292 | if ($child->nodeType === XML_ELEMENT_NODE) { |
||
293 | $current = $this->deepestNode($child, $depth + 1, $current, $deepest); |
||
294 | } |
||
295 | } |
||
296 | } elseif ($depth > $deepest) { |
||
297 | $current = [$ele]; |
||
298 | $deepest = $depth; |
||
299 | } elseif ($depth === $deepest) { |
||
300 | $current[] = $ele; |
||
301 | } |
||
302 | |||
303 | return $current; |
||
304 | } |
||
305 | |||
306 | /** |
||
307 | * Prepare an item for insertion into a DOM. |
||
308 | * |
||
309 | * This handles a variety of boilerplate tasks that need doing before an |
||
310 | * indeterminate object can be inserted into a DOM tree. |
||
311 | * - If item is a string, this is converted into a document fragment and returned. |
||
312 | * - If item is a DOMQuery, then all items are retrieved and converted into |
||
313 | * a document fragment and returned. |
||
314 | * - If the item is a DOMNode, it is imported into the current DOM if necessary. |
||
315 | * - If the item is a SimpleXMLElement, it is converted into a DOM node and then |
||
316 | * imported. |
||
317 | * |
||
318 | * @param mixed $item |
||
319 | * Item to prepare for insert. |
||
320 | * @return mixed |
||
321 | * Returns the prepared item. |
||
322 | * @throws QueryPath::Exception |
||
323 | * Thrown if the object passed in is not of a supprted object type. |
||
324 | * @throws Exception |
||
325 | */ |
||
326 | protected function prepareInsert($item) |
||
382 | } |
||
383 | |||
384 | /** |
||
385 | * Convenience function for getNthMatch(0). |
||
386 | */ |
||
387 | protected function getFirstMatch() |
||
392 | } |
||
393 | |||
394 | /** |
||
395 | * Parse an XML or HTML file. |
||
396 | * |
||
397 | * This attempts to autodetect the type of file, and then parse it. |
||
398 | * |
||
399 | * @param string $filename |
||
400 | * The file name to parse. |
||
401 | * @param int $flags |
||
402 | * The OR-combined flags accepted by the DOM parser. See the PHP documentation |
||
403 | * for DOM or for libxml. |
||
404 | * @param resource $context |
||
405 | * The stream context for the file IO. If this is set, then an alternate |
||
406 | * parsing path is followed: The file is loaded by PHP's stream-aware IO |
||
407 | * facilities, read entirely into memory, and then handed off to |
||
408 | * {@link parseXMLString()}. On large files, this can have a performance impact. |
||
409 | * @throws \QueryPath\ParseException |
||
410 | * Thrown when a file cannot be loaded or parsed. |
||
411 | */ |
||
412 | private function parseXMLFile($filename, $flags = NULL, $context = NULL) |
||
478 | } |
||
479 | |||
480 | /** |
||
481 | * Determine whether a given string looks like XML or not. |
||
482 | * |
||
483 | * Basically, this scans a portion of the supplied string, checking to see |
||
484 | * if it has a tag-like structure. It is possible to "confuse" this, which |
||
485 | * may subsequently result in parse errors, but in the vast majority of |
||
486 | * cases, this method serves as a valid inicator of whether or not the |
||
487 | * content looks like XML. |
||
488 | * |
||
489 | * Things that are intentional excluded: |
||
490 | * - plain text with no markup. |
||
491 | * - strings that look like filesystem paths. |
||
492 | * |
||
493 | * Subclasses SHOULD NOT OVERRIDE THIS. Altering it may be altering |
||
494 | * core assumptions about how things work. Instead, classes should |
||
495 | * override the constructor and pass in only one of the parsed types |
||
496 | * that this class expects. |
||
497 | */ |
||
498 | protected function isXMLish($string) |
||
499 | { |
||
500 | return (strpos($string, '<') !== false && strpos($string, '>') !== false); |
||
501 | } |
||
502 | |||
503 | /** |
||
504 | * A utility function for retriving a match by index. |
||
505 | * |
||
506 | * The internal data structure used in DOMQuery does not have |
||
507 | * strong random access support, so we suppliment it with this method. |
||
508 | * |
||
509 | * @param $index |
||
510 | * @return object|void |
||
511 | */ |
||
512 | protected function getNthMatch(int $index) |
||
522 | } |
||
523 | } |
||
524 | } |
||
525 | } |