Total Complexity | 78 |
Total Lines | 506 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like DOM often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use DOM, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
15 | abstract class DOM implements Query, \IteratorAggregate, \Countable |
||
16 | { |
||
17 | |||
18 | /** |
||
19 | * The array of matches. |
||
20 | */ |
||
21 | protected $matches = []; |
||
22 | |||
23 | /** |
||
24 | * Default parser flags. |
||
25 | * |
||
26 | * These are flags that will be used if no global or local flags override them. |
||
27 | * |
||
28 | * @since 2.0 |
||
29 | */ |
||
30 | public const DEFAULT_PARSER_FLAGS = NULL; |
||
31 | |||
32 | public const JS_CSS_ESCAPE_CDATA = '\\1'; |
||
33 | public const JS_CSS_ESCAPE_CDATA_CCOMMENT = '/* \\1 */'; |
||
34 | public const JS_CSS_ESCAPE_CDATA_DOUBLESLASH = '// \\1'; |
||
35 | public const JS_CSS_ESCAPE_NONE = ''; |
||
36 | |||
37 | protected $errTypes = 771; //E_ERROR; | E_USER_ERROR; |
||
38 | |||
39 | protected $document; |
||
40 | /** |
||
41 | * The base DOMDocument. |
||
42 | */ |
||
43 | protected $options = [ |
||
44 | 'parser_flags' => NULL, |
||
45 | 'omit_xml_declaration' => false, |
||
46 | 'replace_entities' => false, |
||
47 | 'exception_level' => 771, // E_ERROR | E_USER_ERROR | E_USER_WARNING | E_WARNING |
||
48 | 'ignore_parser_warnings' => false, |
||
49 | 'escape_xhtml_js_css_sections' => self::JS_CSS_ESCAPE_CDATA_CCOMMENT, |
||
50 | ]; |
||
51 | |||
52 | /** |
||
53 | * Constructor. |
||
54 | * |
||
55 | * Typically, a new DOMQuery is created by QueryPath::with(), QueryPath::withHTML(), |
||
56 | * qp(), or htmlqp(). |
||
57 | * |
||
58 | * @param mixed $document |
||
59 | * A document-like object. |
||
60 | * @param string $string |
||
61 | * A CSS 3 Selector |
||
62 | * @param array $options |
||
63 | * An associative array of options. |
||
64 | * @see qp() |
||
65 | * @throws Exception |
||
66 | */ |
||
67 | public function __construct($document = NULL, $string = NULL, $options = []) |
||
68 | { |
||
69 | $string = trim($string); |
||
70 | $this->options = $options + Options::get() + $this->options; |
||
71 | |||
72 | $parser_flags = $options['parser_flags'] ?? self::DEFAULT_PARSER_FLAGS; |
||
73 | if (!empty($this->options['ignore_parser_warnings'])) { |
||
74 | // Don't convert parser warnings into exceptions. |
||
75 | $this->errTypes = 257; //E_ERROR | E_USER_ERROR; |
||
76 | } elseif (isset($this->options['exception_level'])) { |
||
77 | // Set the error level at which exceptions will be thrown. By default, |
||
78 | // QueryPath will throw exceptions for |
||
79 | // E_ERROR | E_USER_ERROR | E_WARNING | E_USER_WARNING. |
||
80 | $this->errTypes = $this->options['exception_level']; |
||
81 | } |
||
82 | |||
83 | // Empty: Just create an empty QP. |
||
84 | if (empty($document)) { |
||
85 | $this->document = isset($this->options['encoding']) ? new \DOMDocument('1.0', |
||
86 | $this->options['encoding']) : new \DOMDocument(); |
||
87 | $this->setMatches(new \SplObjectStorage()); |
||
88 | } // Figure out if document is DOM, HTML/XML, or a filename |
||
89 | elseif (is_object($document)) { |
||
90 | |||
91 | // This is the most frequent object type. |
||
92 | if ($document instanceof \SplObjectStorage) { |
||
93 | $this->matches = $document; |
||
94 | if ($document->count() !== 0) { |
||
95 | $first = $this->getFirstMatch(); |
||
96 | if (!empty($first->ownerDocument)) { |
||
97 | $this->document = $first->ownerDocument; |
||
98 | } |
||
99 | } |
||
100 | } elseif ($document instanceof self) { |
||
101 | //$this->matches = $document->get(NULL, TRUE); |
||
102 | $this->setMatches($document->get(NULL, true)); |
||
|
|||
103 | if ($this->matches->count() > 0) { |
||
104 | $this->document = $this->getFirstMatch()->ownerDocument; |
||
105 | } |
||
106 | } elseif ($document instanceof \DOMDocument) { |
||
107 | $this->document = $document; |
||
108 | //$this->matches = $this->matches($document->documentElement); |
||
109 | $this->setMatches($document->documentElement); |
||
110 | } elseif ($document instanceof \DOMNode) { |
||
111 | $this->document = $document->ownerDocument; |
||
112 | //$this->matches = array($document); |
||
113 | $this->setMatches($document); |
||
114 | } elseif ($document instanceof \Masterminds\HTML5) { |
||
115 | $this->document = $document; |
||
116 | $this->setMatches($document->documentElement); |
||
117 | } elseif ($document instanceof \SimpleXMLElement) { |
||
118 | $import = dom_import_simplexml($document); |
||
119 | $this->document = $import->ownerDocument; |
||
120 | //$this->matches = array($import); |
||
121 | $this->setMatches($import); |
||
122 | } else { |
||
123 | throw new \QueryPath\Exception('Unsupported class type: ' . get_class($document)); |
||
124 | } |
||
125 | } elseif (is_array($document)) { |
||
126 | //trigger_error('Detected deprecated array support', E_USER_NOTICE); |
||
127 | if (!empty($document) && $document[0] instanceof \DOMNode) { |
||
128 | $found = new \SplObjectStorage(); |
||
129 | foreach ($document as $item) { |
||
130 | $found->attach($item); |
||
131 | } |
||
132 | //$this->matches = $found; |
||
133 | $this->setMatches($found); |
||
134 | $this->document = $this->getFirstMatch()->ownerDocument; |
||
135 | } |
||
136 | } elseif ($this->isXMLish($document)) { |
||
137 | // $document is a string with XML |
||
138 | $this->document = $this->parseXMLString($document); |
||
139 | $this->setMatches($this->document->documentElement); |
||
140 | } else { |
||
141 | |||
142 | // $document is a filename |
||
143 | $context = empty($options['context']) ? NULL : $options['context']; |
||
144 | $this->document = $this->parseXMLFile($document, $parser_flags, $context); |
||
145 | $this->setMatches($this->document->documentElement); |
||
146 | } |
||
147 | |||
148 | // Globally set the output option. |
||
149 | $this->document->formatOutput = true; |
||
150 | if (isset($this->options['format_output']) && $this->options['format_output'] == false) { |
||
151 | $this->document->formatOutput = false; |
||
152 | } |
||
153 | |||
154 | // Do a find if the second param was set. |
||
155 | if (strlen($string) > 0) { |
||
156 | // We don't issue a find because that creates a new DOMQuery. |
||
157 | //$this->find($string); |
||
158 | |||
159 | $query = new \QueryPath\CSS\DOMTraverser($this->matches); |
||
160 | $query->find($string); |
||
161 | $this->setMatches($query->matches()); |
||
162 | } |
||
163 | } |
||
164 | |||
165 | private function parseXMLString($string, $flags = NULL) |
||
166 | { |
||
167 | $document = new \DOMDocument('1.0'); |
||
168 | $lead = strtolower(substr($string, 0, 5)); // <?xml |
||
169 | try { |
||
170 | set_error_handler(['\QueryPath\ParseException', 'initializeFromError'], $this->errTypes); |
||
171 | |||
172 | if (isset($this->options['convert_to_encoding'])) { |
||
173 | // Is there another way to do this? |
||
174 | |||
175 | $from_enc = isset($this->options['convert_from_encoding']) ? $this->options['convert_from_encoding'] : 'auto'; |
||
176 | $to_enc = $this->options['convert_to_encoding']; |
||
177 | |||
178 | if (function_exists('mb_convert_encoding')) { |
||
179 | $string = mb_convert_encoding($string, $to_enc, $from_enc); |
||
180 | } |
||
181 | |||
182 | } |
||
183 | |||
184 | // This is to avoid cases where low ascii digits have slipped into HTML. |
||
185 | // AFAIK, it should not adversly effect UTF-8 documents. |
||
186 | if (!empty($this->options['strip_low_ascii'])) { |
||
187 | $string = filter_var($string, FILTER_UNSAFE_RAW, FILTER_FLAG_ENCODE_LOW); |
||
188 | } |
||
189 | |||
190 | // Allow users to override parser settings. |
||
191 | if (empty($this->options['use_parser'])) { |
||
192 | $useParser = ''; |
||
193 | } else { |
||
194 | $useParser = strtolower($this->options['use_parser']); |
||
195 | } |
||
196 | |||
197 | // If HTML parser is requested, we use it. |
||
198 | if ($useParser == 'html') { |
||
199 | $document->loadHTML($string); |
||
200 | } // Parse as XML if it looks like XML, or if XML parser is requested. |
||
201 | elseif ($lead == '<?xml' || $useParser == 'xml') { |
||
202 | if ($this->options['replace_entities']) { |
||
203 | $string = \QueryPath\Entities::replaceAllEntities($string); |
||
204 | } |
||
205 | $document->loadXML($string, $flags); |
||
206 | } // In all other cases, we try the HTML parser. |
||
207 | else { |
||
208 | $document->loadHTML($string); |
||
209 | } |
||
210 | } // Emulate 'finally' behavior. |
||
211 | catch (Exception $e) { |
||
212 | restore_error_handler(); |
||
213 | throw $e; |
||
214 | } |
||
215 | restore_error_handler(); |
||
216 | |||
217 | if (empty($document)) { |
||
218 | throw new \QueryPath\ParseException('Unknown parser exception.'); |
||
219 | } |
||
220 | |||
221 | return $document; |
||
222 | } |
||
223 | |||
224 | /** |
||
225 | * EXPERT: Be very, very careful using this. |
||
226 | * A utility function for setting the current set of matches. |
||
227 | * It makes sure the last matches buffer is set (for end() and andSelf()). |
||
228 | * |
||
229 | * @since 2.0 |
||
230 | * @param $matches |
||
231 | */ |
||
232 | public function setMatches($matches) |
||
233 | { |
||
234 | // This causes a lot of overhead.... |
||
235 | //if ($unique) $matches = self::unique($matches); |
||
236 | $this->last = $this->matches; |
||
237 | |||
238 | // Just set current matches. |
||
239 | if ($matches instanceof \SplObjectStorage) { |
||
240 | $this->matches = $matches; |
||
241 | } // This is likely legacy code that needs conversion. |
||
242 | elseif (is_array($matches)) { |
||
243 | trigger_error('Legacy array detected.'); |
||
244 | $tmp = new \SplObjectStorage(); |
||
245 | foreach ($matches as $m) { |
||
246 | $tmp->attach($m); |
||
247 | } |
||
248 | $this->matches = $tmp; |
||
249 | } |
||
250 | // For non-arrays, try to create a new match set and |
||
251 | // add this object. |
||
252 | else { |
||
253 | $found = new \SplObjectStorage(); |
||
254 | if (isset($matches)) { |
||
255 | $found->attach($matches); |
||
256 | } |
||
257 | $this->matches = $found; |
||
258 | } |
||
259 | |||
260 | // EXPERIMENTAL: Support for qp()->length. |
||
261 | $this->length = $this->matches->count(); |
||
262 | } |
||
263 | |||
264 | /** |
||
265 | * A depth-checking function. Typically, it only needs to be |
||
266 | * invoked with the first parameter. The rest are used for recursion. |
||
267 | * |
||
268 | * @see deepest(); |
||
269 | * @param DOMNode $ele |
||
270 | * The element. |
||
271 | * @param int $depth |
||
272 | * The depth guage |
||
273 | * @param mixed $current |
||
274 | * The current set. |
||
275 | * @param DOMNode $deepest |
||
276 | * A reference to the current deepest node. |
||
277 | * @return array |
||
278 | * Returns an array of DOM nodes. |
||
279 | */ |
||
280 | protected function deepestNode(\DOMNode $ele, $depth = 0, $current = NULL, &$deepest = NULL) |
||
281 | { |
||
282 | // FIXME: Should this use SplObjectStorage? |
||
283 | if (!isset($current)) { |
||
284 | $current = [$ele]; |
||
285 | } |
||
286 | if (!isset($deepest)) { |
||
287 | $deepest = $depth; |
||
288 | } |
||
289 | if ($ele->hasChildNodes()) { |
||
290 | foreach ($ele->childNodes as $child) { |
||
291 | if ($child->nodeType === XML_ELEMENT_NODE) { |
||
292 | $current = $this->deepestNode($child, $depth + 1, $current, $deepest); |
||
293 | } |
||
294 | } |
||
295 | } elseif ($depth > $deepest) { |
||
296 | $current = [$ele]; |
||
297 | $deepest = $depth; |
||
298 | } elseif ($depth === $deepest) { |
||
299 | $current[] = $ele; |
||
300 | } |
||
301 | |||
302 | return $current; |
||
303 | } |
||
304 | |||
305 | /** |
||
306 | * Prepare an item for insertion into a DOM. |
||
307 | * |
||
308 | * This handles a variety of boilerplate tasks that need doing before an |
||
309 | * indeterminate object can be inserted into a DOM tree. |
||
310 | * - If item is a string, this is converted into a document fragment and returned. |
||
311 | * - If item is a DOMQuery, then all items are retrieved and converted into |
||
312 | * a document fragment and returned. |
||
313 | * - If the item is a DOMNode, it is imported into the current DOM if necessary. |
||
314 | * - If the item is a SimpleXMLElement, it is converted into a DOM node and then |
||
315 | * imported. |
||
316 | * |
||
317 | * @param mixed $item |
||
318 | * Item to prepare for insert. |
||
319 | * @return mixed |
||
320 | * Returns the prepared item. |
||
321 | * @throws QueryPath::Exception |
||
322 | * Thrown if the object passed in is not of a supprted object type. |
||
323 | * @throws Exception |
||
324 | */ |
||
325 | protected function prepareInsert($item) |
||
381 | } |
||
382 | |||
383 | /** |
||
384 | * Convenience function for getNthMatch(0). |
||
385 | */ |
||
386 | protected function getFirstMatch() |
||
391 | } |
||
392 | |||
393 | /** |
||
394 | * Parse an XML or HTML file. |
||
395 | * |
||
396 | * This attempts to autodetect the type of file, and then parse it. |
||
397 | * |
||
398 | * @param string $filename |
||
399 | * The file name to parse. |
||
400 | * @param int $flags |
||
401 | * The OR-combined flags accepted by the DOM parser. See the PHP documentation |
||
402 | * for DOM or for libxml. |
||
403 | * @param resource $context |
||
404 | * The stream context for the file IO. If this is set, then an alternate |
||
405 | * parsing path is followed: The file is loaded by PHP's stream-aware IO |
||
406 | * facilities, read entirely into memory, and then handed off to |
||
407 | * {@link parseXMLString()}. On large files, this can have a performance impact. |
||
408 | * @throws \QueryPath\ParseException |
||
409 | * Thrown when a file cannot be loaded or parsed. |
||
410 | */ |
||
411 | private function parseXMLFile($filename, $flags = NULL, $context = NULL) |
||
477 | } |
||
478 | |||
479 | /** |
||
480 | * Determine whether a given string looks like XML or not. |
||
481 | * |
||
482 | * Basically, this scans a portion of the supplied string, checking to see |
||
483 | * if it has a tag-like structure. It is possible to "confuse" this, which |
||
484 | * may subsequently result in parse errors, but in the vast majority of |
||
485 | * cases, this method serves as a valid inicator of whether or not the |
||
486 | * content looks like XML. |
||
487 | * |
||
488 | * Things that are intentional excluded: |
||
489 | * - plain text with no markup. |
||
490 | * - strings that look like filesystem paths. |
||
491 | * |
||
492 | * Subclasses SHOULD NOT OVERRIDE THIS. Altering it may be altering |
||
493 | * core assumptions about how things work. Instead, classes should |
||
494 | * override the constructor and pass in only one of the parsed types |
||
495 | * that this class expects. |
||
496 | */ |
||
497 | protected function isXMLish($string) |
||
498 | { |
||
499 | return (strpos($string, '<') !== false && strpos($string, '>') !== false); |
||
500 | } |
||
501 | |||
502 | /** |
||
503 | * A utility function for retriving a match by index. |
||
504 | * |
||
505 | * The internal data structure used in DOMQuery does not have |
||
506 | * strong random access support, so we suppliment it with this method. |
||
507 | * |
||
508 | * @param $index |
||
509 | * @return object|void |
||
510 | */ |
||
511 | protected function getNthMatch(int $index) |
||
521 | } |
||
522 | } |
||
524 | } |