Total Complexity | 322 |
Total Lines | 1500 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like HtmlNode often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlNode, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
38 | class HtmlNode |
||
39 | { |
||
40 | const HDOM_TYPE_ELEMENT = 1; |
||
41 | const HDOM_TYPE_COMMENT = 2; |
||
42 | const HDOM_TYPE_TEXT = 3; |
||
43 | const HDOM_TYPE_ROOT = 5; |
||
44 | const HDOM_TYPE_UNKNOWN = 6; |
||
45 | const HDOM_TYPE_CDATA = 7; |
||
46 | |||
47 | const HDOM_QUOTE_DOUBLE = 0; |
||
48 | const HDOM_QUOTE_SINGLE = 1; |
||
49 | const HDOM_QUOTE_NO = 3; |
||
50 | |||
51 | const HDOM_INFO_BEGIN = 0; |
||
52 | const HDOM_INFO_END = 1; |
||
53 | const HDOM_INFO_QUOTE = 2; |
||
54 | const HDOM_INFO_SPACE = 3; |
||
55 | const HDOM_INFO_TEXT = 4; |
||
56 | const HDOM_INFO_INNER = 5; |
||
57 | const HDOM_INFO_OUTER = 6; |
||
58 | const HDOM_INFO_ENDSPACE = 7; |
||
59 | |||
60 | public $nodetype = self::HDOM_TYPE_TEXT; |
||
61 | public $tag = 'text'; |
||
62 | public $attr = []; |
||
63 | public $children = []; |
||
64 | public $nodes = []; |
||
65 | public $parent = null; |
||
66 | public $_ = []; |
||
67 | private $dom = null; |
||
68 | |||
69 | public function __call($func, $args) |
||
70 | { |
||
71 | // Allow users to call methods with lower_case syntax |
||
72 | switch ($func) { |
||
73 | case 'children': |
||
74 | $actual_function = 'childNodes'; |
||
75 | break; |
||
76 | case 'first_child': |
||
77 | $actual_function = 'firstChild'; |
||
78 | break; |
||
79 | case 'has_child': |
||
80 | $actual_function = 'hasChildNodes'; |
||
81 | break; |
||
82 | case 'last_child': |
||
83 | $actual_function = 'lastChild'; |
||
84 | break; |
||
85 | case 'next_sibling': |
||
86 | $actual_function = 'nextSibling'; |
||
87 | break; |
||
88 | case 'prev_sibling': |
||
89 | $actual_function = 'previousSibling'; |
||
90 | break; |
||
91 | default: |
||
92 | trigger_error( |
||
93 | 'Call to undefined method ' . __CLASS__ . '::' . $func . '()', |
||
94 | E_USER_ERROR |
||
95 | ); |
||
96 | } |
||
97 | |||
98 | // phpcs:ignore Generic.Files.LineLength |
||
99 | Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.'); |
||
|
|||
100 | |||
101 | return call_user_func_array([$this, $actual_function], $args); |
||
102 | } |
||
103 | |||
104 | public function __construct($dom) |
||
105 | { |
||
106 | if (null === $dom) { |
||
107 | return $this; |
||
108 | } |
||
109 | |||
110 | $this->dom = $dom; |
||
111 | $dom->nodes[] = $this; |
||
112 | } |
||
113 | |||
114 | public function __debugInfo() |
||
115 | { |
||
116 | // Translate node type to human-readable form |
||
117 | switch ($this->nodetype) { |
||
118 | case self::HDOM_TYPE_ELEMENT: |
||
119 | $nodetype = "HDOM_TYPE_ELEMENT ($this->nodetype)"; |
||
120 | break; |
||
121 | case self::HDOM_TYPE_COMMENT: |
||
122 | $nodetype = "HDOM_TYPE_COMMENT ($this->nodetype)"; |
||
123 | break; |
||
124 | case self::HDOM_TYPE_TEXT: |
||
125 | $nodetype = "HDOM_TYPE_TEXT ($this->nodetype)"; |
||
126 | break; |
||
127 | case self::HDOM_TYPE_ROOT: |
||
128 | $nodetype = "HDOM_TYPE_ROOT ($this->nodetype)"; |
||
129 | break; |
||
130 | case self::HDOM_TYPE_CDATA: |
||
131 | $nodetype = "HDOM_TYPE_CDATA ($this->nodetype)"; |
||
132 | break; |
||
133 | case self::HDOM_TYPE_UNKNOWN: |
||
134 | default: |
||
135 | $nodetype = "HDOM_TYPE_UNKNOWN ($this->nodetype)"; |
||
136 | } |
||
137 | |||
138 | return [ |
||
139 | 'nodetype' => $nodetype, |
||
140 | 'tag' => $this->tag, |
||
141 | 'attributes' => empty($this->attr) ? 'none' : $this->attr, |
||
142 | 'nodes' => empty($this->nodes) ? 'none' : $this->nodes, |
||
143 | ]; |
||
144 | } |
||
145 | |||
146 | public function __toString() |
||
147 | { |
||
148 | return $this->outertext(); |
||
149 | } |
||
150 | |||
151 | public function clear() |
||
152 | { |
||
153 | unset($this->dom, $this->parent); // Break link to origin |
||
154 | // Break link to branch |
||
155 | } |
||
156 | |||
157 | /** @codeCoverageIgnore */ |
||
158 | public function dump($show_attr = true, $depth = 0) |
||
159 | { |
||
160 | echo str_repeat("\t", $depth) . $this->tag; |
||
161 | |||
162 | if ($show_attr && count($this->attr) > 0) { |
||
163 | echo '('; |
||
164 | foreach ($this->attr as $k => $v) { |
||
165 | echo "[$k]=>\"$v\", "; |
||
166 | } |
||
167 | echo ')'; |
||
168 | } |
||
169 | |||
170 | echo "\n"; |
||
171 | |||
172 | if ($this->nodes) { |
||
173 | foreach ($this->nodes as $node) { |
||
174 | $node->dump($show_attr, $depth + 1); |
||
175 | } |
||
176 | } |
||
177 | } |
||
178 | |||
179 | /** @codeCoverageIgnore */ |
||
180 | public function dump_node($echo = true) |
||
181 | { |
||
182 | $string = $this->tag; |
||
183 | |||
184 | if (count($this->attr) > 0) { |
||
185 | $string .= '('; |
||
186 | foreach ($this->attr as $k => $v) { |
||
187 | $string .= "[$k]=>\"$v\", "; |
||
188 | } |
||
189 | $string .= ')'; |
||
190 | } |
||
191 | |||
192 | if (count($this->_) > 0) { |
||
193 | $string .= ' $_ ('; |
||
194 | foreach ($this->_ as $k => $v) { |
||
195 | if (is_array($v)) { |
||
196 | $string .= "[$k]=>("; |
||
197 | foreach ($v as $k2 => $v2) { |
||
198 | $string .= "[$k2]=>\"$v2\", "; |
||
199 | } |
||
200 | $string .= ')'; |
||
201 | } else { |
||
202 | $string .= "[$k]=>\"$v\", "; |
||
203 | } |
||
204 | } |
||
205 | $string .= ')'; |
||
206 | } |
||
207 | |||
208 | if (isset($this->text)) { |
||
209 | $string .= " text: ({$this->text})"; |
||
210 | } |
||
211 | |||
212 | $string .= ' HDOM_INNER_INFO: '; |
||
213 | /** |
||
214 | * @var mixed |
||
215 | */ |
||
216 | if (isset($node)) { |
||
217 | if (isset($node->_[self::HDOM_INFO_INNER])) { |
||
218 | $string .= "'" . $node->_[self::HDOM_INFO_INNER] . "'"; |
||
219 | } else { |
||
220 | $string .= ' NULL '; |
||
221 | } |
||
222 | } |
||
223 | |||
224 | $string .= ' children: ' . count($this->children); |
||
225 | $string .= ' nodes: ' . count($this->nodes); |
||
226 | $string .= "\n"; |
||
227 | |||
228 | if ($echo) { |
||
229 | echo $string; |
||
230 | |||
231 | return; |
||
232 | } else { |
||
233 | return $string; |
||
234 | } |
||
235 | } |
||
236 | |||
237 | public function parent($parent = null) |
||
238 | { |
||
239 | // I am SURE that this doesn't work properly. |
||
240 | // It fails to unset the current node from it's current parents nodes or |
||
241 | // children list first. |
||
242 | if (null !== $parent) { |
||
243 | $this->parent = $parent; |
||
244 | $this->parent->nodes[] = $this; |
||
245 | $this->parent->children[] = $this; |
||
246 | } |
||
247 | |||
248 | return $this->parent; |
||
249 | } |
||
250 | |||
251 | public function find_ancestor_tag($tag) |
||
252 | { |
||
253 | if (null === $this->parent) { |
||
254 | return null; |
||
255 | } |
||
256 | |||
257 | $ancestor = $this->parent; |
||
258 | |||
259 | while (!is_null($ancestor)) { |
||
260 | if ($ancestor->tag === $tag) { |
||
261 | break; |
||
262 | } |
||
263 | |||
264 | $ancestor = $ancestor->parent; |
||
265 | } |
||
266 | |||
267 | return $ancestor; |
||
268 | } |
||
269 | |||
270 | public function innertext() |
||
271 | { |
||
272 | if (isset($this->_[self::HDOM_INFO_INNER])) { |
||
273 | $ret = $this->_[self::HDOM_INFO_INNER]; |
||
274 | } elseif (isset($this->_[self::HDOM_INFO_TEXT])) { |
||
275 | $ret = $this->_[self::HDOM_INFO_TEXT]; |
||
276 | } else { |
||
277 | $ret = ''; |
||
278 | } |
||
279 | |||
280 | foreach ($this->nodes as $n) { |
||
281 | $ret .= $n->outertext(); |
||
282 | } |
||
283 | |||
284 | return $this->convert_text($ret); |
||
285 | } |
||
286 | |||
287 | public function outertext() |
||
288 | { |
||
289 | if ('root' === $this->tag) { |
||
290 | return $this->innertext(); |
||
291 | } |
||
292 | |||
293 | // todo: What is the use of this callback? Remove? |
||
294 | if ($this->dom && null !== $this->dom->callback) { |
||
295 | call_user_func_array($this->dom->callback, [$this]); |
||
296 | } |
||
297 | |||
298 | if (isset($this->_[self::HDOM_INFO_OUTER])) { |
||
299 | return $this->convert_text($this->_[self::HDOM_INFO_OUTER]); |
||
300 | } |
||
301 | |||
302 | if (isset($this->_[self::HDOM_INFO_TEXT])) { |
||
303 | return $this->convert_text($this->_[self::HDOM_INFO_TEXT]); |
||
304 | } |
||
305 | |||
306 | $ret = ''; |
||
307 | |||
308 | if (isset($this->_[self::HDOM_INFO_BEGIN])) { |
||
309 | $ret = $this->makeup(); |
||
310 | } |
||
311 | |||
312 | if (isset($this->_[self::HDOM_INFO_INNER])) { |
||
313 | // todo: <br> should either never have self::HDOM_INFO_INNER or always |
||
314 | if ('br' !== $this->tag) { |
||
315 | $ret .= $this->_[self::HDOM_INFO_INNER]; |
||
316 | } |
||
317 | } |
||
318 | |||
319 | if ($this->nodes) { |
||
320 | foreach ($this->nodes as $n) { |
||
321 | $ret .= $n->outertext(); |
||
322 | } |
||
323 | } |
||
324 | |||
325 | if (isset($this->_[self::HDOM_INFO_END]) && 0 != $this->_[self::HDOM_INFO_END]) { |
||
326 | $ret .= '</' . $this->tag . '>'; |
||
327 | } |
||
328 | |||
329 | return $this->convert_text($ret); |
||
330 | } |
||
331 | |||
332 | /** |
||
333 | * Returns true if the provided element is a block level element. |
||
334 | * |
||
335 | * @see https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php |
||
336 | */ |
||
337 | protected function is_block_element($node) |
||
338 | { |
||
339 | // todo: When we have the utility class this should be moved there |
||
340 | return in_array(strtolower($node->tag), [ |
||
341 | 'p', |
||
342 | 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', |
||
343 | 'ol', 'ul', |
||
344 | 'pre', |
||
345 | 'address', |
||
346 | 'blockquote', |
||
347 | 'dl', |
||
348 | 'div', |
||
349 | 'fieldset', |
||
350 | 'form', |
||
351 | 'hr', |
||
352 | 'noscript', |
||
353 | 'table', |
||
354 | ]); |
||
355 | } |
||
356 | |||
357 | /** |
||
358 | * Returns true if the provided element is an inline level element. |
||
359 | * |
||
360 | * @see https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php |
||
361 | */ |
||
362 | protected function is_inline_element($node) |
||
363 | { |
||
364 | // todo: When we have the utility class this should be moved there |
||
365 | return in_array(strtolower($node->tag), [ |
||
366 | 'b', 'big', 'i', 'small', 'tt', |
||
367 | 'abbr', 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong', 'samp', 'var', |
||
368 | 'a', 'bdo', 'br', 'img', 'map', 'object', 'q', 'script', 'span', 'sub', 'sup', |
||
369 | 'button', 'input', 'label', 'select', 'textarea', |
||
370 | ]); |
||
371 | } |
||
372 | |||
373 | public function text($trim = true) |
||
374 | { |
||
375 | $ret = ''; |
||
376 | |||
377 | if ('script' === strtolower($this->tag)) { |
||
378 | $ret = ''; |
||
379 | } elseif ('style' === strtolower($this->tag)) { |
||
380 | $ret = ''; |
||
381 | } elseif (self::HDOM_TYPE_COMMENT === $this->nodetype) { |
||
382 | $ret = ''; |
||
383 | } elseif (self::HDOM_TYPE_CDATA === $this->nodetype) { |
||
384 | $ret = $this->_[self::HDOM_INFO_INNER]; |
||
385 | } elseif (self::HDOM_TYPE_UNKNOWN === $this->nodetype) { |
||
386 | $ret = ''; |
||
387 | } elseif (isset($this->_[self::HDOM_INFO_INNER])) { |
||
388 | $ret = $this->_[self::HDOM_INFO_INNER]; |
||
389 | } elseif (self::HDOM_TYPE_TEXT === $this->nodetype) { |
||
390 | $ret = $this->_[self::HDOM_INFO_TEXT]; |
||
391 | } |
||
392 | |||
393 | if (is_null($this->nodes)) { |
||
394 | return ''; |
||
395 | } |
||
396 | |||
397 | foreach ($this->nodes as $n) { |
||
398 | if ($this->is_block_element($n)) { |
||
399 | $block = ltrim($this->convert_text($n->text(false))); |
||
400 | |||
401 | if (empty($block)) { |
||
402 | continue; |
||
403 | } |
||
404 | |||
405 | $ret = rtrim($ret) . "\n\n" . $block; |
||
406 | } elseif ($this->is_inline_element($n)) { |
||
407 | // todo: <br> introduces code smell because no space but \n |
||
408 | if ('br' === strtolower($n->tag)) { |
||
409 | $ret .= $this->dom->default_br_text ?: DEFAULT_BR_TEXT; |
||
410 | } else { |
||
411 | $inline = ltrim($this->convert_text($n->text(false))); |
||
412 | |||
413 | if (empty($inline)) { |
||
414 | continue; |
||
415 | } |
||
416 | |||
417 | $ret = $ret . $this->convert_text($n->text(false)); |
||
418 | } |
||
419 | } else { |
||
420 | $ret .= $this->convert_text($n->text(false)); |
||
421 | } |
||
422 | } |
||
423 | |||
424 | // Reduce whitespace at start/end to a single (or none) space |
||
425 | $ret = preg_replace('/[ \t\n\r\0\x0B\xC2\xA0]+$/u', $trim ? '' : ' ', $ret); |
||
426 | $ret = preg_replace('/^[ \t\n\r\0\x0B\xC2\xA0]+/u', $trim ? '' : ' ', $ret); |
||
427 | |||
428 | return $ret; |
||
429 | } |
||
430 | |||
431 | public function xmltext() |
||
432 | { |
||
433 | $ret = $this->innertext(); |
||
434 | $ret = str_ireplace('<![CDATA[', '', $ret); |
||
435 | $ret = str_replace(']]>', '', $ret); |
||
436 | |||
437 | return $ret; |
||
438 | } |
||
439 | |||
440 | public function makeup() |
||
441 | { |
||
442 | // text, comment, unknown |
||
443 | if (isset($this->_[self::HDOM_INFO_TEXT])) { |
||
444 | return $this->_[self::HDOM_INFO_TEXT]; |
||
445 | } |
||
446 | |||
447 | $ret = '<' . $this->tag; |
||
448 | |||
449 | foreach ($this->attr as $key => $val) { |
||
450 | // skip removed attribute |
||
451 | if (null === $val || false === $val) { |
||
452 | continue; |
||
453 | } |
||
454 | |||
455 | if (isset($this->_[self::HDOM_INFO_SPACE][$key])) { |
||
456 | $ret .= $this->_[self::HDOM_INFO_SPACE][$key][0]; |
||
457 | } else { |
||
458 | $ret .= ' '; |
||
459 | } |
||
460 | |||
461 | //no value attr: nowrap, checked selected... |
||
462 | if (true === $val) { |
||
463 | $ret .= $key; |
||
464 | } else { |
||
465 | if (isset($this->_[self::HDOM_INFO_QUOTE][$key])) { |
||
466 | $quote_type = $this->_[self::HDOM_INFO_QUOTE][$key]; |
||
467 | } else { |
||
468 | $quote_type = self::HDOM_QUOTE_DOUBLE; |
||
469 | } |
||
470 | |||
471 | switch ($quote_type) { |
||
472 | case self::HDOM_QUOTE_SINGLE: |
||
473 | $quote = '\''; |
||
474 | $val = htmlentities($val, ENT_QUOTES, $this->dom->target_charset); |
||
475 | break; |
||
476 | case self::HDOM_QUOTE_NO: |
||
477 | $quote = ''; |
||
478 | break; |
||
479 | case self::HDOM_QUOTE_DOUBLE: |
||
480 | default: |
||
481 | $quote = '"'; |
||
482 | $val = htmlentities($val, ENT_COMPAT, $this->dom->target_charset); |
||
483 | } |
||
484 | |||
485 | $ret .= $key |
||
486 | . (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][1] : '') |
||
487 | . '=' |
||
488 | . (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][2] : '') |
||
489 | . $quote |
||
490 | . $val |
||
491 | . $quote; |
||
492 | } |
||
493 | } |
||
494 | |||
495 | if (isset($this->_[self::HDOM_INFO_ENDSPACE])) { |
||
496 | $ret .= $this->_[self::HDOM_INFO_ENDSPACE]; |
||
497 | } |
||
498 | |||
499 | return $ret . '>'; |
||
500 | } |
||
501 | |||
502 | /** |
||
503 | * Element selector |
||
504 | * |
||
505 | * @param string $selector |
||
506 | * @param int $idx |
||
507 | * @param boolean $lowercase |
||
508 | * @return HtmlNode |
||
509 | */ |
||
510 | public function find($selector, $idx = null, $lowercase = false) |
||
511 | { |
||
512 | $selectors = $this->parse_selector($selector); |
||
513 | if (0 === ($count = count($selectors))) { |
||
514 | return []; |
||
515 | } |
||
516 | $found_keys = []; |
||
517 | |||
518 | // find each selector |
||
519 | for ($c = 0; $c < $count; ++$c) { |
||
520 | // The change on the below line was documented on the sourceforge |
||
521 | // code tracker id 2788009 |
||
522 | // used to be: if (($levle=count($selectors[0]))===0) return array(); |
||
523 | if (0 === ($levle = count($selectors[$c]))) { |
||
524 | Debug::log_once('Empty selector (' . $selector . ') matches nothing.'); |
||
525 | |||
526 | return []; |
||
527 | } |
||
528 | |||
529 | if (!isset($this->_[self::HDOM_INFO_BEGIN])) { |
||
530 | Debug::log_once('Invalid operation. The current node has no start tag.'); |
||
531 | |||
532 | return []; |
||
533 | } |
||
534 | |||
535 | $head = [$this->_[self::HDOM_INFO_BEGIN] => 1]; |
||
536 | $cmd = ' '; // Combinator |
||
537 | |||
538 | // handle descendant selectors, no recursive! |
||
539 | for ($l = 0; $l < $levle; ++$l) { |
||
540 | $ret = []; |
||
541 | |||
542 | foreach ($head as $k => $v) { |
||
543 | $n = (-1 === $k) ? $this->dom->root : $this->dom->nodes[$k]; |
||
544 | //PaperG - Pass this optional parameter on to the seek function. |
||
545 | $n->seek($selectors[$c][$l], $ret, $cmd, $lowercase); |
||
546 | } |
||
547 | |||
548 | $head = $ret; |
||
549 | $cmd = $selectors[$c][$l][6]; // Next Combinator |
||
550 | } |
||
551 | |||
552 | foreach ($head as $k => $v) { |
||
553 | if (!isset($found_keys[$k])) { |
||
554 | $found_keys[$k] = 1; |
||
555 | } |
||
556 | } |
||
557 | } |
||
558 | |||
559 | // sort keys |
||
560 | ksort($found_keys); |
||
561 | |||
562 | $found = []; |
||
563 | foreach ($found_keys as $k => $v) { |
||
564 | $found[] = $this->dom->nodes[$k]; |
||
565 | } |
||
566 | |||
567 | // return nth-element or array |
||
568 | if (is_null($idx)) { |
||
569 | return $found; |
||
570 | } elseif ($idx < 0) { |
||
571 | $idx = count($found) + $idx; |
||
572 | } |
||
573 | |||
574 | return (isset($found[$idx])) ? $found[$idx] : null; |
||
575 | } |
||
576 | |||
577 | public function expect($selector, $idx = null, $lowercase = false) |
||
578 | { |
||
579 | return $this->find($selector, $idx, $lowercase) ?: null; |
||
580 | } |
||
581 | |||
582 | protected function seek($selector, &$ret, $parent_cmd, $lowercase = false) |
||
583 | { |
||
584 | list($ps_selector, $tag, $ps_element, $id, $class, $attributes, $cmb) = $selector; |
||
585 | $nodes = []; |
||
586 | |||
587 | if (' ' === $parent_cmd) { // Descendant Combinator |
||
588 | // Find parent closing tag if the current element doesn't have a closing |
||
589 | // tag (i.e. void element) |
||
590 | $end = (!empty($this->_[self::HDOM_INFO_END])) ? $this->_[self::HDOM_INFO_END] : 0; |
||
591 | if (0 == $end && $this->parent) { |
||
592 | $parent = $this->parent; |
||
593 | while (null !== $parent && !isset($parent->_[self::HDOM_INFO_END])) { |
||
594 | --$end; |
||
595 | $parent = $parent->parent; |
||
596 | } |
||
597 | $end += $parent->_[self::HDOM_INFO_END]; |
||
598 | } |
||
599 | |||
600 | if (0 === $end) { |
||
601 | $end = count($this->dom->nodes); |
||
602 | } |
||
603 | |||
604 | // Get list of target nodes |
||
605 | $nodes_start = $this->_[self::HDOM_INFO_BEGIN] + 1; |
||
606 | |||
607 | // remove() makes $this->dom->nodes non-contiguous; use what is left. |
||
608 | $nodes = array_intersect_key( |
||
609 | $this->dom->nodes, |
||
610 | array_flip(range($nodes_start, $end)) |
||
611 | ); |
||
612 | } elseif ('>' === $parent_cmd) { // Child Combinator |
||
613 | $nodes = $this->children; |
||
614 | } elseif ( |
||
615 | '+' === $parent_cmd |
||
616 | && $this->parent |
||
617 | && in_array($this, $this->parent->children) |
||
618 | ) { // Next-Sibling Combinator |
||
619 | $index = array_search($this, $this->parent->children, true) + 1; |
||
620 | if ($index < count($this->parent->children)) { |
||
621 | $nodes[] = $this->parent->children[$index]; |
||
622 | } |
||
623 | } elseif ( |
||
624 | '~' === $parent_cmd |
||
625 | && $this->parent |
||
626 | && in_array($this, $this->parent->children) |
||
627 | ) { // Subsequent Sibling Combinator |
||
628 | $index = array_search($this, $this->parent->children, true); |
||
629 | $nodes = array_slice($this->parent->children, $index); |
||
630 | } |
||
631 | |||
632 | // Go throgh each element starting at this element until the end tag |
||
633 | // Note: If this element is a void tag, any previous void element is |
||
634 | // skipped. |
||
635 | foreach ($nodes as $node) { |
||
636 | $pass = true; |
||
637 | |||
638 | // Skip root nodes |
||
639 | if (!$node->parent) { |
||
640 | unset($node); |
||
641 | continue; |
||
642 | } |
||
643 | |||
644 | // Handle 'text' selector |
||
645 | if ($pass && 'text' === $tag) { |
||
646 | if ('text' === $node->tag) { |
||
647 | $ret[array_search($node, $this->dom->nodes, true)] = 1; |
||
648 | } |
||
649 | |||
650 | if (isset($node->_[self::HDOM_INFO_INNER])) { |
||
651 | $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1; |
||
652 | } |
||
653 | |||
654 | unset($node); |
||
655 | continue; |
||
656 | } |
||
657 | |||
658 | // Handle 'cdata' selector |
||
659 | if ($pass && 'cdata' === $tag) { |
||
660 | if ('cdata' === $node->tag) { |
||
661 | $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1; |
||
662 | } |
||
663 | |||
664 | unset($node); |
||
665 | continue; |
||
666 | } |
||
667 | |||
668 | // Handle 'comment' |
||
669 | if ($pass && 'comment' === $tag && 'comment' === $node->tag) { |
||
670 | $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1; |
||
671 | unset($node); |
||
672 | continue; |
||
673 | } |
||
674 | |||
675 | // Skip if node isn't a child node (i.e. text nodes) |
||
676 | if ($pass && !in_array($node, $node->parent->children, true)) { |
||
677 | unset($node); |
||
678 | continue; |
||
679 | } |
||
680 | |||
681 | // Skip if tag doesn't match |
||
682 | if ($pass && '' !== $tag && $tag !== $node->tag && '*' !== $tag) { |
||
683 | $pass = false; |
||
684 | } |
||
685 | |||
686 | // Skip if ID doesn't exist |
||
687 | if ($pass && '' !== $id && !isset($node->attr['id'])) { |
||
688 | $pass = false; |
||
689 | } |
||
690 | |||
691 | // Check if ID matches |
||
692 | if ($pass && '' !== $id && isset($node->attr['id'])) { |
||
693 | // Note: Only consider the first ID (as browsers do) |
||
694 | $node_id = explode(' ', trim($node->attr['id']))[0]; |
||
695 | |||
696 | if ($id !== $node_id) { |
||
697 | $pass = false; |
||
698 | } |
||
699 | } |
||
700 | |||
701 | // Check if all class(es) exist |
||
702 | if ($pass && '' !== $class && is_array($class) && !empty($class)) { |
||
703 | if (isset($node->attr['class'])) { |
||
704 | // Apply the same rules for the pattern and attribute value |
||
705 | // Attribute values must not contain control characters other than space |
||
706 | // https://www.w3.org/TR/html/dom.html#text-content |
||
707 | // https://www.w3.org/TR/html/syntax.html#attribute-values |
||
708 | // https://www.w3.org/TR/xml/#AVNormalize |
||
709 | $node_classes = preg_replace("/[\r\n\t\s]+/u", ' ', $node->attr['class']); |
||
710 | $node_classes = trim($node_classes); |
||
711 | $node_classes = explode(' ', $node_classes); |
||
712 | |||
713 | if ($lowercase) { |
||
714 | $node_classes = array_map('strtolower', $node_classes); |
||
715 | } |
||
716 | |||
717 | foreach ($class as $c) { |
||
718 | if (!in_array($c, $node_classes)) { |
||
719 | $pass = false; |
||
720 | break; |
||
721 | } |
||
722 | } |
||
723 | } else { |
||
724 | $pass = false; |
||
725 | } |
||
726 | } |
||
727 | |||
728 | // Check attributes |
||
729 | if ( |
||
730 | $pass |
||
731 | && '' !== $attributes |
||
732 | && is_array($attributes) |
||
733 | && !empty($attributes) |
||
734 | ) { |
||
735 | foreach ($attributes as $a) { |
||
736 | list( |
||
737 | $att_name, |
||
738 | $att_expr, |
||
739 | $att_val, |
||
740 | $att_inv, |
||
741 | $att_case_sensitivity |
||
742 | ) = $a; |
||
743 | |||
744 | // Handle indexing attributes (i.e. "[2]") |
||
745 | /* |
||
746 | * Note: This is not supported by the CSS Standard but adds |
||
747 | * the ability to select items compatible to XPath (i.e. |
||
748 | * the 3rd element within it's parent). |
||
749 | * |
||
750 | * Note: This doesn't conflict with the CSS Standard which |
||
751 | * doesn't work on numeric attributes anyway. |
||
752 | */ |
||
753 | if ( |
||
754 | is_numeric($att_name) |
||
755 | && '' === $att_expr |
||
756 | && '' === $att_val |
||
757 | ) { |
||
758 | $count = 0; |
||
759 | |||
760 | // Find index of current element in parent |
||
761 | foreach ($node->parent->children as $c) { |
||
762 | if ($c->tag === $node->tag) { |
||
763 | ++$count; |
||
764 | } |
||
765 | if ($c === $node) { |
||
766 | break; |
||
767 | } |
||
768 | } |
||
769 | |||
770 | // If this is the correct node, continue with next |
||
771 | // attribute |
||
772 | if ($count === (int) $att_name) { |
||
773 | continue; |
||
774 | } |
||
775 | } |
||
776 | |||
777 | // Check attribute availability |
||
778 | if ($att_inv) { // Attribute should NOT be set |
||
779 | if (isset($node->attr[$att_name])) { |
||
780 | $pass = false; |
||
781 | break; |
||
782 | } |
||
783 | } else { // Attribute should be set |
||
784 | // todo: "plaintext" is not a valid CSS selector! |
||
785 | if ( |
||
786 | 'plaintext' !== $att_name |
||
787 | && !isset($node->attr[$att_name]) |
||
788 | ) { |
||
789 | $pass = false; |
||
790 | break; |
||
791 | } |
||
792 | } |
||
793 | |||
794 | // Continue with next attribute if expression isn't defined |
||
795 | if ('' === $att_expr) { |
||
796 | continue; |
||
797 | } |
||
798 | |||
799 | // If they have told us that this is a "plaintext" |
||
800 | // search then we want the plaintext of the node - right? |
||
801 | // todo "plaintext" is not a valid CSS selector! |
||
802 | if ('plaintext' === $att_name) { |
||
803 | $nodeKeyValue = $node->text(); |
||
804 | } else { |
||
805 | $nodeKeyValue = $node->attr[$att_name]; |
||
806 | } |
||
807 | |||
808 | // If lowercase is set, do a case insensitive test of |
||
809 | // the value of the selector. |
||
810 | if ($lowercase) { |
||
811 | $check = $this->match( |
||
812 | $att_expr, |
||
813 | strtolower($att_val), |
||
814 | strtolower($nodeKeyValue), |
||
815 | $att_case_sensitivity |
||
816 | ); |
||
817 | } else { |
||
818 | $check = $this->match( |
||
819 | $att_expr, |
||
820 | $att_val, |
||
821 | $nodeKeyValue, |
||
822 | $att_case_sensitivity |
||
823 | ); |
||
824 | } |
||
825 | |||
826 | $check = 'not' === $ps_element ? !$check : $check; |
||
827 | |||
828 | if (!$check) { |
||
829 | $pass = false; |
||
830 | break; |
||
831 | } |
||
832 | } |
||
833 | } |
||
834 | |||
835 | // Found a match. Add to list and clear node |
||
836 | $pass = 'not' === $ps_selector ? !$pass : $pass; |
||
837 | if ($pass) { |
||
838 | $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1; |
||
839 | } |
||
840 | unset($node); |
||
841 | } |
||
842 | } |
||
843 | |||
844 | protected function match($exp, $pattern, $value, $case_sensitivity) |
||
845 | { |
||
846 | if ('i' === $case_sensitivity) { |
||
847 | $pattern = strtolower($pattern); |
||
848 | $value = strtolower($value); |
||
849 | } |
||
850 | |||
851 | // Apply the same rules for the pattern and attribute value |
||
852 | // Attribute values must not contain control characters other than space |
||
853 | // https://www.w3.org/TR/html/dom.html#text-content |
||
854 | // https://www.w3.org/TR/html/syntax.html#attribute-values |
||
855 | // https://www.w3.org/TR/xml/#AVNormalize |
||
856 | $pattern = preg_replace("/[\r\n\t\s]+/u", ' ', $pattern); |
||
857 | $pattern = trim($pattern); |
||
858 | |||
859 | $value = preg_replace("/[\r\n\t\s]+/u", ' ', $value); |
||
860 | $value = trim($value); |
||
861 | |||
862 | switch ($exp) { |
||
863 | case '=': |
||
864 | return $value === $pattern; |
||
865 | case '!=': |
||
866 | return $value !== $pattern; |
||
867 | case '^=': |
||
868 | return preg_match('/^' . preg_quote($pattern, '/') . '/', $value); |
||
869 | case '$=': |
||
870 | return preg_match('/' . preg_quote($pattern, '/') . '$/', $value); |
||
871 | case '*=': |
||
872 | return preg_match('/' . preg_quote($pattern, '/') . '/', $value); |
||
873 | case '|=': |
||
874 | /* |
||
875 | * [att|=val] |
||
876 | * |
||
877 | * Represents an element with the att attribute, its value |
||
878 | * either being exactly "val" or beginning with "val" |
||
879 | * immediately followed by "-" (U+002D). |
||
880 | */ |
||
881 | return 0 === strpos($value, $pattern); |
||
882 | case '~=': |
||
883 | /* |
||
884 | * [att~=val] |
||
885 | * |
||
886 | * Represents an element with the att attribute whose value is a |
||
887 | * whitespace-separated list of words, one of which is exactly |
||
888 | * "val". If "val" contains whitespace, it will never represent |
||
889 | * anything (since the words are separated by spaces). Also if |
||
890 | * "val" is the empty string, it will never represent anything. |
||
891 | */ |
||
892 | return in_array($pattern, explode(' ', trim($value)), true); |
||
893 | } |
||
894 | |||
895 | Debug::log('Unhandled attribute selector: ' . $exp . '!'); |
||
896 | |||
897 | return false; |
||
898 | } |
||
899 | |||
900 | protected function parse_selector($selector_string) |
||
901 | { |
||
902 | /** |
||
903 | * Pattern of CSS selectors, modified from mootools (https://mootools.net/). |
||
904 | * |
||
905 | * Paperg: Add the colon to the attribute, so that it properly finds |
||
906 | * <tag attr:ibute="something" > like google does. |
||
907 | * |
||
908 | * Note: if you try to look at this attribute, you MUST use getAttribute |
||
909 | * since $dom->x:y will fail the php syntax check. |
||
910 | * |
||
911 | * Notice the \[ starting the attribute? and the @? following? This |
||
912 | * implies that an attribute can begin with an @ sign that is not |
||
913 | * captured. This implies that an html attribute specifier may start |
||
914 | * with an @ sign that is NOT captured by the expression. Farther study |
||
915 | * is required to determine of this should be documented or removed. |
||
916 | * |
||
917 | * Matches selectors in this order: |
||
918 | * |
||
919 | * [0] - full match |
||
920 | * |
||
921 | * [1] - pseudo selector |
||
922 | * (?:\:(\w+)\()? |
||
923 | * Matches the pseudo selector (optional) |
||
924 | * |
||
925 | * [2] - tag name |
||
926 | * ([\w:\*-]*) |
||
927 | * Matches the tag name consisting of zero or more words, colons, |
||
928 | * asterisks and hyphens. |
||
929 | * |
||
930 | * [3] - pseudo selector |
||
931 | * (?:\:(\w+)\()? |
||
932 | * Matches the pseudo selector (optional) |
||
933 | * |
||
934 | * [4] - id name |
||
935 | * (?:\#([\w-]+)) |
||
936 | * Optionally matches a id name, consisting of an "#" followed by |
||
937 | * the id name (one or more words and hyphens). |
||
938 | * |
||
939 | * [5] - class names (including dots) |
||
940 | * (?:\.([\w\.-]+))? |
||
941 | * Optionally matches a list of classs, consisting of an "." |
||
942 | * followed by the class name (one or more words and hyphens) |
||
943 | * where multiple classes can be chained (i.e. ".foo.bar.baz") |
||
944 | * |
||
945 | * [6] - attributes |
||
946 | * ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)? |
||
947 | * Optionally matches the attributes list |
||
948 | * |
||
949 | * [7] - separator |
||
950 | * ([\/, >+~]+) |
||
951 | * Matches the selector list separator |
||
952 | */ |
||
953 | // phpcs:ignore Generic.Files.LineLength |
||
954 | $pattern = "/(?:\:(\w+)\()?([\w:\*-]*)(?:\:(\w+)\()?(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?(?:\))?(?:\))?([\/, >+~]+)/is"; |
||
955 | |||
956 | preg_match_all( |
||
957 | $pattern, |
||
958 | trim($selector_string) . ' ', // Add final ' ' as pseudo separator |
||
959 | $matches, |
||
960 | PREG_SET_ORDER |
||
961 | ); |
||
962 | |||
963 | $selectors = []; |
||
964 | $result = []; |
||
965 | |||
966 | foreach ($matches as $m) { |
||
967 | $m[0] = trim($m[0]); |
||
968 | |||
969 | // Skip NoOps |
||
970 | if ('' === $m[0] || '/' === $m[0] || '//' === $m[0]) { |
||
971 | continue; |
||
972 | } |
||
973 | |||
974 | array_shift($m); |
||
975 | |||
976 | // Convert to lowercase |
||
977 | if ($this->dom->lowercase) { |
||
978 | $m[1] = strtolower($m[1]); |
||
979 | } |
||
980 | |||
981 | // Extract classes |
||
982 | if ('' !== $m[4]) { |
||
983 | $m[4] = explode('.', $m[4]); |
||
984 | } |
||
985 | |||
986 | /* Extract attributes (pattern based on the pattern above!) |
||
987 | |||
988 | * [0] - full match |
||
989 | * [1] - attribute name |
||
990 | * [2] - attribute expression |
||
991 | * [3] - attribute value |
||
992 | * [4] - case sensitivity |
||
993 | * |
||
994 | * Note: Attributes can be negated with a "!" prefix to their name |
||
995 | */ |
||
996 | if ('' !== $m[5]) { |
||
997 | preg_match_all( |
||
998 | "/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is", |
||
999 | trim($m[5]), |
||
1000 | $attributes, |
||
1001 | PREG_SET_ORDER |
||
1002 | ); |
||
1003 | |||
1004 | // Replace element by array |
||
1005 | $m[5] = []; |
||
1006 | |||
1007 | foreach ($attributes as $att) { |
||
1008 | // Skip empty matches |
||
1009 | if ('' === trim($att[0])) { |
||
1010 | continue; |
||
1011 | } |
||
1012 | |||
1013 | $inverted = (isset($att[1][0]) && '!' === $att[1][0]); |
||
1014 | $m[5][] = [ |
||
1015 | $inverted ? substr($att[1], 1) : $att[1], // Name |
||
1016 | (isset($att[2])) ? $att[2] : '', // Expression |
||
1017 | (isset($att[3])) ? $att[3] : '', // Value |
||
1018 | $inverted, // Inverted Flag |
||
1019 | (isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity |
||
1020 | ]; |
||
1021 | } |
||
1022 | } |
||
1023 | |||
1024 | // Sanitize Separator |
||
1025 | if ('' !== $m[6] && '' === trim($m[6])) { // Descendant Separator |
||
1026 | $m[6] = ' '; |
||
1027 | } else { // Other Separator |
||
1028 | $m[6] = trim($m[6]); |
||
1029 | } |
||
1030 | |||
1031 | // Clear Separator if it's a Selector List |
||
1032 | if ($is_list = (',' === $m[6])) { |
||
1033 | $m[6] = ''; |
||
1034 | } |
||
1035 | |||
1036 | $result[] = $m; |
||
1037 | |||
1038 | if ($is_list) { // Selector List |
||
1039 | $selectors[] = $result; |
||
1040 | $result = []; |
||
1041 | } |
||
1042 | } |
||
1043 | |||
1044 | if (count($result) > 0) { |
||
1045 | $selectors[] = $result; |
||
1046 | } |
||
1047 | |||
1048 | return $selectors; |
||
1049 | } |
||
1050 | |||
1051 | public function __get($name) |
||
1052 | { |
||
1053 | if (isset($this->attr[$name])) { |
||
1054 | return $this->convert_text($this->attr[$name]); |
||
1055 | } |
||
1056 | |||
1057 | switch ($name) { |
||
1058 | case 'outertext': |
||
1059 | return $this->outertext(); |
||
1060 | case 'innertext': |
||
1061 | return $this->innertext(); |
||
1062 | case 'plaintext': |
||
1063 | return $this->text(); |
||
1064 | case 'xmltext': |
||
1065 | return $this->xmltext(); |
||
1066 | } |
||
1067 | |||
1068 | return false; |
||
1069 | } |
||
1070 | |||
1071 | public function __set($name, $value) |
||
1072 | { |
||
1073 | switch ($name) { |
||
1074 | case 'outertext': |
||
1075 | $this->_[self::HDOM_INFO_OUTER] = $value; |
||
1076 | break; |
||
1077 | case 'innertext': |
||
1078 | if (isset($this->_[self::HDOM_INFO_TEXT])) { |
||
1079 | $this->_[self::HDOM_INFO_TEXT] = ''; |
||
1080 | } |
||
1081 | $this->_[self::HDOM_INFO_INNER] = $value; |
||
1082 | break; |
||
1083 | default: |
||
1084 | $this->attr[$name] = $value; |
||
1085 | } |
||
1086 | } |
||
1087 | |||
1088 | public function __isset($name) |
||
1089 | { |
||
1090 | switch ($name) { |
||
1091 | case 'outertext': |
||
1092 | return true; |
||
1093 | case 'innertext': |
||
1094 | return true; |
||
1095 | case 'plaintext': |
||
1096 | return true; |
||
1097 | } |
||
1098 | |||
1099 | return isset($this->attr[$name]); |
||
1100 | } |
||
1101 | |||
1102 | public function __unset($name) |
||
1103 | { |
||
1104 | if (isset($this->attr[$name])) { |
||
1105 | unset($this->attr[$name]); |
||
1106 | } |
||
1107 | } |
||
1108 | |||
1109 | public function convert_text($text) |
||
1110 | { |
||
1111 | $converted_text = $text; |
||
1112 | |||
1113 | $sourceCharset = ''; |
||
1114 | $targetCharset = ''; |
||
1115 | |||
1116 | if ($this->dom) { |
||
1117 | $sourceCharset = strtoupper($this->dom->_charset); |
||
1118 | $targetCharset = strtoupper($this->dom->_target_charset); |
||
1119 | } |
||
1120 | |||
1121 | if (!empty($sourceCharset) && !empty($targetCharset)) { |
||
1122 | if (strtoupper($sourceCharset) === strtoupper($targetCharset)) { |
||
1123 | $converted_text = $text; |
||
1124 | } elseif (('UTF-8' === strtoupper($targetCharset)) && (self::is_utf8($text))) { |
||
1125 | Debug::log_once('The source charset was incorrectly detected as ' . $sourceCharset . ' but should have been UTF-8'); |
||
1126 | $converted_text = $text; |
||
1127 | } else { |
||
1128 | $converted_text = iconv($sourceCharset, $targetCharset, $text); |
||
1129 | } |
||
1130 | } |
||
1131 | |||
1132 | // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output. |
||
1133 | if ('UTF-8' === $targetCharset) { |
||
1134 | if ("\xef\xbb\xbf" === substr($converted_text, 0, 3)) { |
||
1135 | $converted_text = substr($converted_text, 3); |
||
1136 | } |
||
1137 | |||
1138 | if ("\xef\xbb\xbf" === substr($converted_text, -3)) { |
||
1139 | $converted_text = substr($converted_text, 0, -3); |
||
1140 | } |
||
1141 | } |
||
1142 | |||
1143 | return $converted_text; |
||
1144 | } |
||
1145 | |||
1146 | public static function is_utf8($str) |
||
1147 | { |
||
1148 | $c = 0; |
||
1149 | $b = 0; |
||
1150 | $bits = 0; |
||
1151 | $len = strlen($str); |
||
1152 | for ($i = 0; $i < $len; ++$i) { |
||
1153 | $c = ord($str[$i]); |
||
1154 | if ($c > 128) { |
||
1155 | if (($c >= 254)) { |
||
1156 | return false; |
||
1157 | } elseif ($c >= 252) { |
||
1158 | $bits = 6; |
||
1159 | } elseif ($c >= 248) { |
||
1160 | $bits = 5; |
||
1161 | } elseif ($c >= 240) { |
||
1162 | $bits = 4; |
||
1163 | } elseif ($c >= 224) { |
||
1164 | $bits = 3; |
||
1165 | } elseif ($c >= 192) { |
||
1166 | $bits = 2; |
||
1167 | } else { |
||
1168 | return false; |
||
1169 | } |
||
1170 | if (($i + $bits) > $len) { |
||
1171 | return false; |
||
1172 | } |
||
1173 | while ($bits > 1) { |
||
1174 | ++$i; |
||
1175 | $b = ord($str[$i]); |
||
1176 | if ($b < 128 || $b > 191) { |
||
1177 | return false; |
||
1178 | } |
||
1179 | --$bits; |
||
1180 | } |
||
1181 | } |
||
1182 | } |
||
1183 | |||
1184 | return true; |
||
1185 | } |
||
1186 | |||
1187 | public function get_display_size() |
||
1188 | { |
||
1189 | $width = -1; |
||
1190 | $height = -1; |
||
1191 | |||
1192 | if ('img' !== $this->tag) { |
||
1193 | return false; |
||
1194 | } |
||
1195 | |||
1196 | // See if there is aheight or width attribute in the tag itself. |
||
1197 | if (isset($this->attr['width'])) { |
||
1198 | $width = $this->attr['width']; |
||
1199 | } |
||
1200 | |||
1201 | if (isset($this->attr['height'])) { |
||
1202 | $height = $this->attr['height']; |
||
1203 | } |
||
1204 | |||
1205 | // Now look for an inline style. |
||
1206 | if (isset($this->attr['style'])) { |
||
1207 | // Thanks to user gnarf from stackoverflow for this regular expression. |
||
1208 | $attributes = []; |
||
1209 | |||
1210 | preg_match_all( |
||
1211 | '/([\w-]+)\s*:\s*([^;]+)\s*;?/', |
||
1212 | $this->attr['style'], |
||
1213 | $matches, |
||
1214 | PREG_SET_ORDER |
||
1215 | ); |
||
1216 | |||
1217 | foreach ($matches as $match) { |
||
1218 | $attributes[$match[1]] = $match[2]; |
||
1219 | } |
||
1220 | |||
1221 | // If there is a width in the style attributes: |
||
1222 | if (isset($attributes['width']) && -1 == $width) { |
||
1223 | // check that the last two characters are px (pixels) |
||
1224 | if ('px' === strtolower(substr($attributes['width'], -2))) { |
||
1225 | $proposed_width = substr($attributes['width'], 0, -2); |
||
1226 | // Now make sure that it's an integer and not something stupid. |
||
1227 | if (filter_var($proposed_width, FILTER_VALIDATE_INT)) { |
||
1228 | $width = $proposed_width; |
||
1229 | } |
||
1230 | } |
||
1231 | } |
||
1232 | |||
1233 | // If there is a width in the style attributes: |
||
1234 | if (isset($attributes['height']) && -1 == $height) { |
||
1235 | // check that the last two characters are px (pixels) |
||
1236 | if ('px' == strtolower(substr($attributes['height'], -2))) { |
||
1237 | $proposed_height = substr($attributes['height'], 0, -2); |
||
1238 | // Now make sure that it's an integer and not something stupid. |
||
1239 | if (filter_var($proposed_height, FILTER_VALIDATE_INT)) { |
||
1240 | $height = $proposed_height; |
||
1241 | } |
||
1242 | } |
||
1243 | } |
||
1244 | } |
||
1245 | |||
1246 | // Future enhancement: |
||
1247 | // Look in the tag to see if there is a class or id specified that has |
||
1248 | // a height or width attribute to it. |
||
1249 | |||
1250 | // Far future enhancement |
||
1251 | // Look at all the parent tags of this image to see if they specify a |
||
1252 | // class or id that has an img selector that specifies a height or width |
||
1253 | // Note that in this case, the class or id will have the img subselector |
||
1254 | // for it to apply to the image. |
||
1255 | |||
1256 | // ridiculously far future development |
||
1257 | // If the class or id is specified in a SEPARATE css file thats not on |
||
1258 | // the page, go get it and do what we were just doing for the ones on |
||
1259 | // the page. |
||
1260 | |||
1261 | $result = [ |
||
1262 | 'height' => $height, |
||
1263 | 'width' => $width, |
||
1264 | ]; |
||
1265 | |||
1266 | return $result; |
||
1267 | } |
||
1268 | |||
1269 | public function save($filepath = '') |
||
1270 | { |
||
1271 | $ret = $this->outertext(); |
||
1272 | |||
1273 | if ('' !== $filepath) { |
||
1274 | file_put_contents($filepath, $ret, LOCK_EX); |
||
1275 | } |
||
1276 | |||
1277 | return $ret; |
||
1278 | } |
||
1279 | |||
1280 | public function addClass($class) |
||
1281 | { |
||
1282 | if (is_string($class)) { |
||
1283 | $class = explode(' ', $class); |
||
1284 | } |
||
1285 | |||
1286 | if (is_array($class)) { |
||
1287 | foreach ($class as $c) { |
||
1288 | if (isset($this->class)) { |
||
1289 | if ($this->hasClass($c)) { |
||
1290 | continue; |
||
1291 | } else { |
||
1292 | $this->class .= ' ' . $c; |
||
1293 | } |
||
1294 | } else { |
||
1295 | $this->class = $c; |
||
1296 | } |
||
1297 | } |
||
1298 | } |
||
1299 | } |
||
1300 | |||
1301 | public function hasClass($class) |
||
1302 | { |
||
1303 | if (is_string($class)) { |
||
1304 | if (isset($this->class)) { |
||
1305 | return in_array($class, explode(' ', $this->class), true); |
||
1306 | } |
||
1307 | } |
||
1308 | |||
1309 | return false; |
||
1310 | } |
||
1311 | |||
1312 | public function removeClass($class = null) |
||
1313 | { |
||
1314 | if (!isset($this->class)) { |
||
1315 | return; |
||
1316 | } |
||
1317 | |||
1318 | if (is_null($class)) { |
||
1319 | $this->removeAttribute('class'); |
||
1320 | |||
1321 | return; |
||
1322 | } |
||
1323 | |||
1324 | if (is_string($class)) { |
||
1325 | $class = explode(' ', $class); |
||
1326 | } |
||
1327 | |||
1328 | if (is_array($class)) { |
||
1329 | $class = array_diff(explode(' ', $this->class), $class); |
||
1330 | if (empty($class)) { |
||
1331 | $this->removeAttribute('class'); |
||
1332 | } else { |
||
1333 | $this->class = implode(' ', $class); |
||
1334 | } |
||
1335 | } |
||
1336 | } |
||
1337 | |||
1338 | public function getAllAttributes() |
||
1339 | { |
||
1340 | return $this->attr; |
||
1341 | } |
||
1342 | |||
1343 | public function getAttribute($name) |
||
1344 | { |
||
1345 | return $this->$name; |
||
1346 | } |
||
1347 | |||
1348 | public function setAttribute($name, $value) |
||
1349 | { |
||
1350 | $this->$name = $value; |
||
1351 | } |
||
1352 | |||
1353 | public function hasAttribute($name) |
||
1354 | { |
||
1355 | return isset($this->$name); |
||
1356 | } |
||
1357 | |||
1358 | public function removeAttribute($name) |
||
1359 | { |
||
1360 | unset($this->$name); |
||
1361 | } |
||
1362 | |||
1363 | public function remove() |
||
1364 | { |
||
1365 | if ($this->parent) { |
||
1366 | $this->parent->removeChild($this); |
||
1367 | } |
||
1368 | } |
||
1369 | |||
1370 | public function removeChild($node) |
||
1371 | { |
||
1372 | foreach ($node->children as $child) { |
||
1373 | $node->removeChild($child); |
||
1374 | } |
||
1375 | |||
1376 | // No need to re-index node->children because it is about to be removed! |
||
1377 | |||
1378 | foreach ($node->nodes as $entity) { |
||
1379 | $enidx = array_search($entity, $node->nodes, true); |
||
1380 | $edidx = array_search($entity, $node->dom->nodes, true); |
||
1381 | |||
1382 | if (false !== $enidx) { |
||
1383 | unset($node->nodes[$enidx]); |
||
1384 | } |
||
1385 | |||
1386 | if (false !== $edidx) { |
||
1387 | unset($node->dom->nodes[$edidx]); |
||
1388 | } |
||
1389 | } |
||
1390 | |||
1391 | // No need to re-index node->nodes because it is about to be removed! |
||
1392 | |||
1393 | $nidx = array_search($node, $this->nodes, true); |
||
1394 | $cidx = array_search($node, $this->children, true); |
||
1395 | $didx = array_search($node, $this->dom->nodes, true); |
||
1396 | |||
1397 | if (false !== $nidx) { |
||
1398 | unset($this->nodes[$nidx]); |
||
1399 | } |
||
1400 | |||
1401 | $this->nodes = array_values($this->nodes); |
||
1402 | |||
1403 | if (false !== $cidx) { |
||
1404 | unset($this->children[$cidx]); |
||
1405 | } |
||
1406 | |||
1407 | $this->children = array_values($this->children); |
||
1408 | |||
1409 | if (false !== $didx) { |
||
1410 | unset($this->dom->nodes[$didx]); |
||
1411 | } |
||
1412 | |||
1413 | // Do not re-index dom->nodes because nodes point to other nodes in the |
||
1414 | // array explicitly! |
||
1415 | |||
1416 | $node->clear(); |
||
1417 | } |
||
1418 | |||
1419 | public function getElementById($id) |
||
1420 | { |
||
1421 | return $this->find("#$id", 0); |
||
1422 | } |
||
1423 | |||
1424 | public function getElementsById($id, $idx = null) |
||
1425 | { |
||
1426 | return $this->find("#$id", $idx); |
||
1427 | } |
||
1428 | |||
1429 | public function getElementByTagName($name) |
||
1430 | { |
||
1431 | return $this->find($name, 0); |
||
1432 | } |
||
1433 | |||
1434 | public function getElementsByTagName($name, $idx = null) |
||
1435 | { |
||
1436 | return $this->find($name, $idx); |
||
1437 | } |
||
1438 | |||
1439 | public function parentNode() |
||
1440 | { |
||
1441 | return $this->parent(); |
||
1442 | } |
||
1443 | |||
1444 | public function childNodes($idx = -1) |
||
1445 | { |
||
1446 | if (-1 === $idx) { |
||
1447 | return $this->children; |
||
1448 | } |
||
1449 | |||
1450 | if (isset($this->children[$idx])) { |
||
1451 | return $this->children[$idx]; |
||
1452 | } |
||
1453 | |||
1454 | return null; |
||
1455 | } |
||
1456 | |||
1457 | public function firstChild() |
||
1458 | { |
||
1459 | if (count($this->children) > 0) { |
||
1460 | return $this->children[0]; |
||
1461 | } |
||
1462 | |||
1463 | return null; |
||
1464 | } |
||
1465 | |||
1466 | public function lastChild() |
||
1467 | { |
||
1468 | if (count($this->children) > 0) { |
||
1469 | return end($this->children); |
||
1470 | } |
||
1471 | |||
1472 | return null; |
||
1473 | } |
||
1474 | |||
1475 | public function nextSibling() |
||
1488 | } |
||
1489 | |||
1490 | public function previousSibling() |
||
1491 | { |
||
1492 | if (null === $this->parent) { |
||
1493 | return null; |
||
1494 | } |
||
1495 | |||
1496 | $idx = array_search($this, $this->parent->children, true); |
||
1497 | |||
1498 | if (false !== $idx && $idx > 0) { |
||
1499 | return $this->parent->children[$idx - 1]; |
||
1500 | } |
||
1501 | |||
1502 | return null; |
||
1503 | } |
||
1504 | |||
1505 | public function hasChildNodes() |
||
1508 | } |
||
1509 | |||
1510 | public function nodeName() |
||
1513 | } |
||
1514 | |||
1515 | public function appendChild($node) |
||
1516 | { |
||
1517 | $node->parent = $this; |
||
1518 | $this->nodes[] = $node; |
||
1538 | } |
||
1539 | } |
||
1540 |