These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | namespace PHPHtmlParser\Dom; |
||
3 | |||
4 | use PHPHtmlParser\Selector; |
||
5 | use PHPHtmlParser\Exceptions\ChildNotFoundException; |
||
6 | use PHPHtmlParser\Exceptions\CircularException; |
||
7 | use PHPHtmlParser\Exceptions\ParentNotFoundException; |
||
8 | use stringEncode\Encode; |
||
9 | |||
10 | /** |
||
11 | * Dom node object. |
||
12 | * |
||
13 | * @property string outerhtml |
||
14 | * @property string innerhtml |
||
15 | * @property string text |
||
16 | */ |
||
17 | abstract class AbstractNode |
||
18 | { |
||
19 | |||
20 | /** |
||
21 | * Contains the tag name/type |
||
22 | * |
||
23 | * @var \PHPHtmlParser\Dom\Tag |
||
24 | */ |
||
25 | protected $tag; |
||
26 | |||
27 | /** |
||
28 | * Contains a list of attributes on this tag. |
||
29 | * |
||
30 | * @var array |
||
31 | */ |
||
32 | protected $attr = []; |
||
33 | |||
34 | /** |
||
35 | * An array of all the children. |
||
36 | * |
||
37 | * @var array |
||
38 | */ |
||
39 | protected $children = []; |
||
40 | |||
41 | /** |
||
42 | * Contains the parent Node. |
||
43 | * |
||
44 | * @var AbstractNode |
||
45 | */ |
||
46 | protected $parent = null; |
||
47 | |||
48 | /** |
||
49 | * The unique id of the class. Given by PHP. |
||
50 | * |
||
51 | * @var string |
||
52 | */ |
||
53 | protected $id; |
||
54 | |||
55 | /** |
||
56 | * The encoding class used to encode strings. |
||
57 | * |
||
58 | * @var mixed |
||
59 | */ |
||
60 | protected $encode; |
||
61 | |||
62 | /** |
||
63 | * Creates a unique spl hash for this node. |
||
64 | */ |
||
65 | public function __construct() |
||
66 | { |
||
67 | $this->id = spl_object_hash($this); |
||
68 | } |
||
69 | |||
70 | /** |
||
71 | * Magic get method for attributes and certain methods. |
||
72 | * |
||
73 | * @param string $key |
||
74 | * @return mixed |
||
75 | */ |
||
76 | public function __get($key) |
||
77 | { |
||
78 | // check attribute first |
||
79 | if ( ! is_null($this->getAttribute($key))) { |
||
80 | return $this->getAttribute($key); |
||
81 | } |
||
82 | switch (strtolower($key)) { |
||
83 | case 'outerhtml': |
||
84 | return $this->outerHtml(); |
||
85 | case 'innerhtml': |
||
86 | return $this->innerHtml(); |
||
87 | case 'text': |
||
88 | return $this->text(); |
||
89 | } |
||
90 | |||
91 | return null; |
||
92 | } |
||
93 | |||
94 | /** |
||
95 | * Attempts to clear out any object references. |
||
96 | */ |
||
97 | public function __destruct() |
||
98 | { |
||
99 | $this->tag = null; |
||
100 | $this->attr = []; |
||
101 | $this->parent = null; |
||
102 | $this->children = []; |
||
103 | } |
||
104 | |||
105 | /** |
||
106 | * Simply calls the outer text method. |
||
107 | * |
||
108 | * @return string |
||
109 | */ |
||
110 | public function __toString() |
||
111 | { |
||
112 | return $this->outerHtml(); |
||
113 | } |
||
114 | |||
115 | /** |
||
116 | * Returns the id of this object. |
||
117 | */ |
||
118 | public function id() |
||
119 | { |
||
120 | return $this->id; |
||
121 | } |
||
122 | |||
123 | /** |
||
124 | * Returns the parent of node. |
||
125 | * |
||
126 | * @return AbstractNode |
||
127 | */ |
||
128 | public function getParent() |
||
129 | { |
||
130 | return $this->parent; |
||
131 | } |
||
132 | |||
133 | /** |
||
134 | * Sets the parent node. |
||
135 | * |
||
136 | * @param AbstractNode $parent |
||
137 | * @return $this |
||
138 | * @throws CircularException |
||
139 | */ |
||
140 | public function setParent(AbstractNode $parent) |
||
141 | { |
||
142 | // check integrity |
||
143 | if ($this->isDescendant($parent->id())) { |
||
144 | throw new CircularException('Can not add descendant "'.$parent->id().'" as my parent.'); |
||
145 | } |
||
146 | |||
147 | // remove from old parent |
||
148 | if ( ! is_null($this->parent)) { |
||
149 | if ($this->parent->id() == $parent->id()) { |
||
150 | // already the parent |
||
151 | return $this; |
||
152 | } |
||
153 | |||
154 | $this->parent->removeChild($this->id); |
||
155 | } |
||
156 | |||
157 | $this->parent = $parent; |
||
158 | |||
159 | // assign child to parent |
||
160 | $this->parent->addChild($this); |
||
161 | |||
162 | //clear any cache |
||
163 | $this->clear(); |
||
164 | |||
165 | return $this; |
||
166 | } |
||
167 | |||
168 | /** |
||
169 | * Sets the encoding class to this node and propagates it |
||
170 | * to all its children. |
||
171 | * |
||
172 | * @param Encode $encode |
||
173 | */ |
||
174 | public function propagateEncoding(Encode $encode) |
||
175 | { |
||
176 | $this->encode = $encode; |
||
177 | $this->tag->setEncoding($encode); |
||
178 | // check children |
||
179 | foreach ($this->children as $id => $child) { |
||
180 | /** @var AbstractNode $node */ |
||
181 | $node = $child['node']; |
||
182 | $node->propagateEncoding($encode); |
||
183 | } |
||
184 | } |
||
185 | |||
186 | /** |
||
187 | * Checks if this node has children. |
||
188 | * |
||
189 | * @return bool |
||
190 | */ |
||
191 | public function hasChildren() |
||
192 | { |
||
193 | return ! empty($this->children); |
||
194 | } |
||
195 | |||
196 | /** |
||
197 | * Returns the child by id. |
||
198 | * |
||
199 | * @param int $id |
||
200 | * @return AbstractNode |
||
201 | * @throws ChildNotFoundException |
||
202 | */ |
||
203 | public function getChild($id) |
||
204 | { |
||
205 | if ( ! isset($this->children[$id])) { |
||
206 | throw new ChildNotFoundException("Child '$id' not found in this node."); |
||
207 | } |
||
208 | |||
209 | return $this->children[$id]['node']; |
||
210 | } |
||
211 | |||
212 | /** |
||
213 | * Returns a new array of child nodes |
||
214 | * |
||
215 | * @return array |
||
216 | */ |
||
217 | public function getChildren() |
||
218 | { |
||
219 | $nodes = []; |
||
220 | try { |
||
221 | $child = $this->firstChild(); |
||
222 | do { |
||
223 | $nodes[] = $child; |
||
224 | $child = $this->nextChild($child->id()); |
||
225 | } while ( ! is_null($child)); |
||
226 | } catch (ChildNotFoundException $e) { |
||
227 | // we are done looking for children |
||
228 | } |
||
229 | |||
230 | return $nodes; |
||
231 | } |
||
232 | |||
233 | /** |
||
234 | * Counts children |
||
235 | * |
||
236 | * @return int |
||
237 | */ |
||
238 | public function countChildren() |
||
239 | { |
||
240 | return count($this->children); |
||
241 | } |
||
242 | |||
243 | /** |
||
244 | * Adds a child node to this node and returns the id of the child for this |
||
245 | * parent. |
||
246 | * |
||
247 | * @param AbstractNode $child |
||
248 | * @return bool |
||
249 | * @throws CircularException |
||
250 | */ |
||
251 | public function addChild(AbstractNode $child) |
||
252 | { |
||
253 | $key = null; |
||
254 | |||
255 | // check integrity |
||
256 | if ($this->isAncestor($child->id())) { |
||
257 | throw new CircularException('Can not add child. It is my ancestor.'); |
||
258 | } |
||
259 | |||
260 | // check if child is itself |
||
261 | if ($child->id() == $this->id) { |
||
262 | throw new CircularException('Can not set itself as a child.'); |
||
263 | } |
||
264 | |||
265 | if ($this->hasChildren()) { |
||
266 | if (isset($this->children[$child->id()])) { |
||
267 | // we already have this child |
||
268 | return false; |
||
269 | } |
||
270 | $sibling = $this->lastChild(); |
||
271 | $key = $sibling->id(); |
||
272 | $this->children[$key]['next'] = $child->id(); |
||
273 | } |
||
274 | |||
275 | // add the child |
||
276 | $this->children[$child->id()] = [ |
||
277 | 'node' => $child, |
||
278 | 'next' => null, |
||
279 | 'prev' => $key, |
||
280 | ]; |
||
281 | |||
282 | // tell child I am the new parent |
||
283 | $child->setParent($this); |
||
284 | |||
285 | //clear any cache |
||
286 | $this->clear(); |
||
287 | |||
288 | return true; |
||
289 | } |
||
290 | |||
291 | /** |
||
292 | * Removes the child by id. |
||
293 | * |
||
294 | * @param int $id |
||
295 | * @return $this |
||
296 | */ |
||
297 | public function removeChild($id) |
||
298 | { |
||
299 | if ( ! isset($this->children[$id])) { |
||
300 | return $this; |
||
301 | } |
||
302 | |||
303 | // handle moving next and previous assignments. |
||
304 | $next = $this->children[$id]['next']; |
||
305 | $prev = $this->children[$id]['prev']; |
||
306 | if ( ! is_null($next)) { |
||
307 | $this->children[$next]['prev'] = $prev; |
||
308 | } |
||
309 | if ( ! is_null($prev)) { |
||
310 | $this->children[$prev]['next'] = $next; |
||
311 | } |
||
312 | |||
313 | // remove the child |
||
314 | unset($this->children[$id]); |
||
315 | |||
316 | //clear any cache |
||
317 | $this->clear(); |
||
318 | |||
319 | return $this; |
||
320 | } |
||
321 | |||
322 | /** |
||
323 | * Attempts to get the next child. |
||
324 | * |
||
325 | * @param int $id |
||
326 | * @return AbstractNode |
||
327 | * @uses $this->getChild() |
||
328 | */ |
||
329 | View Code Duplication | public function nextChild($id) |
|
0 ignored issues
–
show
|
|||
330 | { |
||
331 | $child = $this->getChild($id); |
||
332 | $next = $this->children[$child->id()]['next']; |
||
333 | |||
334 | return $this->getChild($next); |
||
335 | } |
||
336 | |||
337 | /** |
||
338 | * Attempts to get the previous child. |
||
339 | * |
||
340 | * @param int $id |
||
341 | * @return AbstractNode |
||
342 | * @uses $this->getChild() |
||
343 | */ |
||
344 | View Code Duplication | public function previousChild($id) |
|
0 ignored issues
–
show
This method seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository.
Loading history...
|
|||
345 | { |
||
346 | $child = $this->getchild($id); |
||
347 | $next = $this->children[$child->id()]['prev']; |
||
348 | |||
349 | return $this->getChild($next); |
||
350 | } |
||
351 | |||
352 | /** |
||
353 | * Checks if the given node id is a child of the |
||
354 | * current node. |
||
355 | * |
||
356 | * @param int $id |
||
357 | * @return bool |
||
358 | */ |
||
359 | public function isChild($id) |
||
360 | { |
||
361 | foreach ($this->children as $childId => $child) { |
||
362 | if ($id == $childId) { |
||
363 | return true; |
||
364 | } |
||
365 | } |
||
366 | |||
367 | return false; |
||
368 | } |
||
369 | |||
370 | /** |
||
371 | * Checks if the given node id is a descendant of the |
||
372 | * current node. |
||
373 | * |
||
374 | * @param int $id |
||
375 | * @return bool |
||
376 | */ |
||
377 | public function isDescendant($id) |
||
378 | { |
||
379 | if ($this->isChild($id)) { |
||
380 | return true; |
||
381 | } |
||
382 | |||
383 | foreach ($this->children as $childId => $child) { |
||
384 | /** @var AbstractNode $node */ |
||
385 | $node = $child['node']; |
||
386 | if ($node->hasChildren() && |
||
387 | $node->isDescendant($id) |
||
388 | ) { |
||
389 | return true; |
||
390 | } |
||
391 | } |
||
392 | |||
393 | return false; |
||
394 | } |
||
395 | |||
396 | /** |
||
397 | * Checks if the given node id is an ancestor of |
||
398 | * the current node. |
||
399 | * |
||
400 | * @param int $id |
||
401 | * @return bool |
||
402 | */ |
||
403 | public function isAncestor($id) |
||
404 | { |
||
405 | if ( ! is_null($this->getAncestor($id))) { |
||
406 | return true; |
||
407 | } |
||
408 | |||
409 | return false; |
||
410 | } |
||
411 | |||
412 | /** |
||
413 | * Attempts to get an ancestor node by the given id. |
||
414 | * |
||
415 | * @param int $id |
||
416 | * @return null|AbstractNode |
||
417 | */ |
||
418 | public function getAncestor($id) |
||
419 | { |
||
420 | if ( ! is_null($this->parent)) { |
||
421 | if ($this->parent->id() == $id) { |
||
422 | return $this->parent; |
||
423 | } |
||
424 | |||
425 | return $this->parent->getAncestor($id); |
||
426 | } |
||
427 | |||
428 | return null; |
||
429 | } |
||
430 | |||
431 | /** |
||
432 | * Shortcut to return the first child. |
||
433 | * |
||
434 | * @return AbstractNode |
||
435 | * @uses $this->getChild() |
||
436 | */ |
||
437 | public function firstChild() |
||
438 | { |
||
439 | reset($this->children); |
||
440 | $key = key($this->children); |
||
441 | |||
442 | return $this->getChild($key); |
||
443 | } |
||
444 | |||
445 | /** |
||
446 | * Attempts to get the last child. |
||
447 | * |
||
448 | * @return AbstractNode |
||
449 | */ |
||
450 | public function lastChild() |
||
451 | { |
||
452 | end($this->children); |
||
453 | $key = key($this->children); |
||
454 | |||
455 | return $this->getChild($key); |
||
456 | } |
||
457 | |||
458 | /** |
||
459 | * Attempts to get the next sibling. |
||
460 | * |
||
461 | * @return AbstractNode |
||
462 | * @throws ParentNotFoundException |
||
463 | */ |
||
464 | public function nextSibling() |
||
465 | { |
||
466 | if (is_null($this->parent)) { |
||
467 | throw new ParentNotFoundException('Parent is not set for this node.'); |
||
468 | } |
||
469 | |||
470 | return $this->parent->nextChild($this->id); |
||
471 | } |
||
472 | |||
473 | /** |
||
474 | * Attempts to get the previous sibling |
||
475 | * |
||
476 | * @return AbstractNode |
||
477 | * @throws ParentNotFoundException |
||
478 | */ |
||
479 | public function previousSibling() |
||
480 | { |
||
481 | if (is_null($this->parent)) { |
||
482 | throw new ParentNotFoundException('Parent is not set for this node.'); |
||
483 | } |
||
484 | |||
485 | return $this->parent->previousChild($this->id); |
||
486 | } |
||
487 | |||
488 | /** |
||
489 | * Gets the tag object of this node. |
||
490 | * |
||
491 | * @return Tag |
||
492 | */ |
||
493 | public function getTag() |
||
494 | { |
||
495 | return $this->tag; |
||
496 | } |
||
497 | |||
498 | /** |
||
499 | * A wrapper method that simply calls the getAttribute method |
||
500 | * on the tag of this node. |
||
501 | * |
||
502 | * @return array |
||
503 | */ |
||
504 | public function getAttributes() |
||
505 | { |
||
506 | $attributes = $this->tag->getAttributes(); |
||
507 | foreach ($attributes as $name => $info) { |
||
508 | $attributes[$name] = $info['value']; |
||
509 | } |
||
510 | |||
511 | return $attributes; |
||
512 | } |
||
513 | |||
514 | /** |
||
515 | * A wrapper method that simply calls the getAttribute method |
||
516 | * on the tag of this node. |
||
517 | * |
||
518 | * @param string $key |
||
519 | * @return mixed |
||
520 | */ |
||
521 | public function getAttribute($key) |
||
522 | { |
||
523 | $attribute = $this->tag->getAttribute($key); |
||
524 | if ( ! is_null($attribute)) { |
||
525 | $attribute = $attribute['value']; |
||
526 | } |
||
527 | |||
528 | return $attribute; |
||
529 | } |
||
530 | |||
531 | /** |
||
532 | * A wrapper method that simply calls the setAttribute method |
||
533 | * on the tag of this node. |
||
534 | * |
||
535 | * @param string $key |
||
536 | * @param string $value |
||
537 | * @return $this |
||
538 | */ |
||
539 | public function setAttribute($key, $value) |
||
540 | { |
||
541 | $this->tag->setAttribute($key, $value); |
||
542 | |||
543 | return $this; |
||
544 | } |
||
545 | |||
546 | /** |
||
547 | * Function to locate a specific ancestor tag in the path to the root. |
||
548 | * |
||
549 | * @param string $tag |
||
550 | * @return AbstractNode |
||
551 | * @throws ParentNotFoundException |
||
552 | */ |
||
553 | public function ancestorByTag($tag) |
||
554 | { |
||
555 | // Start by including ourselves in the comparison. |
||
556 | $node = $this; |
||
557 | |||
558 | while ( ! is_null($node)) { |
||
559 | if ($node->tag->name() == $tag) { |
||
560 | return $node; |
||
561 | } |
||
562 | |||
563 | $node = $node->getParent(); |
||
564 | } |
||
565 | |||
566 | throw new ParentNotFoundException('Could not find an ancestor with "'.$tag.'" tag'); |
||
567 | } |
||
568 | |||
569 | /** |
||
570 | * Find elements by css selector |
||
571 | * |
||
572 | * @param string $selector |
||
573 | * @param int $nth |
||
574 | * @return array|AbstractNode |
||
575 | */ |
||
576 | public function find($selector, $nth = null) |
||
577 | { |
||
578 | $selector = new Selector($selector); |
||
579 | $nodes = $selector->find($this); |
||
580 | |||
581 | if ( ! is_null($nth)) { |
||
582 | // return nth-element or array |
||
583 | if (isset($nodes[$nth])) { |
||
584 | return $nodes[$nth]; |
||
585 | } |
||
586 | |||
587 | return null; |
||
588 | } |
||
589 | |||
590 | return $nodes; |
||
591 | } |
||
592 | |||
593 | /** |
||
594 | * Function to try a few tricks to determine the displayed size of an img on the page. |
||
595 | * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types. |
||
596 | * |
||
597 | * Future enhancement: |
||
598 | * Look in the tag to see if there is a class or id specified that has a height or width attribute to it. |
||
599 | * |
||
600 | * Far future enhancement |
||
601 | * Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width |
||
602 | * Note that in this case, the class or id will have the img sub-selector for it to apply to the image. |
||
603 | * |
||
604 | * ridiculously far future development |
||
605 | * If the class or id is specified in a SEPARATE css file that's not on the page, go get it and do what we were just doing for the ones on the page. |
||
606 | * |
||
607 | * @author John Schlick |
||
608 | * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out. |
||
609 | */ |
||
610 | public function get_display_size() |
||
611 | { |
||
612 | $width = -1; |
||
613 | $height = -1; |
||
614 | |||
615 | if ($this->tag->name() != 'img') { |
||
616 | return false; |
||
617 | } |
||
618 | |||
619 | // See if there is a height or width attribute in the tag itself. |
||
620 | if ( ! is_null($this->tag->getAttribute('width'))) { |
||
621 | $width = $this->tag->getAttribute('width'); |
||
622 | } |
||
623 | |||
624 | if ( ! is_null($this->tag->getAttribute('height'))) { |
||
625 | $height = $this->tag->getAttribute('height'); |
||
626 | } |
||
627 | |||
628 | // Now look for an inline style. |
||
629 | if ( ! is_null($this->tag->getAttribute('style'))) { |
||
630 | // Thanks to user 'gnarf' from stackoverflow for this regular expression. |
||
631 | $attributes = []; |
||
632 | preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->tag->getAttribute('style'), $matches, |
||
633 | PREG_SET_ORDER); |
||
634 | foreach ($matches as $match) { |
||
635 | $attributes[$match[1]] = $match[2]; |
||
636 | } |
||
637 | |||
638 | $width = $this->getLength($attributes, $width, 'width'); |
||
639 | $height = $this->getLength($attributes, $width, 'height'); |
||
640 | } |
||
641 | |||
642 | $result = [ |
||
643 | 'height' => $height, |
||
644 | 'width' => $width, |
||
645 | ]; |
||
646 | |||
647 | return $result; |
||
648 | } |
||
649 | |||
650 | /** |
||
651 | * If there is a length in the style attributes use it. |
||
652 | * |
||
653 | * @param array $attributes |
||
654 | * @param int $length |
||
655 | * @param string $key |
||
656 | * @return int |
||
657 | */ |
||
658 | protected function getLength(array $attributes, $length, $key) |
||
659 | { |
||
660 | if (isset($attributes[$key]) && $length == -1) { |
||
661 | // check that the last two characters are px (pixels) |
||
662 | if (strtolower(substr($attributes[$key], -2)) == 'px') { |
||
663 | $proposed_length = substr($attributes[$key], 0, -2); |
||
664 | // Now make sure that it's an integer and not something stupid. |
||
665 | if (filter_var($proposed_length, FILTER_VALIDATE_INT)) { |
||
666 | $length = $proposed_length; |
||
667 | } |
||
668 | } |
||
669 | } |
||
670 | |||
671 | return $length; |
||
672 | } |
||
673 | |||
674 | /** |
||
675 | * Gets the inner html of this node. |
||
676 | * |
||
677 | * @return string |
||
678 | */ |
||
679 | abstract public function innerHtml(); |
||
680 | |||
681 | /** |
||
682 | * Gets the html of this node, including it's own |
||
683 | * tag. |
||
684 | * |
||
685 | * @return string |
||
686 | */ |
||
687 | abstract public function outerHtml(); |
||
688 | |||
689 | /** |
||
690 | * Gets the text of this node (if there is any text). |
||
691 | * |
||
692 | * @return string |
||
693 | */ |
||
694 | abstract public function text(); |
||
695 | |||
696 | /** |
||
697 | * Call this when something in the node tree has changed. Like a child has been added |
||
698 | * or a parent has been changed. |
||
699 | * |
||
700 | * @return void |
||
701 | */ |
||
702 | abstract protected function clear(); |
||
703 | } |
||
704 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.