Completed
Push — master ( 3c077b...77d3a9 )
by Gilles
06:36
created

src/PHPHtmlParser/Dom/AbstractNode.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
namespace PHPHtmlParser\Dom;
3
4
use PHPHtmlParser\Selector;
5
use PHPHtmlParser\Exceptions\ChildNotFoundException;
6
use PHPHtmlParser\Exceptions\CircularException;
7
use PHPHtmlParser\Exceptions\ParentNotFoundException;
8
use stringEncode\Encode;
9
10
/**
11
 * Dom node object.
12
 *
13
 * @property string outerhtml
14
 * @property string innerhtml
15
 * @property string text
16
 */
17
abstract class AbstractNode
18
{
19
20
    /**
21
     * Contains the tag name/type
22
     *
23
     * @var \PHPHtmlParser\Dom\Tag
24
     */
25
    protected $tag;
26
27
    /**
28
     * Contains a list of attributes on this tag.
29
     *
30
     * @var array
31
     */
32
    protected $attr = [];
33
34
    /**
35
     * An array of all the children.
36
     *
37
     * @var array
38
     */
39
    protected $children = [];
40
41
    /**
42
     * Contains the parent Node.
43
     *
44
     * @var AbstractNode
45
     */
46
    protected $parent = null;
47
48
    /**
49
     * The unique id of the class. Given by PHP.
50
     *
51
     * @var string
52
     */
53
    protected $id;
54
55
    /**
56
     * The encoding class used to encode strings.
57
     *
58
     * @var mixed
59
     */
60
    protected $encode;
61
62
    /**
63
     * Creates a unique spl hash for this node.
64
     */
65
    public function __construct()
66
    {
67
        $this->id = spl_object_hash($this);
68
    }
69
70
    /**
71
     * Magic get method for attributes and certain methods.
72
     *
73
     * @param string $key
74
     * @return mixed
75
     */
76
    public function __get($key)
77
    {
78
        // check attribute first
79
        if ( ! is_null($this->getAttribute($key))) {
80
            return $this->getAttribute($key);
81
        }
82
        switch (strtolower($key)) {
83
            case 'outerhtml':
84
                return $this->outerHtml();
85
            case 'innerhtml':
86
                return $this->innerHtml();
87
            case 'text':
88
                return $this->text();
89
        }
90
91
        return null;
92
    }
93
94
    /**
95
     * Attempts to clear out any object references.
96
     */
97
    public function __destruct()
98
    {
99
        $this->tag      = null;
100
        $this->attr     = [];
101
        $this->parent   = null;
102
        $this->children = [];
103
    }
104
105
    /**
106
     * Simply calls the outer text method.
107
     *
108
     * @return string
109
     */
110
    public function __toString()
111
    {
112
        return $this->outerHtml();
113
    }
114
115
    /**
116
     * Returns the id of this object.
117
     */
118
    public function id()
119
    {
120
        return $this->id;
121
    }
122
123
    /**
124
     * Returns the parent of node.
125
     *
126
     * @return AbstractNode
127
     */
128
    public function getParent()
129
    {
130
        return $this->parent;
131
    }
132
133
    /**
134
     * Sets the parent node.
135
     *
136
     * @param AbstractNode $parent
137
     * @return $this
138
     * @throws CircularException
139
     */
140
    public function setParent(AbstractNode $parent)
141
    {
142
        // check integrity
143
        if ($this->isDescendant($parent->id())) {
144
            throw new CircularException('Can not add descendant "'.$parent->id().'" as my parent.');
145
        }
146
147
        // remove from old parent
148
        if ( ! is_null($this->parent)) {
149
            if ($this->parent->id() == $parent->id()) {
150
                // already the parent
151
                return $this;
152
            }
153
154
            $this->parent->removeChild($this->id);
155
        }
156
157
        $this->parent = $parent;
158
159
        // assign child to parent
160
        $this->parent->addChild($this);
161
162
        //clear any cache
163
        $this->clear();
164
165
        return $this;
166
    }
167
168
    /**
169
     * Sets the encoding class to this node and propagates it
170
     * to all its children.
171
     *
172
     * @param Encode $encode
173
     */
174
    public function propagateEncoding(Encode $encode)
175
    {
176
        $this->encode = $encode;
177
        $this->tag->setEncoding($encode);
178
        // check children
179
        foreach ($this->children as $id => $child) {
180
            /** @var AbstractNode $node */
181
            $node = $child['node'];
182
            $node->propagateEncoding($encode);
183
        }
184
    }
185
186
    /**
187
     * Checks if this node has children.
188
     *
189
     * @return bool
190
     */
191
    public function hasChildren()
192
    {
193
        return ! empty($this->children);
194
    }
195
196
    /**
197
     * Returns the child by id.
198
     *
199
     * @param int $id
200
     * @return AbstractNode
201
     * @throws ChildNotFoundException
202
     */
203
    public function getChild($id)
204
    {
205
        if ( ! isset($this->children[$id])) {
206
            throw new ChildNotFoundException("Child '$id' not found in this node.");
207
        }
208
209
        return $this->children[$id]['node'];
210
    }
211
212
    /**
213
     * Returns a new array of child nodes
214
     *
215
     * @return array
216
     */
217
    public function getChildren()
218
    {
219
        $nodes = [];
220
        try {
221
            $child = $this->firstChild();
222
            do {
223
                $nodes[] = $child;
224
                $child   = $this->nextChild($child->id());
225
            } while ( ! is_null($child));
226
        } catch (ChildNotFoundException $e) {
227
            // we are done looking for children
228
        }
229
230
        return $nodes;
231
    }
232
233
    /**
234
     * Counts children
235
     *
236
     * @return int
237
     */
238
    public function countChildren()
239
    {
240
        return count($this->children);
241
    }
242
243
    /**
244
     * Adds a child node to this node and returns the id of the child for this
245
     * parent.
246
     *
247
     * @param AbstractNode $child
248
     * @return bool
249
     * @throws CircularException
250
     */
251
    public function addChild(AbstractNode $child)
252
    {
253
        $key = null;
254
255
        // check integrity
256
        if ($this->isAncestor($child->id())) {
257
            throw new CircularException('Can not add child. It is my ancestor.');
258
        }
259
260
        // check if child is itself
261
        if ($child->id() == $this->id) {
262
            throw new CircularException('Can not set itself as a child.');
263
        }
264
265
        if ($this->hasChildren()) {
266
            if (isset($this->children[$child->id()])) {
267
                // we already have this child
268
                return false;
269
            }
270
            $sibling                      = $this->lastChild();
271
            $key                          = $sibling->id();
272
            $this->children[$key]['next'] = $child->id();
273
        }
274
275
        // add the child
276
        $this->children[$child->id()] = [
277
            'node' => $child,
278
            'next' => null,
279
            'prev' => $key,
280
        ];
281
282
        // tell child I am the new parent
283
        $child->setParent($this);
284
285
        //clear any cache
286
        $this->clear();
287
288
        return true;
289
    }
290
291
    /**
292
     * Removes the child by id.
293
     *
294
     * @param int $id
295
     * @return $this
296
     */
297
    public function removeChild($id)
298
    {
299
        if ( ! isset($this->children[$id])) {
300
            return $this;
301
        }
302
303
        // handle moving next and previous assignments.
304
        $next = $this->children[$id]['next'];
305
        $prev = $this->children[$id]['prev'];
306
        if ( ! is_null($next)) {
307
            $this->children[$next]['prev'] = $prev;
308
        }
309
        if ( ! is_null($prev)) {
310
            $this->children[$prev]['next'] = $next;
311
        }
312
313
        // remove the child
314
        unset($this->children[$id]);
315
316
        //clear any cache
317
        $this->clear();
318
319
        return $this;
320
    }
321
322
    /**
323
     * Attempts to get the next child.
324
     *
325
     * @param int $id
326
     * @return AbstractNode
327
     * @uses $this->getChild()
328
     */
329 View Code Duplication
    public function nextChild($id)
0 ignored issues
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
330
    {
331
        $child = $this->getChild($id);
332
        $next  = $this->children[$child->id()]['next'];
333
334
        return $this->getChild($next);
335
    }
336
337
    /**
338
     * Attempts to get the previous child.
339
     *
340
     * @param int $id
341
     * @return AbstractNode
342
     * @uses $this->getChild()
343
     */
344 View Code Duplication
    public function previousChild($id)
0 ignored issues
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
345
    {
346
        $child = $this->getchild($id);
347
        $next  = $this->children[$child->id()]['prev'];
348
349
        return $this->getChild($next);
350
    }
351
352
    /**
353
     * Checks if the given node id is a child of the
354
     * current node.
355
     *
356
     * @param int $id
357
     * @return bool
358
     */
359
    public function isChild($id)
360
    {
361
        foreach ($this->children as $childId => $child) {
362
            if ($id == $childId) {
363
                return true;
364
            }
365
        }
366
367
        return false;
368
    }
369
370
    /**
371
     * Checks if the given node id is a descendant of the
372
     * current node.
373
     *
374
     * @param int $id
375
     * @return bool
376
     */
377
    public function isDescendant($id)
378
    {
379
        if ($this->isChild($id)) {
380
            return true;
381
        }
382
383
        foreach ($this->children as $childId => $child) {
384
            /** @var AbstractNode $node */
385
            $node = $child['node'];
386
            if ($node->hasChildren() &&
387
                $node->isDescendant($id)
388
            ) {
389
                return true;
390
            }
391
        }
392
393
        return false;
394
    }
395
396
    /**
397
     * Checks if the given node id is an ancestor of
398
     * the current node.
399
     *
400
     * @param int $id
401
     * @return bool
402
     */
403
    public function isAncestor($id)
404
    {
405
        if ( ! is_null($this->getAncestor($id))) {
406
            return true;
407
        }
408
409
        return false;
410
    }
411
412
    /**
413
     * Attempts to get an ancestor node by the given id.
414
     *
415
     * @param int $id
416
     * @return null|AbstractNode
417
     */
418
    public function getAncestor($id)
419
    {
420
        if ( ! is_null($this->parent)) {
421
            if ($this->parent->id() == $id) {
422
                return $this->parent;
423
            }
424
425
            return $this->parent->getAncestor($id);
426
        }
427
428
        return null;
429
    }
430
431
    /**
432
     * Shortcut to return the first child.
433
     *
434
     * @return AbstractNode
435
     * @uses $this->getChild()
436
     */
437
    public function firstChild()
438
    {
439
        reset($this->children);
440
        $key = key($this->children);
441
442
        return $this->getChild($key);
443
    }
444
445
    /**
446
     * Attempts to get the last child.
447
     *
448
     * @return AbstractNode
449
     */
450
    public function lastChild()
451
    {
452
        end($this->children);
453
        $key = key($this->children);
454
455
        return $this->getChild($key);
456
    }
457
458
    /**
459
     * Attempts to get the next sibling.
460
     *
461
     * @return AbstractNode
462
     * @throws ParentNotFoundException
463
     */
464
    public function nextSibling()
465
    {
466
        if (is_null($this->parent)) {
467
            throw new ParentNotFoundException('Parent is not set for this node.');
468
        }
469
470
        return $this->parent->nextChild($this->id);
471
    }
472
473
    /**
474
     * Attempts to get the previous sibling
475
     *
476
     * @return AbstractNode
477
     * @throws ParentNotFoundException
478
     */
479
    public function previousSibling()
480
    {
481
        if (is_null($this->parent)) {
482
            throw new ParentNotFoundException('Parent is not set for this node.');
483
        }
484
485
        return $this->parent->previousChild($this->id);
486
    }
487
488
    /**
489
     * Gets the tag object of this node.
490
     *
491
     * @return Tag
492
     */
493
    public function getTag()
494
    {
495
        return $this->tag;
496
    }
497
498
    /**
499
     * A wrapper method that simply calls the getAttribute method
500
     * on the tag of this node.
501
     *
502
     * @return array
503
     */
504
    public function getAttributes()
505
    {
506
        $attributes = $this->tag->getAttributes();
507
        foreach ($attributes as $name => $info) {
508
            $attributes[$name] = $info['value'];
509
        }
510
511
        return $attributes;
512
    }
513
514
    /**
515
     * A wrapper method that simply calls the getAttribute method
516
     * on the tag of this node.
517
     *
518
     * @param string $key
519
     * @return mixed
520
     */
521
    public function getAttribute($key)
522
    {
523
        $attribute = $this->tag->getAttribute($key);
524
        if ( ! is_null($attribute)) {
525
            $attribute = $attribute['value'];
526
        }
527
528
        return $attribute;
529
    }
530
531
    /**
532
     * A wrapper method that simply calls the setAttribute method
533
     * on the tag of this node.
534
     *
535
     * @param string $key
536
     * @param string $value
537
     * @return $this
538
     */
539
    public function setAttribute($key, $value)
540
    {
541
        $this->tag->setAttribute($key, $value);
542
543
        return $this;
544
    }
545
546
    /**
547
     * Function to locate a specific ancestor tag in the path to the root.
548
     *
549
     * @param  string $tag
550
     * @return AbstractNode
551
     * @throws ParentNotFoundException
552
     */
553
    public function ancestorByTag($tag)
554
    {
555
        // Start by including ourselves in the comparison.
556
        $node = $this;
557
558
        while ( ! is_null($node)) {
559
            if ($node->tag->name() == $tag) {
560
                return $node;
561
            }
562
563
            $node = $node->getParent();
564
        }
565
566
        throw new ParentNotFoundException('Could not find an ancestor with "'.$tag.'" tag');
567
    }
568
569
    /**
570
     * Find elements by css selector
571
     *
572
     * @param string $selector
573
     * @param int $nth
574
     * @return array|AbstractNode
575
     */
576
    public function find($selector, $nth = null)
577
    {
578
        $selector = new Selector($selector);
579
        $nodes    = $selector->find($this);
580
581
        if ( ! is_null($nth)) {
582
            // return nth-element or array
583
            if (isset($nodes[$nth])) {
584
                return $nodes[$nth];
585
            }
586
587
            return null;
588
        }
589
590
        return $nodes;
591
    }
592
593
    /**
594
     * Function to try a few tricks to determine the displayed size of an img on the page.
595
     * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types.
596
     *
597
     * Future enhancement:
598
     * Look in the tag to see if there is a class or id specified that has a height or width attribute to it.
599
     *
600
     * Far future enhancement
601
     * Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width
602
     * Note that in this case, the class or id will have the img sub-selector for it to apply to the image.
603
     *
604
     * ridiculously far future development
605
     * If the class or id is specified in a SEPARATE css file that's not on the page, go get it and do what we were just doing for the ones on the page.
606
     *
607
     * @author John Schlick
608
     * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out.
609
     */
610
    public function get_display_size()
611
    {
612
        $width  = -1;
613
        $height = -1;
614
615
        if ($this->tag->name() != 'img') {
616
            return false;
617
        }
618
619
        // See if there is a height or width attribute in the tag itself.
620
        if ( ! is_null($this->tag->getAttribute('width'))) {
621
            $width = $this->tag->getAttribute('width');
622
        }
623
624
        if ( ! is_null($this->tag->getAttribute('height'))) {
625
            $height = $this->tag->getAttribute('height');
626
        }
627
628
        // Now look for an inline style.
629
        if ( ! is_null($this->tag->getAttribute('style'))) {
630
            // Thanks to user 'gnarf' from stackoverflow for this regular expression.
631
            $attributes = [];
632
            preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->tag->getAttribute('style'), $matches,
633
                PREG_SET_ORDER);
634
            foreach ($matches as $match) {
635
                $attributes[$match[1]] = $match[2];
636
            }
637
638
            $width = $this->getLength($attributes, $width, 'width');
639
            $height = $this->getLength($attributes, $width, 'height');
640
        }
641
642
        $result = [
643
            'height' => $height,
644
            'width'  => $width,
645
        ];
646
647
        return $result;
648
    }
649
650
    /**
651
     * If there is a length in the style attributes use it.
652
     *
653
     * @param array $attributes
654
     * @param int $length
655
     * @param string $key
656
     * @return int
657
     */
658
    protected function getLength(array $attributes, $length, $key)
659
    {
660
        if (isset($attributes[$key]) && $length == -1) {
661
            // check that the last two characters are px (pixels)
662
            if (strtolower(substr($attributes[$key], -2)) == 'px') {
663
                $proposed_length = substr($attributes[$key], 0, -2);
664
                // Now make sure that it's an integer and not something stupid.
665
                if (filter_var($proposed_length, FILTER_VALIDATE_INT)) {
666
                    $length = $proposed_length;
667
                }
668
            }
669
        }
670
671
        return $length;
672
    }
673
674
    /**
675
     * Gets the inner html of this node.
676
     *
677
     * @return string
678
     */
679
    abstract public function innerHtml();
680
681
    /**
682
     * Gets the html of this node, including it's own
683
     * tag.
684
     *
685
     * @return string
686
     */
687
    abstract public function outerHtml();
688
689
    /**
690
     * Gets the text of this node (if there is any text).
691
     *
692
     * @return string
693
     */
694
    abstract public function text();
695
696
    /**
697
     * Call this when something in the node tree has changed. Like a child has been added
698
     * or a parent has been changed.
699
     *
700
     * @return void
701
     */
702
    abstract protected function clear();
703
}
704