Completed
Push — master ( 160bf7...a06a0a )
by Gilles
02:49
created

AbstractNode::delete()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 1 Features 1
Metric Value
c 1
b 1
f 1
dl 0
loc 8
rs 9.4285
cc 2
eloc 4
nc 2
nop 0
1
<?php
2
namespace PHPHtmlParser\Dom;
3
4
use PHPHtmlParser\Selector;
5
use PHPHtmlParser\Exceptions\CircularException;
6
use PHPHtmlParser\Exceptions\ParentNotFoundException;
7
use stringEncode\Encode;
8
9
/**
10
 * Dom node object.
11
 *
12
 * @property string outerhtml
13
 * @property string innerhtml
14
 * @property string text
15
 */
16
abstract class AbstractNode
17
{
18
19
    /**
20
     * Contains the tag name/type
21
     *
22
     * @var \PHPHtmlParser\Dom\Tag
23
     */
24
    protected $tag;
25
26
    /**
27
     * Contains a list of attributes on this tag.
28
     *
29
     * @var array
30
     */
31
    protected $attr = [];
32
33
    /**
34
     * Contains the parent Node.
35
     *
36
     * @var InnerNode
37
     */
38
    protected $parent = null;
39
40
    /**
41
     * The unique id of the class. Given by PHP.
42
     *
43
     * @var string
44
     */
45
    protected $id;
46
47
    /**
48
     * The encoding class used to encode strings.
49
     *
50
     * @var mixed
51
     */
52
    protected $encode;
53
54
    /**
55
     * Creates a unique spl hash for this node.
56
     */
57
    public function __construct()
58
    {
59
        $this->id = spl_object_hash($this);
60
    }
61
62
    /**
63
     * Magic get method for attributes and certain methods.
64
     *
65
     * @param string $key
66
     * @return mixed
67
     */
68
    public function __get($key)
69
    {
70
        // check attribute first
71
        if ( ! is_null($this->getAttribute($key))) {
72
            return $this->getAttribute($key);
73
        }
74
        switch (strtolower($key)) {
75
            case 'outerhtml':
76
                return $this->outerHtml();
77
            case 'innerhtml':
78
                return $this->innerHtml();
79
            case 'text':
80
                return $this->text();
81
        }
82
83
        return null;
84
    }
85
86
    /**
87
     * Attempts to clear out any object references.
88
     */
89
    public function __destruct()
90
    {
91
        $this->tag      = null;
92
        $this->attr     = [];
93
        $this->parent   = null;
94
        $this->children = [];
0 ignored issues
show
Bug introduced by
The property children does not exist. Did you maybe forget to declare it?

In PHP it is possible to write to properties without declaring them. For example, the following is perfectly valid PHP code:

class MyClass { }

$x = new MyClass();
$x->foo = true;

Generally, it is a good practice to explictly declare properties to avoid accidental typos and provide IDE auto-completion:

class MyClass {
    public $foo;
}

$x = new MyClass();
$x->foo = true;
Loading history...
95
    }
96
97
    /**
98
     * Simply calls the outer text method.
99
     *
100
     * @return string
101
     */
102
    public function __toString()
103
    {
104
        return $this->outerHtml();
105
    }
106
107
    /**
108
     * Returns the id of this object.
109
     */
110
    public function id()
111
    {
112
        return $this->id;
113
    }
114
115
    /**
116
     * Returns the parent of node.
117
     *
118
     * @return AbstractNode
119
     */
120
    public function getParent()
121
    {
122
        return $this->parent;
123
    }
124
125
    /**
126
     * Sets the parent node.
127
     *
128
     * @param InnerNode $parent
129
     * @return $this
130
     * @throws CircularException
131
     */
132
    public function setParent(InnerNode $parent)
133
    {
134
        // remove from old parent
135
        if ( ! is_null($this->parent)) {
136
            if ($this->parent->id() == $parent->id()) {
137
                // already the parent
138
                return $this;
139
            }
140
141
            $this->parent->removeChild($this->id);
142
        }
143
144
        $this->parent = $parent;
145
146
        // assign child to parent
147
        $this->parent->addChild($this);
148
149
        //clear any cache
150
        $this->clear();
151
152
        return $this;
153
    }
154
155
    /**
156
     * Removes this node and all its children from the
157
     * DOM tree.
158
     *
159
     * @return void
160
     */
161
    public function delete()
162
    {
163
        if ( ! is_null($this->parent)) {
164
            $this->parent->removeChild($this->id);
165
        }
166
167
        $this->parent = null;
168
    }
169
170
    /**
171
     * Sets the encoding class to this node.
172
     *
173
     * @param Encode $encode
174
     * @return void
175
     */
176
    public function propagateEncoding(Encode $encode)
177
    {
178
        $this->encode = $encode;
179
        $this->tag->setEncoding($encode);
180
    }
181
182
    /**
183
     * Checks if the given node id is an ancestor of
184
     * the current node.
185
     *
186
     * @param int $id
187
     * @return bool
188
     */
189
    public function isAncestor($id)
190
    {
191
        if ( ! is_null($this->getAncestor($id))) {
192
            return true;
193
        }
194
195
        return false;
196
    }
197
198
    /**
199
     * Attempts to get an ancestor node by the given id.
200
     *
201
     * @param int $id
202
     * @return null|AbstractNode
203
     */
204
    public function getAncestor($id)
205
    {
206
        if ( ! is_null($this->parent)) {
207
            if ($this->parent->id() == $id) {
208
                return $this->parent;
209
            }
210
211
            return $this->parent->getAncestor($id);
212
        }
213
214
        return null;
215
    }
216
217
    /**
218
     * Shortcut to return the first child.
219
     *
220
     * @return AbstractNode
221
     * @uses $this->getChild()
222
     */
223
    public function firstChild()
224
    {
225
        reset($this->children);
226
        $key = key($this->children);
227
228
        return $this->getChild($key);
0 ignored issues
show
Bug introduced by
It seems like you code against a specific sub-type and not the parent class PHPHtmlParser\Dom\AbstractNode as the method getChild() does only exist in the following sub-classes of PHPHtmlParser\Dom\AbstractNode: PHPHtmlParser\Dom\HtmlNode, PHPHtmlParser\Dom\InnerNode, PHPHtmlParser\Dom\MockNode. Maybe you want to instanceof check for one of these explicitly?

Let’s take a look at an example:

abstract class User
{
    /** @return string */
    abstract public function getPassword();
}

class MyUser extends User
{
    public function getPassword()
    {
        // return something
    }

    public function getDisplayName()
    {
        // return some name.
    }
}

class AuthSystem
{
    public function authenticate(User $user)
    {
        $this->logger->info(sprintf('Authenticating %s.', $user->getDisplayName()));
        // do something.
    }
}

In the above example, the authenticate() method works fine as long as you just pass instances of MyUser. However, if you now also want to pass a different sub-classes of User which does not have a getDisplayName() method, the code will break.

Available Fixes

  1. Change the type-hint for the parameter:

    class AuthSystem
    {
        public function authenticate(MyUser $user) { /* ... */ }
    }
    
  2. Add an additional type-check:

    class AuthSystem
    {
        public function authenticate(User $user)
        {
            if ($user instanceof MyUser) {
                $this->logger->info(/** ... */);
            }
    
            // or alternatively
            if ( ! $user instanceof MyUser) {
                throw new \LogicException(
                    '$user must be an instance of MyUser, '
                   .'other instances are not supported.'
                );
            }
    
        }
    }
    
Note: PHP Analyzer uses reverse abstract interpretation to narrow down the types inside the if block in such a case.
  1. Add the method to the parent class:

    abstract class User
    {
        /** @return string */
        abstract public function getPassword();
    
        /** @return string */
        abstract public function getDisplayName();
    }
    
Loading history...
229
    }
230
231
    /**
232
     * Attempts to get the last child.
233
     *
234
     * @return AbstractNode
235
     */
236
    public function lastChild()
237
    {
238
        end($this->children);
239
        $key = key($this->children);
240
241
        return $this->getChild($key);
0 ignored issues
show
Bug introduced by
It seems like you code against a specific sub-type and not the parent class PHPHtmlParser\Dom\AbstractNode as the method getChild() does only exist in the following sub-classes of PHPHtmlParser\Dom\AbstractNode: PHPHtmlParser\Dom\HtmlNode, PHPHtmlParser\Dom\InnerNode, PHPHtmlParser\Dom\MockNode. Maybe you want to instanceof check for one of these explicitly?

Let’s take a look at an example:

abstract class User
{
    /** @return string */
    abstract public function getPassword();
}

class MyUser extends User
{
    public function getPassword()
    {
        // return something
    }

    public function getDisplayName()
    {
        // return some name.
    }
}

class AuthSystem
{
    public function authenticate(User $user)
    {
        $this->logger->info(sprintf('Authenticating %s.', $user->getDisplayName()));
        // do something.
    }
}

In the above example, the authenticate() method works fine as long as you just pass instances of MyUser. However, if you now also want to pass a different sub-classes of User which does not have a getDisplayName() method, the code will break.

Available Fixes

  1. Change the type-hint for the parameter:

    class AuthSystem
    {
        public function authenticate(MyUser $user) { /* ... */ }
    }
    
  2. Add an additional type-check:

    class AuthSystem
    {
        public function authenticate(User $user)
        {
            if ($user instanceof MyUser) {
                $this->logger->info(/** ... */);
            }
    
            // or alternatively
            if ( ! $user instanceof MyUser) {
                throw new \LogicException(
                    '$user must be an instance of MyUser, '
                   .'other instances are not supported.'
                );
            }
    
        }
    }
    
Note: PHP Analyzer uses reverse abstract interpretation to narrow down the types inside the if block in such a case.
  1. Add the method to the parent class:

    abstract class User
    {
        /** @return string */
        abstract public function getPassword();
    
        /** @return string */
        abstract public function getDisplayName();
    }
    
Loading history...
242
    }
243
244
    /**
245
     * Attempts to get the next sibling.
246
     *
247
     * @return AbstractNode
248
     * @throws ParentNotFoundException
249
     */
250
    public function nextSibling()
251
    {
252
        if (is_null($this->parent)) {
253
            throw new ParentNotFoundException('Parent is not set for this node.');
254
        }
255
256
        return $this->parent->nextChild($this->id);
257
    }
258
259
    /**
260
     * Attempts to get the previous sibling
261
     *
262
     * @return AbstractNode
263
     * @throws ParentNotFoundException
264
     */
265
    public function previousSibling()
266
    {
267
        if (is_null($this->parent)) {
268
            throw new ParentNotFoundException('Parent is not set for this node.');
269
        }
270
271
        return $this->parent->previousChild($this->id);
272
    }
273
274
    /**
275
     * Gets the tag object of this node.
276
     *
277
     * @return Tag
278
     */
279
    public function getTag()
280
    {
281
        return $this->tag;
282
    }
283
284
    /**
285
     * A wrapper method that simply calls the getAttribute method
286
     * on the tag of this node.
287
     *
288
     * @return array
289
     */
290
    public function getAttributes()
291
    {
292
        $attributes = $this->tag->getAttributes();
293
        foreach ($attributes as $name => $info) {
294
            $attributes[$name] = $info['value'];
295
        }
296
297
        return $attributes;
298
    }
299
300
    /**
301
     * A wrapper method that simply calls the getAttribute method
302
     * on the tag of this node.
303
     *
304
     * @param string $key
305
     * @return mixed
306
     */
307
    public function getAttribute($key)
308
    {
309
        $attribute = $this->tag->getAttribute($key);
310
        if ( ! is_null($attribute)) {
311
            $attribute = $attribute['value'];
312
        }
313
314
        return $attribute;
315
    }
316
317
    /**
318
     * A wrapper method that simply calls the setAttribute method
319
     * on the tag of this node.
320
     *
321
     * @param string $key
322
     * @param string $value
323
     * @return $this
324
     */
325
    public function setAttribute($key, $value)
326
    {
327
        $this->tag->setAttribute($key, $value);
328
329
        return $this;
330
    }
331
332
    /**
333
     * Function to locate a specific ancestor tag in the path to the root.
334
     *
335
     * @param  string $tag
336
     * @return AbstractNode
337
     * @throws ParentNotFoundException
338
     */
339
    public function ancestorByTag($tag)
340
    {
341
        // Start by including ourselves in the comparison.
342
        $node = $this;
343
344
        while ( ! is_null($node)) {
345
            if ($node->tag->name() == $tag) {
346
                return $node;
347
            }
348
349
            $node = $node->getParent();
350
        }
351
352
        throw new ParentNotFoundException('Could not find an ancestor with "'.$tag.'" tag');
353
    }
354
355
    /**
356
     * Find elements by css selector
357
     *
358
     * @param string $selector
359
     * @param int $nth
360
     * @return array|AbstractNode
361
     */
362
    public function find($selector, $nth = null)
363
    {
364
        $selector = new Selector($selector);
365
        $nodes    = $selector->find($this);
366
367
        if ( ! is_null($nth)) {
368
            // return nth-element or array
369
            if (isset($nodes[$nth])) {
370
                return $nodes[$nth];
371
            }
372
373
            return null;
374
        }
375
376
        return $nodes;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return $nodes; (PHPHtmlParser\Dom\Collection) is incompatible with the return type documented by PHPHtmlParser\Dom\AbstractNode::find of type array|PHPHtmlParser\Dom\AbstractNode.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
377
    }
378
379
    /**
380
     * Function to try a few tricks to determine the displayed size of an img on the page.
381
     * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types.
382
     *
383
     * Future enhancement:
384
     * Look in the tag to see if there is a class or id specified that has a height or width attribute to it.
385
     *
386
     * Far future enhancement
387
     * Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width
388
     * Note that in this case, the class or id will have the img sub-selector for it to apply to the image.
389
     *
390
     * ridiculously far future development
391
     * If the class or id is specified in a SEPARATE css file that's not on the page, go get it and do what we were just doing for the ones on the page.
392
     *
393
     * @author John Schlick
394
     * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out.
395
     */
396
    public function get_display_size()
397
    {
398
        $width  = -1;
399
        $height = -1;
400
401
        if ($this->tag->name() != 'img') {
402
            return false;
403
        }
404
405
        // See if there is a height or width attribute in the tag itself.
406
        if ( ! is_null($this->tag->getAttribute('width'))) {
407
            $width = $this->tag->getAttribute('width');
408
        }
409
410
        if ( ! is_null($this->tag->getAttribute('height'))) {
411
            $height = $this->tag->getAttribute('height');
412
        }
413
414
        // Now look for an inline style.
415
        if ( ! is_null($this->tag->getAttribute('style'))) {
416
            // Thanks to user 'gnarf' from stackoverflow for this regular expression.
417
            $attributes = [];
418
            preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->tag->getAttribute('style'), $matches,
419
                PREG_SET_ORDER);
420
            foreach ($matches as $match) {
0 ignored issues
show
Bug introduced by
The expression $matches of type null|array<integer,array<integer,string>> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
421
                $attributes[$match[1]] = $match[2];
422
            }
423
424
            $width = $this->getLength($attributes, $width, 'width');
425
            $height = $this->getLength($attributes, $width, 'height');
426
        }
427
428
        $result = [
429
            'height' => $height,
430
            'width'  => $width,
431
        ];
432
433
        return $result;
434
    }
435
436
    /**
437
     * If there is a length in the style attributes use it.
438
     *
439
     * @param array $attributes
440
     * @param int $length
441
     * @param string $key
442
     * @return int
443
     */
444
    protected function getLength(array $attributes, $length, $key)
445
    {
446
        if (isset($attributes[$key]) && $length == -1) {
447
            // check that the last two characters are px (pixels)
448
            if (strtolower(substr($attributes[$key], -2)) == 'px') {
449
                $proposed_length = substr($attributes[$key], 0, -2);
450
                // Now make sure that it's an integer and not something stupid.
451
                if (filter_var($proposed_length, FILTER_VALIDATE_INT)) {
452
                    $length = $proposed_length;
453
                }
454
            }
455
        }
456
457
        return $length;
458
    }
459
460
    /**
461
     * Gets the inner html of this node.
462
     *
463
     * @return string
464
     */
465
    abstract public function innerHtml();
466
467
    /**
468
     * Gets the html of this node, including it's own
469
     * tag.
470
     *
471
     * @return string
472
     */
473
    abstract public function outerHtml();
474
475
    /**
476
     * Gets the text of this node (if there is any text).
477
     *
478
     * @return string
479
     */
480
    abstract public function text();
481
482
    /**
483
     * Call this when something in the node tree has changed. Like a child has been added
484
     * or a parent has been changed.
485
     *
486
     * @return void
487
     */
488
    abstract protected function clear();
489
}
490