Issues (994)

src/simplehtmldom/HtmlNode.php (15 issues)

1
<?php
2
3
namespace simplehtmldom;
4
5
/**
6
 * Website: http://sourceforge.net/projects/simplehtmldom/
7
 * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/).
8
 *
9
 * Licensed under The MIT License
10
 * See the LICENSE file in the project root for more information.
11
 *
12
 * Authors:
13
 *   S.C. Chen
14
 *   John Schlick
15
 *   Rus Carroll
16
 *   logmanoriginal
17
 *
18
 * Contributors:
19
 *   Yousuke Kumakura
20
 *   Vadim Voituk
21
 *   Antcs
22
 *
23
 * Version Rev. 2.0-RC2 (415)
24
 */
25
include_once __DIR__ . '/constants.php';
26
include_once __DIR__ . '/Debug.php';
27
28
/**
29
 * HTMLNode class
30
 * @property string $innertext
31
 * @property string|null $title
32
 * @property string|null $alt
33
 * @property string|null $src
34
 * @property string|null $href
35
 * @property string|null $async
36
 * @property string|null $defer
37
 */
38
class HtmlNode
39
{
40
  const HDOM_TYPE_ELEMENT = 1;
41
  const HDOM_TYPE_COMMENT = 2;
42
  const HDOM_TYPE_TEXT = 3;
43
  const HDOM_TYPE_ROOT = 5;
44
  const HDOM_TYPE_UNKNOWN = 6;
45
  const HDOM_TYPE_CDATA = 7;
46
47
  const HDOM_QUOTE_DOUBLE = 0;
48
  const HDOM_QUOTE_SINGLE = 1;
49
  const HDOM_QUOTE_NO = 3;
50
51
  const HDOM_INFO_BEGIN = 0;
52
  const HDOM_INFO_END = 1;
53
  const HDOM_INFO_QUOTE = 2;
54
  const HDOM_INFO_SPACE = 3;
55
  const HDOM_INFO_TEXT = 4;
56
  const HDOM_INFO_INNER = 5;
57
  const HDOM_INFO_OUTER = 6;
58
  const HDOM_INFO_ENDSPACE = 7;
59
60
  public $nodetype = self::HDOM_TYPE_TEXT;
61
  public $tag = 'text';
62
  public $attr = [];
63
  public $children = [];
64
  public $nodes = [];
65
  public $parent = null;
66
  public $_ = [];
67
  private $dom = null;
68
69
  public function __call($func, $args)
70
  {
71
    // Allow users to call methods with lower_case syntax
72
    switch ($func) {
73
      case 'children':
74
        $actual_function = 'childNodes';
75
        break;
76
      case 'first_child':
77
        $actual_function = 'firstChild';
78
        break;
79
      case 'has_child':
80
        $actual_function = 'hasChildNodes';
81
        break;
82
      case 'last_child':
83
        $actual_function = 'lastChild';
84
        break;
85
      case 'next_sibling':
86
        $actual_function = 'nextSibling';
87
        break;
88
      case 'prev_sibling':
89
        $actual_function = 'previousSibling';
90
        break;
91
      default:
92
        trigger_error(
93
          'Call to undefined method ' . __CLASS__ . '::' . $func . '()',
94
          E_USER_ERROR
95
        );
96
    }
97
98
    // phpcs:ignore Generic.Files.LineLength
99
    Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.');
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $actual_function does not seem to be defined for all execution paths leading up to this point.
Loading history...
100
101
    return call_user_func_array([$this, $actual_function], $args);
102
  }
103
104
  public function __construct($dom)
105
  {
106
    if (null === $dom) {
107
      return $this;
108
    }
109
110
    $this->dom = $dom;
111
    $dom->nodes[] = $this;
112
  }
113
114
  public function __debugInfo()
115
  {
116
    // Translate node type to human-readable form
117
    switch ($this->nodetype) {
118
      case self::HDOM_TYPE_ELEMENT:
119
        $nodetype = "HDOM_TYPE_ELEMENT ($this->nodetype)";
120
        break;
121
      case self::HDOM_TYPE_COMMENT:
122
        $nodetype = "HDOM_TYPE_COMMENT ($this->nodetype)";
123
        break;
124
      case self::HDOM_TYPE_TEXT:
125
        $nodetype = "HDOM_TYPE_TEXT ($this->nodetype)";
126
        break;
127
      case self::HDOM_TYPE_ROOT:
128
        $nodetype = "HDOM_TYPE_ROOT ($this->nodetype)";
129
        break;
130
      case self::HDOM_TYPE_CDATA:
131
        $nodetype = "HDOM_TYPE_CDATA ($this->nodetype)";
132
        break;
133
      case self::HDOM_TYPE_UNKNOWN:
134
      default:
135
        $nodetype = "HDOM_TYPE_UNKNOWN ($this->nodetype)";
136
    }
137
138
    return [
139
      'nodetype' => $nodetype,
140
      'tag' => $this->tag,
141
      'attributes' => empty($this->attr) ? 'none' : $this->attr,
142
      'nodes' => empty($this->nodes) ? 'none' : $this->nodes,
143
    ];
144
  }
145
146
  public function __toString()
147
  {
148
    return $this->outertext();
149
  }
150
151
  public function clear()
152
  {
153
    unset($this->dom, $this->parent); // Break link to origin
154
    // Break link to branch
155
  }
156
157
  /** @codeCoverageIgnore */
158
  public function dump($show_attr = true, $depth = 0)
159
  {
160
    echo str_repeat("\t", $depth) . $this->tag;
161
162
    if ($show_attr && count($this->attr) > 0) {
163
      echo '(';
164
      foreach ($this->attr as $k => $v) {
165
        echo "[$k]=>\"$v\", ";
166
      }
167
      echo ')';
168
    }
169
170
    echo "\n";
171
172
    if ($this->nodes) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->nodes of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
173
      foreach ($this->nodes as $node) {
174
        $node->dump($show_attr, $depth + 1);
175
      }
176
    }
177
  }
178
179
  /** @codeCoverageIgnore */
180
  public function dump_node($echo = true)
181
  {
182
    $string = $this->tag;
183
184
    if (count($this->attr) > 0) {
185
      $string .= '(';
186
      foreach ($this->attr as $k => $v) {
187
        $string .= "[$k]=>\"$v\", ";
188
      }
189
      $string .= ')';
190
    }
191
192
    if (count($this->_) > 0) {
193
      $string .= ' $_ (';
194
      foreach ($this->_ as $k => $v) {
195
        if (is_array($v)) {
196
          $string .= "[$k]=>(";
197
          foreach ($v as $k2 => $v2) {
198
            $string .= "[$k2]=>\"$v2\", ";
199
          }
200
          $string .= ')';
201
        } else {
202
          $string .= "[$k]=>\"$v\", ";
203
        }
204
      }
205
      $string .= ')';
206
    }
207
208
    if (isset($this->text)) {
0 ignored issues
show
Bug Best Practice introduced by
The property text does not exist on simplehtmldom\HtmlNode. Since you implemented __get, consider adding a @property annotation.
Loading history...
209
      $string .= " text: ({$this->text})";
210
    }
211
212
    $string .= ' HDOM_INNER_INFO: ';
213
    /**
214
     * @var mixed
215
     */
216
    if (isset($node)) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $node seems to never exist and therefore isset should always be false.
Loading history...
217
      if (isset($node->_[self::HDOM_INFO_INNER])) {
218
        $string .= "'" . $node->_[self::HDOM_INFO_INNER] . "'";
219
      } else {
220
        $string .= ' NULL ';
221
      }
222
    }
223
224
    $string .= ' children: ' . count($this->children);
225
    $string .= ' nodes: ' . count($this->nodes);
226
    $string .= "\n";
227
228
    if ($echo) {
229
      echo $string;
230
231
      return;
232
    } else {
233
      return $string;
234
    }
235
  }
236
237
  public function parent($parent = null)
238
  {
239
    // I am SURE that this doesn't work properly.
240
    // It fails to unset the current node from it's current parents nodes or
241
    // children list first.
242
    if (null !== $parent) {
243
      $this->parent = $parent;
244
      $this->parent->nodes[] = $this;
245
      $this->parent->children[] = $this;
246
    }
247
248
    return $this->parent;
249
  }
250
251
  public function find_ancestor_tag($tag)
252
  {
253
    if (null === $this->parent) {
254
      return null;
255
    }
256
257
    $ancestor = $this->parent;
258
259
    while (!is_null($ancestor)) {
260
      if ($ancestor->tag === $tag) {
261
        break;
262
      }
263
264
      $ancestor = $ancestor->parent;
265
    }
266
267
    return $ancestor;
268
  }
269
270
  public function innertext()
271
  {
272
    if (isset($this->_[self::HDOM_INFO_INNER])) {
273
      $ret = $this->_[self::HDOM_INFO_INNER];
274
    } elseif (isset($this->_[self::HDOM_INFO_TEXT])) {
275
      $ret = $this->_[self::HDOM_INFO_TEXT];
276
    } else {
277
      $ret = '';
278
    }
279
280
    foreach ($this->nodes as $n) {
281
      $ret .= $n->outertext();
282
    }
283
284
    return $this->convert_text($ret);
285
  }
286
287
  public function outertext()
288
  {
289
    if ('root' === $this->tag) {
290
      return $this->innertext();
291
    }
292
293
    // todo: What is the use of this callback? Remove?
294
    if ($this->dom && null !== $this->dom->callback) {
295
      call_user_func_array($this->dom->callback, [$this]);
296
    }
297
298
    if (isset($this->_[self::HDOM_INFO_OUTER])) {
299
      return $this->convert_text($this->_[self::HDOM_INFO_OUTER]);
300
    }
301
302
    if (isset($this->_[self::HDOM_INFO_TEXT])) {
303
      return $this->convert_text($this->_[self::HDOM_INFO_TEXT]);
304
    }
305
306
    $ret = '';
307
308
    if (isset($this->_[self::HDOM_INFO_BEGIN])) {
309
      $ret = $this->makeup();
310
    }
311
312
    if (isset($this->_[self::HDOM_INFO_INNER])) {
313
      // todo: <br> should either never have self::HDOM_INFO_INNER or always
314
      if ('br' !== $this->tag) {
315
        $ret .= $this->_[self::HDOM_INFO_INNER];
316
      }
317
    }
318
319
    if ($this->nodes) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->nodes of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
320
      foreach ($this->nodes as $n) {
321
        $ret .= $n->outertext();
322
      }
323
    }
324
325
    if (isset($this->_[self::HDOM_INFO_END]) && 0 != $this->_[self::HDOM_INFO_END]) {
326
      $ret .= '</' . $this->tag . '>';
327
    }
328
329
    return $this->convert_text($ret);
330
  }
331
332
  /**
333
   * Returns true if the provided element is a block level element.
334
   *
335
   * @see https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php
336
   */
337
  protected function is_block_element($node)
338
  {
339
    // todo: When we have the utility class this should be moved there
340
    return in_array(strtolower($node->tag), [
341
      'p',
342
      'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
343
      'ol', 'ul',
344
      'pre',
345
      'address',
346
      'blockquote',
347
      'dl',
348
      'div',
349
      'fieldset',
350
      'form',
351
      'hr',
352
      'noscript',
353
      'table',
354
    ]);
355
  }
356
357
  /**
358
   * Returns true if the provided element is an inline level element.
359
   *
360
   * @see https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php
361
   */
362
  protected function is_inline_element($node)
363
  {
364
    // todo: When we have the utility class this should be moved there
365
    return in_array(strtolower($node->tag), [
366
      'b', 'big', 'i', 'small', 'tt',
367
      'abbr', 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong', 'samp', 'var',
368
      'a', 'bdo', 'br', 'img', 'map', 'object', 'q', 'script', 'span', 'sub', 'sup',
369
      'button', 'input', 'label', 'select', 'textarea',
370
    ]);
371
  }
372
373
  public function text($trim = true)
374
  {
375
    $ret = '';
376
377
    if ('script' === strtolower($this->tag)) {
378
      $ret = '';
379
    } elseif ('style' === strtolower($this->tag)) {
380
      $ret = '';
381
    } elseif (self::HDOM_TYPE_COMMENT === $this->nodetype) {
382
      $ret = '';
383
    } elseif (self::HDOM_TYPE_CDATA === $this->nodetype) {
384
      $ret = $this->_[self::HDOM_INFO_INNER];
385
    } elseif (self::HDOM_TYPE_UNKNOWN === $this->nodetype) {
386
      $ret = '';
387
    } elseif (isset($this->_[self::HDOM_INFO_INNER])) {
388
      $ret = $this->_[self::HDOM_INFO_INNER];
389
    } elseif (self::HDOM_TYPE_TEXT === $this->nodetype) {
390
      $ret = $this->_[self::HDOM_INFO_TEXT];
391
    }
392
393
    if (is_null($this->nodes)) {
0 ignored issues
show
The condition is_null($this->nodes) is always false.
Loading history...
394
      return '';
395
    }
396
397
    foreach ($this->nodes as $n) {
398
      if ($this->is_block_element($n)) {
399
        $block = ltrim($this->convert_text($n->text(false)));
400
401
        if (empty($block)) {
402
          continue;
403
        }
404
405
        $ret = rtrim($ret) . "\n\n" . $block;
406
      } elseif ($this->is_inline_element($n)) {
407
        // todo: <br> introduces code smell because no space but \n
408
        if ('br' === strtolower($n->tag)) {
409
          $ret .= $this->dom->default_br_text ?: DEFAULT_BR_TEXT;
410
        } else {
411
          $inline = ltrim($this->convert_text($n->text(false)));
412
413
          if (empty($inline)) {
414
            continue;
415
          }
416
417
          $ret = $ret . $this->convert_text($n->text(false));
418
        }
419
      } else {
420
        $ret .= $this->convert_text($n->text(false));
421
      }
422
    }
423
424
    // Reduce whitespace at start/end to a single (or none) space
425
    $ret = preg_replace('/[ \t\n\r\0\x0B\xC2\xA0]+$/u', $trim ? '' : ' ', $ret);
426
    $ret = preg_replace('/^[ \t\n\r\0\x0B\xC2\xA0]+/u', $trim ? '' : ' ', $ret);
427
428
    return $ret;
429
  }
430
431
  public function xmltext()
432
  {
433
    $ret = $this->innertext();
434
    $ret = str_ireplace('<![CDATA[', '', $ret);
435
    $ret = str_replace(']]>', '', $ret);
436
437
    return $ret;
438
  }
439
440
  public function makeup()
441
  {
442
    // text, comment, unknown
443
    if (isset($this->_[self::HDOM_INFO_TEXT])) {
444
      return $this->_[self::HDOM_INFO_TEXT];
445
    }
446
447
    $ret = '<' . $this->tag;
448
449
    foreach ($this->attr as $key => $val) {
450
      // skip removed attribute
451
      if (null === $val || false === $val) {
452
        continue;
453
      }
454
455
      if (isset($this->_[self::HDOM_INFO_SPACE][$key])) {
456
        $ret .= $this->_[self::HDOM_INFO_SPACE][$key][0];
457
      } else {
458
        $ret .= ' ';
459
      }
460
461
      //no value attr: nowrap, checked selected...
462
      if (true === $val) {
463
        $ret .= $key;
464
      } else {
465
        if (isset($this->_[self::HDOM_INFO_QUOTE][$key])) {
466
          $quote_type = $this->_[self::HDOM_INFO_QUOTE][$key];
467
        } else {
468
          $quote_type = self::HDOM_QUOTE_DOUBLE;
469
        }
470
471
        switch ($quote_type) {
472
          case self::HDOM_QUOTE_SINGLE:
473
            $quote = '\'';
474
            $val = htmlentities($val, ENT_QUOTES, $this->dom->target_charset);
475
            break;
476
          case self::HDOM_QUOTE_NO:
477
            $quote = '';
478
            break;
479
          case self::HDOM_QUOTE_DOUBLE:
480
          default:
481
            $quote = '"';
482
            $val = htmlentities($val, ENT_COMPAT, $this->dom->target_charset);
483
        }
484
485
        $ret .= $key
486
          . (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][1] : '')
487
          . '='
488
          . (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][2] : '')
489
          . $quote
490
          . $val
491
          . $quote;
492
      }
493
    }
494
495
    if (isset($this->_[self::HDOM_INFO_ENDSPACE])) {
496
      $ret .= $this->_[self::HDOM_INFO_ENDSPACE];
497
    }
498
499
    return $ret . '>';
500
  }
501
502
  /**
503
   * Element selector
504
   *
505
   * @param string $selector
506
   * @param int $idx
507
   * @param boolean $lowercase
508
   * @return HtmlNode
509
   */
510
  public function find($selector, $idx = null, $lowercase = false)
511
  {
512
    $selectors = $this->parse_selector($selector);
513
    if (0 === ($count = count($selectors))) {
514
      return [];
0 ignored issues
show
Bug Best Practice introduced by
The expression return array() returns the type array which is incompatible with the documented return type simplehtmldom\HtmlNode.
Loading history...
515
    }
516
    $found_keys = [];
517
518
    // find each selector
519
    for ($c = 0; $c < $count; ++$c) {
520
      // The change on the below line was documented on the sourceforge
521
      // code tracker id 2788009
522
      // used to be: if (($levle=count($selectors[0]))===0) return array();
523
      if (0 === ($levle = count($selectors[$c]))) {
524
        Debug::log_once('Empty selector (' . $selector . ') matches nothing.');
525
526
        return [];
0 ignored issues
show
Bug Best Practice introduced by
The expression return array() returns the type array which is incompatible with the documented return type simplehtmldom\HtmlNode.
Loading history...
527
      }
528
529
      if (!isset($this->_[self::HDOM_INFO_BEGIN])) {
530
        Debug::log_once('Invalid operation. The current node has no start tag.');
531
532
        return [];
0 ignored issues
show
Bug Best Practice introduced by
The expression return array() returns the type array which is incompatible with the documented return type simplehtmldom\HtmlNode.
Loading history...
533
      }
534
535
      $head = [$this->_[self::HDOM_INFO_BEGIN] => 1];
536
      $cmd = ' '; // Combinator
537
538
      // handle descendant selectors, no recursive!
539
      for ($l = 0; $l < $levle; ++$l) {
540
        $ret = [];
541
542
        foreach ($head as $k => $v) {
543
          $n = (-1 === $k) ? $this->dom->root : $this->dom->nodes[$k];
544
          //PaperG - Pass this optional parameter on to the seek function.
545
          $n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
546
        }
547
548
        $head = $ret;
549
        $cmd = $selectors[$c][$l][6]; // Next Combinator
550
      }
551
552
      foreach ($head as $k => $v) {
553
        if (!isset($found_keys[$k])) {
554
          $found_keys[$k] = 1;
555
        }
556
      }
557
    }
558
559
    // sort keys
560
    ksort($found_keys);
561
562
    $found = [];
563
    foreach ($found_keys as $k => $v) {
564
      $found[] = $this->dom->nodes[$k];
565
    }
566
567
    // return nth-element or array
568
    if (is_null($idx)) {
569
      return $found;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $found returns the type array which is incompatible with the documented return type simplehtmldom\HtmlNode.
Loading history...
570
    } elseif ($idx < 0) {
571
      $idx = count($found) + $idx;
572
    }
573
574
    return (isset($found[$idx])) ? $found[$idx] : null;
575
  }
576
577
  public function expect($selector, $idx = null, $lowercase = false)
578
  {
579
    return $this->find($selector, $idx, $lowercase) ?: null;
580
  }
581
582
  protected function seek($selector, &$ret, $parent_cmd, $lowercase = false)
583
  {
584
    list($ps_selector, $tag, $ps_element, $id, $class, $attributes, $cmb) = $selector;
585
    $nodes = [];
586
587
    if (' ' === $parent_cmd) { // Descendant Combinator
588
      // Find parent closing tag if the current element doesn't have a closing
589
      // tag (i.e. void element)
590
      $end = (!empty($this->_[self::HDOM_INFO_END])) ? $this->_[self::HDOM_INFO_END] : 0;
591
      if (0 == $end && $this->parent) {
592
        $parent = $this->parent;
593
        while (null !== $parent && !isset($parent->_[self::HDOM_INFO_END])) {
594
          --$end;
595
          $parent = $parent->parent;
596
        }
597
        $end += $parent->_[self::HDOM_INFO_END];
598
      }
599
600
      if (0 === $end) {
601
        $end = count($this->dom->nodes);
602
      }
603
604
      // Get list of target nodes
605
      $nodes_start = $this->_[self::HDOM_INFO_BEGIN] + 1;
606
607
      // remove() makes $this->dom->nodes non-contiguous; use what is left.
608
      $nodes = array_intersect_key(
609
        $this->dom->nodes,
610
        array_flip(range($nodes_start, $end))
611
      );
612
    } elseif ('>' === $parent_cmd) { // Child Combinator
613
      $nodes = $this->children;
614
    } elseif (
615
      '+' === $parent_cmd
616
      && $this->parent
617
      && in_array($this, $this->parent->children)
618
    ) { // Next-Sibling Combinator
619
      $index = array_search($this, $this->parent->children, true) + 1;
620
      if ($index < count($this->parent->children)) {
621
        $nodes[] = $this->parent->children[$index];
622
      }
623
    } elseif (
624
      '~' === $parent_cmd
625
      && $this->parent
626
      && in_array($this, $this->parent->children)
627
    ) { // Subsequent Sibling Combinator
628
      $index = array_search($this, $this->parent->children, true);
629
      $nodes = array_slice($this->parent->children, $index);
0 ignored issues
show
It seems like $index can also be of type string; however, parameter $offset of array_slice() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

629
      $nodes = array_slice($this->parent->children, /** @scrutinizer ignore-type */ $index);
Loading history...
630
    }
631
632
    // Go throgh each element starting at this element until the end tag
633
    // Note: If this element is a void tag, any previous void element is
634
    // skipped.
635
    foreach ($nodes as $node) {
636
      $pass = true;
637
638
      // Skip root nodes
639
      if (!$node->parent) {
640
        unset($node);
641
        continue;
642
      }
643
644
      // Handle 'text' selector
645
      if ($pass && 'text' === $tag) {
646
        if ('text' === $node->tag) {
647
          $ret[array_search($node, $this->dom->nodes, true)] = 1;
648
        }
649
650
        if (isset($node->_[self::HDOM_INFO_INNER])) {
651
          $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
652
        }
653
654
        unset($node);
655
        continue;
656
      }
657
658
      // Handle 'cdata' selector
659
      if ($pass && 'cdata' === $tag) {
660
        if ('cdata' === $node->tag) {
661
          $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
662
        }
663
664
        unset($node);
665
        continue;
666
      }
667
668
      // Handle 'comment'
669
      if ($pass && 'comment' === $tag && 'comment' === $node->tag) {
670
        $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
671
        unset($node);
672
        continue;
673
      }
674
675
      // Skip if node isn't a child node (i.e. text nodes)
676
      if ($pass && !in_array($node, $node->parent->children, true)) {
677
        unset($node);
678
        continue;
679
      }
680
681
      // Skip if tag doesn't match
682
      if ($pass && '' !== $tag && $tag !== $node->tag && '*' !== $tag) {
683
        $pass = false;
684
      }
685
686
      // Skip if ID doesn't exist
687
      if ($pass && '' !== $id && !isset($node->attr['id'])) {
688
        $pass = false;
689
      }
690
691
      // Check if ID matches
692
      if ($pass && '' !== $id && isset($node->attr['id'])) {
693
        // Note: Only consider the first ID (as browsers do)
694
        $node_id = explode(' ', trim($node->attr['id']))[0];
695
696
        if ($id !== $node_id) {
697
          $pass = false;
698
        }
699
      }
700
701
      // Check if all class(es) exist
702
      if ($pass && '' !== $class && is_array($class) && !empty($class)) {
703
        if (isset($node->attr['class'])) {
704
          // Apply the same rules for the pattern and attribute value
705
          // Attribute values must not contain control characters other than space
706
          // https://www.w3.org/TR/html/dom.html#text-content
707
          // https://www.w3.org/TR/html/syntax.html#attribute-values
708
          // https://www.w3.org/TR/xml/#AVNormalize
709
          $node_classes = preg_replace("/[\r\n\t\s]+/u", ' ', $node->attr['class']);
710
          $node_classes = trim($node_classes);
711
          $node_classes = explode(' ', $node_classes);
712
713
          if ($lowercase) {
714
            $node_classes = array_map('strtolower', $node_classes);
715
          }
716
717
          foreach ($class as $c) {
718
            if (!in_array($c, $node_classes)) {
719
              $pass = false;
720
              break;
721
            }
722
          }
723
        } else {
724
          $pass = false;
725
        }
726
      }
727
728
      // Check attributes
729
      if (
730
        $pass
731
        && '' !== $attributes
732
        && is_array($attributes)
733
        && !empty($attributes)
734
      ) {
735
        foreach ($attributes as $a) {
736
          list(
737
            $att_name,
738
            $att_expr,
739
            $att_val,
740
            $att_inv,
741
            $att_case_sensitivity
742
          ) = $a;
743
744
          // Handle indexing attributes (i.e. "[2]")
745
          /*
746
           * Note: This is not supported by the CSS Standard but adds
747
           * the ability to select items compatible to XPath (i.e.
748
           * the 3rd element within it's parent).
749
           *
750
           * Note: This doesn't conflict with the CSS Standard which
751
           * doesn't work on numeric attributes anyway.
752
           */
753
          if (
754
            is_numeric($att_name)
755
            && '' === $att_expr
756
            && '' === $att_val
757
          ) {
758
            $count = 0;
759
760
            // Find index of current element in parent
761
            foreach ($node->parent->children as $c) {
762
              if ($c->tag === $node->tag) {
763
                ++$count;
764
              }
765
              if ($c === $node) {
766
                break;
767
              }
768
            }
769
770
            // If this is the correct node, continue with next
771
            // attribute
772
            if ($count === (int) $att_name) {
773
              continue;
774
            }
775
          }
776
777
          // Check attribute availability
778
          if ($att_inv) { // Attribute should NOT be set
779
            if (isset($node->attr[$att_name])) {
780
              $pass = false;
781
              break;
782
            }
783
          } else { // Attribute should be set
784
            // todo: "plaintext" is not a valid CSS selector!
785
            if (
786
              'plaintext' !== $att_name
787
              && !isset($node->attr[$att_name])
788
            ) {
789
              $pass = false;
790
              break;
791
            }
792
          }
793
794
          // Continue with next attribute if expression isn't defined
795
          if ('' === $att_expr) {
796
            continue;
797
          }
798
799
          // If they have told us that this is a "plaintext"
800
          // search then we want the plaintext of the node - right?
801
          // todo "plaintext" is not a valid CSS selector!
802
          if ('plaintext' === $att_name) {
803
            $nodeKeyValue = $node->text();
804
          } else {
805
            $nodeKeyValue = $node->attr[$att_name];
806
          }
807
808
          // If lowercase is set, do a case insensitive test of
809
          // the value of the selector.
810
          if ($lowercase) {
811
            $check = $this->match(
812
              $att_expr,
813
              strtolower($att_val),
814
              strtolower($nodeKeyValue),
815
              $att_case_sensitivity
816
            );
817
          } else {
818
            $check = $this->match(
819
              $att_expr,
820
              $att_val,
821
              $nodeKeyValue,
822
              $att_case_sensitivity
823
            );
824
          }
825
826
          $check = 'not' === $ps_element ? !$check : $check;
827
828
          if (!$check) {
829
            $pass = false;
830
            break;
831
          }
832
        }
833
      }
834
835
      // Found a match. Add to list and clear node
836
      $pass = 'not' === $ps_selector ? !$pass : $pass;
837
      if ($pass) {
838
        $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
839
      }
840
      unset($node);
841
    }
842
  }
843
844
  protected function match($exp, $pattern, $value, $case_sensitivity)
845
  {
846
    if ('i' === $case_sensitivity) {
847
      $pattern = strtolower($pattern);
848
      $value = strtolower($value);
849
    }
850
851
    // Apply the same rules for the pattern and attribute value
852
    // Attribute values must not contain control characters other than space
853
    // https://www.w3.org/TR/html/dom.html#text-content
854
    // https://www.w3.org/TR/html/syntax.html#attribute-values
855
    // https://www.w3.org/TR/xml/#AVNormalize
856
    $pattern = preg_replace("/[\r\n\t\s]+/u", ' ', $pattern);
857
    $pattern = trim($pattern);
858
859
    $value = preg_replace("/[\r\n\t\s]+/u", ' ', $value);
860
    $value = trim($value);
861
862
    switch ($exp) {
863
      case '=':
864
        return $value === $pattern;
865
      case '!=':
866
        return $value !== $pattern;
867
      case '^=':
868
        return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
869
      case '$=':
870
        return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
871
      case '*=':
872
        return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
873
      case '|=':
874
        /*
875
         * [att|=val]
876
         *
877
         * Represents an element with the att attribute, its value
878
         * either being exactly "val" or beginning with "val"
879
         * immediately followed by "-" (U+002D).
880
         */
881
        return 0 === strpos($value, $pattern);
882
      case '~=':
883
        /*
884
         * [att~=val]
885
         *
886
         * Represents an element with the att attribute whose value is a
887
         * whitespace-separated list of words, one of which is exactly
888
         * "val". If "val" contains whitespace, it will never represent
889
         * anything (since the words are separated by spaces). Also if
890
         * "val" is the empty string, it will never represent anything.
891
         */
892
        return in_array($pattern, explode(' ', trim($value)), true);
893
    }
894
895
    Debug::log('Unhandled attribute selector: ' . $exp . '!');
896
897
    return false;
898
  }
899
900
  protected function parse_selector($selector_string)
901
  {
902
    /**
903
     * Pattern of CSS selectors, modified from mootools (https://mootools.net/).
904
     *
905
     * Paperg: Add the colon to the attribute, so that it properly finds
906
     * <tag attr:ibute="something" > like google does.
907
     *
908
     * Note: if you try to look at this attribute, you MUST use getAttribute
909
     * since $dom->x:y will fail the php syntax check.
910
     *
911
     * Notice the \[ starting the attribute? and the @? following? This
912
     * implies that an attribute can begin with an @ sign that is not
913
     * captured. This implies that an html attribute specifier may start
914
     * with an @ sign that is NOT captured by the expression. Farther study
915
     * is required to determine of this should be documented or removed.
916
     *
917
     * Matches selectors in this order:
918
     *
919
     * [0] - full match
920
     *
921
     * [1] - pseudo selector
922
     *     (?:\:(\w+)\()?
923
     *     Matches the pseudo selector (optional)
924
     *
925
     * [2] - tag name
926
     *     ([\w:\*-]*)
927
     *     Matches the tag name consisting of zero or more words, colons,
928
     *     asterisks and hyphens.
929
     *
930
     * [3] - pseudo selector
931
     *     (?:\:(\w+)\()?
932
     *     Matches the pseudo selector (optional)
933
     *
934
     * [4] - id name
935
     *     (?:\#([\w-]+))
936
     *     Optionally matches a id name, consisting of an "#" followed by
937
     *     the id name (one or more words and hyphens).
938
     *
939
     * [5] - class names (including dots)
940
     *     (?:\.([\w\.-]+))?
941
     *     Optionally matches a list of classs, consisting of an "."
942
     *     followed by the class name (one or more words and hyphens)
943
     *     where multiple classes can be chained (i.e. ".foo.bar.baz")
944
     *
945
     * [6] - attributes
946
     *     ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
947
     *     Optionally matches the attributes list
948
     *
949
     * [7] - separator
950
     *     ([\/, >+~]+)
951
     *     Matches the selector list separator
952
     */
953
    // phpcs:ignore Generic.Files.LineLength
954
    $pattern = "/(?:\:(\w+)\()?([\w:\*-]*)(?:\:(\w+)\()?(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?(?:\))?(?:\))?([\/, >+~]+)/is";
955
956
    preg_match_all(
957
      $pattern,
958
      trim($selector_string) . ' ', // Add final ' ' as pseudo separator
959
      $matches,
960
      PREG_SET_ORDER
961
    );
962
963
    $selectors = [];
964
    $result = [];
965
966
    foreach ($matches as $m) {
967
      $m[0] = trim($m[0]);
968
969
      // Skip NoOps
970
      if ('' === $m[0] || '/' === $m[0] || '//' === $m[0]) {
971
        continue;
972
      }
973
974
      array_shift($m);
975
976
      // Convert to lowercase
977
      if ($this->dom->lowercase) {
978
        $m[1] = strtolower($m[1]);
979
      }
980
981
      // Extract classes
982
      if ('' !== $m[4]) {
983
        $m[4] = explode('.', $m[4]);
984
      }
985
986
      /* Extract attributes (pattern based on the pattern above!)
987
988
       * [0] - full match
989
       * [1] - attribute name
990
       * [2] - attribute expression
991
       * [3] - attribute value
992
       * [4] - case sensitivity
993
       *
994
       * Note: Attributes can be negated with a "!" prefix to their name
995
       */
996
      if ('' !== $m[5]) {
997
        preg_match_all(
998
          "/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is",
999
          trim($m[5]),
1000
          $attributes,
1001
          PREG_SET_ORDER
1002
        );
1003
1004
        // Replace element by array
1005
        $m[5] = [];
1006
1007
        foreach ($attributes as $att) {
1008
          // Skip empty matches
1009
          if ('' === trim($att[0])) {
1010
            continue;
1011
          }
1012
1013
          $inverted = (isset($att[1][0]) && '!' === $att[1][0]);
1014
          $m[5][] = [
1015
            $inverted ? substr($att[1], 1) : $att[1], // Name
1016
            (isset($att[2])) ? $att[2] : '', // Expression
1017
            (isset($att[3])) ? $att[3] : '', // Value
1018
            $inverted, // Inverted Flag
1019
            (isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
1020
          ];
1021
        }
1022
      }
1023
1024
      // Sanitize Separator
1025
      if ('' !== $m[6] && '' === trim($m[6])) { // Descendant Separator
1026
        $m[6] = ' ';
1027
      } else { // Other Separator
1028
        $m[6] = trim($m[6]);
1029
      }
1030
1031
      // Clear Separator if it's a Selector List
1032
      if ($is_list = (',' === $m[6])) {
1033
        $m[6] = '';
1034
      }
1035
1036
      $result[] = $m;
1037
1038
      if ($is_list) { // Selector List
1039
        $selectors[] = $result;
1040
        $result = [];
1041
      }
1042
    }
1043
1044
    if (count($result) > 0) {
1045
      $selectors[] = $result;
1046
    }
1047
1048
    return $selectors;
1049
  }
1050
1051
  public function __get($name)
1052
  {
1053
    if (isset($this->attr[$name])) {
1054
      return $this->convert_text($this->attr[$name]);
1055
    }
1056
1057
    switch ($name) {
1058
      case 'outertext':
1059
        return $this->outertext();
1060
      case 'innertext':
1061
        return $this->innertext();
1062
      case 'plaintext':
1063
        return $this->text();
1064
      case 'xmltext':
1065
        return $this->xmltext();
1066
    }
1067
1068
    return false;
1069
  }
1070
1071
  public function __set($name, $value)
1072
  {
1073
    switch ($name) {
1074
      case 'outertext':
1075
        $this->_[self::HDOM_INFO_OUTER] = $value;
1076
        break;
1077
      case 'innertext':
1078
        if (isset($this->_[self::HDOM_INFO_TEXT])) {
1079
          $this->_[self::HDOM_INFO_TEXT] = '';
1080
        }
1081
        $this->_[self::HDOM_INFO_INNER] = $value;
1082
        break;
1083
      default:
1084
        $this->attr[$name] = $value;
1085
    }
1086
  }
1087
1088
  public function __isset($name)
1089
  {
1090
    switch ($name) {
1091
      case 'outertext':
1092
        return true;
1093
      case 'innertext':
1094
        return true;
1095
      case 'plaintext':
1096
        return true;
1097
    }
1098
1099
    return isset($this->attr[$name]);
1100
  }
1101
1102
  public function __unset($name)
1103
  {
1104
    if (isset($this->attr[$name])) {
1105
      unset($this->attr[$name]);
1106
    }
1107
  }
1108
1109
  public function convert_text($text)
1110
  {
1111
    $converted_text = $text;
1112
1113
    $sourceCharset = '';
1114
    $targetCharset = '';
1115
1116
    if ($this->dom) {
1117
      $sourceCharset = strtoupper($this->dom->_charset);
1118
      $targetCharset = strtoupper($this->dom->_target_charset);
1119
    }
1120
1121
    if (!empty($sourceCharset) && !empty($targetCharset)) {
1122
      if (strtoupper($sourceCharset) === strtoupper($targetCharset)) {
1123
        $converted_text = $text;
1124
      } elseif (('UTF-8' === strtoupper($targetCharset)) && (self::is_utf8($text))) {
1125
        Debug::log_once('The source charset was incorrectly detected as ' . $sourceCharset . ' but should have been UTF-8');
1126
        $converted_text = $text;
1127
      } else {
1128
        $converted_text = iconv($sourceCharset, $targetCharset, $text);
1129
      }
1130
    }
1131
1132
    // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
1133
    if ('UTF-8' === $targetCharset) {
1134
      if ("\xef\xbb\xbf" === substr($converted_text, 0, 3)) {
1135
        $converted_text = substr($converted_text, 3);
1136
      }
1137
1138
      if ("\xef\xbb\xbf" === substr($converted_text, -3)) {
1139
        $converted_text = substr($converted_text, 0, -3);
1140
      }
1141
    }
1142
1143
    return $converted_text;
1144
  }
1145
1146
  public static function is_utf8($str)
1147
  {
1148
    $c = 0;
0 ignored issues
show
The assignment to $c is dead and can be removed.
Loading history...
1149
    $b = 0;
0 ignored issues
show
The assignment to $b is dead and can be removed.
Loading history...
1150
    $bits = 0;
1151
    $len = strlen($str);
1152
    for ($i = 0; $i < $len; ++$i) {
1153
      $c = ord($str[$i]);
1154
      if ($c > 128) {
1155
        if (($c >= 254)) {
1156
          return false;
1157
        } elseif ($c >= 252) {
1158
          $bits = 6;
1159
        } elseif ($c >= 248) {
1160
          $bits = 5;
1161
        } elseif ($c >= 240) {
1162
          $bits = 4;
1163
        } elseif ($c >= 224) {
1164
          $bits = 3;
1165
        } elseif ($c >= 192) {
1166
          $bits = 2;
1167
        } else {
1168
          return false;
1169
        }
1170
        if (($i + $bits) > $len) {
1171
          return false;
1172
        }
1173
        while ($bits > 1) {
1174
          ++$i;
1175
          $b = ord($str[$i]);
1176
          if ($b < 128 || $b > 191) {
1177
            return false;
1178
          }
1179
          --$bits;
1180
        }
1181
      }
1182
    }
1183
1184
    return true;
1185
  }
1186
1187
  public function get_display_size()
1188
  {
1189
    $width = -1;
1190
    $height = -1;
1191
1192
    if ('img' !== $this->tag) {
1193
      return false;
1194
    }
1195
1196
    // See if there is aheight or width attribute in the tag itself.
1197
    if (isset($this->attr['width'])) {
1198
      $width = $this->attr['width'];
1199
    }
1200
1201
    if (isset($this->attr['height'])) {
1202
      $height = $this->attr['height'];
1203
    }
1204
1205
    // Now look for an inline style.
1206
    if (isset($this->attr['style'])) {
1207
      // Thanks to user gnarf from stackoverflow for this regular expression.
1208
      $attributes = [];
1209
1210
      preg_match_all(
1211
        '/([\w-]+)\s*:\s*([^;]+)\s*;?/',
1212
        $this->attr['style'],
1213
        $matches,
1214
        PREG_SET_ORDER
1215
      );
1216
1217
      foreach ($matches as $match) {
1218
        $attributes[$match[1]] = $match[2];
1219
      }
1220
1221
      // If there is a width in the style attributes:
1222
      if (isset($attributes['width']) && -1 == $width) {
1223
        // check that the last two characters are px (pixels)
1224
        if ('px' === strtolower(substr($attributes['width'], -2))) {
1225
          $proposed_width = substr($attributes['width'], 0, -2);
1226
          // Now make sure that it's an integer and not something stupid.
1227
          if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
1228
            $width = $proposed_width;
1229
          }
1230
        }
1231
      }
1232
1233
      // If there is a width in the style attributes:
1234
      if (isset($attributes['height']) && -1 == $height) {
1235
        // check that the last two characters are px (pixels)
1236
        if ('px' == strtolower(substr($attributes['height'], -2))) {
1237
          $proposed_height = substr($attributes['height'], 0, -2);
1238
          // Now make sure that it's an integer and not something stupid.
1239
          if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
1240
            $height = $proposed_height;
1241
          }
1242
        }
1243
      }
1244
    }
1245
1246
    // Future enhancement:
1247
    // Look in the tag to see if there is a class or id specified that has
1248
    // a height or width attribute to it.
1249
1250
    // Far future enhancement
1251
    // Look at all the parent tags of this image to see if they specify a
1252
    // class or id that has an img selector that specifies a height or width
1253
    // Note that in this case, the class or id will have the img subselector
1254
    // for it to apply to the image.
1255
1256
    // ridiculously far future development
1257
    // If the class or id is specified in a SEPARATE css file thats not on
1258
    // the page, go get it and do what we were just doing for the ones on
1259
    // the page.
1260
1261
    $result = [
1262
      'height' => $height,
1263
      'width' => $width,
1264
    ];
1265
1266
    return $result;
1267
  }
1268
1269
  public function save($filepath = '')
1270
  {
1271
    $ret = $this->outertext();
1272
1273
    if ('' !== $filepath) {
1274
      file_put_contents($filepath, $ret, LOCK_EX);
1275
    }
1276
1277
    return $ret;
1278
  }
1279
1280
  public function addClass($class)
1281
  {
1282
    if (is_string($class)) {
1283
      $class = explode(' ', $class);
1284
    }
1285
1286
    if (is_array($class)) {
1287
      foreach ($class as $c) {
1288
        if (isset($this->class)) {
1289
          if ($this->hasClass($c)) {
1290
            continue;
1291
          } else {
1292
            $this->class .= ' ' . $c;
1293
          }
1294
        } else {
1295
          $this->class = $c;
0 ignored issues
show
Bug Best Practice introduced by
The property class does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
1296
        }
1297
      }
1298
    }
1299
  }
1300
1301
  public function hasClass($class)
1302
  {
1303
    if (is_string($class)) {
1304
      if (isset($this->class)) {
1305
        return in_array($class, explode(' ', $this->class), true);
1306
      }
1307
    }
1308
1309
    return false;
1310
  }
1311
1312
  public function removeClass($class = null)
1313
  {
1314
    if (!isset($this->class)) {
1315
      return;
1316
    }
1317
1318
    if (is_null($class)) {
1319
      $this->removeAttribute('class');
1320
1321
      return;
1322
    }
1323
1324
    if (is_string($class)) {
1325
      $class = explode(' ', $class);
1326
    }
1327
1328
    if (is_array($class)) {
1329
      $class = array_diff(explode(' ', $this->class), $class);
1330
      if (empty($class)) {
1331
        $this->removeAttribute('class');
1332
      } else {
1333
        $this->class = implode(' ', $class);
0 ignored issues
show
Bug Best Practice introduced by
The property class does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
1334
      }
1335
    }
1336
  }
1337
1338
  public function getAllAttributes()
1339
  {
1340
    return $this->attr;
1341
  }
1342
1343
  public function getAttribute($name)
1344
  {
1345
    return $this->$name;
1346
  }
1347
1348
  public function setAttribute($name, $value)
1349
  {
1350
    $this->$name = $value;
1351
  }
1352
1353
  public function hasAttribute($name)
1354
  {
1355
    return isset($this->$name);
1356
  }
1357
1358
  public function removeAttribute($name)
1359
  {
1360
    unset($this->$name);
1361
  }
1362
1363
  public function remove()
1364
  {
1365
    if ($this->parent) {
1366
      $this->parent->removeChild($this);
1367
    }
1368
  }
1369
1370
  public function removeChild($node)
1371
  {
1372
    foreach ($node->children as $child) {
1373
      $node->removeChild($child);
1374
    }
1375
1376
    // No need to re-index node->children because it is about to be removed!
1377
1378
    foreach ($node->nodes as $entity) {
1379
      $enidx = array_search($entity, $node->nodes, true);
1380
      $edidx = array_search($entity, $node->dom->nodes, true);
1381
1382
      if (false !== $enidx) {
1383
        unset($node->nodes[$enidx]);
1384
      }
1385
1386
      if (false !== $edidx) {
1387
        unset($node->dom->nodes[$edidx]);
1388
      }
1389
    }
1390
1391
    // No need to re-index node->nodes because it is about to be removed!
1392
1393
    $nidx = array_search($node, $this->nodes, true);
1394
    $cidx = array_search($node, $this->children, true);
1395
    $didx = array_search($node, $this->dom->nodes, true);
1396
1397
    if (false !== $nidx) {
1398
      unset($this->nodes[$nidx]);
1399
    }
1400
1401
    $this->nodes = array_values($this->nodes);
1402
1403
    if (false !== $cidx) {
1404
      unset($this->children[$cidx]);
1405
    }
1406
1407
    $this->children = array_values($this->children);
1408
1409
    if (false !== $didx) {
1410
      unset($this->dom->nodes[$didx]);
1411
    }
1412
1413
    // Do not re-index dom->nodes because nodes point to other nodes in the
1414
    // array explicitly!
1415
1416
    $node->clear();
1417
  }
1418
1419
  public function getElementById($id)
1420
  {
1421
    return $this->find("#$id", 0);
1422
  }
1423
1424
  public function getElementsById($id, $idx = null)
1425
  {
1426
    return $this->find("#$id", $idx);
1427
  }
1428
1429
  public function getElementByTagName($name)
1430
  {
1431
    return $this->find($name, 0);
1432
  }
1433
1434
  public function getElementsByTagName($name, $idx = null)
1435
  {
1436
    return $this->find($name, $idx);
1437
  }
1438
1439
  public function parentNode()
1440
  {
1441
    return $this->parent();
1442
  }
1443
1444
  public function childNodes($idx = -1)
1445
  {
1446
    if (-1 === $idx) {
1447
      return $this->children;
1448
    }
1449
1450
    if (isset($this->children[$idx])) {
1451
      return $this->children[$idx];
1452
    }
1453
1454
    return null;
1455
  }
1456
1457
  public function firstChild()
1458
  {
1459
    if (count($this->children) > 0) {
1460
      return $this->children[0];
1461
    }
1462
1463
    return null;
1464
  }
1465
1466
  public function lastChild()
1467
  {
1468
    if (count($this->children) > 0) {
1469
      return end($this->children);
1470
    }
1471
1472
    return null;
1473
  }
1474
1475
  public function nextSibling()
1476
  {
1477
    if (null === $this->parent) {
1478
      return null;
1479
    }
1480
1481
    $idx = array_search($this, $this->parent->children, true);
1482
1483
    if (false !== $idx && isset($this->parent->children[$idx + 1])) {
1484
      return $this->parent->children[$idx + 1];
1485
    }
1486
1487
    return null;
1488
  }
1489
1490
  public function previousSibling()
1491
  {
1492
    if (null === $this->parent) {
1493
      return null;
1494
    }
1495
1496
    $idx = array_search($this, $this->parent->children, true);
1497
1498
    if (false !== $idx && $idx > 0) {
1499
      return $this->parent->children[$idx - 1];
1500
    }
1501
1502
    return null;
1503
  }
1504
1505
  public function hasChildNodes()
1506
  {
1507
    return !empty($this->children);
1508
  }
1509
1510
  public function nodeName()
1511
  {
1512
    return $this->tag;
1513
  }
1514
1515
  public function appendChild($node)
1516
  {
1517
    $node->parent = $this;
1518
    $this->nodes[] = $node;
1519
    $this->children[] = $node;
1520
1521
    if ($this->dom) { // Attach current node to DOM (recursively)
1522
      $children = [$node];
1523
1524
      while ($children) {
1525
        $child = array_pop($children);
1526
        $children = array_merge($children, $child->children);
1527
1528
        $this->dom->nodes[] = $child;
1529
        $child->dom = $this->dom;
1530
        $child->_[self::HDOM_INFO_BEGIN] = count($this->dom->nodes) - 1;
1531
        $child->_[self::HDOM_INFO_END] = $child->_[self::HDOM_INFO_BEGIN];
1532
      }
1533
1534
      $this->dom->root->_[self::HDOM_INFO_END] = count($this->dom->nodes) - 1;
1535
    }
1536
1537
    return $this;
1538
  }
1539
}
1540