Completed
Push — master ( de33f2...651825 )
by Lars
01:40
created

SimpleHtmlDom::getNode()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
use BadMethodCallException;
8
use DOMElement;
9
use DOMNode;
10
use RuntimeException;
11
12
/**
13
 * Class SimpleHtmlDom
14
 *
15
 * @package voku\helper
16
 *
17
 * @property string      outerText <p>Get dom node's outer html (alias for "outerHtml").</p>
18
 * @property string      outerHtml <p>Get dom node's outer html.</p>
19
 * @property string      innerText <p>Get dom node's inner html (alias for "innerHtml").</p>
20
 * @property string      innerHtml <p>Get dom node's inner html.</p>
21
 * @property-read string plaintext <p>Get dom node's plain text.</p>
22
 * @property-read string tag       <p>Get dom node name.</p>
23
 * @property-read string attr      <p>Get dom node attributes.</p>
24
 *
25
 * @method SimpleHtmlDomNode|SimpleHtmlDom|null children() children($idx = -1) <p>Returns children of node.</p>
26
 * @method SimpleHtmlDom|null first_child() <p>Returns the first child of node.</p>
27
 * @method SimpleHtmlDom|null last_child() <p>Returns the last child of node.</p>
28
 * @method SimpleHtmlDom|null next_sibling() <p>Returns the next sibling of node.</p>
29
 * @method SimpleHtmlDom|null prev_sibling() <p>Returns the previous sibling of node.</p>
30
 * @method SimpleHtmlDom|null parent() <p>Returns the parent of node.</p>
31
 *
32
 * @method string outerText() <p>Get dom node's outer html (alias for "outerHtml()").</p>
33
 * @method string outerHtml() <p>Get dom node's outer html.</p>
34
 * @method string innerText() <p>Get dom node's inner html (alias for "innerHtml()").</p>
35
 *
36
 */
37
class SimpleHtmlDom implements \IteratorAggregate
38
{
39
  /**
40
   * @var array
41
   */
42
  protected static $functionAliases = array(
43
      'children'     => 'childNodes',
44
      'first_child'  => 'firstChild',
45
      'last_child'   => 'lastChild',
46
      'next_sibling' => 'nextSibling',
47
      'prev_sibling' => 'previousSibling',
48
      'parent'       => 'parentNode',
49
      'outertext'    => 'html',
50
      'outerhtml'    => 'html',
51
      'innertext'    => 'innerHtml',
52
      'innerhtml'    => 'innerHtml',
53
  );
54
55
  /**
56
   * @var DOMElement
57
   */
58
  protected $node;
59
60
  /**
61
   * SimpleHtmlDom constructor.
62
   *
63
   * @param DOMNode $node
64
   */
65 97
  public function __construct(DOMNode $node)
66
  {
67 97
    $this->node = $node;
0 ignored issues
show
Documentation Bug introduced by
$node is of type object<DOMNode>, but the property $node was declared to be of type object<DOMElement>. Are you sure that you always receive this specific sub-class here, or does it make sense to add an instanceof check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a given class or a super-class is assigned to a property that is type hinted more strictly.

Either this assignment is in error or an instanceof check should be added for that assignment.

class Alien {}

class Dalek extends Alien {}

class Plot
{
    /** @var  Dalek */
    public $villain;
}

$alien = new Alien();
$plot = new Plot();
if ($alien instanceof Dalek) {
    $plot->villain = $alien;
}
Loading history...
68 97
  }
69
70
  /**
71
   * @param string $name
72
   * @param array $arguments
73
   *
74
   * @return null|string|SimpleHtmlDom
75
   *
76
   * @throws \BadMethodCallException
77
   */
78 9 View Code Duplication
  public function __call($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
79
  {
80 9
    $name = \strtolower($name);
81
82 9
    if (isset(self::$functionAliases[$name])) {
83 9
      return \call_user_func_array(array($this, self::$functionAliases[$name]), $arguments);
84
    }
85
86
    throw new BadMethodCallException('Method does not exist');
87
  }
88
89
  /**
90
   * @param string $name
91
   *
92
   * @return array|null|string
93
   */
94 42
  public function __get($name)
95
  {
96 42
    $name = \strtolower($name);
97
98 42
    switch ($name) {
99
      case 'outerhtml':
100
      case 'outertext':
101 17
        return $this->html();
102
      case 'innerhtml':
103
      case 'innertext':
104 11
        return $this->innerHtml();
105
      case 'text':
106
      case 'plaintext':
107 15
        return $this->text();
108
      case 'tag':
109 4
        return $this->node->nodeName;
110
      case 'attr':
111
        return $this->getAllAttributes();
112
      default:
113 10
        return $this->getAttribute($name);
114
    }
115
  }
116
117
  /**
118
   * @param string $selector
119
   * @param int    $idx
120
   *
121
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
122
   */
123 12
  public function __invoke($selector, $idx = null)
124
  {
125 12
    return $this->find($selector, $idx);
126
  }
127
128
  /**
129
   * @param $name
130
   *
131
   * @return bool
132
   */
133 1
  public function __isset($name)
134
  {
135 1
    $name = strtolower($name);
136
137 1
    switch ($name) {
138
      case 'outertext':
139
      case 'outerhtml':
140
      case 'innertext':
141
      case 'innerhtml':
142
      case 'plaintext':
143
      case 'text':
144
      case 'tag':
145
        return true;
146
      default:
147 1
        return $this->hasAttribute($name);
148
    }
149
  }
150
151
  /**
152
   * @param $name
153
   * @param $value
154
   *
155
   * @return SimpleHtmlDom
156
   */
157 14
  public function __set($name, $value)
158
  {
159 14
    $name = strtolower($name);
160
161 14
    switch ($name) {
162
      case 'outerhtml':
163
      case 'outertext':
164 3
        return $this->replaceNode($value);
165
      case 'innertext':
166
      case 'innerhtml':
167 7
        return $this->replaceChild($value);
168
      default:
169 8
        return $this->setAttribute($name, $value);
170
    }
171
  }
172
173
  /**
174
   * @return string
175
   */
176 2
  public function __toString()
177
  {
178 2
    return $this->html();
179
  }
180
181
  /**
182
   * @param $name
183
   *
184
   * @return SimpleHtmlDom
185
   */
186 1
  public function __unset($name)
187
  {
188 1
    return $this->removeAttribute($name);
189
  }
190
191
  /**
192
   * Returns children of node.
193
   *
194
   * @param int $idx
195
   *
196
   * @return SimpleHtmlDomNode|SimpleHtmlDom|null
197
   */
198 2
  public function childNodes(int $idx = -1)
199
  {
200 2
    $nodeList = $this->getIterator();
201
202 2
    if ($idx === -1) {
203 2
      return $nodeList;
204
    }
205
206 2
    if (isset($nodeList[$idx])) {
207 2
      return $nodeList[$idx];
208
    }
209
210 1
    return null;
211
  }
212
213
  /**
214
   * Find list of nodes with a CSS selector.
215
   *
216
   * @param string   $selector
217
   * @param int|null $idx
218
   *
219
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
220
   */
221 26
  public function find(string $selector, $idx = null)
222
  {
223 26
    return $this->getHtmlDomParser()->find($selector, $idx);
224
  }
225
226
  /**
227
   * Find one node with a CSS selector.
228
   *
229
   * @param string $selector
230
   *
231
   * @return SimpleHtmlDom|SimpleHtmlDomNodeInterface
232
   */
233
  public function findOne(string $selector)
234
  {
235
    return $this->find($selector, 0);
236
  }
237
238
  /**
239
   * Returns the first child of node.
240
   *
241
   * @return SimpleHtmlDom|null
242
   */
243 4
  public function firstChild()
244
  {
245 4
    $node = $this->node->firstChild;
246
247 4
    if ($node === null) {
248 1
      return null;
249
    }
250
251 4
    return new self($node);
252
  }
253
254
  /**
255
   * Returns an array of attributes.
256
   *
257
   * @return array|null
258
   */
259 2
  public function getAllAttributes()
260
  {
261 2
    if ($this->node->hasAttributes()) {
262 2
      $attributes = array();
263 2
      foreach ($this->node->attributes as $attr) {
264 2
        $attributes[$attr->name] = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value);
265
      }
266
267 2
      return $attributes;
268
    }
269
270 1
    return null;
271
  }
272
273
  /**
274
   * Return attribute value.
275
   *
276
   * @param string $name
277
   *
278
   * @return string
279
   */
280 13
  public function getAttribute(string $name): string
281
  {
282 13
    $html = $this->node->getAttribute($name);
283
284 13
    return HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
285
  }
286
287
  /**
288
   * Return element by #id.
289
   *
290
   * @param string $id
291
   *
292
   * @return SimpleHtmlDom|SimpleHtmlDomNodeInterface
293
   */
294 1
  public function getElementById(string $id)
295
  {
296 1
    return $this->find("#$id", 0);
297
  }
298
299
  /**
300
   * Return element by tag name.
301
   *
302
   * @param string $name
303
   *
304
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
305
   */
306 1
  public function getElementByTagName(string $name)
307
  {
308 1
    $node = $this->node->getElementsByTagName($name)->item(0);
309
310 1
    if ($node === null) {
311
      return new SimpleHtmlDomNodeBlank();
312
    }
313
314 1
    return new self($node);
315
  }
316
317
  /**
318
   * Returns elements by #id.
319
   *
320
   * @param string   $id
321
   * @param null|int $idx
322
   *
323
   * @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface
324
   */
325
  public function getElementsById(string $id, $idx = null)
326
  {
327
    return $this->find("#$id", $idx);
328
  }
329
330
  /**
331
   * Returns elements by tag name.
332
   *
333
   * @param string   $name
334
   * @param null|int $idx
335
   *
336
   * @return SimpleHtmlDomNode|SimpleHtmlDom[]|SimpleHtmlDomNodeBlank
337
   */
338 1 View Code Duplication
  public function getElementsByTagName(string $name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
339
  {
340 1
    $nodesList = $this->node->getElementsByTagName($name);
341
342 1
    $elements = new SimpleHtmlDomNode();
343
344 1
    foreach ($nodesList as $node) {
345 1
      $elements[] = new self($node);
346
    }
347
348
    // return all elements
349 1
    if (null === $idx) {
350 1
      return $elements;
351
    }
352
353
    // handle negative values
354
    if ($idx < 0) {
355
      $idx = \count($elements) + $idx;
356
    }
357
358
    // return one element
359
    if (isset($elements[$idx])) {
360
      return $elements[$idx];
361
    }
362
363
    // return a blank-element
364
    return new SimpleHtmlDomNodeBlank();
365
  }
366
367
  /**
368
   * Create a new "HtmlDomParser"-object from the current context.
369
   *
370
   * @return HtmlDomParser
371
   */
372 52
  public function getHtmlDomParser(): HtmlDomParser
373
  {
374 52
    return new HtmlDomParser($this);
375
  }
376
377
  /**
378
   * Retrieve an external iterator.
379
   *
380
   * @link  http://php.net/manual/en/iteratoraggregate.getiterator.php
381
   * @return SimpleHtmlDomNode An instance of an object implementing <b>Iterator</b> or
382
   * <b>Traversable</b>
383
   */
384 2
  public function getIterator(): SimpleHtmlDomNode
385
  {
386 2
    $elements = new SimpleHtmlDomNode();
387 2
    if ($this->node->hasChildNodes()) {
388 2
      foreach ($this->node->childNodes as $node) {
389 2
        $elements[] = new self($node);
390
      }
391
    }
392
393 2
    return $elements;
394
  }
395
396
  /**
397
   * @return DOMNode
398
   */
399 53
  public function getNode(): \DOMNode
400
  {
401 53
    return $this->node;
402
  }
403
404
  /**
405
   * Determine if an attribute exists on the element.
406
   *
407
   * @param string $name
408
   *
409
   * @return bool
410
   */
411 1
  public function hasAttribute(string $name): bool
412
  {
413 1
    return $this->node->hasAttribute($name);
414
  }
415
416
  /**
417
   * Get dom node's outer html.
418
   *
419
   * @param bool $multiDecodeNewHtmlEntity
420
   *
421
   * @return string
422
   */
423 18
  public function html(bool $multiDecodeNewHtmlEntity = false): string
424
  {
425 18
    return $this->getHtmlDomParser()->html($multiDecodeNewHtmlEntity);
426
  }
427
428
  /**
429
   * Get dom node's inner html.
430
   *
431
   * @param bool $multiDecodeNewHtmlEntity
432
   *
433
   * @return string
434
   */
435 11
  public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string
436
  {
437 11
    return $this->getHtmlDomParser()->innerHtml($multiDecodeNewHtmlEntity);
438
  }
439
440
  /**
441
   * Returns the last child of node.
442
   *
443
   * @return SimpleHtmlDom|null
444
   */
445 4
  public function lastChild()
446
  {
447 4
    $node = $this->node->lastChild;
448
449 4
    if ($node === null) {
450 1
      return null;
451
    }
452
453 4
    return new self($node);
454
  }
455
456
  /**
457
   * Returns the next sibling of node.
458
   *
459
   * @return SimpleHtmlDom|null
460
   */
461 1
  public function nextSibling()
462
  {
463 1
    $node = $this->node->nextSibling;
464
465 1
    if ($node === null) {
466 1
      return null;
467
    }
468
469 1
    return new self($node);
470
  }
471
472
  /**
473
   * Returns the parent of node.
474
   *
475
   * @return SimpleHtmlDom
476
   */
477 1
  public function parentNode(): self
478
  {
479 1
    return new self($this->node->parentNode);
480
  }
481
482
  /**
483
   * Returns the previous sibling of node.
484
   *
485
   * @return SimpleHtmlDom|null
486
   */
487 1
  public function previousSibling()
488
  {
489 1
    $node = $this->node->previousSibling;
490
491 1
    if ($node === null) {
492 1
      return null;
493
    }
494
495 1
    return new self($node);
496
  }
497
498
  /**
499
   * Replace child node.
500
   *
501
   * @param string $string
502
   *
503
   * @return $this
504
   *
505
   * @throws \RuntimeException
506
   */
507 7
  protected function replaceChild(string $string)
508
  {
509 7
    if (!empty($string)) {
510 6
      $newDocument = new HtmlDomParser($string);
511
512 6
      if ($this->normalizeStringForComparision($newDocument) != $this->normalizeStringForComparision($string)) {
513
        throw new RuntimeException('Not valid HTML fragment');
514
      }
515
    }
516
517
    /** @noinspection PhpParamsInspection */
518 7
    if (\count($this->node->childNodes) > 0) {
519 7
      foreach ($this->node->childNodes as $node) {
520 7
        $this->node->removeChild($node);
521
      }
522
    }
523
524 7
    if (!empty($newDocument)) {
525 6
      $newDocument = $this->cleanHtmlWrapper($newDocument);
526 6
      $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
527 6
      $this->node->appendChild($newNode);
528
    }
529
530 7
    return $this;
531
  }
532
533
  /**
534
   * Replace this node.
535
   *
536
   * @param string $string
537
   *
538
   * @return $this|null
539
   *
540
   * @throws \RuntimeException
541
   */
542 3
  protected function replaceNode(string $string)
543
  {
544 3
    if (empty($string)) {
545 2
      $this->node->parentNode->removeChild($this->node);
546
547 2
      return null;
548
    }
549
550 2
    $newDocument = new HtmlDomParser($string);
551
552 2
    if ($this->normalizeStringForComparision($newDocument->outerText()) != $this->normalizeStringForComparision($string)) {
553
      throw new RuntimeException('Not valid HTML fragment');
554
    }
555
556 2
    $newDocument = $this->cleanHtmlWrapper($newDocument);
557
558 2
    $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
559
560 2
    $this->node->parentNode->replaceChild($newNode, $this->node);
561 2
    $this->node = $newNode;
562
563 2
    return $this;
564
  }
565
566
  /**
567
   * Normalize the given input for comparision.
568
   *
569
   * @param HtmlDomParser|string $input
570
   *
571
   * @return string
572
   */
573 8
  private function normalizeStringForComparision($input): string
574
  {
575 8
    if ($input instanceof HtmlDomParser) {
576 6
      $string = $input->outerText();
577
578 6
      if ($input->getIsDOMDocumentCreatedWithoutHeadWrapper() === true) {
579 6
        $string = str_replace(array('<head>', '</head>'), '', $string);
580
      }
581
    } else {
582 8
      $string = (string)$input;
583
    }
584
585
    return
586 8
        urlencode(
587 8
            urldecode(
588 8
                trim(
589 8
                    str_replace(
590
                        array(
591 8
                            ' ',
592
                            "\n",
593
                            "\r",
594
                            '/>',
595
                        ),
596
                        array(
597 8
                            '',
598
                            '',
599
                            '',
600
                            '>',
601
                        ),
602 8
                        strtolower($string)
603
                    )
604
                )
605
            )
606
        );
607
  }
608
609
  /**
610
   * @param HtmlDomParser $newDocument
611
   *
612
   * @return HtmlDomParser
613
   */
614 8
  protected function cleanHtmlWrapper(HtmlDomParser $newDocument): HtmlDomParser
615
  {
616
    if (
617 8
        $newDocument->getIsDOMDocumentCreatedWithoutHtml() === true
618
        ||
619 8
        $newDocument->getIsDOMDocumentCreatedWithoutHtmlWrapper() === true
620
    ) {
621
622
      // Remove doc-type node.
623 8
      if ($newDocument->getDocument()->doctype !== null) {
624 5
        $newDocument->getDocument()->doctype->parentNode->removeChild($newDocument->getDocument()->doctype);
625
      }
626
627
      // Remove html element, preserving child nodes.
628 8
      $html = $newDocument->getDocument()->getElementsByTagName('html')->item(0);
629 8
      $fragment = $newDocument->getDocument()->createDocumentFragment();
630 8
      if ($html !== null) {
631 5
        while ($html->childNodes->length > 0) {
632 5
          $fragment->appendChild($html->childNodes->item(0));
633
        }
634 5
        $html->parentNode->replaceChild($fragment, $html);
635
      }
636
637
      // Remove body element, preserving child nodes.
638 8
      $body = $newDocument->getDocument()->getElementsByTagName('body')->item(0);
639 8
      $fragment = $newDocument->getDocument()->createDocumentFragment();
640 8
      if ($body instanceof \DOMElement) {
641 4
        while ($body->childNodes->length > 0) {
642 4
          $fragment->appendChild($body->childNodes->item(0));
643
        }
644 4
        $body->parentNode->replaceChild($fragment, $body);
645
646
        // At this point DOMDocument still added a "<p>"-wrapper around our string,
647
        // so we replace it with "<simpleHtmlDomP>" and delete this at the ending ...
648 4
        $item = $newDocument->getDocument()->getElementsByTagName('p')->item(0);
649 4
        if ($item !== null) {
650 4
          $this->changeElementName($item, 'simpleHtmlDomP');
651
        }
652
      }
653
    }
654
655 8
    return $newDocument;
656
  }
657
658
  /**
659
   * Change the name of a tag in a "DOMNode".
660
   *
661
   * @param DOMNode $node
662
   * @param string  $name
663
   *
664
   * @return DOMElement
665
   */
666 4
  protected function changeElementName(\DOMNode $node, string $name): \DOMElement
667
  {
668 4
    $newnode = $node->ownerDocument->createElement($name);
669
670 4
    foreach ($node->childNodes as $child) {
671 4
      $child = $node->ownerDocument->importNode($child, true);
672 4
      $newnode->appendChild($child);
673
    }
674
675 4
    foreach ($node->attributes as $attrName => $attrNode) {
676
      $newnode->setAttribute($attrName, $attrNode);
677
    }
678
679 4
    $newnode->ownerDocument->replaceChild($newnode, $node);
680
681 4
    return $newnode;
682
  }
683
684
  /**
685
   * Set attribute value.
686
   *
687
   * @param string      $name       <p>The name of the html-attribute.</p>
688
   * @param string|null $value      <p>Set to NULL or empty string, to remove the attribute.</p>
689
   * @param bool $strict            </p>
690
   *                                $value must be NULL, to remove the attribute,
691
   *                                so that you can set an empty string as attribute-value e.g. autofocus=""
692
   *                                </p>
693
   *
694
   * @return $this
695
   */
696 9
  public function setAttribute(string $name, $value = null, bool $strict = false)
697
  {
698
    if (
699 9
        ($strict === true && null === $value)
700
        ||
701 9
        ($strict === false && empty($value))
702
    ) {
703 1
      $this->node->removeAttribute($name);
704
    } else {
705 9
      $this->node->setAttribute($name, $value);
706
    }
707
708 9
    return $this;
709
  }
710
711
  /**
712
   * Remove attribute.
713
   *
714
   * @param string $name <p>The name of the html-attribute.</p>
715
   *
716
   * @return mixed
717
   */
718 1
  public function removeAttribute(string $name)
719
  {
720 1
    $this->node->removeAttribute($name);
721
722 1
    return $this;
723
  }
724
725
  /**
726
   * Get dom node's plain text.
727
   *
728
   * @return string
729
   */
730 15
  public function text(): string
731
  {
732 15
    return $this->node->textContent;
733
  }
734
}
735