Completed
Push — master ( b8fd30...907d46 )
by Lars
01:39
created

SimpleHtmlDom::hasAttribute()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
cc 1
eloc 2
nc 1
nop 1
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
use BadMethodCallException;
8
use DOMElement;
9
use DOMNode;
10
use RuntimeException;
11
12
/**
13
 * Class SimpleHtmlDom
14
 *
15
 * @package voku\helper
16
 *
17
 * @property string      outerText <p>Get dom node's outer html (alias for "outerHtml").</p>
18
 * @property string      outerHtml <p>Get dom node's outer html.</p>
19
 * @property string      innerText <p>Get dom node's inner html (alias for "innerHtml").</p>
20
 * @property string      innerHtml <p>Get dom node's inner html.</p>
21
 * @property-read string plaintext <p>Get dom node's plain text.</p>
22
 * @property-read string tag       <p>Get dom node name.</p>
23
 * @property-read string attr      <p>Get dom node attributes.</p>
24
 *
25
 * @method SimpleHtmlDomNode|SimpleHtmlDom[]|SimpleHtmlDom|null children() children($idx = -1) <p>Returns children of
26
 *         node.</p>
27
 * @method SimpleHtmlDom|null first_child() <p>Returns the first child of node.</p>
28
 * @method SimpleHtmlDom|null last_child() <p>Returns the last child of node.</p>
29
 * @method SimpleHtmlDom|null next_sibling() <p>Returns the next sibling of node.</p>
30
 * @method SimpleHtmlDom|null prev_sibling() <p>Returns the previous sibling of node.</p>
31
 * @method SimpleHtmlDom|null parent() <p>Returns the parent of node.</p>
32
 *
33
 * @method string outerText() <p>Get dom node's outer html (alias for "outerHtml()").</p>
34
 * @method string outerHtml() <p>Get dom node's outer html.</p>
35
 * @method string innerText() <p>Get dom node's inner html (alias for "innerHtml()").</p>
36
 *
37
 */
38
class SimpleHtmlDom implements \IteratorAggregate
39
{
40
  /**
41
   * @var array
42
   */
43
  protected static $functionAliases = [
44
      'children'     => 'childNodes',
45
      'first_child'  => 'firstChild',
46
      'last_child'   => 'lastChild',
47
      'next_sibling' => 'nextSibling',
48
      'prev_sibling' => 'previousSibling',
49
      'parent'       => 'parentNode',
50
      'outertext'    => 'html',
51
      'outerhtml'    => 'html',
52
      'innertext'    => 'innerHtml',
53
      'innerhtml'    => 'innerHtml',
54
  ];
55
56
  /**
57
   * @var DOMElement
58
   */
59
  protected $node;
60
61
  /**
62
   * SimpleHtmlDom constructor.
63
   *
64
   * @param DOMNode $node
65
   */
66 98
  public function __construct(DOMNode $node)
67
  {
68 98
    $this->node = $node;
0 ignored issues
show
Documentation Bug introduced by
$node is of type object<DOMNode>, but the property $node was declared to be of type object<DOMElement>. Are you sure that you always receive this specific sub-class here, or does it make sense to add an instanceof check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a given class or a super-class is assigned to a property that is type hinted more strictly.

Either this assignment is in error or an instanceof check should be added for that assignment.

class Alien {}

class Dalek extends Alien {}

class Plot
{
    /** @var  Dalek */
    public $villain;
}

$alien = new Alien();
$plot = new Plot();
if ($alien instanceof Dalek) {
    $plot->villain = $alien;
}
Loading history...
69 98
  }
70
71
  /**
72
   * @param string $name
73
   * @param array  $arguments
74
   *
75
   * @return null|string|SimpleHtmlDom
76
   *
77
   * @throws \BadMethodCallException
78
   */
79 9 View Code Duplication
  public function __call($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
80
  {
81 9
    $name = \strtolower($name);
82
83 9
    if (isset(self::$functionAliases[$name])) {
84 9
      return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments);
85
    }
86
87
    throw new BadMethodCallException('Method does not exist');
88
  }
89
90
  /**
91
   * @param string $name
92
   *
93
   * @return array|null|string
94
   */
95 43
  public function __get($name)
96
  {
97 43
    $name = \strtolower($name);
98
99
    switch ($name) {
100 43
      case 'outerhtml':
101 39
      case 'outertext':
102 17
        return $this->html();
103 34
      case 'innerhtml':
104 28
      case 'innertext':
105 11
        return $this->innerHtml();
106 25
      case 'text':
107 20
      case 'plaintext':
108 16
        return $this->text();
109 11
      case 'tag':
110 4
        return $this->node->nodeName;
111 10
      case 'attr':
112
        return $this->getAllAttributes();
113
      default:
114 10
        return $this->getAttribute($name);
115
    }
116
  }
117
118
  /**
119
   * @param string $selector
120
   * @param int    $idx
121
   *
122
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
123
   */
124 12
  public function __invoke($selector, $idx = null)
125
  {
126 12
    return $this->find($selector, $idx);
127
  }
128
129
  /**
130
   * @param $name
131
   *
132
   * @return bool
133
   */
134 1
  public function __isset($name)
135
  {
136 1
    $name = \strtolower($name);
137
138
    switch ($name) {
139 1
      case 'outertext':
140 1
      case 'outerhtml':
141 1
      case 'innertext':
142 1
      case 'innerhtml':
143 1
      case 'plaintext':
144 1
      case 'text':
145 1
      case 'tag':
146
        return true;
147
      default:
148 1
        return $this->hasAttribute($name);
149
    }
150
  }
151
152
  /**
153
   * @param $name
154
   * @param $value
155
   *
156
   * @return SimpleHtmlDom
157
   */
158 14
  public function __set($name, $value)
159
  {
160 14
    $name = \strtolower($name);
161
162
    switch ($name) {
163 14
      case 'outerhtml':
164 13
      case 'outertext':
165 3
        return $this->replaceNode($value);
166 11
      case 'innertext':
167 9
      case 'innerhtml':
168 7
        return $this->replaceChild($value);
169
      default:
170 8
        return $this->setAttribute($name, $value);
171
    }
172
  }
173
174
  /**
175
   * @return string
176
   */
177 2
  public function __toString()
178
  {
179 2
    return $this->html();
180
  }
181
182
  /**
183
   * @param $name
184
   *
185
   * @return SimpleHtmlDom
186
   */
187 1
  public function __unset($name)
188
  {
189 1
    return $this->removeAttribute($name);
190
  }
191
192
  /**
193
   * Returns children of node.
194
   *
195
   * @param int $idx
196
   *
197
   * @return SimpleHtmlDomNode|SimpleHtmlDom[]|SimpleHtmlDom|null
198
   */
199 2
  public function childNodes(int $idx = -1)
200
  {
201 2
    $nodeList = $this->getIterator();
202
203 2
    if ($idx === -1) {
204 2
      return $nodeList;
205
    }
206
207 2
    if (isset($nodeList[$idx])) {
208 2
      return $nodeList[$idx];
209
    }
210
211 1
    return null;
212
  }
213
214
  /**
215
   * Find list of nodes with a CSS selector.
216
   *
217
   * @param string   $selector
218
   * @param int|null $idx
219
   *
220
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
221
   */
222 26
  public function find(string $selector, $idx = null)
223
  {
224 26
    return $this->getHtmlDomParser()->find($selector, $idx);
225
  }
226
227
  /**
228
   * Find one node with a CSS selector.
229
   *
230
   * @param string $selector
231
   *
232
   * @return SimpleHtmlDom|SimpleHtmlDomNodeInterface
233
   */
234
  public function findOne(string $selector)
235
  {
236
    return $this->find($selector, 0);
237
  }
238
239
  /**
240
   * Returns the first child of node.
241
   *
242
   * @return SimpleHtmlDom|null
243
   */
244 4
  public function firstChild()
245
  {
246 4
    $node = $this->node->firstChild;
247
248 4
    if ($node === null) {
249 1
      return null;
250
    }
251
252 4
    return new self($node);
253
  }
254
255
  /**
256
   * Returns an array of attributes.
257
   *
258
   * @return array|null
259
   */
260 2
  public function getAllAttributes()
261
  {
262 2
    if ($this->node->hasAttributes()) {
263 2
      $attributes = [];
264 2
      foreach ($this->node->attributes as $attr) {
265 2
        $attributes[$attr->name] = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value);
266
      }
267
268 2
      return $attributes;
269
    }
270
271 1
    return null;
272
  }
273
274
  /**
275
   * Return attribute value.
276
   *
277
   * @param string $name
278
   *
279
   * @return string
280
   */
281 13
  public function getAttribute(string $name): string
282
  {
283 13
    $html = $this->node->getAttribute($name);
284
285 13
    return HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
286
  }
287
288
  /**
289
   * Return element by #id.
290
   *
291
   * @param string $id
292
   *
293
   * @return SimpleHtmlDom|SimpleHtmlDomNodeInterface
294
   */
295 1
  public function getElementById(string $id)
296
  {
297 1
    return $this->find("#$id", 0);
298
  }
299
300
  /**
301
   * Return element by tag name.
302
   *
303
   * @param string $name
304
   *
305
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
306
   */
307 1
  public function getElementByTagName(string $name)
308
  {
309 1
    $node = $this->node->getElementsByTagName($name)->item(0);
310
311 1
    if ($node === null) {
312
      return new SimpleHtmlDomNodeBlank();
313
    }
314
315 1
    return new self($node);
316
  }
317
318
  /**
319
   * Returns elements by #id.
320
   *
321
   * @param string   $id
322
   * @param null|int $idx
323
   *
324
   * @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface
325
   */
326
  public function getElementsById(string $id, $idx = null)
327
  {
328
    return $this->find("#$id", $idx);
329
  }
330
331
  /**
332
   * Returns elements by tag name.
333
   *
334
   * @param string   $name
335
   * @param null|int $idx
336
   *
337
   * @return SimpleHtmlDomNode|SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeBlank
338
   */
339 1 View Code Duplication
  public function getElementsByTagName(string $name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
340
  {
341 1
    $nodesList = $this->node->getElementsByTagName($name);
342
343 1
    $elements = new SimpleHtmlDomNode();
344
345 1
    foreach ($nodesList as $node) {
346 1
      $elements[] = new self($node);
347
    }
348
349
    // return all elements
350 1
    if (null === $idx) {
351 1
      return $elements;
352
    }
353
354
    // handle negative values
355
    if ($idx < 0) {
356
      $idx = \count($elements) + $idx;
357
    }
358
359
    // return one element
360
    if (isset($elements[$idx])) {
361
      return $elements[$idx];
362
    }
363
364
    // return a blank-element
365
    return new SimpleHtmlDomNodeBlank();
366
  }
367
368
  /**
369
   * Create a new "HtmlDomParser"-object from the current context.
370
   *
371
   * @return HtmlDomParser
372
   */
373 62
  public function getHtmlDomParser(): HtmlDomParser
374
  {
375 62
    return new HtmlDomParser($this);
376
  }
377
378
  /**
379
   * Retrieve an external iterator.
380
   *
381
   * @link  http://php.net/manual/en/iteratoraggregate.getiterator.php
382
   * @return SimpleHtmlDomNode An instance of an object implementing <b>Iterator</b> or
383
   * <b>Traversable</b>
384
   */
385 2
  public function getIterator(): SimpleHtmlDomNode
386
  {
387 2
    $elements = new SimpleHtmlDomNode();
388 2
    if ($this->node->hasChildNodes()) {
389 2
      foreach ($this->node->childNodes as $node) {
390 2
        $elements[] = new self($node);
391
      }
392
    }
393
394 2
    return $elements;
395
  }
396
397
  /**
398
   * @return DOMNode
399
   */
400 63
  public function getNode(): \DOMNode
401
  {
402 63
    return $this->node;
403
  }
404
405
  /**
406
   * Determine if an attribute exists on the element.
407
   *
408
   * @param string $name
409
   *
410
   * @return bool
411
   */
412 1
  public function hasAttribute(string $name): bool
413
  {
414 1
    return $this->node->hasAttribute($name);
415
  }
416
417
  /**
418
   * Get dom node's outer html.
419
   *
420
   * @param bool $multiDecodeNewHtmlEntity
421
   *
422
   * @return string
423
   */
424 18
  public function html(bool $multiDecodeNewHtmlEntity = false): string
425
  {
426 18
    return $this->getHtmlDomParser()->html($multiDecodeNewHtmlEntity);
427
  }
428
429
  /**
430
   * Get dom node's inner html.
431
   *
432
   * @param bool $multiDecodeNewHtmlEntity
433
   *
434
   * @return string
435
   */
436 11
  public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string
437
  {
438 11
    return $this->getHtmlDomParser()->innerHtml($multiDecodeNewHtmlEntity);
439
  }
440
441
  /**
442
   * Returns the last child of node.
443
   *
444
   * @return SimpleHtmlDom|null
445
   */
446 4
  public function lastChild()
447
  {
448 4
    $node = $this->node->lastChild;
449
450 4
    if ($node === null) {
451 1
      return null;
452
    }
453
454 4
    return new self($node);
455
  }
456
457
  /**
458
   * Returns the next sibling of node.
459
   *
460
   * @return SimpleHtmlDom|null
461
   */
462 1
  public function nextSibling()
463
  {
464 1
    $node = $this->node->nextSibling;
465
466 1
    if ($node === null) {
467 1
      return null;
468
    }
469
470 1
    return new self($node);
471
  }
472
473
  /**
474
   * Returns the parent of node.
475
   *
476
   * @return SimpleHtmlDom
477
   */
478 1
  public function parentNode(): self
479
  {
480 1
    return new self($this->node->parentNode);
481
  }
482
483
  /**
484
   * Returns the previous sibling of node.
485
   *
486
   * @return SimpleHtmlDom|null
487
   */
488 1
  public function previousSibling()
489
  {
490 1
    $node = $this->node->previousSibling;
491
492 1
    if ($node === null) {
493 1
      return null;
494
    }
495
496 1
    return new self($node);
497
  }
498
499
  /**
500
   * Replace child node.
501
   *
502
   * @param string $string
503
   *
504
   * @return $this
505
   *
506
   * @throws \RuntimeException
507
   */
508 7
  protected function replaceChild(string $string)
509
  {
510 7
    if (!empty($string)) {
511 6
      $newDocument = new HtmlDomParser($string);
512
513 6
      if ($this->normalizeStringForComparision($newDocument) != $this->normalizeStringForComparision($string)) {
514
        throw new RuntimeException('Not valid HTML fragment');
515
      }
516
    }
517
518
    /** @noinspection PhpParamsInspection */
519 7
    if (\count($this->node->childNodes) > 0) {
520 7
      foreach ($this->node->childNodes as $node) {
521 7
        $this->node->removeChild($node);
522
      }
523
    }
524
525 7
    if (!empty($newDocument)) {
526 6
      $newDocument = $this->cleanHtmlWrapper($newDocument);
527 6
      $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
528 6
      $this->node->appendChild($newNode);
529
    }
530
531 7
    return $this;
532
  }
533
534
  /**
535
   * Replace this node.
536
   *
537
   * @param string $string
538
   *
539
   * @return $this|null
540
   *
541
   * @throws \RuntimeException
542
   */
543 3
  protected function replaceNode(string $string)
544
  {
545 3
    if (empty($string)) {
546 2
      $this->node->parentNode->removeChild($this->node);
547
548 2
      return null;
549
    }
550
551 2
    $newDocument = new HtmlDomParser($string);
552
553 2
    if ($this->normalizeStringForComparision($newDocument->outerText()) != $this->normalizeStringForComparision($string)) {
554
      throw new RuntimeException('Not valid HTML fragment');
555
    }
556
557 2
    $newDocument = $this->cleanHtmlWrapper($newDocument);
558
559 2
    $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
560
561 2
    $this->node->parentNode->replaceChild($newNode, $this->node);
562 2
    $this->node = $newNode;
563
564 2
    return $this;
565
  }
566
567
  /**
568
   * Normalize the given input for comparision.
569
   *
570
   * @param HtmlDomParser|string $input
571
   *
572
   * @return string
573
   */
574 8
  private function normalizeStringForComparision($input): string
575
  {
576 8
    if ($input instanceof HtmlDomParser) {
577 6
      $string = $input->outerText();
578
579 6
      if ($input->getIsDOMDocumentCreatedWithoutHeadWrapper() === true) {
580 6
        $string = \str_replace(['<head>', '</head>'], '', $string);
581
      }
582
    } else {
583 8
      $string = (string)$input;
584
    }
585
586
    return
587 8
        \urlencode(
588 8
            \urldecode(
589 8
                \trim(
590 8
                    \str_replace(
591
                        [
592 8
                            ' ',
593
                            "\n",
594
                            "\r",
595
                            '/>',
596
                        ],
597
                        [
598 8
                            '',
599
                            '',
600
                            '',
601
                            '>',
602
                        ],
603 8
                        \strtolower($string)
604
                    )
605
                )
606
            )
607
        );
608
  }
609
610
  /**
611
   * @param HtmlDomParser $newDocument
612
   *
613
   * @return HtmlDomParser
614
   */
615 8
  protected function cleanHtmlWrapper(HtmlDomParser $newDocument): HtmlDomParser
616
  {
617
    if (
618 8
        $newDocument->getIsDOMDocumentCreatedWithoutHtml() === true
619
        ||
620 8
        $newDocument->getIsDOMDocumentCreatedWithoutHtmlWrapper() === true
621
    ) {
622
623
      // Remove doc-type node.
624 8
      if ($newDocument->getDocument()->doctype !== null) {
625
        $newDocument->getDocument()->doctype->parentNode->removeChild($newDocument->getDocument()->doctype);
626
      }
627
628
      // Remove html element, preserving child nodes.
629 8
      $html = $newDocument->getDocument()->getElementsByTagName('html')->item(0);
630 8
      $fragment = $newDocument->getDocument()->createDocumentFragment();
631 8
      if ($html !== null) {
632 5
        while ($html->childNodes->length > 0) {
633 5
          $fragment->appendChild($html->childNodes->item(0));
634
        }
635 5
        $html->parentNode->replaceChild($fragment, $html);
636
      }
637
638
      // Remove body element, preserving child nodes.
639 8
      $body = $newDocument->getDocument()->getElementsByTagName('body')->item(0);
640 8
      $fragment = $newDocument->getDocument()->createDocumentFragment();
641 8
      if ($body instanceof \DOMElement) {
642 4
        while ($body->childNodes->length > 0) {
643 4
          $fragment->appendChild($body->childNodes->item(0));
644
        }
645 4
        $body->parentNode->replaceChild($fragment, $body);
646
647
        // At this point DOMDocument still added a "<p>"-wrapper around our string,
648
        // so we replace it with "<simpleHtmlDomP>" and delete this at the ending ...
649 4
        $item = $newDocument->getDocument()->getElementsByTagName('p')->item(0);
650 4
        if ($item !== null) {
651 4
          $this->changeElementName($item, 'simpleHtmlDomP');
652
        }
653
      }
654
    }
655
656 8
    return $newDocument;
657
  }
658
659
  /**
660
   * Change the name of a tag in a "DOMNode".
661
   *
662
   * @param DOMNode $node
663
   * @param string  $name
664
   *
665
   * @return DOMElement
666
   */
667 4
  protected function changeElementName(\DOMNode $node, string $name): \DOMElement
668
  {
669 4
    $newnode = $node->ownerDocument->createElement($name);
670
671 4
    foreach ($node->childNodes as $child) {
672 4
      $child = $node->ownerDocument->importNode($child, true);
673 4
      $newnode->appendChild($child);
674
    }
675
676 4
    foreach ($node->attributes as $attrName => $attrNode) {
677
      $newnode->setAttribute($attrName, $attrNode);
678
    }
679
680 4
    $newnode->ownerDocument->replaceChild($newnode, $node);
681
682 4
    return $newnode;
683
  }
684
685
  /**
686
   * Set attribute value.
687
   *
688
   * @param string      $name       <p>The name of the html-attribute.</p>
689
   * @param string|null $value      <p>Set to NULL or empty string, to remove the attribute.</p>
690
   * @param bool        $strict     </p>
691
   *                                $value must be NULL, to remove the attribute,
692
   *                                so that you can set an empty string as attribute-value e.g. autofocus=""
693
   *                                </p>
694
   *
695
   * @return $this
696
   */
697 9
  public function setAttribute(string $name, $value = null, bool $strict = false)
698
  {
699
    if (
700 9
        ($strict === true && null === $value)
701
        ||
702 9
        ($strict === false && empty($value))
703
    ) {
704 1
      $this->node->removeAttribute($name);
705
    } else {
706 9
      $this->node->setAttribute($name, $value);
707
    }
708
709 9
    return $this;
710
  }
711
712
  /**
713
   * Remove attribute.
714
   *
715
   * @param string $name <p>The name of the html-attribute.</p>
716
   *
717
   * @return mixed
718
   */
719 1
  public function removeAttribute(string $name)
720
  {
721 1
    $this->node->removeAttribute($name);
722
723 1
    return $this;
724
  }
725
726
  /**
727
   * Get dom node's plain text.
728
   *
729
   * @return string
730
   */
731 16
  public function text(): string
732
  {
733 16
    return $this->getHtmlDomParser()->fixHtmlOutput($this->node->textContent);
734
  }
735
}
736