Completed
Push — master ( 01d92b...e4ee10 )
by Lars
05:00
created

SimpleHtmlDom::removeAttribute()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 3
cts 3
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 3
nc 1
nop 1
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
use BadMethodCallException;
8
use DOMElement;
9
use DOMNode;
10
use RuntimeException;
11
12
/**
13
 * Class SimpleHtmlDom
14
 *
15
 * @package voku\helper
16
 *
17
 * @property string      outerText <p>Get dom node's outer html (alias for "outerHtml").</p>
18
 * @property string      outerHtml <p>Get dom node's outer html.</p>
19
 * @property string      innerText <p>Get dom node's inner html (alias for "innerHtml").</p>
20
 * @property string      innerHtml <p>Get dom node's inner html.</p>
21
 * @property-read string plaintext <p>Get dom node's plain text.</p>
22
 * @property-read string tag       <p>Get dom node name.</p>
23
 * @property-read string attr      <p>Get dom node attributes.</p>
24
 *
25
 * @method SimpleHtmlDomNode|SimpleHtmlDom|null children() children($idx = -1) <p>Returns children of node.</p>
26
 * @method SimpleHtmlDom|null first_child() <p>Returns the first child of node.</p>
27
 * @method SimpleHtmlDom|null last_child() <p>Returns the last child of node.</p>
28
 * @method SimpleHtmlDom|null next_sibling() <p>Returns the next sibling of node.</p>
29
 * @method SimpleHtmlDom|null prev_sibling() <p>Returns the previous sibling of node.</p>
30
 * @method SimpleHtmlDom|null parent() <p>Returns the parent of node.</p>
31
 *
32
 * @method string outerText() <p>Get dom node's outer html (alias for "outerHtml()").</p>
33
 * @method string outerHtml() <p>Get dom node's outer html.</p>
34
 * @method string innerText() <p>Get dom node's inner html (alias for "innerHtml()").</p>
35
 *
36
 */
37
class SimpleHtmlDom implements \IteratorAggregate
38
{
39
  /**
40
   * @var array
41
   */
42
  protected static $functionAliases = array(
43
      'children'     => 'childNodes',
44
      'first_child'  => 'firstChild',
45
      'last_child'   => 'lastChild',
46
      'next_sibling' => 'nextSibling',
47
      'prev_sibling' => 'previousSibling',
48
      'parent'       => 'parentNode',
49
      'outertext'    => 'html',
50
      'outerhtml'    => 'html',
51
      'innertext'    => 'innerHtml',
52
      'innerhtml'    => 'innerHtml',
53
  );
54
55
  /**
56
   * @var DOMElement
57
   */
58
  protected $node;
59
60
  /**
61
   * SimpleHtmlDom constructor.
62
   *
63
   * @param DOMNode $node
64
   */
65 97
  public function __construct(DOMNode $node)
66
  {
67 97
    $this->node = $node;
0 ignored issues
show
Documentation Bug introduced by
$node is of type object<DOMNode>, but the property $node was declared to be of type object<DOMElement>. Are you sure that you always receive this specific sub-class here, or does it make sense to add an instanceof check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a given class or a super-class is assigned to a property that is type hinted more strictly.

Either this assignment is in error or an instanceof check should be added for that assignment.

class Alien {}

class Dalek extends Alien {}

class Plot
{
    /** @var  Dalek */
    public $villain;
}

$alien = new Alien();
$plot = new Plot();
if ($alien instanceof Dalek) {
    $plot->villain = $alien;
}
Loading history...
68 97
  }
69
70
  /**
71
   * @param string $name
72
   * @param array $arguments
73
   *
74
   * @return null|string|SimpleHtmlDom
75
   *
76
   * @throws \BadMethodCallException
77
   */
78 9 View Code Duplication
  public function __call($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
79
  {
80 9
    $name = \strtolower($name);
81
82 9
    if (isset(self::$functionAliases[$name])) {
83 9
      return \call_user_func_array(array($this, self::$functionAliases[$name]), $arguments);
84
    }
85
86
    throw new BadMethodCallException('Method does not exist');
87
  }
88
89
  /**
90
   * @param string $name
91
   *
92
   * @return array|null|string
93
   */
94 42
  public function __get($name)
95
  {
96 42
    $name = \strtolower($name);
97
98
    switch ($name) {
99 42
      case 'outerhtml':
100 38
      case 'outertext':
101 17
        return $this->html();
102 33
      case 'innerhtml':
103 27
      case 'innertext':
104 11
        return $this->innerHtml();
105 24
      case 'text':
106 19
      case 'plaintext':
107 15
        return $this->text();
108 11
      case 'tag':
109 4
        return $this->node->nodeName;
110 10
      case 'attr':
111
        return $this->getAllAttributes();
112
      default:
113 10
        return $this->getAttribute($name);
114
    }
115
  }
116
117
  /**
118
   * @param string $selector
119
   * @param int    $idx
120
   *
121
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
122
   */
123 12
  public function __invoke($selector, $idx = null)
124
  {
125 12
    return $this->find($selector, $idx);
126
  }
127
128
  /**
129
   * @param $name
130
   *
131
   * @return bool
132
   */
133 1
  public function __isset($name)
134
  {
135 1
    $name = strtolower($name);
136
137
    switch ($name) {
138 1
      case 'outertext':
139 1
      case 'outerhtml':
140 1
      case 'innertext':
141 1
      case 'innerhtml':
142 1
      case 'plaintext':
143 1
      case 'text':
144 1
      case 'tag':
145
        return true;
146
      default:
147 1
        return $this->hasAttribute($name);
148
    }
149
  }
150
151
  /**
152
   * @param $name
153
   * @param $value
154
   *
155
   * @return SimpleHtmlDom
156
   */
157 14
  public function __set($name, $value)
158
  {
159 14
    $name = strtolower($name);
160
161
    switch ($name) {
162 14
      case 'outerhtml':
163 13
      case 'outertext':
164 3
        return $this->replaceNode($value);
165 11
      case 'innertext':
166 9
      case 'innerhtml':
167 7
        return $this->replaceChild($value);
168
      default:
169 8
        return $this->setAttribute($name, $value);
170
    }
171
  }
172
173
  /**
174
   * @return string
175
   */
176 2
  public function __toString()
177
  {
178 2
    return $this->html();
179
  }
180
181
  /**
182
   * @param $name
183
   *
184
   * @return SimpleHtmlDom
185
   */
186 1
  public function __unset($name)
187
  {
188 1
    return $this->removeAttribute($name);
189
  }
190
191
  /**
192
   * Returns children of node.
193
   *
194
   * @param int $idx
195
   *
196
   * @return SimpleHtmlDomNode|SimpleHtmlDom|null
197
   */
198 2
  public function childNodes(int $idx = -1)
199
  {
200 2
    $nodeList = $this->getIterator();
201
202 2
    if ($idx === -1) {
203 2
      return $nodeList;
204
    }
205
206 2
    if (isset($nodeList[$idx])) {
207 2
      return $nodeList[$idx];
208
    }
209
210 1
    return null;
211
  }
212
213
  /**
214
   * Find list of nodes with a CSS selector.
215
   *
216
   * @param string   $selector
217
   * @param int|null $idx
218
   *
219
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
220
   */
221 26
  public function find(string $selector, $idx = null)
222
  {
223 26
    return $this->getHtmlDomParser()->find($selector, $idx);
224
  }
225
226
  /**
227
   * Returns the first child of node.
228
   *
229
   * @return SimpleHtmlDom|null
230
   */
231 4
  public function firstChild()
232
  {
233 4
    $node = $this->node->firstChild;
234
235 4
    if ($node === null) {
236 1
      return null;
237
    }
238
239 4
    return new self($node);
240
  }
241
242
  /**
243
   * Returns an array of attributes.
244
   *
245
   * @return array|null
246
   */
247 2
  public function getAllAttributes()
248
  {
249 2
    if ($this->node->hasAttributes()) {
250 2
      $attributes = array();
251 2
      foreach ($this->node->attributes as $attr) {
252 2
        $attributes[$attr->name] = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value);
253
      }
254
255 2
      return $attributes;
256
    }
257
258 1
    return null;
259
  }
260
261
  /**
262
   * Return attribute value.
263
   *
264
   * @param string $name
265
   *
266
   * @return string
267
   */
268 13
  public function getAttribute(string $name): string
269
  {
270 13
    $html = $this->node->getAttribute($name);
271
272 13
    return HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
273
  }
274
275
  /**
276
   * Return element by #id.
277
   *
278
   * @param string $id
279
   *
280
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
281
   */
282 1
  public function getElementById(string $id)
283
  {
284 1
    return $this->find("#$id", 0);
285
  }
286
287
  /**
288
   * Return element by tag name.
289
   *
290
   * @param string $name
291
   *
292
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
293
   */
294 1
  public function getElementByTagName(string $name)
295
  {
296 1
    $node = $this->node->getElementsByTagName($name)->item(0);
297
298 1
    if ($node === null) {
299
      return new SimpleHtmlDomNodeBlank();
300
    }
301
302 1
    return new self($node);
303
  }
304
305
  /**
306
   * Returns elements by #id.
307
   *
308
   * @param string   $id
309
   * @param null|int $idx
310
   *
311
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeBlank
312
   */
313
  public function getElementsById(string $id, $idx = null)
314
  {
315
    return $this->find("#$id", $idx);
316
  }
317
318
  /**
319
   * Returns elements by tag name.
320
   *
321
   * @param string   $name
322
   * @param null|int $idx
323
   *
324
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
325
   */
326 1 View Code Duplication
  public function getElementsByTagName(string $name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
327
  {
328 1
    $nodesList = $this->node->getElementsByTagName($name);
329
330 1
    $elements = new SimpleHtmlDomNode();
331
332 1
    foreach ($nodesList as $node) {
333 1
      $elements[] = new self($node);
334
    }
335
336
    // return all elements
337 1
    if (null === $idx) {
338 1
      return $elements;
339
    }
340
341
    // handle negative values
342
    if ($idx < 0) {
343
      $idx = \count($elements) + $idx;
344
    }
345
346
    // return one element
347
    if (isset($elements[$idx])) {
348
      return $elements[$idx];
349
    }
350
351
    // return a blank-element
352
    return new SimpleHtmlDomNodeBlank();
353
  }
354
355
  /**
356
   * Create a new "HtmlDomParser"-object from the current context.
357
   *
358
   * @return HtmlDomParser
359
   */
360 52
  public function getHtmlDomParser(): HtmlDomParser
361
  {
362 52
    return new HtmlDomParser($this);
363
  }
364
365
  /**
366
   * Retrieve an external iterator.
367
   *
368
   * @link  http://php.net/manual/en/iteratoraggregate.getiterator.php
369
   * @return SimpleHtmlDomNode An instance of an object implementing <b>Iterator</b> or
370
   * <b>Traversable</b>
371
   */
372 2
  public function getIterator(): SimpleHtmlDomNode
373
  {
374 2
    $elements = new SimpleHtmlDomNode();
375 2
    if ($this->node->hasChildNodes()) {
376 2
      foreach ($this->node->childNodes as $node) {
377 2
        $elements[] = new self($node);
378
      }
379
    }
380
381 2
    return $elements;
382
  }
383
384
  /**
385
   * @return DOMNode
386
   */
387 53
  public function getNode(): \DOMNode
388
  {
389 53
    return $this->node;
390
  }
391
392
  /**
393
   * Determine if an attribute exists on the element.
394
   *
395
   * @param string $name
396
   *
397
   * @return bool
398
   */
399 1
  public function hasAttribute(string $name): bool
400
  {
401 1
    return $this->node->hasAttribute($name);
402
  }
403
404
  /**
405
   * Get dom node's outer html.
406
   *
407
   * @param bool $multiDecodeNewHtmlEntity
408
   *
409
   * @return string
410
   */
411 18
  public function html(bool $multiDecodeNewHtmlEntity = false): string
412
  {
413 18
    return $this->getHtmlDomParser()->html($multiDecodeNewHtmlEntity);
414
  }
415
416
  /**
417
   * Get dom node's inner html.
418
   *
419
   * @param bool $multiDecodeNewHtmlEntity
420
   *
421
   * @return string
422
   */
423 11
  public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string
424
  {
425 11
    return $this->getHtmlDomParser()->innerHtml($multiDecodeNewHtmlEntity);
426
  }
427
428
  /**
429
   * Returns the last child of node.
430
   *
431
   * @return SimpleHtmlDom|null
432
   */
433 4
  public function lastChild()
434
  {
435 4
    $node = $this->node->lastChild;
436
437 4
    if ($node === null) {
438 1
      return null;
439
    }
440
441 4
    return new self($node);
442
  }
443
444
  /**
445
   * Returns the next sibling of node.
446
   *
447
   * @return SimpleHtmlDom|null
448
   */
449 1
  public function nextSibling()
450
  {
451 1
    $node = $this->node->nextSibling;
452
453 1
    if ($node === null) {
454 1
      return null;
455
    }
456
457 1
    return new self($node);
458
  }
459
460
  /**
461
   * Returns the parent of node.
462
   *
463
   * @return SimpleHtmlDom
464
   */
465 1
  public function parentNode(): self
466
  {
467 1
    return new self($this->node->parentNode);
468
  }
469
470
  /**
471
   * Returns the previous sibling of node.
472
   *
473
   * @return SimpleHtmlDom|null
474
   */
475 1
  public function previousSibling()
476
  {
477 1
    $node = $this->node->previousSibling;
478
479 1
    if ($node === null) {
480 1
      return null;
481
    }
482
483 1
    return new self($node);
484
  }
485
486
  /**
487
   * Replace child node.
488
   *
489
   * @param string $string
490
   *
491
   * @return $this
492
   *
493
   * @throws \RuntimeException
494
   */
495 7
  protected function replaceChild(string $string)
496
  {
497 7
    if (!empty($string)) {
498 6
      $newDocument = new HtmlDomParser($string);
499
500 6
      if ($this->normalizeStringForComparision($newDocument) != $this->normalizeStringForComparision($string)) {
501
        throw new RuntimeException('Not valid HTML fragment');
502
      }
503
    }
504
505
    /** @noinspection PhpParamsInspection */
506 7
    if (\count($this->node->childNodes) > 0) {
507 7
      foreach ($this->node->childNodes as $node) {
508 7
        $this->node->removeChild($node);
509
      }
510
    }
511
512 7
    if (!empty($newDocument)) {
513 6
      $newDocument = $this->cleanHtmlWrapper($newDocument);
514 6
      $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
515 6
      $this->node->appendChild($newNode);
516
    }
517
518 7
    return $this;
519
  }
520
521
  /**
522
   * Replace this node.
523
   *
524
   * @param string $string
525
   *
526
   * @return $this|null
527
   *
528
   * @throws \RuntimeException
529
   */
530 3
  protected function replaceNode(string $string)
531
  {
532 3
    if (empty($string)) {
533 2
      $this->node->parentNode->removeChild($this->node);
534
535 2
      return null;
536
    }
537
538 2
    $newDocument = new HtmlDomParser($string);
539
540 2
    if ($this->normalizeStringForComparision($newDocument->outerText()) != $this->normalizeStringForComparision($string)) {
541
      throw new RuntimeException('Not valid HTML fragment');
542
    }
543
544 2
    $newDocument = $this->cleanHtmlWrapper($newDocument);
545
546 2
    $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
547
548 2
    $this->node->parentNode->replaceChild($newNode, $this->node);
549 2
    $this->node = $newNode;
550
551 2
    return $this;
552
  }
553
554
  /**
555
   * Normalize the given input for comparision.
556
   *
557
   * @param HtmlDomParser|string $input
558
   *
559
   * @return string
560
   */
561 8
  private function normalizeStringForComparision($input): string
562
  {
563 8
    if ($input instanceof HtmlDomParser) {
564 6
      $string = $input->outerText();
565
566 6
      if ($input->getIsDOMDocumentCreatedWithoutHeadWrapper() === true) {
567 6
        $string = str_replace(array('<head>', '</head>'), '', $string);
568
      }
569
    } else {
570 8
      $string = (string)$input;
571
    }
572
573
    return
574 8
        urlencode(
575 8
            urldecode(
576 8
                trim(
577 8
                    str_replace(
578
                        array(
579 8
                            ' ',
580
                            "\n",
581
                            "\r",
582
                            '/>',
583
                        ),
584
                        array(
585 8
                            '',
586
                            '',
587
                            '',
588
                            '>',
589
                        ),
590 8
                        strtolower($string)
591
                    )
592
                )
593
            )
594
        );
595
  }
596
597
  /**
598
   * @param HtmlDomParser $newDocument
599
   *
600
   * @return HtmlDomParser
601
   */
602 8
  protected function cleanHtmlWrapper(HtmlDomParser $newDocument): HtmlDomParser
603
  {
604
    if (
605 8
        $newDocument->getIsDOMDocumentCreatedWithoutHtml() === true
606
        ||
607 8
        $newDocument->getIsDOMDocumentCreatedWithoutHtmlWrapper() === true
608
    ) {
609
610
      // Remove doc-type node.
611 8
      if ($newDocument->getDocument()->doctype !== null) {
612 5
        $newDocument->getDocument()->doctype->parentNode->removeChild($newDocument->getDocument()->doctype);
613
      }
614
615
      // Remove html element, preserving child nodes.
616 8
      $html = $newDocument->getDocument()->getElementsByTagName('html')->item(0);
617 8
      $fragment = $newDocument->getDocument()->createDocumentFragment();
618 8
      if ($html !== null) {
619 5
        while ($html->childNodes->length > 0) {
620 5
          $fragment->appendChild($html->childNodes->item(0));
621
        }
622 5
        $html->parentNode->replaceChild($fragment, $html);
623
      }
624
625
      // Remove body element, preserving child nodes.
626 8
      $body = $newDocument->getDocument()->getElementsByTagName('body')->item(0);
627 8
      $fragment = $newDocument->getDocument()->createDocumentFragment();
628 8
      if ($body instanceof \DOMElement) {
629 4
        while ($body->childNodes->length > 0) {
630 4
          $fragment->appendChild($body->childNodes->item(0));
631
        }
632 4
        $body->parentNode->replaceChild($fragment, $body);
633
634
        // At this point DOMDocument still added a "<p>"-wrapper around our string,
635
        // so we replace it with "<simpleHtmlDomP>" and delete this at the ending ...
636 4
        $item = $newDocument->getDocument()->getElementsByTagName('p')->item(0);
637 4
        if ($item !== null) {
638 4
          $this->changeElementName($item, 'simpleHtmlDomP');
639
        }
640
      }
641
    }
642
643 8
    return $newDocument;
644
  }
645
646
  /**
647
   * Change the name of a tag in a "DOMNode".
648
   *
649
   * @param DOMNode $node
650
   * @param string  $name
651
   *
652
   * @return DOMElement
653
   */
654 4
  protected function changeElementName(\DOMNode $node, string $name): \DOMElement
655
  {
656 4
    $newnode = $node->ownerDocument->createElement($name);
657
658 4
    foreach ($node->childNodes as $child) {
659 4
      $child = $node->ownerDocument->importNode($child, true);
660 4
      $newnode->appendChild($child);
661
    }
662
663 4
    foreach ($node->attributes as $attrName => $attrNode) {
664
      $newnode->setAttribute($attrName, $attrNode);
665
    }
666
667 4
    $newnode->ownerDocument->replaceChild($newnode, $node);
668
669 4
    return $newnode;
670
  }
671
672
  /**
673
   * Set attribute value.
674
   *
675
   * @param string      $name       <p>The name of the html-attribute.</p>
676
   * @param string|null $value      <p>Set to NULL or empty string, to remove the attribute.</p>
677
   * @param bool $strict            </p>
678
   *                                $value must be NULL, to remove the attribute,
679
   *                                so that you can set an empty string as attribute-value e.g. autofocus=""
680
   *                                </p>
681
   *
682
   * @return $this
683
   */
684 9
  public function setAttribute(string $name, $value = null, bool $strict = false)
685
  {
686
    if (
687 9
        ($strict === true && null === $value)
688
        ||
689 9
        ($strict === false && empty($value))
690
    ) {
691 1
      $this->node->removeAttribute($name);
692
    } else {
693 9
      $this->node->setAttribute($name, $value);
694
    }
695
696 9
    return $this;
697
  }
698
699
  /**
700
   * Remove attribute.
701
   *
702
   * @param string $name <p>The name of the html-attribute.</p>
703
   *
704
   * @return mixed
705
   */
706 1
  public function removeAttribute(string $name)
707
  {
708 1
    $this->node->removeAttribute($name);
709
710 1
    return $this;
711
  }
712
713
  /**
714
   * Get dom node's plain text.
715
   *
716
   * @return string
717
   */
718 15
  public function text(): string
719
  {
720 15
    return $this->node->textContent;
721
  }
722
}
723