Completed
Push — master ( 2a517c...a42c86 )
by Lars
04:47
created

SimpleHtmlDom::getElementsById()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 4
ccs 0
cts 2
cp 0
rs 10
cc 1
eloc 2
nc 1
nop 2
crap 2
1
<?php
2
3
namespace voku\helper;
4
5
use BadMethodCallException;
6
use DOMElement;
7
use DOMNode;
8
use RuntimeException;
9
10
/**
11
 * Class SimpleHtmlDom
12
 *
13
 * @package voku\helper
14
 *
15
 * @property string      outerText <p>Get dom node's outer html (alias for "outerHtml").</p>
16
 * @property string      outerHtml <p>Get dom node's outer html.</p>
17
 * @property string      innerText <p>Get dom node's inner html (alias for "innerHtml").</p>
18
 * @property string      innerHtml <p>Get dom node's inner html.</p>
19
 * @property-read string plaintext <p>Get dom node's plain text.</p>
20
 * @property-read string tag       <p>Get dom node name.</p>
21
 * @property-read string attr      <p>Get dom node attributes.</p>
22
 *
23
 * @method SimpleHtmlDomNode|SimpleHtmlDom|null children() children($idx = -1) <p>Returns children of node.</p>
24
 * @method SimpleHtmlDom|null first_child() <p>Returns the first child of node.</p>
25
 * @method SimpleHtmlDom|null last_child() <p>Returns the last child of node.</p>
26
 * @method SimpleHtmlDom|null next_sibling() <p>Returns the next sibling of node.</p>
27
 * @method SimpleHtmlDom|null prev_sibling() <p>Returns the previous sibling of node.</p>
28
 * @method SimpleHtmlDom|null parent() <p>Returns the parent of node.</p>
29
 *
30
 * @method string outerText() <p>Get dom node's outer html (alias for "outerHtml()").</p>
31
 * @method string outerHtml() <p>Get dom node's outer html.</p>
32
 * @method string innerText() <p>Get dom node's inner html (alias for "innerHtml()").</p>
33
 *
34
 */
35
class SimpleHtmlDom implements \IteratorAggregate
36
{
37
  /**
38
   * @var array
39
   */
40
  protected static $functionAliases = array(
41
      'children'     => 'childNodes',
42
      'first_child'  => 'firstChild',
43
      'last_child'   => 'lastChild',
44
      'next_sibling' => 'nextSibling',
45
      'prev_sibling' => 'previousSibling',
46
      'parent'       => 'parentNode',
47
      'outertext'    => 'html',
48
      'outerhtml'    => 'html',
49
      'innertext'    => 'innerHtml',
50
      'innerhtml'    => 'innerHtml',
51
  );
52
53
  /**
54
   * @var DOMElement
55
   */
56
  protected $node;
57
58
  /**
59
   * SimpleHtmlDom constructor.
60
   *
61
   * @param DOMNode $node
62
   */
63 97
  public function __construct(DOMNode $node)
64
  {
65 97
    $this->node = $node;
0 ignored issues
show
Documentation Bug introduced by
$node is of type object<DOMNode>, but the property $node was declared to be of type object<DOMElement>. Are you sure that you always receive this specific sub-class here, or does it make sense to add an instanceof check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a given class or a super-class is assigned to a property that is type hinted more strictly.

Either this assignment is in error or an instanceof check should be added for that assignment.

class Alien {}

class Dalek extends Alien {}

class Plot
{
    /** @var  Dalek */
    public $villain;
}

$alien = new Alien();
$plot = new Plot();
if ($alien instanceof Dalek) {
    $plot->villain = $alien;
}
Loading history...
66 97
  }
67
68
  /**
69
   * @param $name
70
   * @param $arguments
71
   *
72
   * @return null|string|SimpleHtmlDom
73
   *
74
   * @throws \BadMethodCallException
75
   */
76 9 View Code Duplication
  public function __call($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
77
  {
78 9
    $name = strtolower($name);
79
80 9
    if (isset(self::$functionAliases[$name])) {
81 9
      return call_user_func_array(array($this, self::$functionAliases[$name]), $arguments);
82
    }
83
84
    throw new BadMethodCallException('Method does not exist');
85
  }
86
87
  /**
88
   * @param string $name
89
   *
90
   * @return array|null|string
91
   */
92 43
  public function __get($name)
93
  {
94 43
    $name = strtolower($name);
95
96
    switch ($name) {
97 43
      case 'outerhtml':
98 43
      case 'outertext':
99 17
        return $this->html();
100 34
      case 'innerhtml':
101 34
      case 'innertext':
102 11
        return $this->innerHtml();
103 25
      case 'text':
104 25
      case 'plaintext':
105 16
        return $this->text();
106 11
      case 'tag':
107 4
        return $this->node->nodeName;
108 10
      case 'attr':
109
        return $this->getAllAttributes();
110 10
      default:
111 10
        return $this->getAttribute($name);
112 10
    }
113
  }
114
115
  /**
116
   * @param string $selector
117
   * @param int    $idx
118
   *
119
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
120
   */
121 12
  public function __invoke($selector, $idx = null)
122
  {
123 12
    return $this->find($selector, $idx);
124
  }
125
126
  /**
127
   * @param $name
128
   *
129
   * @return bool
130
   */
131 1
  public function __isset($name)
132
  {
133 1
    $name = strtolower($name);
134
135
    switch ($name) {
136 1
      case 'outertext':
137 1
      case 'outerhtml':
138 1
      case 'innertext':
139 1
      case 'innerhtml':
140 1
      case 'plaintext':
141 1
      case 'text':
142 1
      case 'tag':
143
        return true;
144 1
      default:
145 1
        return $this->hasAttribute($name);
146 1
    }
147
  }
148
149
  /**
150
   * @param $name
151
   * @param $value
152
   *
153
   * @return SimpleHtmlDom
154
   */
155 14
  public function __set($name, $value)
156
  {
157 14
    $name = strtolower($name);
158
159
    switch ($name) {
160 14
      case 'outerhtml':
161 14
      case 'outertext':
162 3
        return $this->replaceNode($value);
163 11
      case 'innertext':
164 11
      case 'innerhtml':
165 7
        return $this->replaceChild($value);
166 8
      default:
167 8
        return $this->setAttribute($name, $value);
168 8
    }
169
  }
170
171
  /**
172
   * @return string
173
   */
174 2
  public function __toString()
175
  {
176 2
    return $this->html();
177
  }
178
179
  /**
180
   * @param $name
181
   *
182
   * @return SimpleHtmlDom
183
   */
184 1
  public function __unset($name)
185
  {
186 1
    return $this->removeAttribute($name);
187
  }
188
189
  /**
190
   * Returns children of node.
191
   *
192
   * @param int $idx
193
   *
194
   * @return SimpleHtmlDomNode|SimpleHtmlDom|null
195
   */
196 2
  public function childNodes($idx = -1)
197
  {
198 2
    $nodeList = $this->getIterator();
199
200 2
    if ($idx === -1) {
201 2
      return $nodeList;
202
    }
203
204 2
    if (isset($nodeList[$idx])) {
205 2
      return $nodeList[$idx];
206
    }
207
208 1
    return null;
209
  }
210
211
  /**
212
   * Find list of nodes with a CSS selector.
213
   *
214
   * @param string   $selector
215
   * @param int|null $idx
216
   *
217
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
218
   */
219 26
  public function find($selector, $idx = null)
220
  {
221 26
    return $this->getHtmlDomParser()->find($selector, $idx);
222
  }
223
224
  /**
225
   * Returns the first child of node.
226
   *
227
   * @return SimpleHtmlDom|null
228
   */
229 4
  public function firstChild()
230
  {
231 4
    $node = $this->node->firstChild;
232
233 4
    if ($node === null) {
234 1
      return null;
235
    }
236
237 4
    return new self($node);
238
  }
239
240
  /**
241
   * Returns an array of attributes.
242
   *
243
   * @return array|null
244
   */
245 2
  public function getAllAttributes()
246
  {
247 2
    if ($this->node->hasAttributes()) {
248 2
      $attributes = array();
249 2
      foreach ($this->node->attributes as $attr) {
250 2
        $attributes[$attr->name] = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value);
251 2
      }
252
253 2
      return $attributes;
254
    }
255
256 1
    return null;
257
  }
258
259
  /**
260
   * Return attribute value.
261
   *
262
   * @param string $name
263
   *
264
   * @return string
265
   */
266 13
  public function getAttribute($name)
267
  {
268 13
    $html = $this->node->getAttribute($name);
269
270 13
    return HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
271
  }
272
273
  /**
274
   * Return element by #id.
275
   *
276
   * @param string $id
277
   *
278
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
279
   */
280 1
  public function getElementById($id)
281
  {
282 1
    return $this->find("#$id", 0);
283
  }
284
285
  /**
286
   * Return element by tag name.
287
   *
288
   * @param string $name
289
   *
290
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
291
   */
292 1
  public function getElementByTagName($name)
293
  {
294 1
    $node = $this->node->getElementsByTagName($name)->item(0);
295
296 1
    if ($node === null) {
297
      return new SimpleHtmlDomNodeBlank();
298
    }
299
300 1
    return new self($node);
301
  }
302
303
  /**
304
   * Returns elements by #id.
305
   *
306
   * @param string   $id
307
   * @param null|int $idx
308
   *
309
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeBlank
310
   */
311
  public function getElementsById($id, $idx = null)
312
  {
313
    return $this->find("#$id", $idx);
314
  }
315
316
  /**
317
   * Returns elements by tag name.
318
   *
319
   * @param string   $name
320
   * @param null|int $idx
321
   *
322
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
323
   */
324 1 View Code Duplication
  public function getElementsByTagName($name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
325
  {
326 1
    $nodesList = $this->node->getElementsByTagName($name);
327
328 1
    $elements = new SimpleHtmlDomNode();
329
330 1
    foreach ($nodesList as $node) {
331 1
      $elements[] = new self($node);
332 1
    }
333
334
    // return all elements
335 1
    if (null === $idx) {
336 1
      return $elements;
337
    }
338
339
    // handle negative values
340
    if ($idx < 0) {
341
      $idx = count($elements) + $idx;
342
    }
343
344
    // return one element
345
    if (isset($elements[$idx])) {
346
      return $elements[$idx];
347
    }
348
349
    // return a blank-element
350
    return new SimpleHtmlDomNodeBlank();
351
  }
352
353
  /**
354
   * Create a new "HtmlDomParser"-object from the current context.
355
   *
356
   * @return HtmlDomParser
357
   */
358 52
  public function getHtmlDomParser()
359
  {
360 52
    return new HtmlDomParser($this);
361
  }
362
363
  /**
364
   * Retrieve an external iterator.
365
   *
366
   * @link  http://php.net/manual/en/iteratoraggregate.getiterator.php
367
   * @return SimpleHtmlDomNode An instance of an object implementing <b>Iterator</b> or
368
   * <b>Traversable</b>
369
   */
370 2
  public function getIterator()
371
  {
372 2
    $elements = new SimpleHtmlDomNode();
373 2
    if ($this->node->hasChildNodes()) {
374 2
      foreach ($this->node->childNodes as $node) {
375 2
        $elements[] = new self($node);
376 2
      }
377 2
    }
378
379 2
    return $elements;
380
  }
381
382
  /**
383
   * @return DOMNode
384
   */
385 53
  public function getNode()
386
  {
387 53
    return $this->node;
388
  }
389
390
  /**
391
   * Determine if an attribute exists on the element.
392
   *
393
   * @param $name
394
   *
395
   * @return bool
396
   */
397 1
  public function hasAttribute($name)
398
  {
399 1
    return $this->node->hasAttribute($name);
400
  }
401
402
  /**
403
   * Get dom node's outer html.
404
   *
405
   * @param bool $multiDecodeNewHtmlEntity
406
   *
407
   * @return string
408
   */
409 18
  public function html($multiDecodeNewHtmlEntity = false)
410
  {
411 18
    return $this->getHtmlDomParser()->html($multiDecodeNewHtmlEntity);
412
  }
413
414
  /**
415
   * Get dom node's inner html.
416
   *
417
   * @param bool $multiDecodeNewHtmlEntity
418
   *
419
   * @return string
420
   */
421 11
  public function innerHtml($multiDecodeNewHtmlEntity = false)
422
  {
423 11
    return $this->getHtmlDomParser()->innerHtml($multiDecodeNewHtmlEntity);
424
  }
425
426
  /**
427
   * Returns the last child of node.
428
   *
429
   * @return SimpleHtmlDom|null
430
   */
431 4
  public function lastChild()
432
  {
433 4
    $node = $this->node->lastChild;
434
435 4
    if ($node === null) {
436 1
      return null;
437
    }
438
439 4
    return new self($node);
440
  }
441
442
  /**
443
   * Returns the next sibling of node.
444
   *
445
   * @return SimpleHtmlDom|null
446
   */
447 1
  public function nextSibling()
448
  {
449 1
    $node = $this->node->nextSibling;
450
451 1
    if ($node === null) {
452 1
      return null;
453
    }
454
455 1
    return new self($node);
456
  }
457
458
  /**
459
   * Returns the parent of node.
460
   *
461
   * @return SimpleHtmlDom
462
   */
463 1
  public function parentNode()
464
  {
465 1
    return new self($this->node->parentNode);
466
  }
467
468
  /**
469
   * Returns the previous sibling of node.
470
   *
471
   * @return SimpleHtmlDom|null
472
   */
473 1
  public function previousSibling()
474
  {
475 1
    $node = $this->node->previousSibling;
476
477 1
    if ($node === null) {
478 1
      return null;
479
    }
480
481 1
    return new self($node);
482
  }
483
484
  /**
485
   * Replace child node.
486
   *
487
   * @param $string
488
   *
489
   * @return $this
490
   *
491
   * @throws \RuntimeException
492
   */
493 7
  protected function replaceChild($string)
494
  {
495 7
    if (!empty($string)) {
496 6
      $newDocument = new HtmlDomParser($string);
497
498 6
      if ($this->normalizeStringForComparision($newDocument) != $this->normalizeStringForComparision($string)) {
499
        throw new RuntimeException('Not valid HTML fragment');
500
      }
501 6
    }
502
503
    /** @noinspection PhpParamsInspection */
504 7
    if (count($this->node->childNodes) > 0) {
505 7
      foreach ($this->node->childNodes as $node) {
506 7
        $this->node->removeChild($node);
507 7
      }
508 7
    }
509
510 7
    if (!empty($newDocument)) {
511 6
      $newDocument = $this->cleanHtmlWrapper($newDocument);
512 6
      $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
513 7
      $this->node->appendChild($newNode);
514 6
    }
515
516 7
    return $this;
517
  }
518
519
  /**
520
   * Replace this node.
521
   *
522
   * @param $string
523
   *
524
   * @return $this
525
   *
526
   * @throws \RuntimeException
527
   */
528 3
  protected function replaceNode($string)
529
  {
530 3
    if (empty($string)) {
531 2
      $this->node->parentNode->removeChild($this->node);
532
533 2
      return null;
534
    }
535
536 2
    $newDocument = new HtmlDomParser($string);
537
538 2
    if ($this->normalizeStringForComparision($newDocument->outerText()) != $this->normalizeStringForComparision($string)) {
539
      throw new RuntimeException('Not valid HTML fragment');
540
    }
541
542 2
    $newDocument = $this->cleanHtmlWrapper($newDocument);
543
544 2
    $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
545
546 2
    $this->node->parentNode->replaceChild($newNode, $this->node);
547 2
    $this->node = $newNode;
548
549 2
    return $this;
550
  }
551
552
  /**
553
   * Normalize the given input for comparision.
554
   *
555
   * @param HtmlDomParser|string $input
556
   *
557
   * @return string
558
   */
559 8
  private function normalizeStringForComparision($input)
560
  {
561 8
    if ($input instanceof HtmlDomParser) {
562 6
      $string = $input->outerText();
563
564 6
      if ($input->getIsDOMDocumentCreatedWithoutHeadWrapper() === true) {
565 6
        $string = str_replace(array('<head>', '</head>'), '', $string);
566 6
      }
567 6
    } else {
568 8
      $string = (string)$input;
569
    }
570
571
    return
572 8
        urlencode(
573 8
            urldecode(
574 8
                trim(
575 8
                    str_replace(
576
                        array(
577 8
                            ' ',
578 8
                            "\n",
579 8
                            "\r",
580 8
                            '/>',
581 8
                        ),
582
                        array(
583 8
                            '',
584 8
                            '',
585 8
                            '',
586 8
                            '>',
587 8
                        ),
588 8
                        strtolower($string)
589 8
                    )
590 8
                )
591 8
            )
592 8
        );
593
  }
594
595
  /**
596
   * @param HtmlDomParser $newDocument
597
   *
598
   * @return HtmlDomParser
599
   */
600 8
  protected function cleanHtmlWrapper(HtmlDomParser $newDocument)
601
  {
602
    if (
603 8
        $newDocument->getIsDOMDocumentCreatedWithoutHtml() === true
604 8
        ||
605 4
        $newDocument->getIsDOMDocumentCreatedWithoutHtmlWrapper() === true
606 8
    ) {
607
608
      // Remove doc-type node.
609 8
      if ($newDocument->getDocument()->doctype !== null) {
610 5
        $newDocument->getDocument()->doctype->parentNode->removeChild($newDocument->getDocument()->doctype);
611 5
      }
612
613
      // Remove html element, preserving child nodes.
614 8
      $html = $newDocument->getDocument()->getElementsByTagName('html')->item(0);
615 8
      $fragment = $newDocument->getDocument()->createDocumentFragment();
616 8
      if ($html !== null) {
617 5
        while ($html->childNodes->length > 0) {
618 5
          $fragment->appendChild($html->childNodes->item(0));
619 5
        }
620 5
        $html->parentNode->replaceChild($fragment, $html);
621 5
      }
622
623
      // Remove body element, preserving child nodes.
624 8
      $body = $newDocument->getDocument()->getElementsByTagName('body')->item(0);
625 8
      $fragment = $newDocument->getDocument()->createDocumentFragment();
626 8
      if ($body instanceof \DOMElement) {
627 4
        while ($body->childNodes->length > 0) {
628 4
          $fragment->appendChild($body->childNodes->item(0));
629 4
        }
630 4
        $body->parentNode->replaceChild($fragment, $body);
631
632
        // At this point DOMDocument still added a "<p>"-wrapper around our string,
633
        // so we replace it with "<simpleHtmlDomP>" and delete this at the ending ...
634 4
        $this->changeElementName($newDocument->getDocument()->getElementsByTagName('p')->item(0), 'simpleHtmlDomP');
635 4
      }
636 8
    }
637
638 8
    return $newDocument;
639
  }
640
641
  /**
642
   * Change the name of a tag in a "DOMNode".
643
   *
644
   * @param DOMNode $node
645
   * @param string  $name
646
   *
647
   * @return DOMElement
648
   */
649 4
  protected function changeElementName(\DOMNode $node, $name)
650
  {
651 4
    $newnode = $node->ownerDocument->createElement($name);
652
653 4
    foreach ($node->childNodes as $child) {
654 4
      $child = $node->ownerDocument->importNode($child, true);
655 4
      $newnode->appendChild($child);
656 4
    }
657
658 4
    foreach ($node->attributes as $attrName => $attrNode) {
659
      $newnode->setAttribute($attrName, $attrNode);
660 4
    }
661
662 4
    $newnode->ownerDocument->replaceChild($newnode, $node);
663
664 4
    return $newnode;
665
  }
666
667
  /**
668
   * Set attribute value.
669
   *
670
   * @param string      $name       <p>The name of the html-attribute.</p>
671
   * @param string|null $value      <p>Set to NULL or empty string, to remove the attribute.</p>
672
   * @param bool $strict            </p>
673
   *                                $value must be NULL, to remove the attribute,
674
   *                                so that you can set an empty string as attribute-value e.g. autofocus=""
675
   *                                </p>
676
   *
677
   * @return $this
678
   */
679 9
  public function setAttribute($name, $value = null, $strict = false)
680
  {
681
    if (
682 9
        ($strict === true && null === $value)
683
        ||
684 9
        ($strict === false && empty($value))
685 9
    ) {
686 1
      $this->node->removeAttribute($name);
687 1
    } else {
688 9
      $this->node->setAttribute($name, $value);
689
    }
690
691 9
    return $this;
692
  }
693
694
  /**
695
   * Remove attribute.
696
   *
697
   * @param $name <p>The name of the html-attribute.</p>
698
   *
699
   * @return mixed
700
   */
701 1
  public function removeAttribute($name)
702
  {
703 1
    $this->node->removeAttribute($name);
704
705 1
    return $this;
706
  }
707
708
  /**
709
   * Get dom node's plain text.
710
   *
711
   * @return string
712
   */
713 16
  public function text()
714
  {
715 16
    return $this->node->textContent;
716
  }
717
}
718