Completed
Push — master ( 4cdcad...57d66c )
by Lars
02:26
created

SimpleHtmlDom::getNode()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace voku\helper;
4
5
use BadMethodCallException;
6
use DOMElement;
7
use DOMNode;
8
use RuntimeException;
9
10
/**
11
 * Class SimpleHtmlDom
12
 *
13
 * @package voku\helper
14
 *
15
 * @property string outerText Get dom node's outer html (alias for "outerHtml")
16
 * @property string outerHtml Get dom node's outer html
17
 * @property string innerText Get dom node's inner html (alias for "innerHtml")
18
 * @property string innerHtml Get dom node's inner html
19
 * @property-read string plaintext Get dom node's plain text
20
 * @property-read string tag       Get dom node name
21
 * @property-read string attr      Get dom node attributes
22
 *
23
 * @method SimpleHtmlDomNode|SimpleHtmlDom|null children() children($idx = -1) Returns children of node
24
 * @method SimpleHtmlDom|null first_child() Returns the first child of node
25
 * @method SimpleHtmlDom|null last_child() Returns the last child of node
26
 * @method SimpleHtmlDom|null next_sibling() Returns the next sibling of node
27
 * @method SimpleHtmlDom|null prev_sibling() Returns the previous sibling of node
28
 * @method SimpleHtmlDom|null parent() Returns the parent of node
29
 * @method string outerText() Get dom node's outer html (alias for "outerHtml()")
30
 * @method string outerHtml() Get dom node's outer html
31
 * @method string innerText() Get dom node's inner html (alias for "innerHtml()")
32
 *
33
 */
34
class SimpleHtmlDom implements \IteratorAggregate
35
{
36
  /**
37
   * @var array
38
   */
39
  protected static $functionAliases = array(
40
      'children'     => 'childNodes',
41
      'first_child'  => 'firstChild',
42
      'last_child'   => 'lastChild',
43
      'next_sibling' => 'nextSibling',
44
      'prev_sibling' => 'previousSibling',
45
      'parent'       => 'parentNode',
46
      'outertext'    => 'html',
47
      'outerhtml'    => 'html',
48
      'innertext'    => 'innerHtml',
49
      'innerhtml'    => 'innerHtml',
50
  );
51
52
  /**
53
   * @var DOMElement
54
   */
55
  protected $node;
56 69
57
  /**
58 69
   * SimpleHtmlDom constructor.
59 69
   *
60
   * @param DOMNode $node
61
   */
62
  public function __construct(DOMNode $node)
63
  {
64
    $this->node = $node;
0 ignored issues
show
Documentation Bug introduced by
$node is of type object<DOMNode>, but the property $node was declared to be of type object<DOMElement>. Are you sure that you always receive this specific sub-class here, or does it make sense to add an instanceof check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a given class or a super-class is assigned to a property that is type hinted more strictly.

Either this assignment is in error or an instanceof check should be added for that assignment.

class Alien {}

class Dalek extends Alien {}

class Plot
{
    /** @var  Dalek */
    public $villain;
}

$alien = new Alien();
$plot = new Plot();
if ($alien instanceof Dalek) {
    $plot->villain = $alien;
}
Loading history...
65
  }
66
67
  /**
68 8
   * @param $name
69
   * @param $arguments
70 8
   *
71 8
   * @return null|string|SimpleHtmlDom
72
   *
73
   */
74
  public function __call($name, $arguments)
75
  {
76
    if (isset(self::$functionAliases[$name])) {
77
      return call_user_func_array(array($this, self::$functionAliases[$name]), $arguments);
78
    }
79
80
    throw new BadMethodCallException('Method does not exist');
81
  }
82 23
83
  /**
84
   * @param $name
85 23
   *
86 12
   * @return array|null|string
87 18
   */
88 3
  public function __get($name)
89 17
  {
90 9
    $name = strtolower($name);
91 9
92 4
    switch ($name) {
93 8
      case 'outerhtml':
94
      case 'outertext':
95
        return $this->html();
96 8
      case 'innerhtml':
97
      case 'innertext':
98
        return $this->innerHtml();
99
      case 'text':
100
      case 'plaintext':
101
        return $this->text();
102
      case 'tag':
103
        return $this->node->nodeName;
104
      case 'attr':
105
        return $this->getAllAttributes();
106 12
      default:
107
        return $this->getAttribute($name);
108 12
    }
109
  }
110
111
  /**
112
   * @param string $selector
113
   * @param int    $idx
114
   *
115
   * @return SimpleHtmlDom|SimpleHtmlDomNode|null
116 1
   */
117
  public function __invoke($selector, $idx = null)
118
  {
119 1
    return $this->find($selector, $idx);
120 1
  }
121 1
122 1
  /**
123
   * @param $name
124
   *
125 1
   * @return bool
126
   */
127
  public function __isset($name)
128
  {
129
    switch ($name) {
130
      case 'outertext':
131
      case 'innertext':
132
      case 'plaintext':
133
      case 'text':
134
      case 'tag':
135 9
        return true;
136
      default:
137
        return $this->hasAttribute($name);
138 9
    }
139 3
  }
140 6
141 3
  /**
142
   * @param $name
143 5
   * @param $value
144
   *
145
   * @return SimpleHtmlDom
146
   */
147
  public function __set($name, $value)
148
  {
149
    switch ($name) {
150 2
      case 'outertext':
151
        return $this->replaceNode($value);
152 2
      case 'innertext':
153
        return $this->replaceChild($value);
154
      default:
155
        return $this->setAttribute($name, $value);
156
    }
157
  }
158
159
  /**
160 1
   * @return string
161
   */
162 1
  public function __toString()
163
  {
164
    return $this->html();
165
  }
166
167
  /**
168
   * @param $name
169
   *
170
   * @return SimpleHtmlDom
171
   */
172 2
  public function __unset($name)
173
  {
174 2
    return $this->setAttribute($name, null);
175
  }
176 2
177 2
  /**
178
   * Returns children of node
179
   *
180 2
   * @param int $idx
181 2
   *
182
   * @return SimpleHtmlDomNode|SimpleHtmlDom|null
183
   */
184 1
  public function childNodes($idx = -1)
185
  {
186
    $nodeList = $this->getIterator();
187
188
    if ($idx === -1) {
189
      return $nodeList;
190
    }
191
192
    if (isset($nodeList[$idx])) {
193
      return $nodeList[$idx];
194
    }
195 24
196
    return null;
197 24
  }
198
199
  /**
200
   * Find list of nodes with a CSS selector
201
   *
202
   * @param string $selector
203
   * @param int    $idx
204
   *
205 4
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
206
   */
207 4
  public function find($selector, $idx = null)
208
  {
209 4
    return $this->getHtmlDomParser()->find($selector, $idx);
210 1
  }
211
212
  /**
213 4
   * Returns the first child of node
214
   *
215
   * @return SimpleHtmlDom|null
216
   */
217
  public function firstChild()
218
  {
219
    $node = $this->node->firstChild;
220
221 1
    if ($node === null) {
222
      return null;
223 1
    }
224 1
225 1
    return new self($node);
226 1
  }
227
228
  /**
229 1
   * Returns array of attributes
230
   *
231
   * @return array|null
232 1
   */
233
  public function getAllAttributes()
234
  {
235
    if ($this->node->hasAttributes()) {
236
      $attributes = array();
237
      foreach ($this->node->attributes as $attr) {
238
        $attributes[$attr->name] = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value);
239
      }
240
241
      return $attributes;
242 10
    }
243
244 10
    return null;
245
  }
246 10
247
  /**
248
   * Return attribute value
249
   *
250
   * @param string $name
251
   *
252
   * @return string
253
   */
254
  public function getAttribute($name)
255
  {
256 1
    $html = $this->node->getAttribute($name);
257
258 1
    return HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
259
  }
260
261
  /**
262
   * Return SimpleHtmlDom by id.
263
   *
264
   * @param string $id
265
   *
266
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
267
   */
268 1
  public function getElementById($id)
269
  {
270 1
    return $this->find("#$id", 0);
271
  }
272 1
273 1
  /**
274
   * Return SimpleHtmlDom by tag name.
275
   *
276
   * @param string $name
277
   *
278
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
279
   */
280
  public function getElementByTagName($name)
281
  {
282
    $node = $this->node->getElementsByTagName($name)->item(0);
283
284
    if ($node !== null) {
285
      return new self($node);
286
    } else {
287
      return new SimpleHtmlDomNodeBlank();
288
    }
289
  }
290
291
  /**
292
   * Returns Elements by id
293
   *
294
   * @param string   $id
295
   * @param null|int $idx
296
   *
297
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
298
   */
299
  public function getElementsById($id, $idx = null)
300 1
  {
301
    return $this->find("#$id", $idx);
302 1
  }
303
304 1
  /**
305
   * Returns Elements by tag name
306 1
   *
307 1
   * @param string   $name
308
   * @param null|int $idx
309
   *
310 1
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
311 1
   */
312 View Code Duplication
  public function getElementsByTagName($name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
313
  {
314
    $nodesList = $this->node->getElementsByTagName($name);
315
316
    $elements = new SimpleHtmlDomNode();
317
318
    foreach ($nodesList as $node) {
319
      $elements[] = new self($node);
320
    }
321
322
    if (null === $idx) {
323
      return $elements;
324
    } else {
325
      if ($idx < 0) {
326
        $idx = count($elements) + $idx;
327
      }
328
    }
329
330 40
    if (isset($elements[$idx])) {
331
      return $elements[$idx];
332 40
    } else {
333
      return new SimpleHtmlDomNodeBlank();
334
    }
335
  }
336
337
  /**
338
   * Create a new "HtmlDomParser"-object from the current context.
339
   *
340
   * @return HtmlDomParser
341
   */
342 2
  public function getHtmlDomParser()
343
  {
344 2
    return new HtmlDomParser($this);
345 2
  }
346 2
347 2
  /**
348
   * Retrieve an external iterator
349
   *
350
   * @link  http://php.net/manual/en/iteratoraggregate.getiterator.php
351 2
   * @return SimpleHtmlDomNode An instance of an object implementing <b>Iterator</b> or
352
   * <b>Traversable</b>
353
   */
354
  public function getIterator()
355
  {
356
    $elements = new SimpleHtmlDomNode();
357 41
    if ($this->node->hasChildNodes()) {
358
      foreach ($this->node->childNodes as $node) {
359 41
        $elements[] = new self($node);
360
      }
361
    }
362
363
    return $elements;
364
  }
365
366
  /**
367
   * @return DOMNode
368
   */
369 1
  public function getNode()
370
  {
371 1
    return $this->node;
372
  }
373
374
  /**
375
   * Determine if an attribute exists on the element.
376
   *
377
   * @param $name
378
   *
379 13
   * @return bool
380
   */
381 13
  public function hasAttribute($name)
382
  {
383
    return $this->node->hasAttribute($name);
384
  }
385
386
  /**
387
   * Get dom node's outer html
388
   *
389 3
   * @return string
390
   */
391 3
  public function html()
392
  {
393
    return $this->getHtmlDomParser()->html();
394
  }
395
396
  /**
397
   * Get dom node's inner html
398
   *
399 4
   * @return string
400
   */
401 4
  public function innerHtml()
402
  {
403 4
    return $this->getHtmlDomParser()->innerHtml();
404 1
  }
405
406
  /**
407 4
   * Returns the last child of node
408
   *
409
   * @return SimpleHtmlDom|null
410
   */
411
  public function lastChild()
412
  {
413
    $node = $this->node->lastChild;
414
415 1
    if ($node === null) {
416
      return null;
417 1
    }
418
419 1
    return new self($node);
420 1
  }
421
422
  /**
423 1
   * Returns the next sibling of node
424
   *
425
   * @return SimpleHtmlDom|null
426
   */
427
  public function nextSibling()
428
  {
429
    $node = $this->node->nextSibling;
430
431 1
    if ($node === null) {
432
      return null;
433 1
    }
434
435
    return new self($node);
436
  }
437
438
  /**
439
   * Returns the parent of node
440
   *
441 1
   * @return SimpleHtmlDom
442
   */
443 1
  public function parentNode()
444
  {
445 1
    return new self($this->node->parentNode);
446 1
  }
447
448
  /**
449 1
   * Returns the previous sibling of node
450
   *
451
   * @return SimpleHtmlDom|null
452
   */
453
  public function previousSibling()
454
  {
455
    $node = $this->node->previousSibling;
456
457
    if ($node === null) {
458
      return null;
459 3
    }
460
461 3
    return new self($node);
462 3
  }
463
464 3
  /**
465
   * Replace child node
466
   *
467
   * @param $string
468
   *
469 3
   * @return $this
470 3
   */
471
  protected function replaceChild($string)
472
  {
473 3
    if (!empty($string)) {
474
      $newDocument = new HtmlDomParser($string);
475 3
476
      if ($this->normalizeStringForComparision($newDocument->outertext) != $this->normalizeStringForComparision($string)) {
0 ignored issues
show
Bug introduced by
The property outertext does not seem to exist. Did you mean outerText?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
477 3
        throw new RuntimeException('Not valid HTML fragment');
478
      }
479 3
    }
480
481
    foreach ($this->node->childNodes as $node) {
482 3
      $this->node->removeChild($node);
483
    }
484
485
    if (!empty($newDocument)) {
486
487
      $newDocument = $this->cleanHtmlWrapper($newDocument);
488
489
      $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
490
491
      $this->node->appendChild($newNode);
492 3
    }
493
494 3
    return $this;
495 1
  }
496
497 1
  /**
498
   * Replace this node
499
   *
500 3
   * @param $string
501
   *
502
   * @return $this
503
   */
504
  protected function replaceNode($string)
505
  {
506 3
    if (empty($string)) {
507
      $this->node->parentNode->removeChild($this->node);
508
509
      return null;
510 3
    }
511
512 3
    $newDocument = new HtmlDomParser($string);
513
514 3
    // DEBUG
515 3
    //echo $this->normalizeStringForComparision($newDocument->outertext) . "\n";
0 ignored issues
show
Unused Code Comprehensibility introduced by
60% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
516
    //echo $this->normalizeStringForComparision($string) . "\n\n";
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
517 3
518
    if ($this->normalizeStringForComparision($newDocument->outertext) != $this->normalizeStringForComparision($string)) {
0 ignored issues
show
Bug introduced by
The property outertext does not seem to exist. Did you mean outerText?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
519
      throw new RuntimeException('Not valid HTML fragment');
520
    }
521
522
    $newDocument = $this->cleanHtmlWrapper($newDocument);
523
524
    $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
525
526
    $this->node->parentNode->replaceChild($newNode, $this->node);
527 6
    $this->node = $newNode;
528
529 6
    return $this;
530
  }
531
532
  /**
533
   * Normalize the given string for comparision.
534
   *
535
   * @param $string
536
   *
537 6
   * @return string
538
   */
539 6
  private function normalizeStringForComparision($string)
540
  {
541
    return urlencode(urldecode(trim(str_replace(array(' ', "\n", "\r\n", "\r"), '', strtolower($string)))));
542 3
  }
543
544
  /**
545 3
   * @param HtmlDomParser $newDocument
546 3
   *
547 3
   * @return HtmlDomParser
548 3
   */
549
  protected function cleanHtmlWrapper(HtmlDomParser $newDocument)
550 3
  {
551
    if ($newDocument->getIsDOMDocumentCreatedWithoutHtml() === true) {
552
553 3
      // Remove doc-type node.
554 3
      $newDocument->getDocument()->doctype->parentNode->removeChild($newDocument->getDocument()->doctype);
555 3
556 3
      // Remove html element, preserving child nodes.
557
      $html = $newDocument->getDocument()->getElementsByTagName('html')->item(0);
558 3
      $fragment = $newDocument->getDocument()->createDocumentFragment();
559
      while ($html->childNodes->length > 0) {
560
        $fragment->appendChild($html->childNodes->item(0));
561
      }
562 3
      $html->parentNode->replaceChild($fragment, $html);
563
564
      // Remove body element, preserving child nodes.
565 6
      $body = $newDocument->getDocument()->getElementsByTagName('body')->item(0);
566
      $fragment = $newDocument->getDocument()->createDocumentFragment();
567
      while ($body->childNodes->length > 0) {
568
        $fragment->appendChild($body->childNodes->item(0));
569
      }
570
      $body->parentNode->replaceChild($fragment, $body);
571
572
      // At this point DOMDocument still added a "<p>"-wrapper around our string,
573
      // so we replace it with "<simpleHtmlDomP>" and delete this at the ending ...
574
      $this->changeElementName($newDocument->getDocument()->getElementsByTagName('p')->item(0), 'simpleHtmlDomP');
575
    }
576 3
577
    return $newDocument;
578 3
  }
579 3
580 3
  /**
581 3
   * change the name of a tag in a "DOMNode"
582
   *
583 3
   * @param DOMNode $node
584
   * @param string  $name
585
   *
586 3
   * @return DOMElement
587
   */
588 3
  protected function changeElementName(\DOMNode $node, $name)
589
  {
590
    $newnode = $node->ownerDocument->createElement($name);
591
    foreach ($node->childNodes as $child) {
592
      $child = $node->ownerDocument->importNode($child, true);
593
      $newnode->appendChild($child);
594
    }
595
    foreach ($node->attributes as $attrName => $attrNode) {
596
      $newnode->setAttribute($attrName, $attrNode);
597
    }
598
    $newnode->ownerDocument->replaceChild($newnode, $node);
599 5
600
    return $newnode;
601 5
  }
602 1
603
  /**
604 5
   * Set attribute value
605
   *
606
   * @param $name
607 5
   * @param $value
608
   *
609
   * @return $this
610
   */
611
  public function setAttribute($name, $value)
612
  {
613
    if (empty($value)) {
614
      $this->node->removeAttribute($name);
615 9
    } else {
616
      $this->node->setAttribute($name, $value);
617 9
    }
618
619
    return $this;
620
  }
621
622
  /**
623
   * Get dom node's plain text
624
   *
625
   * @return string
626
   */
627
  public function text()
628
  {
629
    return $this->node->textContent;
630
  }
631
}
632