Completed
Push — master ( 01893c...871ff9 )
by Lars
02:47
created

SimpleHtmlDom::__get()   C

Complexity

Conditions 9
Paths 9

Size

Total Lines 22
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 9.0197

Importance

Changes 5
Bugs 1 Features 1
Metric Value
c 5
b 1
f 1
dl 0
loc 22
ccs 15
cts 16
cp 0.9375
rs 6.412
cc 9
eloc 18
nc 9
nop 1
crap 9.0197
1
<?php
2
3
namespace voku\helper;
4
5
use BadMethodCallException;
6
use DOMElement;
7
use DOMNode;
8
use RuntimeException;
9
10
/**
11
 * Class SimpleHtmlDom
12
 *
13
 * @package voku\helper
14
 *
15
 * @property string outerText Get dom node's outer html (alias for "outerHtml")
16
 * @property string outerHtml Get dom node's outer html
17
 * @property string innerText Get dom node's inner html (alias for "innerHtml")
18
 * @property string innerHtml Get dom node's inner html
19
 * @property-read string plaintext Get dom node's plain text
20
 * @property-read string tag       Get dom node name
21
 * @property-read string attr      Get dom node attributes
22
 *
23
 * @method SimpleHtmlDomNode|SimpleHtmlDom|null children() children($idx = -1) Returns children of node
24
 * @method SimpleHtmlDom|null first_child() Returns the first child of node
25
 * @method SimpleHtmlDom|null last_child() Returns the last child of node
26
 * @method SimpleHtmlDom|null next_sibling() Returns the next sibling of node
27
 * @method SimpleHtmlDom|null prev_sibling() Returns the previous sibling of node
28
 * @method SimpleHtmlDom|null parent() Returns the parent of node
29
 * @method string outerText() Get dom node's outer html (alias for "outerHtml()")
30
 * @method string outerHtml() Get dom node's outer html
31
 * @method string innerText() Get dom node's inner html (alias for "innerHtml()")
32
 *
33
 */
34
class SimpleHtmlDom implements \IteratorAggregate
35
{
36
  /**
37
   * @var array
38
   */
39
  protected static $functionAliases = array(
40
      'children'     => 'childNodes',
41
      'first_child'  => 'firstChild',
42
      'last_child'   => 'lastChild',
43
      'next_sibling' => 'nextSibling',
44
      'prev_sibling' => 'previousSibling',
45
      'parent'       => 'parentNode',
46
      'outertext'    => 'html',
47
      'outerhtml'    => 'html',
48
      'innertext'    => 'innerHtml',
49
      'innerhtml'    => 'innerHtml',
50
  );
51
52
  /**
53
   * @var DOMElement
54
   */
55
  protected $node;
56
57
  /**
58
   * SimpleHtmlDom constructor.
59
   *
60
   * @param DOMNode $node
61
   */
62 92
  public function __construct(DOMNode $node)
63
  {
64 92
    $this->node = $node;
0 ignored issues
show
Documentation Bug introduced by
$node is of type object<DOMNode>, but the property $node was declared to be of type object<DOMElement>. Are you sure that you always receive this specific sub-class here, or does it make sense to add an instanceof check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a given class or a super-class is assigned to a property that is type hinted more strictly.

Either this assignment is in error or an instanceof check should be added for that assignment.

class Alien {}

class Dalek extends Alien {}

class Plot
{
    /** @var  Dalek */
    public $villain;
}

$alien = new Alien();
$plot = new Plot();
if ($alien instanceof Dalek) {
    $plot->villain = $alien;
}
Loading history...
65 92
  }
66
67
  /**
68
   * @param $name
69
   * @param $arguments
70
   *
71
   * @return null|string|SimpleHtmlDom
72
   *
73
   */
74 8
  public function __call($name, $arguments)
75
  {
76 8 View Code Duplication
    if (isset(self::$functionAliases[$name])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
77 8
      return call_user_func_array(array($this, self::$functionAliases[$name]), $arguments);
78
    }
79
80
    throw new BadMethodCallException('Method does not exist');
81
  }
82
83
  /**
84
   * @param $name
85
   *
86
   * @return array|null|string
87
   */
88 40
  public function __get($name)
89
  {
90 40
    $name = strtolower($name);
91
92
    switch ($name) {
93 40
      case 'outerhtml':
94 36
      case 'outertext':
95 17
        return $this->html();
96 30
      case 'innerhtml':
97 25
      case 'innertext':
98 8
        return $this->innerHtml();
99 24
      case 'text':
100 19
      case 'plaintext':
101 15
        return $this->text();
102 10
      case 'tag':
103 4
        return $this->node->nodeName;
104 9
      case 'attr':
105
        return $this->getAllAttributes();
106
      default:
107 9
        return $this->getAttribute($name);
108
    }
109
  }
110
111
  /**
112
   * @param string $selector
113
   * @param int    $idx
114
   *
115
   * @return SimpleHtmlDom|SimpleHtmlDomNode|null
116
   */
117 12
  public function __invoke($selector, $idx = null)
118
  {
119 12
    return $this->find($selector, $idx);
120
  }
121
122
  /**
123
   * @param $name
124
   *
125
   * @return bool
126
   */
127 1
  public function __isset($name)
128
  {
129
    switch ($name) {
130 1
      case 'outertext':
131 1
      case 'outerhtml':
132 1
      case 'innertext':
133 1
      case 'innerhtml':
134 1
      case 'plaintext':
135 1
      case 'text':
136 1
      case 'tag':
137
        return true;
138
      default:
139 1
        return $this->hasAttribute($name);
140
    }
141
  }
142
143
  /**
144
   * @param $name
145
   * @param $value
146
   *
147
   * @return SimpleHtmlDom
148
   */
149 12
  public function __set($name, $value)
150
  {
151 12
    $name = strtolower($name);
152
153
    switch ($name) {
154 12
      case 'outerhtml':
155 11
      case 'outertext':
156 4
        return $this->replaceNode($value);
157 8
      case 'innertext':
158 7
      case 'innerhtml':
159 5
        return $this->replaceChild($value);
160
      default:
161 6
        return $this->setAttribute($name, $value);
162
    }
163
  }
164
165
  /**
166
   * @return string
167
   */
168 3
  public function __toString()
169
  {
170 3
    return $this->html();
171
  }
172
173
  /**
174
   * @param $name
175
   *
176
   * @return SimpleHtmlDom
177
   */
178 1
  public function __unset($name)
179
  {
180 1
    return $this->setAttribute($name, null);
181
  }
182
183
  /**
184
   * Returns children of node
185
   *
186
   * @param int $idx
187
   *
188
   * @return SimpleHtmlDomNode|SimpleHtmlDom|null
189
   */
190 2
  public function childNodes($idx = -1)
191
  {
192 2
    $nodeList = $this->getIterator();
193
194 2
    if ($idx === -1) {
195 2
      return $nodeList;
196
    }
197
198 2
    if (isset($nodeList[$idx])) {
199 2
      return $nodeList[$idx];
200
    }
201
202 1
    return null;
203
  }
204
205
  /**
206
   * Find list of nodes with a CSS selector
207
   *
208
   * @param string $selector
209
   * @param int    $idx
210
   *
211
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
212
   */
213 25
  public function find($selector, $idx = null)
214
  {
215 25
    return $this->getHtmlDomParser()->find($selector, $idx);
216
  }
217
218
  /**
219
   * Returns the first child of node
220
   *
221
   * @return SimpleHtmlDom|null
222
   */
223 4
  public function firstChild()
224
  {
225 4
    $node = $this->node->firstChild;
226
227 4
    if ($node === null) {
228 1
      return null;
229
    }
230
231 4
    return new self($node);
232
  }
233
234
  /**
235
   * Returns array of attributes
236
   *
237
   * @return array|null
238
   */
239 1
  public function getAllAttributes()
240
  {
241 1
    if ($this->node->hasAttributes()) {
242 1
      $attributes = array();
243 1
      foreach ($this->node->attributes as $attr) {
244 1
        $attributes[$attr->name] = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value);
245
      }
246
247 1
      return $attributes;
248
    }
249
250 1
    return null;
251
  }
252
253
  /**
254
   * Return attribute value
255
   *
256
   * @param string $name
257
   *
258
   * @return string
259
   */
260 12
  public function getAttribute($name)
261
  {
262 12
    $html = $this->node->getAttribute($name);
263
264 12
    return HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
265
  }
266
267
  /**
268
   * Return SimpleHtmlDom by id.
269
   *
270
   * @param string $id
271
   *
272
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
273
   */
274 1
  public function getElementById($id)
275
  {
276 1
    return $this->find("#$id", 0);
277
  }
278
279
  /**
280
   * Return SimpleHtmlDom by tag name.
281
   *
282
   * @param string $name
283
   *
284
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
285
   */
286 1
  public function getElementByTagName($name)
287
  {
288 1
    $node = $this->node->getElementsByTagName($name)->item(0);
289
290 1
    if ($node !== null) {
291 1
      return new self($node);
292
    } else {
293
      return new SimpleHtmlDomNodeBlank();
294
    }
295
  }
296
297
  /**
298
   * Returns Elements by id
299
   *
300
   * @param string   $id
301
   * @param null|int $idx
302
   *
303
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
304
   */
305
  public function getElementsById($id, $idx = null)
306
  {
307
    return $this->find("#$id", $idx);
308
  }
309
310
  /**
311
   * Returns Elements by tag name
312
   *
313
   * @param string   $name
314
   * @param null|int $idx
315
   *
316
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
317
   */
318 1 View Code Duplication
  public function getElementsByTagName($name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
319
  {
320 1
    $nodesList = $this->node->getElementsByTagName($name);
321
322 1
    $elements = new SimpleHtmlDomNode();
323
324 1
    foreach ($nodesList as $node) {
325 1
      $elements[] = new self($node);
326
    }
327
328 1
    if (null === $idx) {
329 1
      return $elements;
330
    } else {
331
      if ($idx < 0) {
332
        $idx = count($elements) + $idx;
333
      }
334
    }
335
336
    if (isset($elements[$idx])) {
337
      return $elements[$idx];
338
    } else {
339
      return new SimpleHtmlDomNodeBlank();
340
    }
341
  }
342
343
  /**
344
   * Create a new "HtmlDomParser"-object from the current context.
345
   *
346
   * @return HtmlDomParser
347
   */
348 51
  public function getHtmlDomParser()
349
  {
350 51
    return new HtmlDomParser($this);
351
  }
352
353
  /**
354
   * Retrieve an external iterator
355
   *
356
   * @link  http://php.net/manual/en/iteratoraggregate.getiterator.php
357
   * @return SimpleHtmlDomNode An instance of an object implementing <b>Iterator</b> or
358
   * <b>Traversable</b>
359
   */
360 2
  public function getIterator()
361
  {
362 2
    $elements = new SimpleHtmlDomNode();
363 2
    if ($this->node->hasChildNodes()) {
364 2
      foreach ($this->node->childNodes as $node) {
365 2
        $elements[] = new self($node);
366
      }
367
    }
368
369 2
    return $elements;
370
  }
371
372
  /**
373
   * @return DOMNode
374
   */
375 52
  public function getNode()
376
  {
377 52
    return $this->node;
378
  }
379
380
  /**
381
   * Determine if an attribute exists on the element.
382
   *
383
   * @param $name
384
   *
385
   * @return bool
386
   */
387 1
  public function hasAttribute($name)
388
  {
389 1
    return $this->node->hasAttribute($name);
390
  }
391
392
  /**
393
   * Get dom node's outer html
394
   *
395
   * @return string
396
   */
397 19
  public function html()
398
  {
399 19
    return $this->getHtmlDomParser()->html();
400
  }
401
402
  /**
403
   * Get dom node's inner html
404
   *
405
   * @return string
406
   */
407 8
  public function innerHtml()
408
  {
409 8
    return $this->getHtmlDomParser()->innerHtml();
410
  }
411
412
  /**
413
   * Returns the last child of node
414
   *
415
   * @return SimpleHtmlDom|null
416
   */
417 4
  public function lastChild()
418
  {
419 4
    $node = $this->node->lastChild;
420
421 4
    if ($node === null) {
422 1
      return null;
423
    }
424
425 4
    return new self($node);
426
  }
427
428
  /**
429
   * Returns the next sibling of node
430
   *
431
   * @return SimpleHtmlDom|null
432
   */
433 1
  public function nextSibling()
434
  {
435 1
    $node = $this->node->nextSibling;
436
437 1
    if ($node === null) {
438 1
      return null;
439
    }
440
441 1
    return new self($node);
442
  }
443
444
  /**
445
   * Returns the parent of node
446
   *
447
   * @return SimpleHtmlDom
448
   */
449 1
  public function parentNode()
450
  {
451 1
    return new self($this->node->parentNode);
452
  }
453
454
  /**
455
   * Returns the previous sibling of node
456
   *
457
   * @return SimpleHtmlDom|null
458
   */
459 1
  public function previousSibling()
460
  {
461 1
    $node = $this->node->previousSibling;
462
463 1
    if ($node === null) {
464 1
      return null;
465
    }
466
467 1
    return new self($node);
468
  }
469
470
  /**
471
   * Replace child node
472
   *
473
   * @param $string
474
   *
475
   * @return $this
476
   */
477 5
  protected function replaceChild($string)
478
  {
479 5
    if (!empty($string)) {
480 4
      $newDocument = new HtmlDomParser($string);
481
482 4
      if ($this->normalizeStringForComparision($newDocument->outertext) != $this->normalizeStringForComparision($string)) {
0 ignored issues
show
Bug introduced by
The property outertext does not seem to exist. Did you mean outerText?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
483
        throw new RuntimeException('Not valid HTML fragment');
484
      }
485
    }
486
487 5
    foreach ($this->node->childNodes as $node) {
488 5
      $this->node->removeChild($node);
489
    }
490
491 5
    if (!empty($newDocument)) {
492
493 4
      $newDocument = $this->cleanHtmlWrapper($newDocument);
494
495 4
      $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
496
497 4
      $this->node->appendChild($newNode);
498
    }
499
500 5
    return $this;
501
  }
502
503
  /**
504
   * Replace this node
505
   *
506
   * @param $string
507
   *
508
   * @return $this
509
   */
510 4
  protected function replaceNode($string)
511
  {
512 4
    if (empty($string)) {
513 2
      $this->node->parentNode->removeChild($this->node);
514
515 2
      return null;
516
    }
517
518 3
    $newDocument = new HtmlDomParser($string);
519
520
    // DEBUG
521
    //echo $this->normalizeStringForComparision($newDocument->outertext) . "\n";
0 ignored issues
show
Unused Code Comprehensibility introduced by
60% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
522
    //echo $this->normalizeStringForComparision($string) . "\n\n";
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
523
524 3
    if ($this->normalizeStringForComparision($newDocument->outertext) != $this->normalizeStringForComparision($string)) {
0 ignored issues
show
Bug introduced by
The property outertext does not seem to exist. Did you mean outerText?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
525
      throw new RuntimeException('Not valid HTML fragment');
526
    }
527
528 3
    $newDocument = $this->cleanHtmlWrapper($newDocument);
529
530 3
    $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
531
532 3
    $this->node->parentNode->replaceChild($newNode, $this->node);
533 3
    $this->node = $newNode;
534
535 3
    return $this;
536
  }
537
538
  /**
539
   * Normalize the given string for comparision.
540
   *
541
   * @param $string
542
   *
543
   * @return string
544
   */
545 7
  private function normalizeStringForComparision($string)
546
  {
547 7
    return urlencode(urldecode(trim(str_replace(array(' ', "\n", "\r\n", "\r"), '', strtolower($string)))));
548
  }
549
550
  /**
551
   * @param HtmlDomParser $newDocument
552
   *
553
   * @return HtmlDomParser
554
   */
555 7
  protected function cleanHtmlWrapper(HtmlDomParser $newDocument)
556
  {
557 7
    if ($newDocument->getIsDOMDocumentCreatedWithoutHtml() === true) {
558
559
      // Remove doc-type node.
560 4
      $newDocument->getDocument()->doctype->parentNode->removeChild($newDocument->getDocument()->doctype);
561
562
      // Remove html element, preserving child nodes.
563 4
      $html = $newDocument->getDocument()->getElementsByTagName('html')->item(0);
564 4
      $fragment = $newDocument->getDocument()->createDocumentFragment();
565 4
      while ($html->childNodes->length > 0) {
566 4
        $fragment->appendChild($html->childNodes->item(0));
567
      }
568 4
      $html->parentNode->replaceChild($fragment, $html);
569
570
      // Remove body element, preserving child nodes.
571 4
      $body = $newDocument->getDocument()->getElementsByTagName('body')->item(0);
572 4
      $fragment = $newDocument->getDocument()->createDocumentFragment();
573 4
      while ($body->childNodes->length > 0) {
574 4
        $fragment->appendChild($body->childNodes->item(0));
575
      }
576 4
      $body->parentNode->replaceChild($fragment, $body);
577
578
      // At this point DOMDocument still added a "<p>"-wrapper around our string,
579
      // so we replace it with "<simpleHtmlDomP>" and delete this at the ending ...
580 4
      $this->changeElementName($newDocument->getDocument()->getElementsByTagName('p')->item(0), 'simpleHtmlDomP');
581
    }
582
583 7
    return $newDocument;
584
  }
585
586
  /**
587
   * change the name of a tag in a "DOMNode"
588
   *
589
   * @param DOMNode $node
590
   * @param string  $name
591
   *
592
   * @return DOMElement
593
   */
594 4
  protected function changeElementName(\DOMNode $node, $name)
595
  {
596 4
    $newnode = $node->ownerDocument->createElement($name);
597 4
    foreach ($node->childNodes as $child) {
598 4
      $child = $node->ownerDocument->importNode($child, true);
599 4
      $newnode->appendChild($child);
600
    }
601 4
    foreach ($node->attributes as $attrName => $attrNode) {
602
      $newnode->setAttribute($attrName, $attrNode);
603
    }
604 4
    $newnode->ownerDocument->replaceChild($newnode, $node);
605
606 4
    return $newnode;
607
  }
608
609
  /**
610
   * Set attribute value
611
   *
612
   * @param $name
613
   * @param $value
614
   *
615
   * @return $this
616
   */
617 6
  public function setAttribute($name, $value)
618
  {
619 6
    if (empty($value)) {
620 1
      $this->node->removeAttribute($name);
621
    } else {
622 6
      $this->node->setAttribute($name, $value);
623
    }
624
625 6
    return $this;
626
  }
627
628
  /**
629
   * Get dom node's plain text
630
   *
631
   * @return string
632
   */
633 15
  public function text()
634
  {
635 15
    return $this->node->textContent;
636
  }
637
}
638