Completed
Push — master ( 60197c...2a5e10 )
by Lars
02:03
created

SimpleHtmlDom::text()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace voku\helper;
4
5
use BadMethodCallException;
6
use DOMElement;
7
use DOMNode;
8
use RuntimeException;
9
10
/**
11
 * Class SimpleHtmlDom
12
 *
13
 * @package voku\helper
14
 *
15
 * @property string      outerText Get dom node's outer html (alias for "outerHtml")
16
 * @property string      outerHtml Get dom node's outer html
17
 * @property string      innerText Get dom node's inner html (alias for "innerHtml")
18
 * @property string      innerHtml Get dom node's inner html
19
 * @property-read string plaintext Get dom node's plain text
20
 * @property-read string tag       Get dom node name
21
 * @property-read string attr      Get dom node attributes
22
 *
23
 * @method SimpleHtmlDomNode|SimpleHtmlDom|null children() children($idx = -1) Returns children of node
24
 * @method SimpleHtmlDom|null first_child() Returns the first child of node
25
 * @method SimpleHtmlDom|null last_child() Returns the last child of node
26
 * @method SimpleHtmlDom|null next_sibling() Returns the next sibling of node
27
 * @method SimpleHtmlDom|null prev_sibling() Returns the previous sibling of node
28
 * @method SimpleHtmlDom|null parent() Returns the parent of node
29
 * @method string outerText() Get dom node's outer html (alias for "outerHtml()")
30
 * @method string outerHtml() Get dom node's outer html
31
 * @method string innerText() Get dom node's inner html (alias for "innerHtml()")
32
 *
33
 */
34
class SimpleHtmlDom implements \IteratorAggregate
35
{
36
  /**
37
   * @var array
38
   */
39
  protected static $functionAliases = array(
40
      'children'     => 'childNodes',
41
      'first_child'  => 'firstChild',
42
      'last_child'   => 'lastChild',
43
      'next_sibling' => 'nextSibling',
44
      'prev_sibling' => 'previousSibling',
45
      'parent'       => 'parentNode',
46
      'outertext'    => 'html',
47
      'outerhtml'    => 'html',
48
      'innertext'    => 'innerHtml',
49
      'innerhtml'    => 'innerHtml',
50
  );
51
52
  /**
53
   * @var DOMElement
54
   */
55
  protected $node;
56
57
  /**
58
   * SimpleHtmlDom constructor.
59
   *
60
   * @param DOMNode $node
61
   */
62 93
  public function __construct(DOMNode $node)
63
  {
64 93
    $this->node = $node;
0 ignored issues
show
Documentation Bug introduced by
$node is of type object<DOMNode>, but the property $node was declared to be of type object<DOMElement>. Are you sure that you always receive this specific sub-class here, or does it make sense to add an instanceof check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a given class or a super-class is assigned to a property that is type hinted more strictly.

Either this assignment is in error or an instanceof check should be added for that assignment.

class Alien {}

class Dalek extends Alien {}

class Plot
{
    /** @var  Dalek */
    public $villain;
}

$alien = new Alien();
$plot = new Plot();
if ($alien instanceof Dalek) {
    $plot->villain = $alien;
}
Loading history...
65 93
  }
66
67
  /**
68
   * @param $name
69
   * @param $arguments
70
   *
71
   * @return null|string|SimpleHtmlDom
72
   *
73
   */
74 8 View Code Duplication
  public function __call($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
75
  {
76 8
    $name = strtolower($name);
77
78 8
    if (isset(self::$functionAliases[$name])) {
79 8
      return call_user_func_array(array($this, self::$functionAliases[$name]), $arguments);
80
    }
81
82
    throw new BadMethodCallException('Method does not exist');
83
  }
84
85
  /**
86
   * @param $name
87
   *
88
   * @return array|null|string
89
   */
90 40
  public function __get($name)
91
  {
92 40
    $name = strtolower($name);
93
94
    switch ($name) {
95 40
      case 'outerhtml':
96 36
      case 'outertext':
97 17
        return $this->html();
98 30
      case 'innerhtml':
99 24
      case 'innertext':
100 9
        return $this->innerHtml();
101 23
      case 'text':
102 18
      case 'plaintext':
103 15
        return $this->text();
104 9
      case 'tag':
105 4
        return $this->node->nodeName;
106 8
      case 'attr':
107
        return $this->getAllAttributes();
108
      default:
109 8
        return $this->getAttribute($name);
110
    }
111
  }
112
113
  /**
114
   * @param string $selector
115
   * @param int    $idx
116
   *
117
   * @return SimpleHtmlDom|SimpleHtmlDomNode|null
118
   */
119 11
  public function __invoke($selector, $idx = null)
120
  {
121 11
    return $this->find($selector, $idx);
122
  }
123
124
  /**
125
   * @param $name
126
   *
127
   * @return bool
128
   */
129 1
  public function __isset($name)
130
  {
131
    switch ($name) {
132 1
      case 'outertext':
133 1
      case 'outerhtml':
134 1
      case 'innertext':
135 1
      case 'innerhtml':
136 1
      case 'plaintext':
137 1
      case 'text':
138 1
      case 'tag':
139
        return true;
140
      default:
141 1
        return $this->hasAttribute($name);
142
    }
143
  }
144
145
  /**
146
   * @param $name
147
   * @param $value
148
   *
149
   * @return SimpleHtmlDom
150
   */
151 12
  public function __set($name, $value)
152
  {
153 12
    $name = strtolower($name);
154
155
    switch ($name) {
156 12
      case 'outerhtml':
157 11
      case 'outertext':
158 3
        return $this->replaceNode($value);
159 9
      case 'innertext':
160 8
      case 'innerhtml':
161 5
        return $this->replaceChild($value);
162
      default:
163 7
        return $this->setAttribute($name, $value);
164
    }
165
  }
166
167
  /**
168
   * @return string
169
   */
170 2
  public function __toString()
171
  {
172 2
    return $this->html();
173
  }
174
175
  /**
176
   * @param $name
177
   *
178
   * @return SimpleHtmlDom
179
   */
180 1
  public function __unset($name)
181
  {
182 1
    return $this->removeAttribute($name);
183
  }
184
185
  /**
186
   * Returns children of node
187
   *
188
   * @param int $idx
189
   *
190
   * @return SimpleHtmlDomNode|SimpleHtmlDom|null
191
   */
192 2
  public function childNodes($idx = -1)
193
  {
194 2
    $nodeList = $this->getIterator();
195
196 2
    if ($idx === -1) {
197 2
      return $nodeList;
198
    }
199
200 2
    if (isset($nodeList[$idx])) {
201 2
      return $nodeList[$idx];
202
    }
203
204 1
    return null;
205
  }
206
207
  /**
208
   * Find list of nodes with a CSS selector
209
   *
210
   * @param string $selector
211
   * @param int    $idx
212
   *
213
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
214
   */
215 25
  public function find($selector, $idx = null)
216
  {
217 25
    return $this->getHtmlDomParser()->find($selector, $idx);
218
  }
219
220
  /**
221
   * Returns the first child of node
222
   *
223
   * @return SimpleHtmlDom|null
224
   */
225 4
  public function firstChild()
226
  {
227 4
    $node = $this->node->firstChild;
228
229 4
    if ($node === null) {
230 1
      return null;
231
    }
232
233 4
    return new self($node);
234
  }
235
236
  /**
237
   * Returns array of attributes
238
   *
239
   * @return array|null
240
   */
241 2
  public function getAllAttributes()
242
  {
243 2
    if ($this->node->hasAttributes()) {
244 2
      $attributes = array();
245 2
      foreach ($this->node->attributes as $attr) {
246 2
        $attributes[$attr->name] = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value);
247
      }
248
249 2
      return $attributes;
250
    }
251
252 1
    return null;
253
  }
254
255
  /**
256
   * Return attribute value
257
   *
258
   * @param string $name
259
   *
260
   * @return string
261
   */
262 11
  public function getAttribute($name)
263
  {
264 11
    $html = $this->node->getAttribute($name);
265
266 11
    return HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
267
  }
268
269
  /**
270
   * Return SimpleHtmlDom by id.
271
   *
272
   * @param string $id
273
   *
274
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
275
   */
276 1
  public function getElementById($id)
277
  {
278 1
    return $this->find("#$id", 0);
279
  }
280
281
  /**
282
   * Return SimpleHtmlDom by tag name.
283
   *
284
   * @param string $name
285
   *
286
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
287
   */
288 1
  public function getElementByTagName($name)
289
  {
290 1
    $node = $this->node->getElementsByTagName($name)->item(0);
291
292 1
    if ($node !== null) {
293 1
      return new self($node);
294
    } else {
295
      return new SimpleHtmlDomNodeBlank();
296
    }
297
  }
298
299
  /**
300
   * Returns Elements by id
301
   *
302
   * @param string   $id
303
   * @param null|int $idx
304
   *
305
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
306
   */
307
  public function getElementsById($id, $idx = null)
308
  {
309
    return $this->find("#$id", $idx);
310
  }
311
312
  /**
313
   * Returns Elements by tag name
314
   *
315
   * @param string   $name
316
   * @param null|int $idx
317
   *
318
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
319
   */
320 1 View Code Duplication
  public function getElementsByTagName($name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
321
  {
322 1
    $nodesList = $this->node->getElementsByTagName($name);
323
324 1
    $elements = new SimpleHtmlDomNode();
325
326 1
    foreach ($nodesList as $node) {
327 1
      $elements[] = new self($node);
328
    }
329
330 1
    if (null === $idx) {
331 1
      return $elements;
332
    } else {
333
      if ($idx < 0) {
334
        $idx = count($elements) + $idx;
335
      }
336
    }
337
338
    if (isset($elements[$idx])) {
339
      return $elements[$idx];
340
    } else {
341
      return new SimpleHtmlDomNodeBlank();
342
    }
343
  }
344
345
  /**
346
   * Create a new "HtmlDomParser"-object from the current context.
347
   *
348
   * @return HtmlDomParser
349
   */
350 51
  public function getHtmlDomParser()
351
  {
352 51
    return new HtmlDomParser($this);
353
  }
354
355
  /**
356
   * Retrieve an external iterator
357
   *
358
   * @link  http://php.net/manual/en/iteratoraggregate.getiterator.php
359
   * @return SimpleHtmlDomNode An instance of an object implementing <b>Iterator</b> or
360
   * <b>Traversable</b>
361
   */
362 2
  public function getIterator()
363
  {
364 2
    $elements = new SimpleHtmlDomNode();
365 2
    if ($this->node->hasChildNodes()) {
366 2
      foreach ($this->node->childNodes as $node) {
367 2
        $elements[] = new self($node);
368
      }
369
    }
370
371 2
    return $elements;
372
  }
373
374
  /**
375
   * @return DOMNode
376
   */
377 52
  public function getNode()
378
  {
379 52
    return $this->node;
380
  }
381
382
  /**
383
   * Determine if an attribute exists on the element.
384
   *
385
   * @param $name
386
   *
387
   * @return bool
388
   */
389 1
  public function hasAttribute($name)
390
  {
391 1
    return $this->node->hasAttribute($name);
392
  }
393
394
  /**
395
   * Get dom node's outer html
396
   *
397
   * @return string
398
   */
399 18
  public function html()
400
  {
401 18
    return $this->getHtmlDomParser()->html();
402
  }
403
404
  /**
405
   * Get dom node's inner html
406
   *
407
   * @return string
408
   */
409 9
  public function innerHtml()
410
  {
411 9
    return $this->getHtmlDomParser()->innerHtml();
412
  }
413
414
  /**
415
   * Returns the last child of node
416
   *
417
   * @return SimpleHtmlDom|null
418
   */
419 4
  public function lastChild()
420
  {
421 4
    $node = $this->node->lastChild;
422
423 4
    if ($node === null) {
424 1
      return null;
425
    }
426
427 4
    return new self($node);
428
  }
429
430
  /**
431
   * Returns the next sibling of node
432
   *
433
   * @return SimpleHtmlDom|null
434
   */
435 1
  public function nextSibling()
436
  {
437 1
    $node = $this->node->nextSibling;
438
439 1
    if ($node === null) {
440 1
      return null;
441
    }
442
443 1
    return new self($node);
444
  }
445
446
  /**
447
   * Returns the parent of node
448
   *
449
   * @return SimpleHtmlDom
450
   */
451 1
  public function parentNode()
452
  {
453 1
    return new self($this->node->parentNode);
454
  }
455
456
  /**
457
   * Returns the previous sibling of node
458
   *
459
   * @return SimpleHtmlDom|null
460
   */
461 1
  public function previousSibling()
462
  {
463 1
    $node = $this->node->previousSibling;
464
465 1
    if ($node === null) {
466 1
      return null;
467
    }
468
469 1
    return new self($node);
470
  }
471
472
  /**
473
   * Replace child node
474
   *
475
   * @param $string
476
   *
477
   * @return $this
478
   */
479 5
  protected function replaceChild($string)
480
  {
481 5
    if (!empty($string)) {
482 4
      $newDocument = new HtmlDomParser($string);
483
484 4
      if ($this->normalizeStringForComparision($newDocument->outertext) != $this->normalizeStringForComparision($string)) {
0 ignored issues
show
Bug introduced by
The property outertext does not seem to exist. Did you mean outerText?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
485
        throw new RuntimeException('Not valid HTML fragment');
486
      }
487
    }
488
489
    /** @noinspection PhpParamsInspection */
490 5
    if (count($this->node->childNodes) > 0) {
491 5
      foreach ($this->node->childNodes as $node) {
492 5
        $this->node->removeChild($node);
493
      }
494
    }
495
496 5
    if (!empty($newDocument)) {
497 4
      $newDocument = $this->cleanHtmlWrapper($newDocument);
498 4
      $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
499 4
      $this->node->appendChild($newNode);
500
    }
501
502 5
    return $this;
503
  }
504
505
  /**
506
   * Replace this node
507
   *
508
   * @param $string
509
   *
510
   * @return $this
511
   */
512 3
  protected function replaceNode($string)
513
  {
514 3
    if (empty($string)) {
515 2
      $this->node->parentNode->removeChild($this->node);
516
517 2
      return null;
518
    }
519
520 2
    $newDocument = new HtmlDomParser($string);
521
522 2
    if ($this->normalizeStringForComparision($newDocument->outertext) != $this->normalizeStringForComparision($string)) {
0 ignored issues
show
Bug introduced by
The property outertext does not seem to exist. Did you mean outerText?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
523
      throw new RuntimeException('Not valid HTML fragment');
524
    }
525
526 2
    $newDocument = $this->cleanHtmlWrapper($newDocument);
527
528 2
    $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
529
530 2
    $this->node->parentNode->replaceChild($newNode, $this->node);
531 2
    $this->node = $newNode;
532
533 2
    return $this;
534
  }
535
536
  /**
537
   * Normalize the given string for comparision.
538
   *
539
   * @param $string
540
   *
541
   * @return string
542
   */
543 6
  private function normalizeStringForComparision($string)
544
  {
545 6
    return urlencode(urldecode(trim(str_replace(array(' ', "\n", "\r\n", "\r"), '', strtolower($string)))));
546
  }
547
548
  /**
549
   * @param HtmlDomParser $newDocument
550
   *
551
   * @return HtmlDomParser
552
   */
553 6
  protected function cleanHtmlWrapper(HtmlDomParser $newDocument)
554
  {
555 6
    if ($newDocument->getIsDOMDocumentCreatedWithoutHtml() === true) {
556
557
      // Remove doc-type node.
558 4
      $newDocument->getDocument()->doctype->parentNode->removeChild($newDocument->getDocument()->doctype);
559
560
      // Remove html element, preserving child nodes.
561 4
      $html = $newDocument->getDocument()->getElementsByTagName('html')->item(0);
562 4
      $fragment = $newDocument->getDocument()->createDocumentFragment();
563 4
      while ($html->childNodes->length > 0) {
564 4
        $fragment->appendChild($html->childNodes->item(0));
565
      }
566 4
      $html->parentNode->replaceChild($fragment, $html);
567
568
      // Remove body element, preserving child nodes.
569 4
      $body = $newDocument->getDocument()->getElementsByTagName('body')->item(0);
570 4
      $fragment = $newDocument->getDocument()->createDocumentFragment();
571 4
      while ($body->childNodes->length > 0) {
572 4
        $fragment->appendChild($body->childNodes->item(0));
573
      }
574 4
      $body->parentNode->replaceChild($fragment, $body);
575
576
      // At this point DOMDocument still added a "<p>"-wrapper around our string,
577
      // so we replace it with "<simpleHtmlDomP>" and delete this at the ending ...
578 4
      $this->changeElementName($newDocument->getDocument()->getElementsByTagName('p')->item(0), 'simpleHtmlDomP');
579
    }
580
581 6
    return $newDocument;
582
  }
583
584
  /**
585
   * change the name of a tag in a "DOMNode"
586
   *
587
   * @param DOMNode $node
588
   * @param string  $name
589
   *
590
   * @return DOMElement
591
   */
592 4
  protected function changeElementName(\DOMNode $node, $name)
593
  {
594 4
    $newnode = $node->ownerDocument->createElement($name);
595 4
    foreach ($node->childNodes as $child) {
596 4
      $child = $node->ownerDocument->importNode($child, true);
597 4
      $newnode->appendChild($child);
598
    }
599 4
    foreach ($node->attributes as $attrName => $attrNode) {
600
      $newnode->setAttribute($attrName, $attrNode);
601
    }
602 4
    $newnode->ownerDocument->replaceChild($newnode, $node);
603
604 4
    return $newnode;
605
  }
606
607
  /**
608
   * Set attribute value
609
   *
610
   * @param string      $name
611
   * @param string|null $value      Set to NULL or empty string, to remove the attribute.
612
   * @param bool        $strict     $value must be NULL, to remove the attribute,
613
   *                                so that you can set an empty string as attribute-value e.g. autofocus=""
614
   *
615
   * @return $this
616
   */
617 8
  public function setAttribute($name, $value = null, $strict = false)
618
  {
619
    if (
620 8
        ($strict === true && null === $value)
621
        ||
622 8
        ($strict === false && empty($value))
623
    ) {
624 1
      $this->node->removeAttribute($name);
625
    } else {
626 8
      $this->node->setAttribute($name, $value);
627
    }
628
629 8
    return $this;
630
  }
631
632
  /**
633
   * Remove attribute
634
   *
635
   * @param $name
636
   *
637
   * @return mixed
638
   */
639 1
  public function removeAttribute($name)
640
  {
641 1
    $this->node->removeAttribute($name);
642
643 1
    return $this;
644
  }
645
646
  /**
647
   * Get dom node's plain text
648
   *
649
   * @return string
650
   */
651 15
  public function text()
652
  {
653 15
    return $this->node->textContent;
654
  }
655
}
656