Completed
Push — master ( e8747e...b70373 )
by Lars
02:57
created

SimpleHtmlDom::find()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 5
Bugs 0 Features 0
Metric Value
c 5
b 0
f 0
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
cc 1
eloc 2
nc 1
nop 2
crap 1
1
<?php
2
3
namespace voku\helper;
4
5
use BadMethodCallException;
6
use DOMElement;
7
use DOMNode;
8
use RuntimeException;
9
10
/**
11
 * Class SimpleHtmlDom
12
 *
13
 * @package voku\helper
14
 *
15
 * @property string outertext Get dom node's outer html
16
 * @property string innertext Get dom node's inner html
17
 * @property string plaintext (read-only) Get dom node's plain text
18
 * @property string tag       (read-only) Get dom node name
19
 * @property string attr      (read-only) Get dom node attributes
20
 *
21
 * @method SimpleHtmlDomNode|SimpleHtmlDom|null children() children($idx = -1) Returns children of node
22
 * @method SimpleHtmlDom|null first_child() Returns the first child of node
23
 * @method SimpleHtmlDom|null last_child() Returns the last child of node
24
 * @method SimpleHtmlDom|null next_sibling() Returns the next sibling of node
25
 * @method SimpleHtmlDom|null prev_sibling() Returns the previous sibling of node
26
 * @method SimpleHtmlDom|null parent() Returns the parent of node
27
 * @method string outertext() Get dom node's outer html
28
 * @method string innertext() Get dom node's inner html
29
 */
30
class SimpleHtmlDom implements \IteratorAggregate
31
{
32
  /**
33
   * @var array
34
   */
35
  protected static $functionAliases = array(
36
      'children'     => 'childNodes',
37
      'first_child'  => 'firstChild',
38
      'last_child'   => 'lastChild',
39
      'next_sibling' => 'nextSibling',
40
      'prev_sibling' => 'previousSibling',
41
      'parent'       => 'parentNode',
42
      'outertext'    => 'html',
43
      'innertext'    => 'innerHtml',
44
  );
45
  /**
46
   * @var DOMElement
47
   */
48
  protected $node;
49
50
  /**
51
   * SimpleHtmlDom constructor.
52
   *
53
   * @param DOMNode $node
54
   */
55 69
  public function __construct(DOMNode $node)
56
  {
57 69
    $this->node = $node;
0 ignored issues
show
Documentation Bug introduced by
$node is of type object<DOMNode>, but the property $node was declared to be of type object<DOMElement>. Are you sure that you always receive this specific sub-class here, or does it make sense to add an instanceof check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a given class or a super-class is assigned to a property that is type hinted more strictly.

Either this assignment is in error or an instanceof check should be added for that assignment.

class Alien {}

class Dalek extends Alien {}

class Plot
{
    /** @var  Dalek */
    public $villain;
}

$alien = new Alien();
$plot = new Plot();
if ($alien instanceof Dalek) {
    $plot->villain = $alien;
}
Loading history...
58 69
  }
59
60
  /**
61
   * @param $name
62
   * @param $arguments
63
   *
64
   * @return null|string|SimpleHtmlDom
65
   *
66
   */
67 8 View Code Duplication
  public function __call($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
68
  {
69 8
    if (isset(self::$functionAliases[$name])) {
70 8
      return call_user_func_array(array($this, self::$functionAliases[$name]), $arguments);
71
    }
72
73
    throw new BadMethodCallException('Method does not exist');
74
  }
75
76
  /**
77
   * @param $name
78
   *
79
   * @return array|null|string
80
   */
81 23
  public function __get($name)
82
  {
83
    switch ($name) {
84 23
      case 'outertext':
85 12
        return $this->html();
86 18
      case 'innertext':
87 3
        return $this->innerHtml();
88 17
      case 'plaintext':
89 9
        return $this->text();
90 9
      case 'tag':
91 4
        return $this->node->nodeName;
92 8
      case 'attr':
93
        return $this->getAllAttributes();
94
      default:
95 8
        return $this->getAttribute($name);
96
    }
97
  }
98
99
  /**
100
   * @param string $selector
101
   * @param int    $idx
102
   *
103
   * @return SimpleHtmlDom|SimpleHtmlDomNode|null
104
   */
105 12
  public function __invoke($selector, $idx = null)
106
  {
107 12
    return $this->find($selector, $idx);
108
  }
109
110
  /**
111
   * @param $name
112
   *
113
   * @return bool
114
   */
115 1 View Code Duplication
  public function __isset($name)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
116
  {
117
    switch ($name) {
118 1
      case 'outertext':
119 1
      case 'innertext':
120 1
      case 'plaintext':
121 1
      case 'tag':
122
        return true;
123
      default:
124 1
        return $this->hasAttribute($name);
125
    }
126
  }
127
128
  /**
129
   * @param $name
130
   * @param $value
131
   *
132
   * @return SimpleHtmlDom
133
   */
134 9
  public function __set($name, $value)
135
  {
136
    switch ($name) {
137 9
      case 'outertext':
138 3
        return $this->replaceNode($value);
139 6
      case 'innertext':
140 3
        return $this->replaceChild($value);
141
      default:
142 5
        return $this->setAttribute($name, $value);
143
    }
144
  }
145
146
  /**
147
   * @return string
148
   */
149 2
  public function __toString()
150
  {
151 2
    return $this->html();
152
  }
153
154
  /**
155
   * @param $name
156
   *
157
   * @return SimpleHtmlDom
158
   */
159 1
  public function __unset($name)
160
  {
161 1
    return $this->setAttribute($name, null);
162
  }
163
164
  /**
165
   * Returns children of node
166
   *
167
   * @param int $idx
168
   *
169
   * @return SimpleHtmlDomNode|SimpleHtmlDom|null
170
   */
171 2
  public function childNodes($idx = -1)
172
  {
173 2
    $nodeList = $this->getIterator();
174
175 2
    if ($idx === -1) {
176 2
      return $nodeList;
177
    }
178
179 2
    if (isset($nodeList[$idx])) {
180 2
      return $nodeList[$idx];
181
    }
182
183 1
    return null;
184
  }
185
186
  /**
187
   * Find list of nodes with a CSS selector
188
   *
189
   * @param string $selector
190
   * @param int    $idx
191
   *
192
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
193
   */
194 24
  public function find($selector, $idx = null)
195
  {
196 24
    return $this->getHtmlDomParser()->find($selector, $idx);
197
  }
198
199
  /**
200
   * Returns the first child of node
201
   *
202
   * @return SimpleHtmlDom|null
203
   */
204 4
  public function firstChild()
205
  {
206 4
    $node = $this->node->firstChild;
207
208 4
    if ($node === null) {
209 1
      return null;
210
    }
211
212 4
    return new self($node);
213
  }
214
215
  /**
216
   * Returns array of attributes
217
   *
218
   * @return array|null
219
   */
220 1
  public function getAllAttributes()
221
  {
222 1
    if ($this->node->hasAttributes()) {
223 1
      $attributes = array();
224 1
      foreach ($this->node->attributes as $attr) {
225 1
        $attributes[$attr->name] = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($attr->value);
226
      }
227
228 1
      return $attributes;
229
    }
230
231 1
    return null;
232
  }
233
234
  /**
235
   * Return attribute value
236
   *
237
   * @param string $name
238
   *
239
   * @return string
240
   */
241 10
  public function getAttribute($name)
242
  {
243 10
    $html = $this->node->getAttribute($name);
244
245 10
    return HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
246
  }
247
248
  /**
249
   * Return SimpleHtmlDom by id.
250
   *
251
   * @param string $id
252
   *
253
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
254
   */
255 1
  public function getElementById($id)
256
  {
257 1
    return $this->find("#$id", 0);
258
  }
259
260
  /**
261
   * Return SimpleHtmlDom by tag name.
262
   *
263
   * @param string $name
264
   *
265
   * @return SimpleHtmlDomNode|SimpleHtmlDomNodeBlank
266
   */
267 1
  public function getElementByTagName($name)
268
  {
269 1
    $node = $this->node->getElementsByTagName($name)->item(0);
270
271 1
    if ($node !== null) {
272 1
      return new self($node);
273
    } else {
274
      return new SimpleHtmlDomNodeBlank();
275
    }
276
  }
277
278
  /**
279
   * Returns Elements by id
280
   *
281
   * @param string   $id
282
   * @param null|int $idx
283
   *
284
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
285
   */
286
  public function getElementsById($id, $idx = null)
287
  {
288
    return $this->find("#$id", $idx);
289
  }
290
291
  /**
292
   * Returns Elements by tag name
293
   *
294
   * @param string   $name
295
   * @param null|int $idx
296
   *
297
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
298
   */
299 1 View Code Duplication
  public function getElementsByTagName($name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
300
  {
301 1
    $nodesList = $this->node->getElementsByTagName($name);
302
303 1
    $elements = new SimpleHtmlDomNode();
304
305 1
    foreach ($nodesList as $node) {
306 1
      $elements[] = new self($node);
307
    }
308
309 1
    if (null === $idx) {
310 1
      return $elements;
311
    } else {
312
      if ($idx < 0) {
313
        $idx = count($elements) + $idx;
314
      }
315
    }
316
317
    if (isset($elements[$idx])) {
318
      return $elements[$idx];
319
    } else {
320
      return new SimpleHtmlDomNodeBlank();
321
    }
322
  }
323
324
  /**
325
   * Create a new "HtmlDomParser"-object from the current context.
326
   *
327
   * @return HtmlDomParser
328
   */
329 40
  public function getHtmlDomParser()
330
  {
331 40
    return new HtmlDomParser($this);
332
  }
333
334
  /**
335
   * Retrieve an external iterator
336
   *
337
   * @link  http://php.net/manual/en/iteratoraggregate.getiterator.php
338
   * @return SimpleHtmlDomNode An instance of an object implementing <b>Iterator</b> or
339
   * <b>Traversable</b>
340
   */
341 2
  public function getIterator()
342
  {
343 2
    $elements = new SimpleHtmlDomNode();
344 2
    if ($this->node->hasChildNodes()) {
345 2
      foreach ($this->node->childNodes as $node) {
346 2
        $elements[] = new self($node);
347
      }
348
    }
349
350 2
    return $elements;
351
  }
352
353
  /**
354
   * @return DOMNode
355
   */
356 41
  public function getNode()
357
  {
358 41
    return $this->node;
359
  }
360
361
  /**
362
   * Determine if an attribute exists on the element.
363
   *
364
   * @param $name
365
   *
366
   * @return bool
367
   */
368 1
  public function hasAttribute($name)
369
  {
370 1
    return $this->node->hasAttribute($name);
371
  }
372
373
  /**
374
   * Get dom node's outer html
375
   *
376
   * @return string
377
   */
378 13
  public function html()
379
  {
380 13
    return $this->getHtmlDomParser()->html();
381
  }
382
383
  /**
384
   * Get dom node's inner html
385
   *
386
   * @return string
387
   */
388 3
  public function innerHtml()
389
  {
390 3
    return $this->getHtmlDomParser()->innerHtml();
391
  }
392
393
  /**
394
   * Returns the last child of node
395
   *
396
   * @return SimpleHtmlDom|null
397
   */
398 4
  public function lastChild()
399
  {
400 4
    $node = $this->node->lastChild;
401
402 4
    if ($node === null) {
403 1
      return null;
404
    }
405
406 4
    return new self($node);
407
  }
408
409
  /**
410
   * Returns the next sibling of node
411
   *
412
   * @return SimpleHtmlDom|null
413
   */
414 1
  public function nextSibling()
415
  {
416 1
    $node = $this->node->nextSibling;
417
418 1
    if ($node === null) {
419 1
      return null;
420
    }
421
422 1
    return new self($node);
423
  }
424
425
  /**
426
   * Returns the parent of node
427
   *
428
   * @return SimpleHtmlDom
429
   */
430 1
  public function parentNode()
431
  {
432 1
    return new self($this->node->parentNode);
433
  }
434
435
  /**
436
   * Returns the previous sibling of node
437
   *
438
   * @return SimpleHtmlDom|null
439
   */
440 1
  public function previousSibling()
441
  {
442 1
    $node = $this->node->previousSibling;
443
444 1
    if ($node === null) {
445 1
      return null;
446
    }
447
448 1
    return new self($node);
449
  }
450
451
  /**
452
   * Replace child node
453
   *
454
   * @param $string
455
   *
456
   * @return $this
457
   */
458 3
  protected function replaceChild($string)
459
  {
460 3
    if (!empty($string)) {
461 3
      $newDocument = new HtmlDomParser($string);
462
463 3
      if ($this->normalizeStringForComparision($newDocument->outertext) != $this->normalizeStringForComparision($string)) {
464
        throw new RuntimeException('Not valid HTML fragment');
465
      }
466
    }
467
468 3
    foreach ($this->node->childNodes as $node) {
469 3
      $this->node->removeChild($node);
470
    }
471
472 3
    if (!empty($newDocument)) {
473
474 3
      $newDocument = $this->cleanHtmlWrapper($newDocument);
475
476 3
      $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
477
478 3
      $this->node->appendChild($newNode);
479
    }
480
481 3
    return $this;
482
  }
483
484
  /**
485
   * Replace this node
486
   *
487
   * @param $string
488
   *
489
   * @return $this
490
   */
491 3
  protected function replaceNode($string)
492
  {
493 3
    if (empty($string)) {
494 1
      $this->node->parentNode->removeChild($this->node);
495
496 1
      return null;
497
    }
498
499 3
    $newDocument = new HtmlDomParser($string);
500
501
    // DEBUG
502
    //echo $this->normalizeStringForComparision($newDocument->outertext) . "\n";
0 ignored issues
show
Unused Code Comprehensibility introduced by
60% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
503
    //echo $this->normalizeStringForComparision($string) . "\n\n";
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
504
505 3
    if ($this->normalizeStringForComparision($newDocument->outertext) != $this->normalizeStringForComparision($string)) {
506
      throw new RuntimeException('Not valid HTML fragment');
507
    }
508
509 3
    $newDocument = $this->cleanHtmlWrapper($newDocument);
510
511 3
    $newNode = $this->node->ownerDocument->importNode($newDocument->getDocument()->documentElement, true);
512
513 3
    $this->node->parentNode->replaceChild($newNode, $this->node);
514 3
    $this->node = $newNode;
515
516 3
    return $this;
517
  }
518
519
  /**
520
   * Normalize the given string for comparision.
521
   *
522
   * @param $string
523
   *
524
   * @return string
525
   */
526 6
  private function normalizeStringForComparision($string)
527
  {
528 6
    return urlencode(urldecode(trim(str_replace(array(' ', "\n", "\r\n", "\r"), '', strtolower($string)))));
529
  }
530
531
  /**
532
   * @param HtmlDomParser $newDocument
533
   *
534
   * @return HtmlDomParser
535
   */
536 6
  protected function cleanHtmlWrapper(HtmlDomParser $newDocument)
537
  {
538 6
    if ($newDocument->getIsDOMDocumentCreatedWithoutHtml() === true) {
539
540
      // Remove doc-type node.
541 3
      $newDocument->getDocument()->doctype->parentNode->removeChild($newDocument->getDocument()->doctype);
542
543
      // Remove html element, preserving child nodes.
544 3
      $html = $newDocument->getDocument()->getElementsByTagName('html')->item(0);
545 3
      $fragment = $newDocument->getDocument()->createDocumentFragment();
546 3
      while ($html->childNodes->length > 0) {
547 3
        $fragment->appendChild($html->childNodes->item(0));
548
      }
549 3
      $html->parentNode->replaceChild($fragment, $html);
550
551
      // Remove body element, preserving child nodes.
552 3
      $body = $newDocument->getDocument()->getElementsByTagName('body')->item(0);
553 3
      $fragment = $newDocument->getDocument()->createDocumentFragment();
554 3
      while ($body->childNodes->length > 0) {
555 3
        $fragment->appendChild($body->childNodes->item(0));
556
      }
557 3
      $body->parentNode->replaceChild($fragment, $body);
558
559
      // At this point DOMDocument still added a "<p>"-wrapper around our string,
560
      // so we replace it with "<simpleHtmlDomP>" and delete this at the ending ...
561 3
      $this->changeElementName($newDocument->getDocument()->getElementsByTagName('p')->item(0), 'simpleHtmlDomP');
562
    }
563
564 6
    return $newDocument;
565
  }
566
567
  /**
568
   * change the name of a tag in a "DOMNode"
569
   *
570
   * @param DOMNode $node
571
   * @param string  $name
572
   *
573
   * @return DOMElement
574
   */
575 3
  protected function changeElementName(\DOMNode $node, $name)
576
  {
577 3
    $newnode = $node->ownerDocument->createElement($name);
578 3
    foreach ($node->childNodes as $child) {
579 3
      $child = $node->ownerDocument->importNode($child, true);
580 3
      $newnode->appendChild($child);
581
    }
582 3
    foreach ($node->attributes as $attrName => $attrNode) {
583
      $newnode->setAttribute($attrName, $attrNode);
584
    }
585 3
    $newnode->ownerDocument->replaceChild($newnode, $node);
586
587 3
    return $newnode;
588
  }
589
590
  /**
591
   * Set attribute value
592
   *
593
   * @param $name
594
   * @param $value
595
   *
596
   * @return $this
597
   */
598 5
  public function setAttribute($name, $value)
599
  {
600 5
    if (empty($value)) {
601 1
      $this->node->removeAttribute($name);
602
    } else {
603 5
      $this->node->setAttribute($name, $value);
604
    }
605
606 5
    return $this;
607
  }
608
609
  /**
610
   * Get dom node's plain text
611
   *
612
   * @return string
613
   */
614 9
  public function text()
615
  {
616 9
    return $this->node->textContent;
617
  }
618
}
619