Completed
Push — master ( 01d92b...e4ee10 )
by Lars
05:00
created

HtmlDomParser::getElementsById()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 4
ccs 0
cts 2
cp 0
rs 10
cc 1
eloc 2
nc 1
nop 2
crap 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
use BadMethodCallException;
8
use DOMDocument;
9
use DOMXPath;
10
use InvalidArgumentException;
11
use RuntimeException;
12
13
/**
14
 * Class HtmlDomParser
15
 *
16
 * @package voku\helper
17
 *
18
 * @property-read string outerText <p>Get dom node's outer html (alias for "outerHtml").</p>
19
 * @property-read string outerHtml <p>Get dom node's outer html.</p>
20
 * @property-read string innerText <p>Get dom node's inner html (alias for "innerHtml").</p>
21
 * @property-read string innerHtml <p>Get dom node's inner html.</p>
22
 * @property-read string plaintext <p>Get dom node's plain text.</p>
23
 *
24
 * @method string outerText() <p>Get dom node's outer html (alias for "outerHtml()").</p>
25
 * @method string outerHtml() <p>Get dom node's outer html.</p>
26
 * @method string innerText() <p>Get dom node's inner html (alias for "innerHtml()").</p>
27
 *
28
 * @method HtmlDomParser load() load($html) <p>Load HTML from string.</p>
29
 * @method HtmlDomParser load_file() load_file($html) <p>Load HTML from file.</p>
30
 *
31
 * @method static HtmlDomParser file_get_html() file_get_html($html, $libXMLExtraOptions = null) <p>Load HTML from
32
 *         file.</p>
33
 * @method static HtmlDomParser str_get_html() str_get_html($html, $libXMLExtraOptions = null) <p>Load HTML from
34
 *         string.</p>
35
 */
36
class HtmlDomParser
37
{
38
  /**
39
   * @var array
40
   */
41
  protected static $functionAliases = [
42
      'outertext' => 'html',
43
      'outerhtml' => 'html',
44
      'innertext' => 'innerHtml',
45
      'innerhtml' => 'innerHtml',
46
      'load'      => 'loadHtml',
47
      'load_file' => 'loadHtmlFile',
48
  ];
49
50
  /**
51
   * @var string[][]
52
   */
53
  protected static $domLinkReplaceHelper = [
54
      'orig' => ['[', ']', '{', '}',],
55
      'tmp'  => [
56
          '!!!!SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_LEFT!!!!',
57
          '!!!!SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_RIGHT!!!!',
58
          '!!!!SIMPLE_HTML_DOM__VOKU__BRACKET_LEFT!!!!',
59
          '!!!!SIMPLE_HTML_DOM__VOKU__BRACKET_RIGHT!!!!',
60
      ],
61
  ];
62
63
  /**
64
   * @var array
65
   */
66
  protected static $domReplaceHelper = [
67
      'orig' => ['&', '|', '+', '%'],
68
      'tmp'  => [
69
          '!!!!SIMPLE_HTML_DOM__VOKU__AMP!!!!',
70
          '!!!!SIMPLE_HTML_DOM__VOKU__PIPE!!!!',
71
          '!!!!SIMPLE_HTML_DOM__VOKU__PLUS!!!!',
72
          '!!!!SIMPLE_HTML_DOM__VOKU__PERCENT!!!!',
73
      ],
74
  ];
75
76
  /**
77
   * @var Callable
78
   */
79
  protected static $callback;
80
81
  /**
82
   * @var DOMDocument
83
   */
84
  protected $document;
85
86
  /**
87
   * @var string
88
   */
89
  protected $encoding = 'UTF-8';
90
91
  /**
92
   * @var bool
93
   */
94
  protected $isDOMDocumentCreatedWithoutHtml = false;
95
96
  /**
97
   * @var bool
98
   */
99
  protected $isDOMDocumentCreatedWithoutWrapper = false;
100
101
  /**
102
   * @var bool
103
   */
104
  protected $isDOMDocumentCreatedWithoutHeadWrapper = false;
105
106
  /**
107
   * @var bool
108
   */
109
  protected $isDOMDocumentCreatedWithoutHtmlWrapper = false;
110
111
  /**
112
   * Constructor
113
   *
114
   * @param string|SimpleHtmlDom|\DOMNode $element HTML code or SimpleHtmlDom, \DOMNode
115
   *
116
   * @throws \InvalidArgumentException
117
   */
118 123
  public function __construct($element = null)
119
  {
120 123
    $this->document = new \DOMDocument('1.0', $this->getEncoding());
121
122
    // DOMDocument settings
123 123
    $this->document->preserveWhiteSpace = true;
124 123
    $this->document->formatOutput = true;
125
126 123
    if ($element instanceof SimpleHtmlDom) {
127 52
      $element = $element->getNode();
128
    }
129
130 123
    if ($element instanceof \DOMNode) {
131 52
      $domNode = $this->document->importNode($element, true);
132
133 52
      if ($domNode instanceof \DOMNode) {
134 52
        $this->document->appendChild($domNode);
135
      }
136
137 52
      return;
138
    }
139
140 123
    if ($element !== null) {
141 73
      $this->loadHtml($element);
142
    }
143 122
  }
144
145
  /**
146
   * @param $name
147
   * @param $arguments
148
   *
149
   * @return bool|mixed
150
   */
151 42 View Code Duplication
  public function __call($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
152
  {
153 42
    $name = \strtolower($name);
154
155 42
    if (isset(self::$functionAliases[$name])) {
156 41
      return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments);
157
    }
158
159
    /** @noinspection ExceptionsAnnotatingAndHandlingInspection */
160 1
    throw new BadMethodCallException('Method does not exist: ' . $name);
161
  }
162
163
  /**
164
   * @param $name
165
   * @param $arguments
166
   *
167
   * @return HtmlDomParser
168
   *
169
   * @throws \BadMethodCallException
170
   * @throws \RuntimeException
171
   * @throws \InvalidArgumentException
172
   */
173 17
  public static function __callStatic($name, $arguments)
174
  {
175 17
    $arguments0 = '';
176 17
    if (isset($arguments[0])) {
177 16
      $arguments0 = $arguments[0];
178
    }
179
180 17
    $arguments1 = null;
181 17
    if (isset($arguments[1])) {
182 1
      $arguments1 = $arguments[1];
183
    }
184
185 17
    if ($name === 'str_get_html') {
186
      /** @noinspection ExceptionsAnnotatingAndHandlingInspection */
187 12
      $parser = new self();
188
189 12
      return $parser->loadHtml($arguments0, $arguments1);
190
    }
191
192 5
    if ($name === 'file_get_html') {
193
      /** @noinspection ExceptionsAnnotatingAndHandlingInspection */
194 4
      $parser = new self();
195
196 4
      return $parser->loadHtmlFile($arguments0, $arguments1);
197
    }
198
199 1
    throw new BadMethodCallException('Method does not exist');
200
  }
201
202
  /** @noinspection MagicMethodsValidityInspection */
203
  /**
204
   * @param $name
205
   *
206
   * @return string
207
   */
208 11
  public function __get($name)
209
  {
210 11
    $name = \strtolower($name);
211
212
    switch ($name) {
213 11
      case 'outerhtml':
214 11
      case 'outertext':
215 4
        return $this->html();
216 7
      case 'innerhtml':
217 3
      case 'innertext':
218 5
        return $this->innerHtml();
219 2
      case 'text':
220 2
      case 'plaintext':
221 1
        return $this->text();
222
    }
223
224 1
    return null;
225
  }
226
227
  /**
228
   * @param string $selector
229
   * @param int    $idx
230
   *
231
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
232
   */
233 3
  public function __invoke($selector, $idx = null)
234
  {
235 3
    return $this->find($selector, $idx);
236
  }
237
238
  /**
239
   * @return string
240
   */
241 15
  public function __toString()
242
  {
243 15
    return $this->html();
244
  }
245
246
  /**
247
   * does nothing (only for api-compatibility-reasons)
248
   *
249
   * @deprecated
250
   *
251
   * @return bool
252
   */
253 1
  public function clear(): bool
254
  {
255 1
    return true;
256
  }
257
258
  /**
259
   * @param string $html
260
   *
261
   * @return string
262
   */
263 78
  public static function replaceToPreserveHtmlEntities(string $html): string
264
  {
265
    // init
266 78
    $linksNew = [];
267 78
    $linksOld = [];
268
269 78
    if (\strpos($html, 'http') !== false) {
270
271
      // regEx for e.g.: [https://www.domain.de/foo.php?foobar=1&email=lars%40moelleken.org&guid=test1233312&{{foo}}#foo]
272 54
      $regExUrl = '/(\[?\bhttps?:\/\/[^\s<>]+(?:\([\w]+\)|[^[:punct:]\s]|\/|\}|\]))/i';
273 54
      \preg_match_all($regExUrl, $html, $linksOld);
274
275 54
      if (!empty($linksOld[1])) {
276 52
        $linksOld = $linksOld[1];
277 52
        foreach ((array)$linksOld as $linkKey => $linkOld) {
278 52
          $linksNew[$linkKey] = \str_replace(
279 52
              self::$domLinkReplaceHelper['orig'],
280 52
              self::$domLinkReplaceHelper['tmp'],
281 52
              $linkOld
282
          );
283
        }
284
      }
285
    }
286
287 78
    $linksNewCount = \count($linksNew);
288 78
    if ($linksNewCount > 0 && \count($linksOld) === $linksNewCount) {
289 52
      $search = \array_merge($linksOld, self::$domReplaceHelper['orig']);
290 52
      $replace = \array_merge($linksNew, self::$domReplaceHelper['tmp']);
291
    } else {
292 29
      $search = self::$domReplaceHelper['orig'];
293 29
      $replace = self::$domReplaceHelper['tmp'];
294
    }
295
296 78
    return \str_replace($search, $replace, $html);
297
  }
298
299
  /**
300
   * @param string $html
301
   *
302
   * @return string
303
   */
304 61
  public static function putReplacedBackToPreserveHtmlEntities(string $html): string
305
  {
306 61
    static $DOM_REPLACE__HELPER_CACHE = null;
307
308 61
    if ($DOM_REPLACE__HELPER_CACHE === null) {
309 1
      $DOM_REPLACE__HELPER_CACHE['tmp'] = \array_merge(
310 1
          self::$domLinkReplaceHelper['tmp'],
311 1
          self::$domReplaceHelper['tmp']
312
      );
313 1
      $DOM_REPLACE__HELPER_CACHE['orig'] = \array_merge(
314 1
          self::$domLinkReplaceHelper['orig'],
315 1
          self::$domReplaceHelper['orig']
316
      );
317
    }
318
319 61
    return \str_replace($DOM_REPLACE__HELPER_CACHE['tmp'], $DOM_REPLACE__HELPER_CACHE['orig'], $html);
320
  }
321
322
  /**
323
   * Create DOMDocument from HTML.
324
   *
325
   * @param string   $html
326
   * @param int|null $libXMLExtraOptions
327
   *
328
   * @return \DOMDocument
329
   */
330 111
  private function createDOMDocument(string $html, $libXMLExtraOptions = null): \DOMDocument
331
  {
332 111
    if (\strpos($html, '<') === false) {
333 6
      $this->isDOMDocumentCreatedWithoutHtml = true;
334 110
    } elseif (\strpos(\ltrim($html), '<') !== 0) {
335 3
      $this->isDOMDocumentCreatedWithoutWrapper = true;
336
    }
337
338 111
    if (\strpos($html, '<html') === false) {
339 63
      $this->isDOMDocumentCreatedWithoutHtmlWrapper = true;
340
    }
341
342 111
    if (\strpos($html, '<head>') === false) {
343 65
      $this->isDOMDocumentCreatedWithoutHeadWrapper = true;
344
    }
345
346
    // set error level
347 111
    $internalErrors = \libxml_use_internal_errors(true);
348 111
    $disableEntityLoader = \libxml_disable_entity_loader(true);
349 111
    \libxml_clear_errors();
350
351 111
    $optionsSimpleXml = LIBXML_DTDLOAD | LIBXML_DTDATTR | LIBXML_NONET;
352 111
    $optionsXml = 0;
353
354 111
    if (\defined('LIBXML_BIGLINES')) {
355 111
      $optionsSimpleXml |= LIBXML_BIGLINES;
356
    }
357
358 111
    if (\defined('LIBXML_COMPACT')) {
359 111
      $optionsSimpleXml |= LIBXML_COMPACT;
360
    }
361
362 111
    if (\defined('LIBXML_HTML_NOIMPLIED')) {
363 111
      $optionsSimpleXml |= LIBXML_HTML_NOIMPLIED;
364
    }
365
366 111
    if (\defined('LIBXML_HTML_NODEFDTD')) {
367 111
      $optionsSimpleXml |= LIBXML_HTML_NODEFDTD;
368
    }
369
370 111
    if ($libXMLExtraOptions !== null) {
371 1
      $optionsSimpleXml |= $libXMLExtraOptions;
372 1
      $optionsXml |= $libXMLExtraOptions;
373
    }
374
375 111
    $sxe = \simplexml_load_string($html, 'SimpleXMLElement', $optionsSimpleXml);
376 111
    if ($sxe !== false && \count(\libxml_get_errors()) === 0) {
377 38
      $this->document = \dom_import_simplexml($sxe)->ownerDocument;
378
    } else {
379
380
      // UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251
381 77
      $html = \trim($html);
382 77
      $xmlHackUsed = false;
383 77
      if (\stripos('<?xml', $html) !== 0) {
384 77
        $xmlHackUsed = true;
385 77
        $html = '<?xml encoding="' . $this->getEncoding() . '" ?>' . $html;
386
      }
387
388 77
      $html = self::replaceToPreserveHtmlEntities($html);
389
390 77
      $this->document->loadHTML($html, $optionsXml);
391
392
      // remove the "xml-encoding" hack
393 77
      if ($xmlHackUsed === true) {
394 77
        foreach ($this->document->childNodes as $child) {
395 77
          if ($child->nodeType === XML_PI_NODE) {
396 77
            $this->document->removeChild($child);
397
          }
398
        }
399
      }
400
401 77
      \libxml_clear_errors();
402
    }
403
404
    // set encoding
405 111
    $this->document->encoding = $this->getEncoding();
406
407
    // restore lib-xml settings
408 111
    \libxml_use_internal_errors($internalErrors);
409 111
    \libxml_disable_entity_loader($disableEntityLoader);
410
411 111
    return $this->document;
412
  }
413
414
  /**
415
   * Return element by #id.
416
   *
417
   * @param string $id
418
   *
419
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
420
   */
421 2
  public function getElementById(string $id)
422
  {
423 2
    return $this->find("#$id", 0);
424
  }
425
426
  /**
427
   * Return element by tag name.
428
   *
429
   * @param string $name
430
   *
431
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
432
   */
433 1
  public function getElementByTagName(string $name)
434
  {
435 1
    $node = $this->document->getElementsByTagName($name)->item(0);
436
437 1
    if ($node === null) {
438
      return new SimpleHtmlDomNodeBlank();
439
    }
440
441 1
    return new SimpleHtmlDom($node);
442
  }
443
444
  /**
445
   * Returns elements by #id.
446
   *
447
   * @param string   $id
448
   * @param null|int $idx
449
   *
450
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
451
   */
452
  public function getElementsById(string $id, $idx = null)
453
  {
454
    return $this->find("#$id", $idx);
455
  }
456
457
  /**
458
   * Returns elements by tag name.
459
   *
460
   * @param string   $name
461
   * @param null|int $idx
462
   *
463
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
464
   */
465 3 View Code Duplication
  public function getElementsByTagName(string $name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
466
  {
467 3
    $nodesList = $this->document->getElementsByTagName($name);
468
469 3
    $elements = new SimpleHtmlDomNode();
470
471 3
    foreach ($nodesList as $node) {
472 3
      $elements[] = new SimpleHtmlDom($node);
473
    }
474
475
    // return all elements
476 3
    if (null === $idx) {
477 2
      return $elements;
478
    }
479
480
    // handle negative values
481 1
    if ($idx < 0) {
482
      $idx = \count($elements) + $idx;
483
    }
484
485
    // return one element
486 1
    if (isset($elements[$idx])) {
487 1
      return $elements[$idx];
488
    }
489
490
    // return a blank-element
491
    return new SimpleHtmlDomNodeBlank();
492
  }
493
494
  /**
495
   * Find list of nodes with a CSS selector.
496
   *
497
   * @param string $selector
498
   * @param int    $idx
499
   *
500
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
501
   */
502 81
  public function find(string $selector, $idx = null)
503
  {
504
    /** @noinspection ExceptionsAnnotatingAndHandlingInspection */
505 81
    $xPathQuery = SelectorConverter::toXPath($selector);
506
507 81
    $xPath = new DOMXPath($this->document);
508 81
    $nodesList = $xPath->query($xPathQuery);
509 81
    $elements = new SimpleHtmlDomNode();
510
511 81
    foreach ($nodesList as $node) {
512 77
      $elements[] = new SimpleHtmlDom($node);
513
    }
514
515
    // return all elements
516 81
    if (null === $idx) {
517 52
      return $elements;
518
    }
519
520
    // handle negative values
521 41
    if ($idx < 0) {
522 11
      $idx = \count($elements) + $idx;
523
    }
524
525
    // return one element
526 41
    if (isset($elements[$idx])) {
527 39
      return $elements[$idx];
528
    }
529
530
    // return a blank-element
531 5
    return new SimpleHtmlDomNodeBlank();
532
  }
533
534
  /**
535
   * @param string $content
536
   * @param bool   $multiDecodeNewHtmlEntity
537
   *
538
   * @return string
539
   */
540 50
  protected function fixHtmlOutput(string $content, bool $multiDecodeNewHtmlEntity = false): string
541
  {
542
    // INFO: DOMDocument will encapsulate plaintext into a paragraph tag (<p>),
543
    //          so we try to remove it here again ...
544
545 50
    if ($this->isDOMDocumentCreatedWithoutHtmlWrapper === true) {
546 23
      $content = \str_replace(
547
          [
548 23
              "\n",
549
              "\r\n",
550
              "\r",
551
              '<body>',
552
              '</body>',
553
              '<html>',
554
              '</html>',
555
          ],
556 23
          '',
557 23
          $content
558
      );
559
    }
560
561 50
    if ($this->isDOMDocumentCreatedWithoutWrapper === true) {
562 2
      $content = (string)\preg_replace('/^<p>/', '', $content);
563 2
      $content = (string)\preg_replace('/<\/p>/', '', $content);
564
    }
565
566 50
    if ($this->isDOMDocumentCreatedWithoutHtml === true) {
567 5
      $content = \str_replace(
568
          [
569 5
              '<p>',
570
              '</p>',
571
              '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">',
572
          ],
573 5
          '',
574 5
          $content
575
      );
576
    }
577
578 50
    $content = \str_replace(
579
        [
580 50
            '<simpleHtmlDomP>',
581
            '</simpleHtmlDomP>',
582
            '<head><head>',
583
            '</head></head>',
584
        ],
585
        [
586 50
            '',
587
            '',
588
            '<head>',
589
            '</head>',
590
        ],
591 50
        $content
592
    );
593
594 50
    $content = \trim($content);
595 50
    if ($multiDecodeNewHtmlEntity === true) {
596 2
      if (\class_exists('\voku\helper\UTF8')) {
597
598
        /** @noinspection PhpUndefinedClassInspection */
599
        $content = \voku\helper\UTF8::rawurldecode($content);
600
601
      } else {
602
603
        do {
604 2
          $content_compare = $content;
605
606 2
          $content = \rawurldecode(
607 2
              \html_entity_decode(
608 2
                  $content,
609 2
                  ENT_QUOTES | ENT_HTML5
610
              )
611
          );
612
613 2
        } while ($content_compare !== $content);
614
615
      }
616
617
    } else {
618
619 49
      $content = \rawurldecode(
620 49
          \html_entity_decode(
621 49
              $content,
622 49
              ENT_QUOTES | ENT_HTML5
623
          )
624
      );
625
    }
626
627 50
    $content = self::putReplacedBackToPreserveHtmlEntities($content);
628
629 50
    return $content;
630
  }
631
632
  /**
633
   * @return DOMDocument
634
   */
635 37
  public function getDocument(): \DOMDocument
636
  {
637 37
    return $this->document;
638
  }
639
640
  /**
641
   * Get the encoding to use.
642
   *
643
   * @return string
644
   */
645 123
  private function getEncoding(): string
646
  {
647 123
    return $this->encoding;
648
  }
649
650
  /**
651
   * @return bool
652
   */
653 8
  public function getIsDOMDocumentCreatedWithoutHtml(): bool
654
  {
655 8
    return $this->isDOMDocumentCreatedWithoutHtml;
656
  }
657
658
  /**
659
   * @return bool
660
   */
661 36
  public function getIsDOMDocumentCreatedWithoutHtmlWrapper(): bool
662
  {
663 36
    return $this->isDOMDocumentCreatedWithoutHtmlWrapper;
664
  }
665
666
  /**
667
   * @return bool
668
   */
669 6
  public function getIsDOMDocumentCreatedWithoutHeadWrapper(): bool
670
  {
671 6
    return $this->isDOMDocumentCreatedWithoutHeadWrapper;
672
  }
673
674
  /**
675
   * @return bool
676
   */
677
  public function getIsDOMDocumentCreatedWithoutWrapper(): bool
678
  {
679
    return $this->isDOMDocumentCreatedWithoutWrapper;
680
  }
681
682
  /**
683
   * Get dom node's outer html.
684
   *
685
   * @param bool $multiDecodeNewHtmlEntity
686
   *
687
   * @return string
688
   */
689 36
  public function html(bool $multiDecodeNewHtmlEntity = false): string
690
  {
691 36
    if ($this::$callback !== null) {
692
      \call_user_func($this::$callback, [$this]);
693
    }
694
695 36
    if ($this->getIsDOMDocumentCreatedWithoutHtmlWrapper()) {
696 17
      $content = $this->document->saveHTML($this->document->documentElement);
697
    } else {
698 24
      $content = $this->document->saveHTML();
699
    }
700
701 36
    return $this->fixHtmlOutput($content, $multiDecodeNewHtmlEntity);
702
  }
703
704
  /**
705
   * Get the HTML as XML.
706
   *
707
   * @param bool $multiDecodeNewHtmlEntity
708
   *
709
   * @return string
710
   */
711 1
  public function xml(bool $multiDecodeNewHtmlEntity = false): string
712
  {
713 1
    $xml = $this->document->saveXML(null, LIBXML_NOEMPTYTAG);
714
715
    // remove the XML-header
716 1
    $xml = \ltrim((string)\preg_replace('/<\?xml.*\?>/', '', $xml));
717
718 1
    return $this->fixHtmlOutput($xml, $multiDecodeNewHtmlEntity);
719
  }
720
721
  /**
722
   * Get dom node's inner html.
723
   *
724
   * @param bool $multiDecodeNewHtmlEntity
725
   *
726
   * @return string
727
   */
728 17
  public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string
729
  {
730 17
    $text = '';
731
732 17
    foreach ($this->document->documentElement->childNodes as $node) {
733 17
      $text .= $this->document->saveHTML($node);
734
    }
735
736 17
    return $this->fixHtmlOutput($text, $multiDecodeNewHtmlEntity);
737
  }
738
739
  /**
740
   * Load HTML from string.
741
   *
742
   * @param string   $html
743
   * @param int|null $libXMLExtraOptions
744
   *
745
   * @return HtmlDomParser
746
   *
747
   * @throws InvalidArgumentException if argument is not string
748
   */
749 111
  public function loadHtml(string $html, $libXMLExtraOptions = null): self
750
  {
751 111
    $this->document = $this->createDOMDocument($html, $libXMLExtraOptions);
752
753 111
    return $this;
754
  }
755
756
  /**
757
   * Load HTML from file.
758
   *
759
   * @param string   $filePath
760
   * @param int|null $libXMLExtraOptions
761
   *
762
   * @return HtmlDomParser
763
   *
764
   * @throws \RuntimeException
765
   * @throws \InvalidArgumentException
766
   */
767 10
  public function loadHtmlFile(string $filePath, $libXMLExtraOptions = null): self
768
  {
769
    if (
770 10
        !\preg_match("/^https?:\/\//i", $filePath)
771
        &&
772 10
        !\file_exists($filePath)
773
    ) {
774 1
      throw new RuntimeException("File $filePath not found");
775
    }
776
777
    try {
778 9
      if (\class_exists('\voku\helper\UTF8')) {
779
        /** @noinspection PhpUndefinedClassInspection */
780
        $html = \voku\helper\UTF8::file_get_contents($filePath);
781
      } else {
782 9
        $html = \file_get_contents($filePath);
783
      }
784 1
    } catch (\Exception $e) {
785 1
      throw new RuntimeException("Could not load file $filePath");
786
    }
787
788 8
    if ($html === false) {
789
      throw new RuntimeException("Could not load file $filePath");
790
    }
791
792 8
    $this->loadHtml($html, $libXMLExtraOptions);
793
794 8
    return $this;
795
  }
796
797
  /**
798
   * Save the html-dom as string.
799
   *
800
   * @param string $filepath
801
   *
802
   * @return string
803
   */
804 1
  public function save(string $filepath = ''): string
805
  {
806 1
    $string = $this->innerHtml();
807 1
    if ($filepath !== '') {
808
      \file_put_contents($filepath, $string, LOCK_EX);
809
    }
810
811 1
    return $string;
812
  }
813
814
  /**
815
   * @param $functionName
816
   */
817
  public function set_callback($functionName)
818
  {
819
    $this::$callback = $functionName;
820
  }
821
822
  /**
823
   * Get dom node's plain text.
824
   *
825
   * @param bool $multiDecodeNewHtmlEntity
826
   *
827
   * @return string
828
   */
829 2
  public function text(bool $multiDecodeNewHtmlEntity = false): string
830
  {
831 2
    return $this->fixHtmlOutput($this->document->textContent, $multiDecodeNewHtmlEntity);
832
  }
833
}
834