Completed
Push — master ( 8f1b8a...1a2d77 )
by Lars
02:56 queued 01:16
created

HtmlDomParser::__call()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 5

Duplication

Lines 10
Ratio 90.91 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 10
loc 11
ccs 5
cts 5
cp 1
rs 9.4285
cc 2
eloc 5
nc 2
nop 2
crap 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
use BadMethodCallException;
8
use DOMDocument;
9
use DOMXPath;
10
use InvalidArgumentException;
11
use RuntimeException;
12
13
/**
14
 * Class HtmlDomParser
15
 *
16
 * @package voku\helper
17
 *
18
 * @property-read string outerText <p>Get dom node's outer html (alias for "outerHtml").</p>
19
 * @property-read string outerHtml <p>Get dom node's outer html.</p>
20
 * @property-read string innerText <p>Get dom node's inner html (alias for "innerHtml").</p>
21
 * @property-read string innerHtml <p>Get dom node's inner html.</p>
22
 * @property-read string plaintext <p>Get dom node's plain text.</p>
23
 *
24
 * @method string outerText() <p>Get dom node's outer html (alias for "outerHtml()").</p>
25
 * @method string outerHtml() <p>Get dom node's outer html.</p>
26
 * @method string innerText() <p>Get dom node's inner html (alias for "innerHtml()").</p>
27
 *
28
 * @method HtmlDomParser load() load($html) <p>Load HTML from string.</p>
29
 * @method HtmlDomParser load_file() load_file($html) <p>Load HTML from file.</p>
30
 *
31
 * @method static HtmlDomParser file_get_html() file_get_html($html, $libXMLExtraOptions = null) <p>Load HTML from
32
 *         file.</p>
33
 * @method static HtmlDomParser str_get_html() str_get_html($html, $libXMLExtraOptions = null) <p>Load HTML from
34
 *         string.</p>
35
 */
36
class HtmlDomParser
37
{
38
  /**
39
   * @var array
40
   */
41
  protected static $functionAliases = [
42
      'outertext' => 'html',
43
      'outerhtml' => 'html',
44
      'innertext' => 'innerHtml',
45
      'innerhtml' => 'innerHtml',
46
      'load'      => 'loadHtml',
47
      'load_file' => 'loadHtmlFile',
48
  ];
49
50
  /**
51
   * @var string[][]
52
   */
53
  protected static $domLinkReplaceHelper = [
54
      'orig' => ['[', ']', '{', '}',],
55
      'tmp'  => [
56
          '!!!!SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_LEFT!!!!',
57
          '!!!!SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_RIGHT!!!!',
58
          '!!!!SIMPLE_HTML_DOM__VOKU__BRACKET_LEFT!!!!',
59
          '!!!!SIMPLE_HTML_DOM__VOKU__BRACKET_RIGHT!!!!',
60
      ],
61
  ];
62
63
  /**
64
   * @var array
65
   */
66
  protected static $domReplaceHelper = [
67
      'orig' => ['&', '|', '+', '%'],
68
      'tmp'  => [
69
          '!!!!SIMPLE_HTML_DOM__VOKU__AMP!!!!',
70
          '!!!!SIMPLE_HTML_DOM__VOKU__PIPE!!!!',
71
          '!!!!SIMPLE_HTML_DOM__VOKU__PLUS!!!!',
72
          '!!!!SIMPLE_HTML_DOM__VOKU__PERCENT!!!!',
73
      ],
74
  ];
75
76
  /**
77
   * @var Callable
78
   */
79
  protected static $callback;
80
81
  /**
82
   * @var DOMDocument
83
   */
84
  protected $document;
85
86
  /**
87
   * @var string
88
   */
89
  protected $encoding = 'UTF-8';
90
91
  /**
92
   * @var bool
93
   */
94
  protected $isDOMDocumentCreatedWithoutHtml = false;
95
96
  /**
97
   * @var bool
98
   */
99
  protected $isDOMDocumentCreatedWithoutWrapper = false;
100
101
  /**
102
   * @var bool
103
   */
104
  protected $isDOMDocumentCreatedWithoutHeadWrapper = false;
105
106
  /**
107
   * @var bool
108
   */
109
  protected $isDOMDocumentCreatedWithoutHtmlWrapper = false;
110
111
  /**
112
   * Constructor
113
   *
114
   * @param string|SimpleHtmlDom|\DOMNode $element HTML code or SimpleHtmlDom, \DOMNode
115
   *
116
   * @throws \InvalidArgumentException
117
   */
118 123
  public function __construct($element = null)
119
  {
120 123
    $this->document = new \DOMDocument('1.0', $this->getEncoding());
121
122
    // DOMDocument settings
123 123
    $this->document->preserveWhiteSpace = true;
124 123
    $this->document->formatOutput = true;
125
126 123
    if ($element instanceof SimpleHtmlDom) {
127 52
      $element = $element->getNode();
128
    }
129
130 123
    if ($element instanceof \DOMNode) {
131 52
      $domNode = $this->document->importNode($element, true);
132
133 52
      if ($domNode instanceof \DOMNode) {
134 52
        $this->document->appendChild($domNode);
135
      }
136
137 52
      return;
138
    }
139
140 123
    if ($element !== null) {
141 73
      $this->loadHtml($element);
142
    }
143 122
  }
144
145
  /**
146
   * @param $name
147
   * @param $arguments
148
   *
149
   * @return bool|mixed
150
   */
151 42 View Code Duplication
  public function __call($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
152
  {
153 42
    $name = \strtolower($name);
154
155 42
    if (isset(self::$functionAliases[$name])) {
156 41
      return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments);
157
    }
158
159
    /** @noinspection ExceptionsAnnotatingAndHandlingInspection */
160 1
    throw new BadMethodCallException('Method does not exist: ' . $name);
161
  }
162
163
  /**
164
   * @param $name
165
   * @param $arguments
166
   *
167
   * @return HtmlDomParser
168
   *
169
   * @throws \BadMethodCallException
170
   * @throws \RuntimeException
171
   * @throws \InvalidArgumentException
172
   */
173 17
  public static function __callStatic($name, $arguments)
174
  {
175 17
    $arguments0 = '';
176 17
    if (isset($arguments[0])) {
177 16
      $arguments0 = $arguments[0];
178
    }
179
180 17
    $arguments1 = null;
181 17
    if (isset($arguments[1])) {
182 1
      $arguments1 = $arguments[1];
183
    }
184
185 17
    if ($name === 'str_get_html') {
186
      /** @noinspection ExceptionsAnnotatingAndHandlingInspection */
187 12
      $parser = new self();
188
189 12
      return $parser->loadHtml($arguments0, $arguments1);
190
    }
191
192 5
    if ($name === 'file_get_html') {
193
      /** @noinspection ExceptionsAnnotatingAndHandlingInspection */
194 4
      $parser = new self();
195
196 4
      return $parser->loadHtmlFile($arguments0, $arguments1);
197
    }
198
199 1
    throw new BadMethodCallException('Method does not exist');
200
  }
201
202
  /** @noinspection MagicMethodsValidityInspection */
203
  /**
204
   * @param $name
205
   *
206
   * @return string
207
   */
208 11
  public function __get($name)
209
  {
210 11
    $name = \strtolower($name);
211
212
    switch ($name) {
213 11
      case 'outerhtml':
214 11
      case 'outertext':
215 4
        return $this->html();
216 7
      case 'innerhtml':
217 3
      case 'innertext':
218 5
        return $this->innerHtml();
219 2
      case 'text':
220 2
      case 'plaintext':
221 1
        return $this->text();
222
    }
223
224 1
    return null;
225
  }
226
227
  /**
228
   * @param string $selector
229
   * @param int    $idx
230
   *
231
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
232
   */
233 3
  public function __invoke($selector, $idx = null)
234
  {
235 3
    return $this->find($selector, $idx);
236
  }
237
238
  /**
239
   * @return string
240
   */
241 15
  public function __toString()
242
  {
243 15
    return $this->html();
244
  }
245
246
  /**
247
   * does nothing (only for api-compatibility-reasons)
248
   *
249
   * @deprecated
250
   *
251
   * @return bool
252
   */
253 1
  public function clear(): bool
254
  {
255 1
    return true;
256
  }
257
258
  /**
259
   * @param string $html
260
   *
261
   * @return string
262
   */
263 78
  public static function replaceToPreserveHtmlEntities(string $html): string
264
  {
265
    // init
266 78
    $linksNew = [];
267 78
    $linksOld = [];
268
269 78
    if (\strpos($html, 'http') !== false) {
270
271
      // regEx for e.g.: [https://www.domain.de/foo.php?foobar=1&email=lars%40moelleken.org&guid=test1233312&{{foo}}#foo]
272 54
      $regExUrl = '/(\[?\bhttps?:\/\/[^\s<>]+(?:\([\w]+\)|[^[:punct:]\s]|\/|\}|\]))/i';
273 54
      \preg_match_all($regExUrl, $html, $linksOld);
274
275 54
      if (!empty($linksOld[1])) {
276 52
        $linksOld = $linksOld[1];
277 52
        foreach ((array)$linksOld as $linkKey => $linkOld) {
278 52
          $linksNew[$linkKey] = \str_replace(
279 52
              self::$domLinkReplaceHelper['orig'],
280 52
              self::$domLinkReplaceHelper['tmp'],
281 52
              $linkOld
282
          );
283
        }
284
      }
285
    }
286
287 78
    $linksNewCount = \count($linksNew);
288 78
    if ($linksNewCount > 0 && \count($linksOld) === $linksNewCount) {
289 52
      $search = \array_merge($linksOld, self::$domReplaceHelper['orig']);
290 52
      $replace = \array_merge($linksNew, self::$domReplaceHelper['tmp']);
291
    } else {
292 29
      $search = self::$domReplaceHelper['orig'];
293 29
      $replace = self::$domReplaceHelper['tmp'];
294
    }
295
296 78
    return \str_replace($search, $replace, $html);
297
  }
298
299
  /**
300
   * @param string $html
301
   *
302
   * @return string
303
   */
304 61
  public static function putReplacedBackToPreserveHtmlEntities(string $html): string
305
  {
306 61
    static $DOM_REPLACE__HELPER_CACHE = null;
307
308 61
    if ($DOM_REPLACE__HELPER_CACHE === null) {
309 1
      $DOM_REPLACE__HELPER_CACHE['tmp'] = \array_merge(
310 1
          self::$domLinkReplaceHelper['tmp'],
311 1
          self::$domReplaceHelper['tmp']
312
      );
313 1
      $DOM_REPLACE__HELPER_CACHE['orig'] = \array_merge(
314 1
          self::$domLinkReplaceHelper['orig'],
315 1
          self::$domReplaceHelper['orig']
316
      );
317
    }
318
319 61
    return \str_replace($DOM_REPLACE__HELPER_CACHE['tmp'], $DOM_REPLACE__HELPER_CACHE['orig'], $html);
320
  }
321
322
  /**
323
   * Create DOMDocument from HTML.
324
   *
325
   * @param string   $html
326
   * @param int|null $libXMLExtraOptions
327
   *
328
   * @return \DOMDocument
329
   */
330 111
  private function createDOMDocument(string $html, $libXMLExtraOptions = null): \DOMDocument
331
  {
332 111
    if (\strpos($html, '<') === false) {
333 6
      $this->isDOMDocumentCreatedWithoutHtml = true;
334 110
    } elseif (\strpos(\ltrim($html), '<') !== 0) {
335 3
      $this->isDOMDocumentCreatedWithoutWrapper = true;
336
    }
337
338 111
    if (\strpos($html, '<html') === false) {
339 63
      $this->isDOMDocumentCreatedWithoutHtmlWrapper = true;
340
    }
341
342 111
    if (\strpos($html, '<head>') === false) {
343 65
      $this->isDOMDocumentCreatedWithoutHeadWrapper = true;
344
    }
345
346
    // set error level
347 111
    $internalErrors = \libxml_use_internal_errors(true);
348 111
    $disableEntityLoader = \libxml_disable_entity_loader(true);
349 111
    \libxml_clear_errors();
350
351 111
    $optionsXml = LIBXML_DTDLOAD | LIBXML_DTDATTR | LIBXML_NONET;
352
353 111
    if (\defined('LIBXML_BIGLINES')) {
354 111
      $optionsXml |= LIBXML_BIGLINES;
355
    }
356
357 111
    if (\defined('LIBXML_COMPACT')) {
358 111
      $optionsXml |= LIBXML_COMPACT;
359
    }
360
361 111
    if (\defined('LIBXML_HTML_NODEFDTD')) {
362 111
      $optionsXml |= LIBXML_HTML_NODEFDTD;
363
    }
364
365 111
    if ($libXMLExtraOptions !== null) {
366 1
      $optionsXml |= $libXMLExtraOptions;
367
    }
368
369 111
    $sxe = \simplexml_load_string($html, 'SimpleXMLElement', $optionsXml);
370 111
    if ($sxe !== false && \count(\libxml_get_errors()) === 0) {
371 38
      $this->document = \dom_import_simplexml($sxe)->ownerDocument;
372
    } else {
373
374
      // UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251
375 77
      $html = \trim($html);
376 77
      $xmlHackUsed = false;
377 77
      if (\stripos('<?xml', $html) !== 0) {
378 77
        $xmlHackUsed = true;
379 77
        $html = '<?xml encoding="' . $this->getEncoding() . '" ?>' . $html;
380
      }
381
382 77
      $html = self::replaceToPreserveHtmlEntities($html);
383
384 77
      $this->document->loadHTML($html, $optionsXml);
385
386
      // remove the "xml-encoding" hack
387 77
      if ($xmlHackUsed === true) {
388 77
        foreach ($this->document->childNodes as $child) {
389 77
          if ($child->nodeType === XML_PI_NODE) {
390 77
            $this->document->removeChild($child);
391
          }
392
        }
393
      }
394
395 77
      \libxml_clear_errors();
396
    }
397
398
    // set encoding
399 111
    $this->document->encoding = $this->getEncoding();
400
401
    // restore lib-xml settings
402 111
    \libxml_use_internal_errors($internalErrors);
403 111
    \libxml_disable_entity_loader($disableEntityLoader);
404
405 111
    return $this->document;
406
  }
407
408
  /**
409
   * Return element by #id.
410
   *
411
   * @param string $id
412
   *
413
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
414
   */
415 2
  public function getElementById(string $id)
416
  {
417 2
    return $this->find("#$id", 0);
418
  }
419
420
  /**
421
   * Return element by tag name.
422
   *
423
   * @param string $name
424
   *
425
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
426
   */
427 1
  public function getElementByTagName(string $name)
428
  {
429 1
    $node = $this->document->getElementsByTagName($name)->item(0);
430
431 1
    if ($node === null) {
432
      return new SimpleHtmlDomNodeBlank();
433
    }
434
435 1
    return new SimpleHtmlDom($node);
436
  }
437
438
  /**
439
   * Returns elements by #id.
440
   *
441
   * @param string   $id
442
   * @param null|int $idx
443
   *
444
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
445
   */
446
  public function getElementsById(string $id, $idx = null)
447
  {
448
    return $this->find("#$id", $idx);
449
  }
450
451
  /**
452
   * Returns elements by tag name.
453
   *
454
   * @param string   $name
455
   * @param null|int $idx
456
   *
457
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
458
   */
459 3 View Code Duplication
  public function getElementsByTagName(string $name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
460
  {
461 3
    $nodesList = $this->document->getElementsByTagName($name);
462
463 3
    $elements = new SimpleHtmlDomNode();
464
465 3
    foreach ($nodesList as $node) {
466 3
      $elements[] = new SimpleHtmlDom($node);
467
    }
468
469
    // return all elements
470 3
    if (null === $idx) {
471 2
      return $elements;
472
    }
473
474
    // handle negative values
475 1
    if ($idx < 0) {
476
      $idx = \count($elements) + $idx;
477
    }
478
479
    // return one element
480 1
    if (isset($elements[$idx])) {
481 1
      return $elements[$idx];
482
    }
483
484
    // return a blank-element
485
    return new SimpleHtmlDomNodeBlank();
486
  }
487
488
  /**
489
   * Find one node with a CSS selector.
490
   *
491
   * @param string $selector
492
   *
493
   * @return SimpleHtmlDom|SimpleHtmlDomNodeInterface
494
   */
495 1
  public function findOne(string $selector)
496
  {
497 1
    return $this->find($selector, 0);
498
  }
499
500
  /**
501
   * Find list of nodes with a CSS selector.
502
   *
503
   * @param string $selector
504
   * @param int    $idx
505
   *
506
   * @return SimpleHtmlDom[]|SimpleHtmlDom|SimpleHtmlDomNodeInterface
507
   */
508 81
  public function find(string $selector, $idx = null)
509
  {
510
    /** @noinspection ExceptionsAnnotatingAndHandlingInspection */
511 81
    $xPathQuery = SelectorConverter::toXPath($selector);
512
513 81
    $xPath = new DOMXPath($this->document);
514 81
    $nodesList = $xPath->query($xPathQuery);
515 81
    $elements = new SimpleHtmlDomNode();
516
517 81
    foreach ($nodesList as $node) {
518 77
      $elements[] = new SimpleHtmlDom($node);
519
    }
520
521
    // return all elements
522 81
    if (null === $idx) {
523 52
      return $elements;
524
    }
525
526
    // handle negative values
527 41
    if ($idx < 0) {
528 11
      $idx = \count($elements) + $idx;
529
    }
530
531
    // return one element
532 41
    if (isset($elements[$idx])) {
533 39
      return $elements[$idx];
534
    }
535
536
    // return a blank-element
537 5
    return new SimpleHtmlDomNodeBlank();
538
  }
539
540
  /**
541
   * @param string $content
542
   * @param bool   $multiDecodeNewHtmlEntity
543
   *
544
   * @return string
545
   */
546 50
  public function fixHtmlOutput(string $content, bool $multiDecodeNewHtmlEntity = false): string
547
  {
548
    // INFO: DOMDocument will encapsulate plaintext into a paragraph tag (<p>),
549
    //          so we try to remove it here again ...
550
551 50
    if ($this->isDOMDocumentCreatedWithoutHtmlWrapper === true) {
552 23
      $content = \str_replace(
553
          [
554 23
              "\n",
555
              "\r\n",
556
              "\r",
557
              '<body>',
558
              '</body>',
559
              '<html>',
560
              '</html>',
561
          ],
562 23
          '',
563 23
          $content
564
      );
565
    }
566
567 50
    if ($this->isDOMDocumentCreatedWithoutWrapper === true) {
568 2
      $content = (string)\preg_replace('/^<p>/', '', $content);
569 2
      $content = (string)\preg_replace('/<\/p>/', '', $content);
570
    }
571
572 50
    if ($this->isDOMDocumentCreatedWithoutHtml === true) {
573 5
      $content = \str_replace(
574
          [
575 5
              '<p>',
576
              '</p>',
577
              '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">',
578
          ],
579 5
          '',
580 5
          $content
581
      );
582
    }
583
584 50
    $content = \str_replace(
585
        [
586 50
            '<simpleHtmlDomP>',
587
            '</simpleHtmlDomP>',
588
            '<head><head>',
589
            '</head></head>',
590
            '<br></br>',
591
        ],
592
        [
593 50
            '',
594
            '',
595
            '<head>',
596
            '</head>',
597
            '<br>',
598
        ],
599 50
        $content
600
    );
601
602 50
    $content = \trim($content);
603 50
    if ($multiDecodeNewHtmlEntity === true) {
604 2
      if (\class_exists('\voku\helper\UTF8')) {
605
606
        /** @noinspection PhpUndefinedClassInspection */
607
        $content = \voku\helper\UTF8::rawurldecode($content);
608
609
      } else {
610
611
        do {
612 2
          $content_compare = $content;
613
614 2
          $content = \rawurldecode(
615 2
              \html_entity_decode(
616 2
                  $content,
617 2
                  ENT_QUOTES | ENT_HTML5
618
              )
619
          );
620
621 2
        } while ($content_compare !== $content);
622
623
      }
624
625
    } else {
626
627 49
      $content = \rawurldecode(
628 49
          \html_entity_decode(
629 49
              $content,
630 49
              ENT_QUOTES | ENT_HTML5
631
          )
632
      );
633
    }
634
635 50
    $content = self::putReplacedBackToPreserveHtmlEntities($content);
636
637 50
    return $content;
638
  }
639
640
  /**
641
   * @return DOMDocument
642
   */
643 37
  public function getDocument(): \DOMDocument
644
  {
645 37
    return $this->document;
646
  }
647
648
  /**
649
   * Get the encoding to use.
650
   *
651
   * @return string
652
   */
653 123
  private function getEncoding(): string
654
  {
655 123
    return $this->encoding;
656
  }
657
658
  /**
659
   * @return bool
660
   */
661 8
  public function getIsDOMDocumentCreatedWithoutHtml(): bool
662
  {
663 8
    return $this->isDOMDocumentCreatedWithoutHtml;
664
  }
665
666
  /**
667
   * @return bool
668
   */
669 36
  public function getIsDOMDocumentCreatedWithoutHtmlWrapper(): bool
670
  {
671 36
    return $this->isDOMDocumentCreatedWithoutHtmlWrapper;
672
  }
673
674
  /**
675
   * @return bool
676
   */
677 6
  public function getIsDOMDocumentCreatedWithoutHeadWrapper(): bool
678
  {
679 6
    return $this->isDOMDocumentCreatedWithoutHeadWrapper;
680
  }
681
682
  /**
683
   * @return bool
684
   */
685
  public function getIsDOMDocumentCreatedWithoutWrapper(): bool
686
  {
687
    return $this->isDOMDocumentCreatedWithoutWrapper;
688
  }
689
690
  /**
691
   * Get dom node's outer html.
692
   *
693
   * @param bool $multiDecodeNewHtmlEntity
694
   *
695
   * @return string
696
   */
697 36
  public function html(bool $multiDecodeNewHtmlEntity = false): string
698
  {
699 36
    if ($this::$callback !== null) {
700
      \call_user_func($this::$callback, [$this]);
701
    }
702
703 36
    if ($this->getIsDOMDocumentCreatedWithoutHtmlWrapper()) {
704 17
      $content = $this->document->saveHTML($this->document->documentElement);
705
    } else {
706 24
      $content = $this->document->saveHTML();
707
    }
708
709 36
    return $this->fixHtmlOutput($content, $multiDecodeNewHtmlEntity);
710
  }
711
712
  /**
713
   * Get the HTML as XML.
714
   *
715
   * @param bool $multiDecodeNewHtmlEntity
716
   *
717
   * @return string
718
   */
719 1
  public function xml(bool $multiDecodeNewHtmlEntity = false): string
720
  {
721 1
    $xml = $this->document->saveXML(null, LIBXML_NOEMPTYTAG);
722
723
    // remove the XML-header
724 1
    $xml = \ltrim((string)\preg_replace('/<\?xml.*\?>/', '', $xml));
725
726 1
    return $this->fixHtmlOutput($xml, $multiDecodeNewHtmlEntity);
727
  }
728
729
  /**
730
   * Get dom node's inner html.
731
   *
732
   * @param bool $multiDecodeNewHtmlEntity
733
   *
734
   * @return string
735
   */
736 17
  public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string
737
  {
738 17
    $text = '';
739
740 17
    foreach ($this->document->documentElement->childNodes as $node) {
741 17
      $text .= $this->document->saveHTML($node);
742
    }
743
744 17
    return $this->fixHtmlOutput($text, $multiDecodeNewHtmlEntity);
745
  }
746
747
  /**
748
   * Load HTML from string.
749
   *
750
   * @param string   $html
751
   * @param int|null $libXMLExtraOptions
752
   *
753
   * @return HtmlDomParser
754
   *
755
   * @throws InvalidArgumentException if argument is not string
756
   */
757 111
  public function loadHtml(string $html, $libXMLExtraOptions = null): self
758
  {
759 111
    $this->document = $this->createDOMDocument($html, $libXMLExtraOptions);
760
761 111
    return $this;
762
  }
763
764
  /**
765
   * Load HTML from file.
766
   *
767
   * @param string   $filePath
768
   * @param int|null $libXMLExtraOptions
769
   *
770
   * @return HtmlDomParser
771
   *
772
   * @throws \RuntimeException
773
   * @throws \InvalidArgumentException
774
   */
775 10
  public function loadHtmlFile(string $filePath, $libXMLExtraOptions = null): self
776
  {
777
    if (
778 10
        !\preg_match("/^https?:\/\//i", $filePath)
779
        &&
780 10
        !\file_exists($filePath)
781
    ) {
782 1
      throw new RuntimeException("File $filePath not found");
783
    }
784
785
    try {
786 9
      if (\class_exists('\voku\helper\UTF8')) {
787
        /** @noinspection PhpUndefinedClassInspection */
788
        $html = \voku\helper\UTF8::file_get_contents($filePath);
789
      } else {
790 9
        $html = \file_get_contents($filePath);
791
      }
792 1
    } catch (\Exception $e) {
793 1
      throw new RuntimeException("Could not load file $filePath");
794
    }
795
796 8
    if ($html === false) {
797
      throw new RuntimeException("Could not load file $filePath");
798
    }
799
800 8
    $this->loadHtml($html, $libXMLExtraOptions);
801
802 8
    return $this;
803
  }
804
805
  /**
806
   * Save the html-dom as string.
807
   *
808
   * @param string $filepath
809
   *
810
   * @return string
811
   */
812 1
  public function save(string $filepath = ''): string
813
  {
814 1
    $string = $this->innerHtml();
815 1
    if ($filepath !== '') {
816
      \file_put_contents($filepath, $string, LOCK_EX);
817
    }
818
819 1
    return $string;
820
  }
821
822
  /**
823
   * @param $functionName
824
   */
825
  public function set_callback($functionName)
826
  {
827
    $this::$callback = $functionName;
828
  }
829
830
  /**
831
   * Get dom node's plain text.
832
   *
833
   * @param bool $multiDecodeNewHtmlEntity
834
   *
835
   * @return string
836
   */
837 2
  public function text(bool $multiDecodeNewHtmlEntity = false): string
838
  {
839 2
    return $this->fixHtmlOutput($this->document->textContent, $multiDecodeNewHtmlEntity);
840
  }
841
842
  public function __clone()
843
  {
844
    $this->document = clone $this->document;
845
  }
846
}
847