Completed
Push — master ( b7ff31...2a6ca1 )
by Lars
03:07
created

putReplacedBackToPreserveHtmlEntities()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 16
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 13
CRAP Score 2

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 16
ccs 13
cts 13
cp 1
rs 9.4285
cc 2
eloc 10
nc 2
nop 1
crap 2
1
<?php
2
3
namespace voku\helper;
4
5
use BadMethodCallException;
6
use DOMDocument;
7
use DOMXPath;
8
use InvalidArgumentException;
9
use RuntimeException;
10
11
/**
12
 * Class HtmlDomParser
13
 *
14
 * @package voku\helper
15
 *
16
 * @property-read string outerText Get dom node's outer html (alias for "outerHtml")
17
 * @property-read string outerHtml Get dom node's outer html
18
 * @property-read string innerText Get dom node's inner html (alias for "innerHtml")
19
 * @property-read string innerHtml Get dom node's inner html
20
 * @property-read string plaintext Get dom node's plain text
21
 *
22
 * @method string outerText() Get dom node's outer html (alias for "outerHtml()")
23
 * @method string outerHtml() Get dom node's outer html
24
 * @method string innerText() Get dom node's inner html (alias for "innerHtml()")
25
 * @method HtmlDomParser load() load($html) Load HTML from string
26
 * @method HtmlDomParser load_file() load_file($html) Load HTML from file
27
 *
28
 * @method static HtmlDomParser file_get_html() file_get_html($html, $libXMLExtraOptions = null) Load HTML from file
29
 * @method static HtmlDomParser str_get_html() str_get_html($html, $libXMLExtraOptions = null) Load HTML from string
30
 */
31
class HtmlDomParser
32
{
33
  /**
34
   * @var array
35
   */
36
  protected static $functionAliases = array(
37
      'outertext' => 'html',
38
      'outerhtml' => 'html',
39
      'innertext' => 'innerHtml',
40
      'innerhtml' => 'innerHtml',
41
      'load'      => 'loadHtml',
42
      'load_file' => 'loadHtmlFile',
43
  );
44
45
  /**
46
   * @var string[][]
47
   */
48
  protected static $domLinkReplaceHelper = array(
49
      'orig' => array('[', ']', '{', '}',),
50
      'tmp'  => array(
51
          '!!!!SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_LEFT!!!!',
52
          '!!!!SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_RIGHT!!!!',
53
          '!!!!SIMPLE_HTML_DOM__VOKU__BRACKET_LEFT!!!!',
54
          '!!!!SIMPLE_HTML_DOM__VOKU__BRACKET_RIGHT!!!!',
55
      ),
56
  );
57
58
  /**
59
   * @var array
60
   */
61
  protected static $domReplaceHelper = array(
62
      'orig' => array('&', '|', '+', '%'),
63
      'tmp'  => array(
64
          '!!!!SIMPLE_HTML_DOM__VOKU__AMP!!!!',
65
          '!!!!SIMPLE_HTML_DOM__VOKU__PIPE!!!!',
66
          '!!!!SIMPLE_HTML_DOM__VOKU__PLUS!!!!',
67
          '!!!!SIMPLE_HTML_DOM__VOKU__PERCENT!!!!',
68
      ),
69
  );
70
71
  /**
72
   * @var Callable
73
   */
74
  protected static $callback;
75
76
  /**
77
   * @var DOMDocument
78
   */
79
  protected $document;
80
81
  /**
82
   * @var string
83
   */
84
  protected $encoding = 'UTF-8';
85
86
  /**
87
   * @var bool
88
   */
89
  protected $isDOMDocumentCreatedWithoutHtml = false;
90
91
  /**
92
   * @var bool
93
   */
94
  protected $isDOMDocumentCreatedWithoutWrapper = false;
95
96
  /**
97
   * @var bool
98
   */
99
  protected $isDOMDocumentCreatedWithoutHtmlWrapper = false;
100
101
  /**
102
   * Constructor
103
   *
104
   * @param string|SimpleHtmlDom|\DOMNode $element HTML code or SimpleHtmlDom, \DOMNode
105
   */
106 120
  public function __construct($element = null)
107
  {
108 120
    $this->document = new \DOMDocument('1.0', $this->getEncoding());
109
110
    // DOMDocument settings
111 120
    $this->document->preserveWhiteSpace = true;
112 120
    $this->document->formatOutput = true;
113
114 120
    if ($element instanceof SimpleHtmlDom) {
115 52
      $element = $element->getNode();
116 52
    }
117
118 120
    if ($element instanceof \DOMNode) {
119 52
      $domNode = $this->document->importNode($element, true);
120
121 52
      if ($domNode instanceof \DOMNode) {
122 52
        $this->document->appendChild($domNode);
123 52
      }
124
125 52
      return;
126
    }
127
128 120
    if ($element !== null) {
129 70
      $this->loadHtml($element);
130 69
    }
131 119
  }
132
133
  /**
134
   * @param $name
135
   * @param $arguments
136
   *
137
   * @return bool|mixed
138
   */
139 40 View Code Duplication
  public function __call($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
140
  {
141 40
    $name = strtolower($name);
142
143 40
    if (isset(self::$functionAliases[$name])) {
144 39
      return call_user_func_array(array($this, self::$functionAliases[$name]), $arguments);
145
    }
146
147 1
    throw new BadMethodCallException('Method does not exist: ' . $name);
148
  }
149
150
  /**
151
   * @param $name
152
   * @param $arguments
153
   *
154
   * @return HtmlDomParser
155
   */
156 15
  public static function __callStatic($name, $arguments)
157
  {
158 15
    $arguments0 = null;
159 15
    if (isset($arguments[0])) {
160 14
      $arguments0 = $arguments[0];
161 14
    }
162
163 15
    $arguments1 = null;
164 15
    if (isset($arguments[1])) {
165 1
      $arguments1 = $arguments[1];
166 1
    }
167
168 15
    if ($name === 'str_get_html') {
169 10
      $parser = new self();
170
171 10
      return $parser->loadHtml($arguments0, $arguments1);
172
    }
173
174 5
    if ($name === 'file_get_html') {
175 4
      $parser = new self();
176
177 4
      return $parser->loadHtmlFile($arguments0, $arguments1);
178
    }
179
180 1
    throw new BadMethodCallException('Method does not exist');
181
  }
182
183
  /** @noinspection MagicMethodsValidityInspection */
184
  /**
185
   * @param $name
186
   *
187
   * @return string
188
   */
189 11
  public function __get($name)
190
  {
191 11
    $name = strtolower($name);
192
193
    switch ($name) {
194 11
      case 'outerhtml':
195 11
      case 'outertext':
196 4
        return $this->html();
197 7
      case 'innerhtml':
198 7
      case 'innertext':
199 5
        return $this->innerHtml();
200 2
      case 'text':
201 2
      case 'plaintext':
202 1
        return $this->text();
203
    }
204
205 1
    return null;
206
  }
207
208
  /**
209
   * @param string $selector
210
   * @param int    $idx
211
   *
212
   * @return SimpleHtmlDom|SimpleHtmlDomNode|null
213
   */
214 3
  public function __invoke($selector, $idx = null)
215
  {
216 3
    return $this->find($selector, $idx);
217
  }
218
219
  /**
220
   * @return string
221
   */
222 14
  public function __toString()
223
  {
224 14
    return $this->html();
225
  }
226
227
  /**
228
   * does nothing (only for api-compatibility-reasons)
229
   *
230
   * @return bool
231
   */
232 1
  public function clear()
233
  {
234 1
    return true;
235
  }
236
237
  /**
238
   * @param string $html
239
   *
240
   * @return string
241
   */
242 75
  public static function replaceToPreserveHtmlEntities($html)
243
  {
244
    // init
245 75
    $linksNew = array();
246 75
    $linksOld = array();
247
248 75
    if (strpos($html, 'http') !== false) {
249
250
      // regEx for e.g.: [https://www.domain.de/foo.php?foobar=1&email=lars%40moelleken.org&guid=test1233312&{{foo}}#foo]
251 52
      $regExUrl = '/(\[?\bhttps?:\/\/[^\s<>]+(?:\([\w]+\)|[^[:punct:]\s]|\/|\}|\]))/i';
252 52
      preg_match_all($regExUrl, $html, $linksOld);
253
254 52
      if (!empty($linksOld[1])) {
255 51
        $linksOld = $linksOld[1];
256 51
        foreach ((array)$linksOld as $linkKey => $linkOld) {
257 51
          $linksNew[$linkKey] = str_replace(
258 51
              self::$domLinkReplaceHelper['orig'],
259 51
              self::$domLinkReplaceHelper['tmp'],
260
              $linkOld
261 51
          );
262 51
        }
263 51
      }
264 52
    }
265
266 75
    $linksNewCount = count($linksNew);
267 75
    if ($linksNewCount > 0 && count($linksOld) === $linksNewCount) {
268 51
      $search = array_merge($linksOld, self::$domReplaceHelper['orig']);
269 51
      $replace = array_merge($linksNew, self::$domReplaceHelper['tmp']);
270 51
    } else {
271 26
      $search = self::$domReplaceHelper['orig'];
272 26
      $replace = self::$domReplaceHelper['tmp'];
273
    }
274
275 75
    return str_replace($search, $replace, $html);
276
  }
277
278
  /**
279
   * @param string $html
280
   *
281
   * @return string
282
   */
283 58
  public static function putReplacedBackToPreserveHtmlEntities($html)
284
  {
285 58
    static $DOM_REPLACE__HELPER_CACHE = null;
286 58
    if ($DOM_REPLACE__HELPER_CACHE === null) {
287 1
      $DOM_REPLACE__HELPER_CACHE['tmp'] = array_merge(
288 1
          self::$domLinkReplaceHelper['tmp'],
289 1
          self::$domReplaceHelper['tmp']
290 1
      );
291 1
      $DOM_REPLACE__HELPER_CACHE['orig'] = array_merge(
292 1
          self::$domLinkReplaceHelper['orig'],
293 1
          self::$domReplaceHelper['orig']
294 1
      );
295 1
    }
296
297 58
    return str_replace($DOM_REPLACE__HELPER_CACHE['tmp'], $DOM_REPLACE__HELPER_CACHE['orig'], $html);
298
  }
299
300
  /**
301
   * create DOMDocument from HTML
302
   *
303
   * @param string   $html
304
   * @param int|null $libXMLExtraOptions
305
   *
306
   * @return \DOMDocument
307
   */
308 108
  private function createDOMDocument($html, $libXMLExtraOptions = null)
309
  {
310 108
    if (strpos($html, '<') === false) {
311 6
      $this->isDOMDocumentCreatedWithoutHtml = true;
312 108
    } else if (strpos(ltrim($html), '<') !== 0) {
313 3
      $this->isDOMDocumentCreatedWithoutWrapper = true;
314 3
    }
315
316 108
    if (strpos($html, '<html') === false) {
317 60
      $this->isDOMDocumentCreatedWithoutHtmlWrapper = true;
318 60
    }
319
320
    // set error level
321 108
    $internalErrors = libxml_use_internal_errors(true);
322 108
    $disableEntityLoader = libxml_disable_entity_loader(true);
323 108
    libxml_clear_errors();
324
325 108
    $optionsSimpleXml = LIBXML_DTDLOAD | LIBXML_DTDATTR | LIBXML_NONET;
326 108
    $optionsXml = 0;
327
328 108
    if (defined('LIBXML_BIGLINES')) {
329
      $optionsSimpleXml |= LIBXML_BIGLINES;
330
    }
331
332 108
    if (defined('LIBXML_COMPACT')) {
333 108
      $optionsSimpleXml |= LIBXML_COMPACT;
334 108
    }
335
336 108
    if (defined('LIBXML_HTML_NOIMPLIED')) {
337 108
      $optionsSimpleXml |= LIBXML_HTML_NOIMPLIED;
338 108
    }
339
340 108
    if (defined('LIBXML_HTML_NODEFDTD')) {
341 108
      $optionsSimpleXml |= LIBXML_HTML_NODEFDTD;
342 108
    }
343
344 108
    if ($libXMLExtraOptions !== null) {
345 1
      $optionsSimpleXml |= $libXMLExtraOptions;
346 1
      $optionsXml |= $libXMLExtraOptions;
347 1
    }
348
349 108
    $sxe = simplexml_load_string($html, 'SimpleXMLElement', $optionsSimpleXml);
350 108
    if ($sxe !== false && count(libxml_get_errors()) === 0) {
351 37
      $this->document = dom_import_simplexml($sxe)->ownerDocument;
352 37
    } else {
353
354
      // UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251
355 74
      $html = trim($html);
356 74
      $xmlHackUsed = false;
357 74
      if (stripos('<?xml', $html) !== 0) {
358 74
        $xmlHackUsed = true;
359 74
        $html = '<?xml encoding="' . $this->getEncoding() . '" ?>' . $html;
360 74
      }
361
362 74
      $html = self::replaceToPreserveHtmlEntities($html);
363
364 74
      if ($optionsXml && Bootup::is_php('5.4')) {
365 1
        $this->document->loadHTML($html, $optionsXml);
366 1
      } else {
367 74
        $this->document->loadHTML($html);
368
      }
369
370
      // remove the "xml-encoding" hack
371 74
      if ($xmlHackUsed === true) {
372 74
        foreach ($this->document->childNodes as $child) {
373 74
          if ($child->nodeType === XML_PI_NODE) {
374 74
            $this->document->removeChild($child);
375 74
          }
376 74
        }
377 74
      }
378
379 74
      libxml_clear_errors();
380
    }
381
382
    // set encoding
383 108
    $this->document->encoding = $this->getEncoding();
384
385
    // restore lib-xml settings
386 108
    libxml_use_internal_errors($internalErrors);
387 108
    libxml_disable_entity_loader($disableEntityLoader);
388
389 108
    return $this->document;
390
  }
391
392
  /**
393
   * Return SimpleHtmlDom by id.
394
   *
395
   * @param string $id
396
   *
397
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
398
   */
399 2
  public function getElementById($id)
400
  {
401 2
    return $this->find("#$id", 0);
402
  }
403
404
  /**
405
   * Return SimpleHtmlDom by tag name.
406
   *
407
   * @param string $name
408
   *
409
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
410
   */
411 1
  public function getElementByTagName($name)
412
  {
413 1
    $node = $this->document->getElementsByTagName($name)->item(0);
414
415 1
    if ($node !== null) {
416 1
      return new SimpleHtmlDom($node);
417
    } else {
418
      return new SimpleHtmlDomNodeBlank();
419
    }
420
  }
421
422
  /**
423
   * Returns Elements by id
424
   *
425
   * @param string   $id
426
   * @param null|int $idx
427
   *
428
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
429
   */
430
  public function getElementsById($id, $idx = null)
431
  {
432
    return $this->find("#$id", $idx);
433
  }
434
435
  /**
436
   * Returns Elements by tag name
437
   *
438
   * @param string   $name
439
   * @param null|int $idx
440
   *
441
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
442
   */
443 3 View Code Duplication
  public function getElementsByTagName($name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
444
  {
445 3
    $nodesList = $this->document->getElementsByTagName($name);
446
447 3
    $elements = new SimpleHtmlDomNode();
448
449 3
    foreach ($nodesList as $node) {
450 3
      $elements[] = new SimpleHtmlDom($node);
451 3
    }
452
453 3
    if (null === $idx) {
454 2
      return $elements;
455
    } else {
456 1
      if ($idx < 0) {
457
        $idx = count($elements) + $idx;
458
      }
459
    }
460
461 1
    if (isset($elements[$idx])) {
462 1
      return $elements[$idx];
463
    } else {
464
      return new SimpleHtmlDomNodeBlank();
465
    }
466
  }
467
468
  /**
469
   * Find list of nodes with a CSS selector.
470
   *
471
   * @param string $selector
472
   * @param int    $idx
473
   *
474
   * @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeBlank
475
   */
476 78
  public function find($selector, $idx = null)
477
  {
478 78
    $xPathQuery = SelectorConverter::toXPath($selector);
479
480 78
    $xPath = new DOMXPath($this->document);
481 78
    $nodesList = $xPath->query($xPathQuery);
482 78
    $elements = new SimpleHtmlDomNode();
483
484 78
    foreach ($nodesList as $node) {
485 74
      $elements[] = new SimpleHtmlDom($node);
486 78
    }
487
488 78
    if (null === $idx) {
489 51
      return $elements;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return $elements; (voku\helper\SimpleHtmlDomNode) is incompatible with the return type documented by voku\helper\HtmlDomParser::find of type voku\helper\SimpleHtmlDo...\SimpleHtmlDomNodeBlank.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
490
    } else {
491 39
      if ($idx < 0) {
492 11
        $idx = count($elements) + $idx;
493 11
      }
494
    }
495
496 39
    if (isset($elements[$idx])) {
497 37
      return $elements[$idx];
498
    } else {
499 5
      return new SimpleHtmlDomNodeBlank();
500
    }
501
  }
502
503
  /**
504
   * @param string $content
505
   *
506
   * @return string
507
   */
508 48
  protected function fixHtmlOutput($content)
509
  {
510
    // INFO: DOMDocument will encapsulate plaintext into a paragraph tag (<p>),
511
    //          so we try to remove it here again ...
512
513 48
    if ($this->isDOMDocumentCreatedWithoutHtmlWrapper === true) {
514 21
      $content = str_replace(
515
          array(
516 21
              "\n",
517 21
              "\r\n",
518 21
              "\r",
519 21
              '<simpleHtmlDomP>',
520 21
              '</simpleHtmlDomP>',
521 21
              '<body>',
522 21
              '</body>',
523 21
              '<html>',
524 21
              '</html>',
525 21
          ),
526 21
          '',
527
          $content
528 21
      );
529 21
    }
530
531 48
    if ($this->isDOMDocumentCreatedWithoutWrapper === true) {
532 2
      $content = preg_replace('/^<p>/', '', $content);
533 2
      $content = preg_replace('/<\/p>/', '', $content);
534 2
    }
535
536 48
    if ($this->isDOMDocumentCreatedWithoutHtml === true) {
537 5
      $content = str_replace(
538
          array(
539 5
              '<p>',
540 5
              '</p>',
541 5
              '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">',
542 5
          ),
543 5
          '',
544
          $content
545 5
      );
546 5
    }
547
548 48
    $content = UTF8::html_entity_decode($content);
549 48
    $content = trim($content);
550 48
    $content = UTF8::rawurldecode($content);
551
552 48
    $content = self::putReplacedBackToPreserveHtmlEntities($content);
553
554 48
    return $content;
555
  }
556
557
  /**
558
   * @return DOMDocument
559
   */
560 35
  public function getDocument()
561
  {
562 35
    return $this->document;
563
  }
564
565
  /**
566
   * Get the encoding to use
567
   *
568
   * @return string
569
   */
570 120
  private function getEncoding()
571
  {
572 120
    return $this->encoding;
573
  }
574
575
  /**
576
   * @return bool
577
   */
578 6
  public function getIsDOMDocumentCreatedWithoutHtml()
579
  {
580 6
    return $this->isDOMDocumentCreatedWithoutHtml;
581
  }
582
583
  /**
584
   * @return bool
585
   */
586 34
  public function getIsDOMDocumentCreatedWithoutHtmlWrapper()
587
  {
588 34
    return $this->isDOMDocumentCreatedWithoutHtmlWrapper;
589
  }
590
591
  /**
592
   * @return bool
593
   */
594
  public function getIsDOMDocumentCreatedWithoutWrapper()
595
  {
596
    return $this->isDOMDocumentCreatedWithoutWrapper;
597
  }
598
599
  /**
600
   * Get dom node's outer html
601
   *
602
   * @return string
603
   */
604 34
  public function html()
605
  {
606 34
    if ($this::$callback !== null) {
607
      call_user_func($this::$callback, array($this));
608
    }
609
610 34
    if ($this->getIsDOMDocumentCreatedWithoutHtmlWrapper()) {
611 15
      $content = $this->document->saveHTML($this->document->documentElement);
612 15
    } else {
613 22
      $content = $this->document->saveHTML();
614
    }
615
616 34
    return $this->fixHtmlOutput($content);
617
  }
618
619
  /**
620
   * Get the HTML as XML.
621
   *
622
   * @return string
623
   */
624 1
  public function xml()
625
  {
626 1
    $xml = $this->document->saveXML(null, LIBXML_NOEMPTYTAG);
627
628
    // remove the XML-header
629 1
    $xml = ltrim(preg_replace('/<\?xml.*\?>/', '', $xml));
630
631 1
    return $this->fixHtmlOutput($xml);
632
  }
633
634
  /**
635
   * Get dom node's inner html
636
   *
637
   * @return string
638
   */
639 16
  public function innerHtml()
640
  {
641 16
    $text = '';
642
643 16
    foreach ($this->document->documentElement->childNodes as $node) {
644 16
      $text .= $this->fixHtmlOutput($this->document->saveHTML($node));
645 16
    }
646
647 16
    return $text;
648
  }
649
650
  /**
651
   * Load HTML from string
652
   *
653
   * @param string   $html
654
   * @param int|null $libXMLExtraOptions
655
   *
656
   * @return HtmlDomParser
657
   *
658
   * @throws InvalidArgumentException if argument is not string
659
   */
660 111
  public function loadHtml($html, $libXMLExtraOptions = null)
661
  {
662 111
    if (!is_string($html)) {
663 3
      throw new InvalidArgumentException(__METHOD__ . ' expects parameter 1 to be string.');
664
    }
665
666 108
    $this->document = $this->createDOMDocument($html, $libXMLExtraOptions);
667
668 108
    return $this;
669
  }
670
671
  /**
672
   * Load HTML from file
673
   *
674
   * @param string   $filePath
675
   * @param int|null $libXMLExtraOptions
676
   *
677
   * @return HtmlDomParser
678
   */
679 12
  public function loadHtmlFile($filePath, $libXMLExtraOptions = null)
680
  {
681 12
    if (!is_string($filePath)) {
682 2
      throw new InvalidArgumentException(__METHOD__ . ' expects parameter 1 to be string.');
683
    }
684
685 10
    if (!preg_match("/^https?:\/\//i", $filePath) && !file_exists($filePath)) {
686 1
      throw new RuntimeException("File $filePath not found");
687
    }
688
689
    try {
690 9
      $html = UTF8::file_get_contents($filePath);
691
692 9
    } catch (\Exception $e) {
693 1
      throw new RuntimeException("Could not load file $filePath");
694
    }
695
696 8
    if ($html === false) {
697
      throw new RuntimeException("Could not load file $filePath");
698
    }
699
700 8
    $this->loadHtml($html, $libXMLExtraOptions);
701
702 8
    return $this;
703
  }
704
705
  /**
706
   * Save dom as string
707
   *
708
   * @param string $filepath
709
   *
710
   * @return string
711
   */
712 1
  public function save($filepath = '')
713
  {
714 1
    $string = $this->innerHtml();
715 1
    if ($filepath !== '') {
716
      file_put_contents($filepath, $string, LOCK_EX);
717
    }
718
719 1
    return $string;
720
  }
721
722
  /**
723
   * @param $functionName
724
   */
725
  public function set_callback($functionName)
726
  {
727
    $this::$callback = $functionName;
728
  }
729
730
  /**
731
   * Get dom node's plain text
732
   *
733
   * @return string
734
   */
735 2
  public function text()
736
  {
737 2
    return $this->fixHtmlOutput($this->document->textContent);
738
  }
739
}
740