Completed
Push — master ( 519359...293703 )
by Lars
02:27
created

HtmlDomParser::createDOMDocument()   F

Complexity

Conditions 15
Paths 1152

Size

Total Lines 83
Code Lines 46

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 53
CRAP Score 15.0778

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 83
ccs 53
cts 57
cp 0.9298
rs 2
cc 15
eloc 46
nc 1152
nop 2
crap 15.0778

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
use BadMethodCallException;
6
use DOMDocument;
7
use DOMXPath;
8
use InvalidArgumentException;
9
use RuntimeException;
10
11
/**
12
 * Class HtmlDomParser
13
 *
14
 * @package voku\helper
15
 *
16
 * @property-read string outerText Get dom node's outer html (alias for "outerHtml")
17
 * @property-read string outerHtml Get dom node's outer html
18
 * @property-read string innerText Get dom node's inner html (alias for "innerHtml")
19
 * @property-read string innerHtml Get dom node's inner html
20
 * @property-read string plaintext Get dom node's plain text
21
 *
22
 * @method string outerText() Get dom node's outer html (alias for "outerHtml()")
23
 * @method string outerHtml() Get dom node's outer html
24
 * @method string innerText() Get dom node's inner html (alias for "innerHtml()")
25
 * @method HtmlDomParser load() load($html) Load HTML from string
26
 * @method HtmlDomParser load_file() load_file($html) Load HTML from file
27
 *
28
 * @method static HtmlDomParser file_get_html() file_get_html($html, $libXMLExtraOptions = null) Load HTML from file
29
 * @method static HtmlDomParser str_get_html() str_get_html($html, $libXMLExtraOptions = null) Load HTML from string
30
 */
31
class HtmlDomParser
32
{
33
  /**
34
   * @var array
35
   */
36
  protected static $functionAliases = array(
37
      'outertext' => 'html',
38
      'outerhtml' => 'html',
39
      'innertext' => 'innerHtml',
40
      'innerhtml' => 'innerHtml',
41
      'load'      => 'loadHtml',
42
      'load_file' => 'loadHtmlFile',
43
  );
44
45
  /**
46
   * @var string[][]
47
   */
48
  protected static $domLinkReplaceHelper = array(
49
      'orig' => array('[', ']', '{', '}',),
50
      'tmp'  => array(
51
          '!!!!HTML_DOM__SQUARE_BRACKET_LEFT!!!!',
52
          '!!!!HTML_DOM__SQUARE_BRACKET_RIGHT!!!!',
53
          '!!!!HTML_DOM__BRACKET_LEFT!!!!',
54
          '!!!!HTML_DOM__BRACKET_RIGHT!!!!',
55
      ),
56
  );
57
58
  /**
59
   * @var array
60
   */
61
  protected static $domReplaceHelper = array(
62
      'orig' => array('&', '|', '+', '%'),
63
      'tmp'  => array(
64
          '!!!!HTML_DOM__AMP!!!!',
65
          '!!!!HTML_DOM__PIPE!!!!',
66
          '!!!!HTML_DOM__PLUS!!!!',
67
          '!!!!HTML_DOM__PERCENT!!!!',
68
      ),
69
  );
70
71
  /**
72
   * @var Callable
73
   */
74
  protected static $callback;
75
76
  /**
77
   * @var DOMDocument
78
   */
79
  protected $document;
80
81
  /**
82
   * @var string
83
   */
84
  protected $encoding = 'UTF-8';
85
86
  /**
87
   * @var bool
88
   */
89
  protected $isDOMDocumentCreatedWithoutHtml = false;
90
91
  /**
92
   * @var bool
93
   */
94
  protected $isDOMDocumentCreatedWithoutHtmlWrapper = false;
95
96
  /**
97
   * An random md5-hash, generated via "random_bytes()".
98
   *
99
   * @var string
100
   */
101
  protected $randomHash;
102
103
  /**
104
   * Constructor
105
   *
106
   * @param string|SimpleHtmlDom|\DOMNode $element HTML code or SimpleHtmlDom, \DOMNode
107
   */
108 118
  public function __construct($element = null)
109
  {
110 118
    $this->randomHash = md5(Bootup::get_random_bytes(16));
111 118
    $this->document = new \DOMDocument('1.0', $this->getEncoding());
112
113 118
    $this->addRandBytesToDomReplaceHelpers();
114
115
    // DOMDocument settings
116 118
    $this->document->preserveWhiteSpace = true;
117 118
    $this->document->formatOutput = true;
118
119 118
    if ($element instanceof SimpleHtmlDom) {
120 51
      $element = $element->getNode();
121 51
    }
122
123 118
    if ($element instanceof \DOMNode) {
124 51
      $domNode = $this->document->importNode($element, true);
125
126 51
      if ($domNode instanceof \DOMNode) {
127 51
        $this->document->appendChild($domNode);
128 51
      }
129
130 51
      return;
131
    }
132
133 118
    if ($element !== null) {
134 70
      $this->loadHtml($element);
135 69
    }
136 117
  }
137
138
  /**
139
   * Add rand-bytes to the "Dom-Replace-Helper"-variables.
140
   */
141 118
  protected function addRandBytesToDomReplaceHelpers()
142
  {
143
    /** @noinspection AlterInForeachInspection */
144 118
    foreach (self::$domLinkReplaceHelper['tmp'] as &$linkHelper) {
145 118
      $linkHelper .= $this->randomHash;
146 118
    }
147
148
    /** @noinspection AlterInForeachInspection */
149 118
    foreach (self::$domReplaceHelper['tmp'] as &$domHelper) {
150 118
      $domHelper .= $this->randomHash;
151 118
    }
152 118
  }
153
154
  /**
155
   * @param $name
156
   * @param $arguments
157
   *
158
   * @return bool|mixed
159
   */
160 33 View Code Duplication
  public function __call($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
161
  {
162 33
    $name = strtolower($name);
163
164 33
    if (isset(self::$functionAliases[$name])) {
165 32
      return call_user_func_array(array($this, self::$functionAliases[$name]), $arguments);
166
    }
167
168 1
    throw new BadMethodCallException('Method does not exist: ' . $name);
169
  }
170
171
  /**
172
   * @param $name
173
   * @param $arguments
174
   *
175
   * @return HtmlDomParser
176
   */
177 14
  public static function __callStatic($name, $arguments)
178
  {
179 14
    $arguments0 = null;
180 14
    if (isset($arguments[0])) {
181 13
      $arguments0 = $arguments[0];
182 13
    }
183
184 14
    $arguments1 = null;
185 14
    if (isset($arguments[1])) {
186 1
      $arguments1 = $arguments[1];
187 1
    }
188
189 14
    if ($name === 'str_get_html') {
190 9
      $parser = new self();
191
192 9
      return $parser->loadHtml($arguments0, $arguments1);
193
    }
194
195 5
    if ($name === 'file_get_html') {
196 4
      $parser = new self();
197
198 4
      return $parser->loadHtmlFile($arguments0, $arguments1);
199
    }
200
201 1
    throw new BadMethodCallException('Method does not exist');
202
  }
203
204
  /** @noinspection MagicMethodsValidityInspection */
205
  /**
206
   * @param $name
207
   *
208
   * @return string
209
   */
210 13
  public function __get($name)
211
  {
212 13
    $name = strtolower($name);
213
214
    switch ($name) {
215 13
      case 'outerhtml':
216 13
      case 'outertext':
217 7
        return $this->html();
218 6
      case 'innerhtml':
219 6
      case 'innertext':
220 4
        return $this->innerHtml();
221 2
      case 'text':
222 2
      case 'plaintext':
223 1
        return $this->text();
224
    }
225
226 1
    return null;
227
  }
228
229
  /**
230
   * @param string $selector
231
   * @param int    $idx
232
   *
233
   * @return SimpleHtmlDom|SimpleHtmlDomNode|null
234
   */
235 3
  public function __invoke($selector, $idx = null)
236
  {
237 3
    return $this->find($selector, $idx);
238
  }
239
240
  /**
241
   * @return string
242
   */
243 14
  public function __toString()
244
  {
245 14
    return $this->html();
246
  }
247
248
  /**
249
   * does nothing (only for api-compatibility-reasons)
250
   *
251
   * @return bool
252
   */
253 1
  public function clear()
254
  {
255 1
    return true;
256
  }
257
258
  /**
259
   * @param string $html
260
   *
261
   * @return string
262
   */
263 71
  public static function replaceToPreserveHtmlEntities($html)
264
  {
265
    // init
266 71
    $linksNew = array();
267 71
    $linksOld = array();
268
269 71
    if (strpos($html, 'http') !== false) {
270 49
      preg_match_all("/(\bhttps?:\/\/[^\s()<>]+(?:\([\w\d]+\)|[^[:punct:]\s]|\/|\}|\]))/i", $html, $linksOld);
271
272 49
      if (!empty($linksOld[1])) {
273 49
        $linksOld = $linksOld[1];
274 49
        foreach ((array)$linksOld as $linkKey => $linkOld) {
275 49
          $linksNew[$linkKey] = str_replace(
276 49
              self::$domLinkReplaceHelper['orig'],
277 49
              self::$domLinkReplaceHelper['tmp'],
278
              $linkOld
279 49
          );
280 49
        }
281 49
      }
282 49
    }
283
284 71
    $linksNewCount = count($linksNew);
285 71
    if ($linksNewCount > 0 && count($linksOld) === $linksNewCount) {
286 49
      $search = array_merge($linksOld, self::$domReplaceHelper['orig']);
287 49
      $replace = array_merge($linksNew, self::$domReplaceHelper['tmp']);
288 49
    } else {
289 23
      $search = self::$domReplaceHelper['orig'];
290 23
      $replace = self::$domReplaceHelper['tmp'];
291
    }
292
293 71
    return str_replace($search, $replace, $html);
294
  }
295
296
  /**
297
   * @param string $html
298
   *
299
   * @return string
300
   */
301 55
  public static function putReplacedBackToPreserveHtmlEntities($html)
302
  {
303 55
    return str_replace(
304 55
        array_merge(
305 55
            self::$domLinkReplaceHelper['tmp'],
306 55
            self::$domReplaceHelper['tmp'],
307 55
            array('&#13;')
308 55
        ),
309 55
        array_merge(
310 55
            self::$domLinkReplaceHelper['orig'],
311 55
            self::$domReplaceHelper['orig'],
312 55
            array('')
313 55
        ),
314
        $html
315 55
    );
316
  }
317
318
  /**
319
   * create DOMDocument from HTML
320
   *
321
   * @param string   $html
322
   * @param int|null $libXMLExtraOptions
323
   *
324
   * @return \DOMDocument
325
   */
326 106
  private function createDOMDocument($html, $libXMLExtraOptions = null)
327
  {
328 106
    if (strpos($html, '<') === false) {
329 6
      $this->isDOMDocumentCreatedWithoutHtml = true;
330 6
    }
331
332 106
    if (strpos($html, '<html') === false) {
333 58
      $this->isDOMDocumentCreatedWithoutHtmlWrapper = true;
334 58
    }
335
336
    // set error level
337 106
    $internalErrors = libxml_use_internal_errors(true);
338 106
    $disableEntityLoader = libxml_disable_entity_loader(true);
339 106
    libxml_clear_errors();
340
341 106
    $optionsSimpleXml = LIBXML_DTDLOAD | LIBXML_DTDATTR | LIBXML_NONET;
342 106
    $optionsXml = LIBXML_DTDATTR | LIBXML_NONET;
343
344 106
    if (defined('LIBXML_BIGLINES')) {
345
      $optionsSimpleXml |= LIBXML_BIGLINES;
346
      $optionsXml |= LIBXML_BIGLINES;
347
    }
348
349 106
    if (defined('LIBXML_COMPACT')) {
350 106
      $optionsSimpleXml |= LIBXML_COMPACT;
351 106
      $optionsXml |= LIBXML_COMPACT;
352 106
    }
353
354 106
    if (defined('LIBXML_HTML_NOIMPLIED')) {
355 106
      $optionsSimpleXml |= LIBXML_HTML_NOIMPLIED;
356 106
    }
357
358 106
    if (defined('LIBXML_HTML_NODEFDTD')) {
359 106
      $optionsSimpleXml |= LIBXML_HTML_NODEFDTD;
360 106
    }
361
362 106
    if ($libXMLExtraOptions !== null) {
363 1
      $optionsSimpleXml |= $libXMLExtraOptions;
364 1
      $optionsXml |= $libXMLExtraOptions;
365 1
    }
366
367 106
    $sxe = simplexml_load_string($html, 'SimpleXMLElement', $optionsSimpleXml);
368 106
    if ($sxe !== false && count(libxml_get_errors()) === 0) {
369 37
      $this->document = dom_import_simplexml($sxe)->ownerDocument;
370 37
    } else {
371
372
      // UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251
373 71
      $html = trim($html);
374 71
      $xmlHackUsed = false;
375 71
      if (stripos('<?xml', $html) !== 0) {
376 71
        $xmlHackUsed = true;
377 71
        $html = '<?xml encoding="' . $this->getEncoding() . '" ?>' . $html;
378 71
      }
379
380 71
      $html = self::replaceToPreserveHtmlEntities($html);
381
382 71
      if (Bootup::is_php('5.4')) {
383 71
        $this->document->loadHTML($html, $optionsXml);
384 71
      } else {
385
        $this->document->loadHTML($html);
386
      }
387
388
      // remove the "xml-encoding" hack
389 71
      if ($xmlHackUsed === true) {
390 71
        foreach ($this->document->childNodes as $child) {
391 71
          if ($child->nodeType === XML_PI_NODE) {
392 71
            $this->document->removeChild($child);
393 71
          }
394 71
        }
395 71
      }
396
397 71
      libxml_clear_errors();
398
    }
399
400
    // set encoding
401 106
    $this->document->encoding = $this->getEncoding();
402
403
    // restore lib-xml settings
404 106
    libxml_use_internal_errors($internalErrors);
405 106
    libxml_disable_entity_loader($disableEntityLoader);
406
407 106
    return $this->document;
408
  }
409
410
  /**
411
   * Return SimpleHtmlDom by id.
412
   *
413
   * @param string $id
414
   *
415
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
416
   */
417 2
  public function getElementById($id)
418
  {
419 2
    return $this->find("#$id", 0);
420
  }
421
422
  /**
423
   * Return SimpleHtmlDom by tag name.
424
   *
425
   * @param string $name
426
   *
427
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
428
   */
429 1
  public function getElementByTagName($name)
430
  {
431 1
    $node = $this->document->getElementsByTagName($name)->item(0);
432
433 1
    if ($node !== null) {
434 1
      return new SimpleHtmlDom($node);
435
    } else {
436
      return new SimpleHtmlDomNodeBlank();
437
    }
438
  }
439
440
  /**
441
   * Returns Elements by id
442
   *
443
   * @param string   $id
444
   * @param null|int $idx
445
   *
446
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
447
   */
448
  public function getElementsById($id, $idx = null)
449
  {
450
    return $this->find("#$id", $idx);
451
  }
452
453
  /**
454
   * Returns Elements by tag name
455
   *
456
   * @param string   $name
457
   * @param null|int $idx
458
   *
459
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
460
   */
461 3 View Code Duplication
  public function getElementsByTagName($name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
462
  {
463 3
    $nodesList = $this->document->getElementsByTagName($name);
464
465 3
    $elements = new SimpleHtmlDomNode();
466
467 3
    foreach ($nodesList as $node) {
468 3
      $elements[] = new SimpleHtmlDom($node);
469 3
    }
470
471 3
    if (null === $idx) {
472 2
      return $elements;
473
    } else {
474 1
      if ($idx < 0) {
475
        $idx = count($elements) + $idx;
476
      }
477
    }
478
479 1
    if (isset($elements[$idx])) {
480 1
      return $elements[$idx];
481
    } else {
482
      return new SimpleHtmlDomNodeBlank();
483
    }
484
  }
485
486
  /**
487
   * Find list of nodes with a CSS selector.
488
   *
489
   * @param string $selector
490
   * @param int    $idx
491
   *
492
   * @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeBlank
493
   */
494 77
  public function find($selector, $idx = null)
495
  {
496 77
    $xPathQuery = SelectorConverter::toXPath($selector);
497
498 77
    $xPath = new DOMXPath($this->document);
499 77
    $nodesList = $xPath->query($xPathQuery);
500 77
    $elements = new SimpleHtmlDomNode();
501
502 77
    foreach ($nodesList as $node) {
503 73
      $elements[] = new SimpleHtmlDom($node);
504 77
    }
505
506 77
    if (null === $idx) {
507 50
      return $elements;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return $elements; (voku\helper\SimpleHtmlDomNode) is incompatible with the return type documented by voku\helper\HtmlDomParser::find of type voku\helper\SimpleHtmlDo...\SimpleHtmlDomNodeBlank.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
508
    } else {
509 38
      if ($idx < 0) {
510 10
        $idx = count($elements) + $idx;
511 10
      }
512
    }
513
514 38
    if (isset($elements[$idx])) {
515 35
      return $elements[$idx];
516
    } else {
517 5
      return new SimpleHtmlDomNodeBlank();
518
    }
519
  }
520
521
  /**
522
   * @param string $content
523
   *
524
   * @return string
525
   */
526 46
  protected function fixHtmlOutput($content)
527
  {
528
    // INFO: DOMDocument will encapsulate plaintext into a paragraph tag (<p>),
529
    //          so we try to remove it here again ...
530
531 46
    if ($this->isDOMDocumentCreatedWithoutHtmlWrapper === true) {
532 19
      $content = str_replace(
533
          array(
534 19
              "\n",
535 19
              "\r\n",
536 19
              "\r",
537 19
              '<simpleHtmlDomP>',
538 19
              '</simpleHtmlDomP>',
539 19
              '<body>',
540 19
              '</body>',
541 19
              '<html>',
542 19
              '</html>',
543 19
          ),
544 19
          '',
545
          $content
546 19
      );
547 19
    }
548
549 46
    if ($this->isDOMDocumentCreatedWithoutHtml === true) {
550 5
      $content = str_replace(
551
          array(
552 5
              '<p>',
553 5
              '</p>',
554 5
              '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">',
555 5
          ),
556 5
          '',
557
          $content
558 5
      );
559 5
    }
560
561 46
    $content = UTF8::html_entity_decode($content);
562 46
    $content = trim($content);
563 46
    $content = UTF8::rawurldecode($content);
564
565 46
    $content = self::putReplacedBackToPreserveHtmlEntities($content);
566
567 46
    return $content;
568
  }
569
570
  /**
571
   * @return DOMDocument
572
   */
573 35
  public function getDocument()
574
  {
575 35
    return $this->document;
576
  }
577
578
  /**
579
   * Get the encoding to use
580
   *
581
   * @return string
582
   */
583 118
  private function getEncoding()
584
  {
585 118
    return $this->encoding;
586
  }
587
588
  /**
589
   * @return bool
590
   */
591 6
  public function getIsDOMDocumentCreatedWithoutHtml()
592
  {
593 6
    return $this->isDOMDocumentCreatedWithoutHtml;
594
  }
595
596
  /**
597
   * @return bool
598
   */
599 33
  public function getIsDOMDocumentCreatedWithoutHtmlWrapper()
600
  {
601 33
    return $this->isDOMDocumentCreatedWithoutHtmlWrapper;
602
  }
603
604
  /**
605
   * Get dom node's outer html
606
   *
607
   * @return string
608
   */
609 33
  public function html()
610
  {
611 33
    if ($this::$callback !== null) {
612
      call_user_func($this::$callback, array($this));
613
    }
614
615 33
    if ($this->getIsDOMDocumentCreatedWithoutHtmlWrapper()) {
616 14
      $content = $this->document->saveHTML($this->document->documentElement);
617 14
    } else {
618 22
      $content = $this->document->saveHTML();
619
    }
620
621 33
    return $this->fixHtmlOutput($content);
622
  }
623
624
  /**
625
   * Get the HTML as XML.
626
   *
627
   * @return string
628
   */
629 1
  public function xml()
630
  {
631 1
    $xml = $this->document->saveXML(null, LIBXML_NOEMPTYTAG);
632
633
    // remove the XML-header
634 1
    $xml = ltrim(preg_replace('/<\?xml.*\?>/', '', $xml));
635
636 1
    return $this->fixHtmlOutput($xml);
637
  }
638
639
  /**
640
   * Get dom node's inner html
641
   *
642
   * @return string
643
   */
644 14
  public function innerHtml()
645
  {
646 14
    $text = '';
647
648 14
    foreach ($this->document->documentElement->childNodes as $node) {
649 14
      $text .= $this->fixHtmlOutput($this->document->saveHTML($node));
650 14
    }
651
652 14
    return $text;
653
  }
654
655
  /**
656
   * Load HTML from string
657
   *
658
   * @param string   $html
659
   * @param int|null $libXMLExtraOptions
660
   *
661
   * @return HtmlDomParser
662
   *
663
   * @throws InvalidArgumentException if argument is not string
664
   */
665 109
  public function loadHtml($html, $libXMLExtraOptions = null)
666
  {
667 109
    if (!is_string($html)) {
668 3
      throw new InvalidArgumentException(__METHOD__ . ' expects parameter 1 to be string.');
669
    }
670
671 106
    $this->document = $this->createDOMDocument($html, $libXMLExtraOptions);
672
673 106
    return $this;
674
  }
675
676
  /**
677
   * Load HTML from file
678
   *
679
   * @param string   $filePath
680
   * @param int|null $libXMLExtraOptions
681
   *
682
   * @return HtmlDomParser
683
   */
684 12
  public function loadHtmlFile($filePath, $libXMLExtraOptions = null)
685
  {
686 12
    if (!is_string($filePath)) {
687 2
      throw new InvalidArgumentException(__METHOD__ . ' expects parameter 1 to be string.');
688
    }
689
690 10
    if (!preg_match("/^https?:\/\//i", $filePath) && !file_exists($filePath)) {
691 1
      throw new RuntimeException("File $filePath not found");
692
    }
693
694
    try {
695 9
      $html = UTF8::file_get_contents($filePath);
696
697 9
    } catch (\Exception $e) {
698 1
      throw new RuntimeException("Could not load file $filePath");
699
    }
700
701 8
    if ($html === false) {
702
      throw new RuntimeException("Could not load file $filePath");
703
    }
704
705 8
    $this->loadHtml($html, $libXMLExtraOptions);
706
707 8
    return $this;
708
  }
709
710
  /**
711
   * Save dom as string
712
   *
713
   * @param string $filepath
714
   *
715
   * @return string
716
   */
717 1
  public function save($filepath = '')
718
  {
719 1
    $string = $this->innerHtml();
720 1
    if ($filepath !== '') {
721
      file_put_contents($filepath, $string, LOCK_EX);
722
    }
723
724 1
    return $string;
725
  }
726
727
  /**
728
   * @param $functionName
729
   */
730
  public function set_callback($functionName)
731
  {
732
    $this::$callback = $functionName;
733
  }
734
735
  /**
736
   * Get dom node's plain text
737
   *
738
   * @return string
739
   */
740 2
  public function text()
741
  {
742 2
    return $this->fixHtmlOutput($this->document->textContent);
743
  }
744
}
745