Completed
Push — master ( 8111da...2b9d4e )
by Lars
03:23
created

HtmlDomParser::createDOMDocument()   F

Complexity

Conditions 16
Paths 1152

Size

Total Lines 81
Code Lines 44

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 53
CRAP Score 16.0123

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 81
ccs 53
cts 55
cp 0.9636
rs 2.0759
cc 16
eloc 44
nc 1152
nop 2
crap 16.0123

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
use BadMethodCallException;
6
use DOMDocument;
7
use DOMXPath;
8
use InvalidArgumentException;
9
use RuntimeException;
10
11
/**
12
 * Class HtmlDomParser
13
 *
14
 * @package voku\helper
15
 *
16
 * @property-read string outerText Get dom node's outer html (alias for "outerHtml")
17
 * @property-read string outerHtml Get dom node's outer html
18
 * @property-read string innerText Get dom node's inner html (alias for "innerHtml")
19
 * @property-read string innerHtml Get dom node's inner html
20
 * @property-read string plaintext Get dom node's plain text
21
 *
22
 * @method string outerText() Get dom node's outer html (alias for "outerHtml()")
23
 * @method string outerHtml() Get dom node's outer html
24
 * @method string innerText() Get dom node's inner html (alias for "innerHtml()")
25
 * @method HtmlDomParser load() load($html) Load HTML from string
26
 * @method HtmlDomParser load_file() load_file($html) Load HTML from file
27
 *
28
 * @method static HtmlDomParser file_get_html() file_get_html($html, $libXMLExtraOptions = null) Load HTML from file
29
 * @method static HtmlDomParser str_get_html() str_get_html($html, $libXMLExtraOptions = null) Load HTML from string
30
 */
31
class HtmlDomParser
32
{
33
  /**
34
   * @var array
35
   */
36
  protected static $functionAliases = array(
37
      'outertext' => 'html',
38
      'outerhtml' => 'html',
39
      'innertext' => 'innerHtml',
40
      'innerhtml' => 'innerHtml',
41
      'load'      => 'loadHtml',
42
      'load_file' => 'loadHtmlFile',
43
  );
44
45
  /**
46
   * @var string[][]
47
   */
48
  protected static $domLinkReplaceHelper = array(
49
      'orig' => array('[', ']', '{', '}',),
50
      'tmp'  => array(
51
          '!!!!SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_LEFT!!!!',
52
          '!!!!SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_RIGHT!!!!',
53
          '!!!!SIMPLE_HTML_DOM__VOKU__BRACKET_LEFT!!!!',
54
          '!!!!SIMPLE_HTML_DOM__VOKU__BRACKET_RIGHT!!!!',
55
      ),
56
  );
57
58
  /**
59
   * @var array
60
   */
61
  protected static $domReplaceHelper = array(
62
      'orig' => array('&', '|', '+', '%'),
63
      'tmp'  => array(
64
          '!!!!SIMPLE_HTML_DOM__VOKU__AMP!!!!',
65
          '!!!!SIMPLE_HTML_DOM__VOKU__PIPE!!!!',
66
          '!!!!SIMPLE_HTML_DOM__VOKU__PLUS!!!!',
67
          '!!!!SIMPLE_HTML_DOM__VOKU__PERCENT!!!!',
68
      ),
69
  );
70
71
  /**
72
   * @var Callable
73
   */
74
  protected static $callback;
75
76
  /**
77
   * @var DOMDocument
78
   */
79
  protected $document;
80
81
  /**
82
   * @var string
83
   */
84
  protected $encoding = 'UTF-8';
85
86
  /**
87
   * @var bool
88
   */
89
  protected $isDOMDocumentCreatedWithoutHtml = false;
90
91
  /**
92
   * @var bool
93
   */
94
  protected $isDOMDocumentCreatedWithoutHtmlWrapper = false;
95
96
  /**
97
   * Constructor
98
   *
99
   * @param string|SimpleHtmlDom|\DOMNode $element HTML code or SimpleHtmlDom, \DOMNode
100
   */
101 120
  public function __construct($element = null)
102
  {
103 120
    $this->document = new \DOMDocument('1.0', $this->getEncoding());
104
105
    // DOMDocument settings
106 120
    $this->document->preserveWhiteSpace = true;
107 120
    $this->document->formatOutput = true;
108
109 120
    if ($element instanceof SimpleHtmlDom) {
110 51
      $element = $element->getNode();
111 51
    }
112
113 120
    if ($element instanceof \DOMNode) {
114 51
      $domNode = $this->document->importNode($element, true);
115
116 51
      if ($domNode instanceof \DOMNode) {
117 51
        $this->document->appendChild($domNode);
118 51
      }
119
120 51
      return;
121
    }
122
123 120
    if ($element !== null) {
124 70
      $this->loadHtml($element);
125 69
    }
126 119
  }
127
128
  /**
129
   * @param $name
130
   * @param $arguments
131
   *
132
   * @return bool|mixed
133
   */
134 40 View Code Duplication
  public function __call($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
135
  {
136 40
    $name = strtolower($name);
137
138 40
    if (isset(self::$functionAliases[$name])) {
139 39
      return call_user_func_array(array($this, self::$functionAliases[$name]), $arguments);
140
    }
141
142 1
    throw new BadMethodCallException('Method does not exist: ' . $name);
143
  }
144
145
  /**
146
   * @param $name
147
   * @param $arguments
148
   *
149
   * @return HtmlDomParser
150
   */
151 15
  public static function __callStatic($name, $arguments)
152
  {
153 15
    $arguments0 = null;
154 15
    if (isset($arguments[0])) {
155 14
      $arguments0 = $arguments[0];
156 14
    }
157
158 15
    $arguments1 = null;
159 15
    if (isset($arguments[1])) {
160 1
      $arguments1 = $arguments[1];
161 1
    }
162
163 15
    if ($name === 'str_get_html') {
164 10
      $parser = new self();
165
166 10
      return $parser->loadHtml($arguments0, $arguments1);
167
    }
168
169 5
    if ($name === 'file_get_html') {
170 4
      $parser = new self();
171
172 4
      return $parser->loadHtmlFile($arguments0, $arguments1);
173
    }
174
175 1
    throw new BadMethodCallException('Method does not exist');
176
  }
177
178
  /** @noinspection MagicMethodsValidityInspection */
179
  /**
180
   * @param $name
181
   *
182
   * @return string
183
   */
184 11
  public function __get($name)
185
  {
186 11
    $name = strtolower($name);
187
188
    switch ($name) {
189 11
      case 'outerhtml':
190 11
      case 'outertext':
191 4
        return $this->html();
192 7
      case 'innerhtml':
193 7
      case 'innertext':
194 5
        return $this->innerHtml();
195 2
      case 'text':
196 2
      case 'plaintext':
197 1
        return $this->text();
198
    }
199
200 1
    return null;
201
  }
202
203
  /**
204
   * @param string $selector
205
   * @param int    $idx
206
   *
207
   * @return SimpleHtmlDom|SimpleHtmlDomNode|null
208
   */
209 3
  public function __invoke($selector, $idx = null)
210
  {
211 3
    return $this->find($selector, $idx);
212
  }
213
214
  /**
215
   * @return string
216
   */
217 14
  public function __toString()
218
  {
219 14
    return $this->html();
220
  }
221
222
  /**
223
   * does nothing (only for api-compatibility-reasons)
224
   *
225
   * @return bool
226
   */
227 1
  public function clear()
228
  {
229 1
    return true;
230
  }
231
232
  /**
233
   * @param string $html
234
   *
235
   * @return string
236
   */
237 74
  public static function replaceToPreserveHtmlEntities($html)
238
  {
239
    // init
240 74
    $linksNew = array();
241 74
    $linksOld = array();
242
243 74
    if (strpos($html, 'http') !== false) {
244
245
      // regEx for e.g.: [https://www.domain.de/foo.php?foobar=1&email=lars%40moelleken.org&guid=test1233312&{{foo}}#foo]
246 51
      $regExUrl = '/(\[?\bhttps?:\/\/[^\s<>]+(?:\([\w]+\)|[^[:punct:]\s]|\/|\}|\]))/i';
247 51
      preg_match_all($regExUrl, $html, $linksOld);
248
249 51
      if (!empty($linksOld[1])) {
250 51
        $linksOld = $linksOld[1];
251 51
        foreach ((array)$linksOld as $linkKey => $linkOld) {
252 51
          $linksNew[$linkKey] = str_replace(
253 51
              self::$domLinkReplaceHelper['orig'],
254 51
              self::$domLinkReplaceHelper['tmp'],
255
              $linkOld
256 51
          );
257 51
        }
258 51
      }
259 51
    }
260
261 74
    $linksNewCount = count($linksNew);
262 74
    if ($linksNewCount > 0 && count($linksOld) === $linksNewCount) {
263 51
      $search = array_merge($linksOld, self::$domReplaceHelper['orig']);
264 51
      $replace = array_merge($linksNew, self::$domReplaceHelper['tmp']);
265 51
    } else {
266 25
      $search = self::$domReplaceHelper['orig'];
267 25
      $replace = self::$domReplaceHelper['tmp'];
268
    }
269
270 74
    return str_replace($search, $replace, $html);
271
  }
272
273
  /**
274
   * @param string $html
275
   *
276
   * @return string
277
   */
278 58
  public static function putReplacedBackToPreserveHtmlEntities($html)
279
  {
280 58
    static $DOM_REPLACE__HELPER_CACHE = null;
281 58
    if ($DOM_REPLACE__HELPER_CACHE === null) {
282 1
      $DOM_REPLACE__HELPER_CACHE['tmp'] = array_merge(
283 1
          self::$domLinkReplaceHelper['tmp'],
284 1
          self::$domReplaceHelper['tmp']
285 1
      );
286 1
      $DOM_REPLACE__HELPER_CACHE['orig'] = array_merge(
287 1
          self::$domLinkReplaceHelper['orig'],
288 1
          self::$domReplaceHelper['orig']
289 1
      );
290 1
    }
291
292 58
    return str_replace($DOM_REPLACE__HELPER_CACHE['tmp'], $DOM_REPLACE__HELPER_CACHE['orig'], $html);
293
  }
294
295
  /**
296
   * create DOMDocument from HTML
297
   *
298
   * @param string   $html
299
   * @param int|null $libXMLExtraOptions
300
   *
301
   * @return \DOMDocument
302
   */
303 108
  private function createDOMDocument($html, $libXMLExtraOptions = null)
304
  {
305 108
    if (strpos($html, '<') === false) {
306 6
      $this->isDOMDocumentCreatedWithoutHtml = true;
307 6
    }
308
309 108
    if (strpos($html, '<html') === false) {
310 60
      $this->isDOMDocumentCreatedWithoutHtmlWrapper = true;
311 60
    }
312
313
    // set error level
314 108
    $internalErrors = libxml_use_internal_errors(true);
315 108
    $disableEntityLoader = libxml_disable_entity_loader(true);
316 108
    libxml_clear_errors();
317
318 108
    $optionsSimpleXml = LIBXML_DTDLOAD | LIBXML_DTDATTR | LIBXML_NONET;
319 108
    $optionsXml = 0;
320
321 108
    if (defined('LIBXML_BIGLINES')) {
322
      $optionsSimpleXml |= LIBXML_BIGLINES;
323
    }
324
325 108
    if (defined('LIBXML_COMPACT')) {
326 108
      $optionsSimpleXml |= LIBXML_COMPACT;
327 108
    }
328
329 108
    if (defined('LIBXML_HTML_NOIMPLIED')) {
330 108
      $optionsSimpleXml |= LIBXML_HTML_NOIMPLIED;
331 108
    }
332
333 108
    if (defined('LIBXML_HTML_NODEFDTD')) {
334 108
      $optionsSimpleXml |= LIBXML_HTML_NODEFDTD;
335 108
    }
336
337 108
    if ($libXMLExtraOptions !== null) {
338 1
      $optionsSimpleXml |= $libXMLExtraOptions;
339 1
      $optionsXml |= $libXMLExtraOptions;
340 1
    }
341
342 108
    $sxe = simplexml_load_string($html, 'SimpleXMLElement', $optionsSimpleXml);
343 108
    if ($sxe !== false && count(libxml_get_errors()) === 0) {
344 37
      $this->document = dom_import_simplexml($sxe)->ownerDocument;
345 37
    } else {
346
347
      // UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251
348 73
      $html = trim($html);
349 73
      $xmlHackUsed = false;
350 73
      if (stripos('<?xml', $html) !== 0) {
351 73
        $xmlHackUsed = true;
352 73
        $html = '<?xml encoding="' . $this->getEncoding() . '" ?>' . $html;
353 73
      }
354
355 73
      $html = self::replaceToPreserveHtmlEntities($html);
356
357 73
      if ($optionsXml && Bootup::is_php('5.4')) {
358 1
        $this->document->loadHTML($html, $optionsXml);
359 1
      } else {
360 73
        $this->document->loadHTML($html);
361
      }
362
363
      // remove the "xml-encoding" hack
364 73
      if ($xmlHackUsed === true) {
365 73
        foreach ($this->document->childNodes as $child) {
366 73
          if ($child->nodeType === XML_PI_NODE) {
367 73
            $this->document->removeChild($child);
368 73
          }
369 73
        }
370 73
      }
371
372 73
      libxml_clear_errors();
373
    }
374
375
    // set encoding
376 108
    $this->document->encoding = $this->getEncoding();
377
378
    // restore lib-xml settings
379 108
    libxml_use_internal_errors($internalErrors);
380 108
    libxml_disable_entity_loader($disableEntityLoader);
381
382 108
    return $this->document;
383
  }
384
385
  /**
386
   * Return SimpleHtmlDom by id.
387
   *
388
   * @param string $id
389
   *
390
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
391
   */
392 2
  public function getElementById($id)
393
  {
394 2
    return $this->find("#$id", 0);
395
  }
396
397
  /**
398
   * Return SimpleHtmlDom by tag name.
399
   *
400
   * @param string $name
401
   *
402
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
403
   */
404 1
  public function getElementByTagName($name)
405
  {
406 1
    $node = $this->document->getElementsByTagName($name)->item(0);
407
408 1
    if ($node !== null) {
409 1
      return new SimpleHtmlDom($node);
410
    } else {
411
      return new SimpleHtmlDomNodeBlank();
412
    }
413
  }
414
415
  /**
416
   * Returns Elements by id
417
   *
418
   * @param string   $id
419
   * @param null|int $idx
420
   *
421
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
422
   */
423
  public function getElementsById($id, $idx = null)
424
  {
425
    return $this->find("#$id", $idx);
426
  }
427
428
  /**
429
   * Returns Elements by tag name
430
   *
431
   * @param string   $name
432
   * @param null|int $idx
433
   *
434
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
435
   */
436 3 View Code Duplication
  public function getElementsByTagName($name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
437
  {
438 3
    $nodesList = $this->document->getElementsByTagName($name);
439
440 3
    $elements = new SimpleHtmlDomNode();
441
442 3
    foreach ($nodesList as $node) {
443 3
      $elements[] = new SimpleHtmlDom($node);
444 3
    }
445
446 3
    if (null === $idx) {
447 2
      return $elements;
448
    } else {
449 1
      if ($idx < 0) {
450
        $idx = count($elements) + $idx;
451
      }
452
    }
453
454 1
    if (isset($elements[$idx])) {
455 1
      return $elements[$idx];
456
    } else {
457
      return new SimpleHtmlDomNodeBlank();
458
    }
459
  }
460
461
  /**
462
   * Find list of nodes with a CSS selector.
463
   *
464
   * @param string $selector
465
   * @param int    $idx
466
   *
467
   * @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeBlank
468
   */
469 78
  public function find($selector, $idx = null)
470
  {
471 78
    $xPathQuery = SelectorConverter::toXPath($selector);
472
473 78
    $xPath = new DOMXPath($this->document);
474 78
    $nodesList = $xPath->query($xPathQuery);
475 78
    $elements = new SimpleHtmlDomNode();
476
477 78
    foreach ($nodesList as $node) {
478 74
      $elements[] = new SimpleHtmlDom($node);
479 78
    }
480
481 78
    if (null === $idx) {
482 51
      return $elements;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return $elements; (voku\helper\SimpleHtmlDomNode) is incompatible with the return type documented by voku\helper\HtmlDomParser::find of type voku\helper\SimpleHtmlDo...\SimpleHtmlDomNodeBlank.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
483
    } else {
484 39
      if ($idx < 0) {
485 11
        $idx = count($elements) + $idx;
486 11
      }
487
    }
488
489 39
    if (isset($elements[$idx])) {
490 36
      return $elements[$idx];
491
    } else {
492 5
      return new SimpleHtmlDomNodeBlank();
493
    }
494
  }
495
496
  /**
497
   * @param string $content
498
   *
499
   * @return string
500
   */
501 48
  protected function fixHtmlOutput($content)
502
  {
503
    // INFO: DOMDocument will encapsulate plaintext into a paragraph tag (<p>),
504
    //          so we try to remove it here again ...
505
506 48
    if ($this->isDOMDocumentCreatedWithoutHtmlWrapper === true) {
507 21
      $content = str_replace(
508
          array(
509 21
              "\n",
510 21
              "\r\n",
511 21
              "\r",
512 21
              '<simpleHtmlDomP>',
513 21
              '</simpleHtmlDomP>',
514 21
              '<body>',
515 21
              '</body>',
516 21
              '<html>',
517 21
              '</html>',
518 21
          ),
519 21
          '',
520
          $content
521 21
      );
522 21
    }
523
524 48
    if ($this->isDOMDocumentCreatedWithoutHtml === true) {
525 5
      $content = str_replace(
526
          array(
527 5
              '<p>',
528 5
              '</p>',
529 5
              '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">',
530 5
          ),
531 5
          '',
532
          $content
533 5
      );
534 5
    }
535
536 48
    $content = UTF8::html_entity_decode($content);
537 48
    $content = trim($content);
538 48
    $content = UTF8::rawurldecode($content);
539
540 48
    $content = self::putReplacedBackToPreserveHtmlEntities($content);
541
542 48
    return $content;
543
  }
544
545
  /**
546
   * @return DOMDocument
547
   */
548 35
  public function getDocument()
549
  {
550 35
    return $this->document;
551
  }
552
553
  /**
554
   * Get the encoding to use
555
   *
556
   * @return string
557
   */
558 120
  private function getEncoding()
559
  {
560 120
    return $this->encoding;
561
  }
562
563
  /**
564
   * @return bool
565
   */
566 6
  public function getIsDOMDocumentCreatedWithoutHtml()
567
  {
568 6
    return $this->isDOMDocumentCreatedWithoutHtml;
569
  }
570
571
  /**
572
   * @return bool
573
   */
574 34
  public function getIsDOMDocumentCreatedWithoutHtmlWrapper()
575
  {
576 34
    return $this->isDOMDocumentCreatedWithoutHtmlWrapper;
577
  }
578
579
  /**
580
   * Get dom node's outer html
581
   *
582
   * @return string
583
   */
584 34
  public function html()
585
  {
586 34
    if ($this::$callback !== null) {
587
      call_user_func($this::$callback, array($this));
588
    }
589
590 34
    if ($this->getIsDOMDocumentCreatedWithoutHtmlWrapper()) {
591 15
      $content = $this->document->saveHTML($this->document->documentElement);
592 15
    } else {
593 22
      $content = $this->document->saveHTML();
594
    }
595
596 34
    return $this->fixHtmlOutput($content);
597
  }
598
599
  /**
600
   * Get the HTML as XML.
601
   *
602
   * @return string
603
   */
604 1
  public function xml()
605
  {
606 1
    $xml = $this->document->saveXML(null, LIBXML_NOEMPTYTAG);
607
608
    // remove the XML-header
609 1
    $xml = ltrim(preg_replace('/<\?xml.*\?>/', '', $xml));
610
611 1
    return $this->fixHtmlOutput($xml);
612
  }
613
614
  /**
615
   * Get dom node's inner html
616
   *
617
   * @return string
618
   */
619 15
  public function innerHtml()
620
  {
621 15
    $text = '';
622
623 15
    foreach ($this->document->documentElement->childNodes as $node) {
624 15
      $text .= $this->fixHtmlOutput($this->document->saveHTML($node));
625 15
    }
626
627 15
    return $text;
628
  }
629
630
  /**
631
   * Load HTML from string
632
   *
633
   * @param string   $html
634
   * @param int|null $libXMLExtraOptions
635
   *
636
   * @return HtmlDomParser
637
   *
638
   * @throws InvalidArgumentException if argument is not string
639
   */
640 111
  public function loadHtml($html, $libXMLExtraOptions = null)
641
  {
642 111
    if (!is_string($html)) {
643 3
      throw new InvalidArgumentException(__METHOD__ . ' expects parameter 1 to be string.');
644
    }
645
646 108
    $this->document = $this->createDOMDocument($html, $libXMLExtraOptions);
647
648 108
    return $this;
649
  }
650
651
  /**
652
   * Load HTML from file
653
   *
654
   * @param string   $filePath
655
   * @param int|null $libXMLExtraOptions
656
   *
657
   * @return HtmlDomParser
658
   */
659 12
  public function loadHtmlFile($filePath, $libXMLExtraOptions = null)
660
  {
661 12
    if (!is_string($filePath)) {
662 2
      throw new InvalidArgumentException(__METHOD__ . ' expects parameter 1 to be string.');
663
    }
664
665 10
    if (!preg_match("/^https?:\/\//i", $filePath) && !file_exists($filePath)) {
666 1
      throw new RuntimeException("File $filePath not found");
667
    }
668
669
    try {
670 9
      $html = UTF8::file_get_contents($filePath);
671
672 9
    } catch (\Exception $e) {
673 1
      throw new RuntimeException("Could not load file $filePath");
674
    }
675
676 8
    if ($html === false) {
677
      throw new RuntimeException("Could not load file $filePath");
678
    }
679
680 8
    $this->loadHtml($html, $libXMLExtraOptions);
681
682 8
    return $this;
683
  }
684
685
  /**
686
   * Save dom as string
687
   *
688
   * @param string $filepath
689
   *
690
   * @return string
691
   */
692 1
  public function save($filepath = '')
693
  {
694 1
    $string = $this->innerHtml();
695 1
    if ($filepath !== '') {
696
      file_put_contents($filepath, $string, LOCK_EX);
697
    }
698
699 1
    return $string;
700
  }
701
702
  /**
703
   * @param $functionName
704
   */
705
  public function set_callback($functionName)
706
  {
707
    $this::$callback = $functionName;
708
  }
709
710
  /**
711
   * Get dom node's plain text
712
   *
713
   * @return string
714
   */
715 2
  public function text()
716
  {
717 2
    return $this->fixHtmlOutput($this->document->textContent);
718
  }
719
}
720