Completed
Push — master ( 0ea57e...7c876a )
by Lars
02:20
created

HtmlDomParser   D

Complexity

Total Complexity 83

Size/Duplication

Total Lines 692
Duplicated Lines 3.9 %

Coupling/Cohesion

Components 1
Dependencies 6

Test Coverage

Coverage 94.8%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 83
c 1
b 0
f 0
lcom 1
cbo 6
dl 27
loc 692
ccs 255
cts 269
cp 0.948
rs 4.4812

29 Methods

Rating   Name   Duplication   Size   Complexity  
B __construct() 0 29 5
A addRandBytesToDomReplaceHelpers() 0 10 3
A __call() 3 10 2
B __callStatic() 0 26 5
B __get() 0 18 7
A __invoke() 0 4 1
A __toString() 0 4 1
A clear() 0 4 1
B replaceToPreserveHtmlEntities() 0 27 5
A putReplacedBackToPreserveHtmlEntities() 0 16 1
D createDOMDocument() 0 71 13
A getElementById() 0 4 1
A getElementByTagName() 0 10 2
A getElementsById() 0 4 1
B getElementsByTagName() 24 24 5
B find() 0 26 5
B fixHtmlOutput() 0 42 3
A getDocument() 0 4 1
A getEncoding() 0 4 1
A getIsDOMDocumentCreatedWithoutHtml() 0 4 1
A getIsDOMDocumentCreatedWithoutHtmlWrapper() 0 4 1
A html() 0 14 3
A xml() 0 9 1
A innerHtml() 0 10 2
A loadHtml() 0 10 2
B loadHtmlFile() 0 25 6
A save() 0 9 2
A set_callback() 0 4 1
A text() 0 4 1

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complex Class

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like HtmlDomParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use HtmlDomParser, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
namespace voku\helper;
4
5
use BadMethodCallException;
6
use DOMDocument;
7
use DOMXPath;
8
use InvalidArgumentException;
9
use RuntimeException;
10
11
/**
12
 * Class HtmlDomParser
13
 *
14
 * @package voku\helper
15
 *
16
 * @property-read string outerText Get dom node's outer html (alias for "outerHtml")
17
 * @property-read string outerHtml Get dom node's outer html
18
 * @property-read string innerText Get dom node's inner html (alias for "innerHtml")
19
 * @property-read string innerHtml Get dom node's inner html
20
 * @property-read string plaintext Get dom node's plain text
21
 *
22
 * @method string outerText() Get dom node's outer html (alias for "outerHtml()")
23
 * @method string outerHtml() Get dom node's outer html
24
 * @method string innerText() Get dom node's inner html (alias for "innerHtml()")
25
 * @method HtmlDomParser load() load($html) Load HTML from string
26
 * @method HtmlDomParser load_file() load_file($html) Load HTML from file
27
 *
28
 * @method static HtmlDomParser file_get_html() file_get_html($html, $libXMLExtraOptions = null) Load HTML from file
29
 * @method static HtmlDomParser str_get_html() str_get_html($html, $libXMLExtraOptions = null) Load HTML from string
30
 */
31
class HtmlDomParser
32
{
33
  /**
34
   * @var array
35
   */
36
  protected static $functionAliases = array(
37
      'outertext' => 'html',
38
      'outerhtml' => 'html',
39
      'innertext' => 'innerHtml',
40
      'innerhtml' => 'innerHtml',
41
      'load'      => 'loadHtml',
42
      'load_file' => 'loadHtmlFile',
43
  );
44
45
  /**
46
   * @var array
47
   */
48
  protected static $domLinkReplaceHelper = array(
49
      'orig' => array('[', ']', '{', '}',),
50
      'tmp'  => array(
51
          '!!!!HTML_DOM__SQUARE_BRACKET_LEFT!!!!',
52
          '!!!!HTML_DOM__SQUARE_BRACKET_RIGHT!!!!',
53
          '!!!!HTML_DOM__BRACKET_LEFT!!!!',
54
          '!!!!HTML_DOM__BRACKET_RIGHT!!!!',
55
      ),
56
  );
57
58
  /**
59
   * @var array
60
   */
61
  protected static $domReplaceHelper = array(
62
      'orig' => array('&', '|', '+', '%'),
63
      'tmp'  => array(
64
          '!!!!HTML_DOM__AMP!!!!',
65
          '!!!!HTML_DOM__PIPE!!!!',
66
          '!!!!HTML_DOM__PLUS!!!!',
67
          '!!!!HTML_DOM__PERCENT!!!!',
68
      ),
69
  );
70
71
  /**
72
   * @var Callable
73
   */
74
  protected static $callback;
75
76
  /**
77
   * @var DOMDocument
78
   */
79
  protected $document;
80
81
  /**
82
   * @var string
83
   */
84
  protected $encoding = 'UTF-8';
85
86
  /**
87
   * @var bool
88
   */
89
  protected $isDOMDocumentCreatedWithoutHtml = false;
90
91
  /**
92
   * @var bool
93
   */
94
  protected $isDOMDocumentCreatedWithoutHtmlWrapper = false;
95
96
  /**
97
   * An random md5-hash, generated via "random_bytes()".
98
   * @var string
99
   */
100
  protected $randomHash;
101
102
  /**
103
   * Constructor
104
   *
105
   * @param string|SimpleHtmlDom|\DOMNode $element HTML code or SimpleHtmlDom, \DOMNode
106
   */
107 116
  public function __construct($element = null)
108
  {
109 116
    $this->randomHash = md5(Bootup::get_random_bytes(16));
110 116
    $this->document = new \DOMDocument('1.0', $this->getEncoding());
111
112 116
    $this->addRandBytesToDomReplaceHelpers();
113
114
    // DOMDocument settings
115 116
    $this->document->preserveWhiteSpace = true;
116 116
    $this->document->formatOutput = true;
117
118 116
    if ($element instanceof SimpleHtmlDom) {
119 50
      $element = $element->getNode();
120 50
    }
121
122 116
    if ($element instanceof \DOMNode) {
123 50
      $domNode = $this->document->importNode($element, true);
124
125 50
      if ($domNode instanceof \DOMNode) {
126 50
        $this->document->appendChild($domNode);
127 50
      }
128
129 50
      return;
130
    }
131
132 116
    if ($element !== null) {
133 69
      $this->loadHtml($element);
134 68
    }
135 115
  }
136
137
  /**
138
   * Add rand-bytes to the "Dom-Replace-Helper"-variables.
139
   */
140 116
  protected function addRandBytesToDomReplaceHelpers()
141
  {
142 116
    foreach (self::$domLinkReplaceHelper['tmp'] as &$linkHelper) {
143 116
      $linkHelper .= $this->randomHash;
144 116
    }
145
146 116
    foreach (self::$domReplaceHelper['tmp'] as &$domHelper) {
147 116
      $domHelper .= $this->randomHash;
148 116
    }
149 116
  }
150
151
  /**
152
   * @param $name
153
   * @param $arguments
154
   *
155
   * @return bool|mixed
156
   */
157 33
  public function __call($name, $arguments)
158
  {
159 33
    $name = strtolower($name);
160
161 33 View Code Duplication
    if (isset(self::$functionAliases[$name])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
162 32
      return call_user_func_array(array($this, self::$functionAliases[$name]), $arguments);
163
    }
164
165 1
    throw new BadMethodCallException('Method does not exist: ' . $name);
166
  }
167
168
  /**
169
   * @param $name
170
   * @param $arguments
171
   *
172
   * @return HtmlDomParser
173
   */
174 14
  public static function __callStatic($name, $arguments)
175
  {
176 14
    $arguments0 = null;
177 14
    if (isset($arguments[0])) {
178 13
      $arguments0 = $arguments[0];
179 13
    }
180
181 14
    $arguments1 = null;
182 14
    if (isset($arguments[1])) {
183 1
      $arguments1 = $arguments[1];
184 1
    }
185
186 14
    if ($name == 'str_get_html') {
187 9
      $parser = new self();
188
189 9
      return $parser->loadHtml($arguments0, $arguments1);
190
    }
191
192 5
    if ($name == 'file_get_html') {
193 4
      $parser = new self();
194
195 4
      return $parser->loadHtmlFile($arguments0, $arguments1);
196
    }
197
198 1
    throw new BadMethodCallException('Method does not exist');
199
  }
200
201
  /**
202
   * @param $name
203
   *
204
   * @return string
205
   */
206 13
  public function __get($name)
207
  {
208 13
    $name = strtolower($name);
209
210
    switch ($name) {
211 13
      case 'outerhtml':
212 13
      case 'outertext':
213 7
        return $this->html();
214 6
      case 'innerhtml':
215 6
      case 'innertext':
216 4
        return $this->innerHtml();
217 2
      case 'text':
218 2
      case 'plaintext':
219 1
        return $this->text();
220
    }
221
222 1
    return null;
223
  }
224
225
  /**
226
   * @param string $selector
227
   * @param int    $idx
228
   *
229
   * @return SimpleHtmlDom|SimpleHtmlDomNode|null
230
   */
231 3
  public function __invoke($selector, $idx = null)
232
  {
233 3
    return $this->find($selector, $idx);
234
  }
235
236
  /**
237
   * @return string
238
   */
239 14
  public function __toString()
240
  {
241 14
    return $this->html();
242
  }
243
244
  /**
245
   * does nothing (only for api-compatibility-reasons)
246
   *
247
   * @return bool
248
   */
249 1
  public function clear()
250
  {
251 1
    return true;
252
  }
253
254
  /**
255
   * @param string $html
256
   *
257
   * @return string
258
   */
259 70
  protected function replaceToPreserveHtmlEntities($html)
260
  {
261 70
    preg_match_all("/(\bhttps?:\/\/[^\s()<>]+(?:\([\w\d]+\)|[^[:punct:]\s]|\/|\}|\]))/i", $html, $linksOld);
262
263 70
    $linksNew = array();
264 70
    if (!empty($linksOld[1])) {
265 49
      $linksOld = $linksOld[1];
266 49
      foreach ($linksOld as $linkKey => $linkOld) {
267 49
        $linksNew[$linkKey] = str_replace(
268 49
            self::$domLinkReplaceHelper['orig'],
269 49
            self::$domLinkReplaceHelper['tmp'],
270
            $linkOld
271 49
        );
272 49
      }
273 49
    }
274
275 70
    $linksNewCount = count($linksNew);
276 70
    if ($linksNewCount > 0 && count($linksOld) === $linksNewCount) {
277 49
      $search = array_merge($linksOld, self::$domReplaceHelper['orig']);
278 49
      $replace = array_merge($linksNew, self::$domReplaceHelper['tmp']);
279 49
    } else {
280 22
      $search = self::$domReplaceHelper['orig'];
281 22
      $replace = self::$domReplaceHelper['tmp'];
282
    }
283
284 70
    return str_replace($search, $replace, $html);
285
  }
286
287
  /**
288
   * @param string $html
289
   *
290
   * @return string
291
   */
292 53
  public static function putReplacedBackToPreserveHtmlEntities($html)
293
  {
294 53
    return str_replace(
295 53
        array_merge(
296 53
            self::$domLinkReplaceHelper['tmp'],
297 53
            self::$domReplaceHelper['tmp'],
298 53
            array('&#13;')
299 53
        ),
300 53
        array_merge(
301 53
            self::$domLinkReplaceHelper['orig'],
302 53
            self::$domReplaceHelper['orig'],
303 53
            array('')
304 53
        ),
305
        $html
306 53
    );
307
  }
308
309
  /**
310
   * create DOMDocument from HTML
311
   *
312
   * @param string   $html
313
   * @param int|null $libXMLExtraOptions
314
   *
315
   * @return \DOMDocument
316
   */
317 104
  private function createDOMDocument($html, $libXMLExtraOptions = null)
318
  {
319 104
    if (strpos($html, '<') === false) {
320 6
      $this->isDOMDocumentCreatedWithoutHtml = true;
321 6
    }
322
323 104
    if (strpos($html, '<html') === false) {
324 58
      $this->isDOMDocumentCreatedWithoutHtmlWrapper = true;
325 58
    }
326
327
    // set error level
328 104
    $internalErrors = libxml_use_internal_errors(true);
329 104
    $disableEntityLoader = libxml_disable_entity_loader(true);
330 104
    libxml_clear_errors();
331
332 104
    $options = LIBXML_DTDLOAD | LIBXML_DTDATTR | LIBXML_NONET;
333
334 104
    if (defined('LIBXML_COMPACT')) {
335 104
      $options |= LIBXML_COMPACT;
336 104
    }
337
338 104
    if (defined('LIBXML_HTML_NOIMPLIED')) {
339 104
      $options |= LIBXML_HTML_NOIMPLIED;
340 104
    }
341
342 104
    if (defined('LIBXML_HTML_NODEFDTD')) {
343 104
      $options |= LIBXML_HTML_NODEFDTD;
344 104
    }
345
346 104
    if ($libXMLExtraOptions !== null) {
347 1
      $options |= $libXMLExtraOptions;
348 1
    }
349
350 104
    $sxe = simplexml_load_string($html, 'SimpleXMLElement', $options);
351 104
    if ($sxe !== false && count(libxml_get_errors()) === 0) {
352 36
      $this->document = dom_import_simplexml($sxe)->ownerDocument;
353 36
    } else {
354
355
      // UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251
356 70
      $html = trim($html);
357 70
      $xmlHackUsed = false;
358 70
      if (stripos('<?xml', $html) !== 0) {
359 70
        $xmlHackUsed = true;
360 70
        $html = '<?xml encoding="' . $this->getEncoding() . '" ?>' . $html;
361 70
      }
362
363 70
      $html = $this->replaceToPreserveHtmlEntities($html);
364
365 70
      $this->document->loadHTML($html);
366
367
      // remove the "xml-encoding" hack
368 70
      if ($xmlHackUsed === true) {
369 70
        foreach ($this->document->childNodes as $child) {
370 70
          if ($child->nodeType == XML_PI_NODE) {
371 70
            $this->document->removeChild($child);
372 70
          }
373 70
        }
374 70
      }
375
376 70
      libxml_clear_errors();
377
    }
378
379
    // set encoding
380 104
    $this->document->encoding = $this->getEncoding();
381
382
    // restore lib-xml settings
383 104
    libxml_use_internal_errors($internalErrors);
384 104
    libxml_disable_entity_loader($disableEntityLoader);
385
386 104
    return $this->document;
387
  }
388
389
  /**
390
   * Return SimpleHtmlDom by id.
391
   *
392
   * @param string $id
393
   *
394
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
395
   */
396 2
  public function getElementById($id)
397
  {
398 2
    return $this->find("#$id", 0);
399
  }
400
401
  /**
402
   * Return SimpleHtmlDom by tag name.
403
   *
404
   * @param string $name
405
   *
406
   * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank
407
   */
408 1
  public function getElementByTagName($name)
409
  {
410 1
    $node = $this->document->getElementsByTagName($name)->item(0);
411
412 1
    if ($node !== null) {
413 1
      return new SimpleHtmlDom($node);
414
    } else {
415
      return new SimpleHtmlDomNodeBlank();
416
    }
417
  }
418
419
  /**
420
   * Returns Elements by id
421
   *
422
   * @param string   $id
423
   * @param null|int $idx
424
   *
425
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
426
   */
427
  public function getElementsById($id, $idx = null)
428
  {
429
    return $this->find("#$id", $idx);
430
  }
431
432
  /**
433
   * Returns Elements by tag name
434
   *
435
   * @param string   $name
436
   * @param null|int $idx
437
   *
438
   * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|SimpleHtmlDomNodeBlank
439
   */
440 3 View Code Duplication
  public function getElementsByTagName($name, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
441
  {
442 3
    $nodesList = $this->document->getElementsByTagName($name);
443
444 3
    $elements = new SimpleHtmlDomNode();
445
446 3
    foreach ($nodesList as $node) {
447 3
      $elements[] = new SimpleHtmlDom($node);
448 3
    }
449
450 3
    if (null === $idx) {
451 2
      return $elements;
452
    } else {
453 1
      if ($idx < 0) {
454
        $idx = count($elements) + $idx;
455
      }
456
    }
457
458 1
    if (isset($elements[$idx])) {
459 1
      return $elements[$idx];
460
    } else {
461
      return new SimpleHtmlDomNodeBlank();
462
    }
463
  }
464
465
  /**
466
   * Find list of nodes with a CSS selector.
467
   *
468
   * @param string $selector
469
   * @param int    $idx
470
   *
471
   * @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeBlank
472
   */
473 76
  public function find($selector, $idx = null)
474
  {
475 76
    $xPathQuery = SelectorConverter::toXPath($selector);
476
477 76
    $xPath = new DOMXPath($this->document);
478 76
    $nodesList = $xPath->query($xPathQuery);
479 76
    $elements = new SimpleHtmlDomNode();
480
481 76
    foreach ($nodesList as $node) {
482 72
      $elements[] = new SimpleHtmlDom($node);
483 76
    }
484
485 76
    if (null === $idx) {
486 49
      return $elements;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return $elements; (voku\helper\SimpleHtmlDomNode) is incompatible with the return type documented by voku\helper\HtmlDomParser::find of type voku\helper\SimpleHtmlDo...\SimpleHtmlDomNodeBlank.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
487
    } else {
488 39
      if ($idx < 0) {
489 11
        $idx = count($elements) + $idx;
490 11
      }
491
    }
492
493 39
    if (isset($elements[$idx])) {
494 36
      return $elements[$idx];
495
    } else {
496 5
      return new SimpleHtmlDomNodeBlank();
497
    }
498
  }
499
500
  /**
501
   * @param string $content
502
   *
503
   * @return string
504
   */
505 44
  protected function fixHtmlOutput($content)
506
  {
507
    // INFO: DOMDocument will encapsulate plaintext into a paragraph tag (<p>),
508
    //          so we try to remove it here again ...
509
510 44
    if ($this->isDOMDocumentCreatedWithoutHtmlWrapper === true) {
511 19
      $content = str_replace(
512
          array(
513 19
              "\n",
514 19
              "\r\n",
515 19
              "\r",
516 19
              '<simpleHtmlDomP>',
517 19
              '</simpleHtmlDomP>',
518 19
              '<body>',
519 19
              '</body>',
520 19
              '<html>',
521 19
              '</html>',
522 19
          ),
523 19
          '',
524
          $content
525 19
      );
526 19
    }
527
528 44
    if ($this->isDOMDocumentCreatedWithoutHtml === true) {
529 5
      $content = str_replace(
530
          array(
531 5
              '<p>',
532 5
              '</p>',
533
              '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">'
534 5
          ),
535 5
          '',
536 5
          $content);
537 5
    }
538
539 44
    $content = UTF8::html_entity_decode($content);
540 44
    $content = trim($content);
541 44
    $content = UTF8::urldecode($content);
542
543 44
    $content = self::putReplacedBackToPreserveHtmlEntities($content);
544
545 44
    return $content;
546
  }
547
548
  /**
549
   * @return DOMDocument
550
   */
551 35
  public function getDocument()
552
  {
553 35
    return $this->document;
554
  }
555
556
  /**
557
   * Get the encoding to use
558
   *
559
   * @return string
560
   */
561 116
  private function getEncoding()
562
  {
563 116
    return $this->encoding;
564
  }
565
566
  /**
567
   * @return bool
568
   */
569 6
  public function getIsDOMDocumentCreatedWithoutHtml()
570
  {
571 6
    return $this->isDOMDocumentCreatedWithoutHtml;
572
  }
573
574
  /**
575
   * @return bool
576
   */
577 32
  public function getIsDOMDocumentCreatedWithoutHtmlWrapper()
578
  {
579 32
    return $this->isDOMDocumentCreatedWithoutHtmlWrapper;
580
  }
581
582
  /**
583
   * Get dom node's outer html
584
   *
585
   * @return string
586
   */
587 32
  public function html()
588
  {
589 32
    if ($this::$callback !== null) {
590
      call_user_func_array($this::$callback, array($this));
591
    }
592
593 32
    if ($this->getIsDOMDocumentCreatedWithoutHtmlWrapper()) {
594 14
      $content = $this->document->saveHTML($this->document->documentElement);
595 14
    } else {
596 21
      $content = $this->document->saveHTML();
597
    }
598
599 32
    return $this->fixHtmlOutput($content);
600
  }
601
602
  /**
603
   * Get the HTML as XML.
604
   *
605
   * @return string
606
   */
607 1
  public function xml()
608
  {
609 1
    $xml = $this->document->saveXML(null, LIBXML_NOEMPTYTAG);
610
611
    // remove the XML-header
612 1
    $xml = ltrim(preg_replace('/<\?xml.*\?>/', '', $xml));
613
614 1
    return $this->fixHtmlOutput($xml);
615
  }
616
617
  /**
618
   * Get dom node's inner html
619
   *
620
   * @return string
621
   */
622 13
  public function innerHtml()
623
  {
624 13
    $text = '';
625
626 13
    foreach ($this->document->documentElement->childNodes as $node) {
627 13
      $text .= $this->fixHtmlOutput($this->document->saveHTML($node));
628 13
    }
629
630 13
    return $text;
631
  }
632
633
  /**
634
   * Load HTML from string
635
   *
636
   * @param string   $html
637
   * @param int|null $libXMLExtraOptions
638
   *
639
   * @return HtmlDomParser
640
   *
641
   * @throws InvalidArgumentException if argument is not string
642
   */
643 107
  public function loadHtml($html, $libXMLExtraOptions = null)
644
  {
645 107
    if (!is_string($html)) {
646 3
      throw new InvalidArgumentException(__METHOD__ . ' expects parameter 1 to be string.');
647
    }
648
649 104
    $this->document = $this->createDOMDocument($html, $libXMLExtraOptions);
650
651 104
    return $this;
652
  }
653
654
  /**
655
   * Load HTML from file
656
   *
657
   * @param string   $filePath
658
   * @param int|null $libXMLExtraOptions
659
   *
660
   * @return HtmlDomParser
661
   */
662 12
  public function loadHtmlFile($filePath, $libXMLExtraOptions = null)
663
  {
664 12
    if (!is_string($filePath)) {
665 2
      throw new InvalidArgumentException(__METHOD__ . ' expects parameter 1 to be string.');
666
    }
667
668 10
    if (!preg_match("/^https?:\/\//i", $filePath) && !file_exists($filePath)) {
669 1
      throw new RuntimeException("File $filePath not found");
670
    }
671
672
    try {
673 9
      $html = UTF8::file_get_contents($filePath);
674
675 9
    } catch (\Exception $e) {
676 1
      throw new RuntimeException("Could not load file $filePath");
677
    }
678
679 8
    if ($html === false) {
680
      throw new RuntimeException("Could not load file $filePath");
681
    }
682
683 8
    $this->loadHtml($html, $libXMLExtraOptions);
684
685 8
    return $this;
686
  }
687
688
  /**
689
   * Save dom as string
690
   *
691
   * @param string $filepath
692
   *
693
   * @return string
694
   */
695 1
  public function save($filepath = '')
696
  {
697 1
    $string = $this->innerHtml();
698 1
    if ($filepath !== '') {
699
      file_put_contents($filepath, $string, LOCK_EX);
700
    }
701
702 1
    return $string;
703
  }
704
705
  /**
706
   * @param $functionName
707
   */
708
  public function set_callback($functionName)
709
  {
710
    $this::$callback = $functionName;
711
  }
712
713
  /**
714
   * Get dom node's plain text
715
   *
716
   * @return string
717
   */
718 2
  public function text()
719
  {
720 2
    return $this->fixHtmlOutput($this->document->textContent);
721
  }
722
}
723