Total Complexity | 209 |
Total Lines | 1108 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like HtmlDocument often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlDocument, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
32 | class HtmlDocument |
||
33 | { |
||
34 | /** |
||
35 | * HtmlNode instance. |
||
36 | * |
||
37 | * @var HtmlNode |
||
38 | */ |
||
39 | public $root = null; |
||
40 | public $nodes = []; |
||
41 | public $callback = null; |
||
42 | public $lowercase = false; |
||
43 | public $original_size; |
||
44 | public $size; |
||
45 | |||
46 | protected $pos; |
||
47 | protected $doc; |
||
48 | protected $char; |
||
49 | |||
50 | protected $cursor; |
||
51 | protected $parent; |
||
52 | protected $noise = []; |
||
53 | protected $token_blank = " \t\r\n"; |
||
54 | protected $token_equal = ' =/>'; |
||
55 | protected $token_slash = " />\r\n\t"; |
||
56 | protected $token_attr = ' >'; |
||
57 | |||
58 | public $_charset = ''; |
||
59 | public $_target_charset = ''; |
||
60 | |||
61 | public $default_br_text = ''; |
||
62 | public $default_span_text = ''; |
||
63 | |||
64 | protected $self_closing_tags = [ |
||
65 | 'area' => 1, |
||
66 | 'base' => 1, |
||
67 | 'br' => 1, |
||
68 | 'col' => 1, |
||
69 | 'embed' => 1, |
||
70 | 'hr' => 1, |
||
71 | 'img' => 1, |
||
72 | 'input' => 1, |
||
73 | 'link' => 1, |
||
74 | 'meta' => 1, |
||
75 | 'param' => 1, |
||
76 | 'source' => 1, |
||
77 | 'track' => 1, |
||
78 | 'wbr' => 1, |
||
79 | ]; |
||
80 | protected $block_tags = [ |
||
81 | 'body' => 1, |
||
82 | 'div' => 1, |
||
83 | 'form' => 1, |
||
84 | 'root' => 1, |
||
85 | 'span' => 1, |
||
86 | 'table' => 1, |
||
87 | ]; |
||
88 | protected $optional_closing_tags = [ |
||
89 | // Not optional, see |
||
90 | // https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element |
||
91 | 'b' => ['b' => 1], |
||
92 | 'dd' => ['dd' => 1, 'dt' => 1], |
||
93 | // Not optional, see |
||
94 | // https://www.w3.org/TR/html/grouping-content.html#the-dl-element |
||
95 | 'dl' => ['dd' => 1, 'dt' => 1], |
||
96 | 'dt' => ['dd' => 1, 'dt' => 1], |
||
97 | 'li' => ['li' => 1], |
||
98 | 'optgroup' => ['optgroup' => 1, 'option' => 1], |
||
99 | 'option' => ['optgroup' => 1, 'option' => 1], |
||
100 | 'p' => ['p' => 1], |
||
101 | 'rp' => ['rp' => 1, 'rt' => 1], |
||
102 | 'rt' => ['rp' => 1, 'rt' => 1], |
||
103 | 'td' => ['td' => 1, 'th' => 1], |
||
104 | 'th' => ['td' => 1, 'th' => 1], |
||
105 | 'tr' => ['td' => 1, 'th' => 1, 'tr' => 1], |
||
106 | ]; |
||
107 | |||
108 | public function __call($func, $args) |
||
109 | { |
||
110 | // Allow users to call methods with lower_case syntax |
||
111 | switch ($func) { |
||
112 | case 'load_file': |
||
113 | $actual_function = 'loadFile'; |
||
114 | break; |
||
115 | case 'clear': |
||
116 | return; /* no-op */ |
||
117 | default: |
||
118 | trigger_error( |
||
119 | 'Call to undefined method ' . __CLASS__ . '::' . $func . '()', |
||
120 | E_USER_ERROR |
||
121 | ); |
||
122 | } |
||
123 | |||
124 | // phpcs:ignore Generic.Files.LineLength |
||
125 | Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.'); |
||
|
|||
126 | |||
127 | return call_user_func_array([$this, $actual_function], $args); |
||
128 | } |
||
129 | |||
130 | public function __construct( |
||
131 | $str = null, |
||
132 | $lowercase = true, |
||
133 | $forceTagsClosed = true, |
||
134 | $target_charset = DEFAULT_TARGET_CHARSET, |
||
135 | $stripRN = true, |
||
136 | $defaultBRText = DEFAULT_BR_TEXT, |
||
137 | $defaultSpanText = DEFAULT_SPAN_TEXT, |
||
138 | $options = 0 |
||
139 | ) { |
||
140 | if ($str) { |
||
141 | if (preg_match('/^http:\/\//i', $str) || is_file($str)) { |
||
142 | $this->load_file($str); |
||
143 | } else { |
||
144 | $this->load( |
||
145 | $str, |
||
146 | $lowercase, |
||
147 | $stripRN, |
||
148 | $defaultBRText, |
||
149 | $defaultSpanText, |
||
150 | $options |
||
151 | ); |
||
152 | } |
||
153 | } else { |
||
154 | $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText); |
||
155 | } |
||
156 | // Forcing tags to be closed implies that we don't trust the html, but |
||
157 | // it can lead to parsing errors if we SHOULD trust the html. |
||
158 | if (!$forceTagsClosed) { |
||
159 | $this->optional_closing_array = []; |
||
160 | } |
||
161 | |||
162 | $this->_target_charset = $target_charset; |
||
163 | } |
||
164 | |||
165 | public function __debugInfo() |
||
166 | { |
||
167 | return [ |
||
168 | 'root' => $this->root, |
||
169 | 'noise' => empty($this->noise) ? 'none' : $this->noise, |
||
170 | 'charset' => $this->_charset, |
||
171 | 'target charset' => $this->_target_charset, |
||
172 | 'original size' => $this->original_size, |
||
173 | ]; |
||
174 | } |
||
175 | |||
176 | public function __destruct() |
||
177 | { |
||
178 | if (isset($this->nodes)) { |
||
179 | foreach ($this->nodes as $n) { |
||
180 | $n->clear(); |
||
181 | } |
||
182 | } |
||
183 | } |
||
184 | |||
185 | public function load( |
||
186 | $str, |
||
187 | $lowercase = true, |
||
188 | $stripRN = true, |
||
189 | $defaultBRText = DEFAULT_BR_TEXT, |
||
190 | $defaultSpanText = DEFAULT_SPAN_TEXT, |
||
191 | $options = 0 |
||
192 | ) { |
||
193 | // prepare |
||
194 | $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText); |
||
195 | |||
196 | if ($stripRN) { |
||
197 | // Temporarily remove any element that shouldn't loose whitespace |
||
198 | $this->remove_noise("'<\s*script[^>]*>(.*?)<\s*/\s*script\s*>'is"); |
||
199 | $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is"); |
||
200 | $this->remove_noise("'<!--(.*?)-->'is"); |
||
201 | $this->remove_noise("'<\s*style[^>]*>(.*?)<\s*/\s*style\s*>'is"); |
||
202 | $this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is"); |
||
203 | |||
204 | // Remove whitespace and newlines between tags |
||
205 | $this->doc = preg_replace('/\>([\t\s]*[\r\n]^[\t\s]*)\</m', '><', $this->doc); |
||
206 | |||
207 | // Remove whitespace and newlines in text |
||
208 | $this->doc = preg_replace('/([\t\s]*[\r\n]^[\t\s]*)/m', ' ', $this->doc); |
||
209 | |||
210 | // Restore temporarily removed elements and calculate new size |
||
211 | $this->doc = $this->restore_noise($this->doc); |
||
212 | $this->size = strlen($this->doc); |
||
213 | } |
||
214 | |||
215 | $this->remove_noise("'(<\?)(.*?)(\?>)'s", true); // server-side script |
||
216 | if (count($this->noise)) { |
||
217 | // phpcs:ignore Generic.Files.LineLength |
||
218 | Debug::log('Support for server-side scripts has been deprecated and will be removed in the next major version of simplehtmldom.'); |
||
219 | } |
||
220 | |||
221 | if ($options & HDOM_SMARTY_AS_TEXT) { // Strip Smarty scripts |
||
222 | $this->remove_noise("'(\{\w)(.*?)(\})'s", true); |
||
223 | // phpcs:ignore Generic.Files.LineLength |
||
224 | Debug::log('Support for Smarty scripts has been deprecated and will be removed in the next major version of simplehtmldom.'); |
||
225 | } |
||
226 | |||
227 | // parsing |
||
228 | $this->parse($stripRN); |
||
229 | // end |
||
230 | $this->root->_[HtmlNode::HDOM_INFO_END] = $this->cursor; |
||
231 | $this->parse_charset(); |
||
232 | $this->decode(); |
||
233 | unset($this->doc); |
||
234 | |||
235 | // make load function chainable |
||
236 | return $this; |
||
237 | } |
||
238 | |||
239 | public function set_callback($function_name) |
||
240 | { |
||
241 | $this->callback = $function_name; |
||
242 | } |
||
243 | |||
244 | public function remove_callback() |
||
245 | { |
||
246 | $this->callback = null; |
||
247 | } |
||
248 | |||
249 | /** |
||
250 | * Save modified html. |
||
251 | * |
||
252 | * @param string $filepath |
||
253 | */ |
||
254 | public function save($filepath = '') |
||
255 | { |
||
256 | $ret = $this->root->innertext(); |
||
257 | if ('' !== $filepath) { |
||
258 | file_put_contents($filepath, $ret, LOCK_EX); |
||
259 | } |
||
260 | |||
261 | return $ret; |
||
262 | } |
||
263 | |||
264 | /** |
||
265 | * Find elements by CSS Selector. |
||
266 | * |
||
267 | * @param string $selector CSS Selector |
||
268 | * @param number|null $idx |
||
269 | * @param bool $lowercase |
||
270 | * |
||
271 | * @return HtmlNode[]|HtmlNode |
||
272 | */ |
||
273 | public function find($selector, $idx = null, $lowercase = false) |
||
274 | { |
||
275 | return $this->root->find($selector, $idx, $lowercase); |
||
276 | } |
||
277 | |||
278 | public function title() |
||
279 | { |
||
280 | $title = $this->find('title', 0); |
||
281 | return $title ? $title->innertext : null; |
||
282 | } |
||
283 | |||
284 | public function expect($selector, $idx = null, $lowercase = false) |
||
285 | { |
||
286 | return $this->root->expect($selector, $idx, $lowercase); |
||
287 | } |
||
288 | |||
289 | /** @codeCoverageIgnore */ |
||
290 | public function dump($show_attr = true) |
||
291 | { |
||
292 | $this->root->dump($show_attr); |
||
293 | } |
||
294 | |||
295 | protected function prepare( |
||
296 | $str, |
||
297 | $lowercase = true, |
||
298 | $defaultBRText = DEFAULT_BR_TEXT, |
||
299 | $defaultSpanText = DEFAULT_SPAN_TEXT |
||
300 | ) { |
||
301 | $this->clear(); |
||
302 | |||
303 | $this->doc = trim($str); |
||
304 | $this->size = strlen($this->doc); |
||
305 | $this->original_size = $this->size; // original size of the html |
||
306 | $this->pos = 0; |
||
307 | $this->cursor = 1; |
||
308 | $this->noise = []; |
||
309 | $this->nodes = []; |
||
310 | $this->lowercase = $lowercase; |
||
311 | $this->default_br_text = $defaultBRText; |
||
312 | $this->default_span_text = $defaultSpanText; |
||
313 | $this->root = new HtmlNode($this); |
||
314 | $this->root->tag = 'root'; |
||
315 | $this->root->_[HtmlNode::HDOM_INFO_BEGIN] = -1; |
||
316 | $this->root->nodetype = HtmlNode::HDOM_TYPE_ROOT; |
||
317 | $this->parent = $this->root; |
||
318 | if ($this->size > 0) { |
||
319 | $this->char = $this->doc[0]; |
||
320 | } |
||
321 | } |
||
322 | |||
323 | protected function decode() |
||
324 | { |
||
325 | foreach ($this->nodes as $node) { |
||
326 | if (isset($node->_[HtmlNode::HDOM_INFO_TEXT])) { |
||
327 | $node->_[HtmlNode::HDOM_INFO_TEXT] = html_entity_decode( |
||
328 | $this->restore_noise($node->_[HtmlNode::HDOM_INFO_TEXT]), |
||
329 | ENT_QUOTES | ENT_HTML5, |
||
330 | $this->_target_charset |
||
331 | ); |
||
332 | } |
||
333 | if (isset($node->_[HtmlNode::HDOM_INFO_INNER])) { |
||
334 | $node->_[HtmlNode::HDOM_INFO_INNER] = html_entity_decode( |
||
335 | $this->restore_noise($node->_[HtmlNode::HDOM_INFO_INNER]), |
||
336 | ENT_QUOTES | ENT_HTML5, |
||
337 | $this->_target_charset |
||
338 | ); |
||
339 | } |
||
340 | if (isset($node->attr) && is_array($node->attr)) { |
||
341 | foreach ($node->attr as $a => $v) { |
||
342 | if (true === $v) { |
||
343 | continue; |
||
344 | } |
||
345 | $node->attr[$a] = html_entity_decode( |
||
346 | $v, |
||
347 | ENT_QUOTES | ENT_HTML5, |
||
348 | $this->_target_charset |
||
349 | ); |
||
350 | } |
||
351 | } |
||
352 | } |
||
353 | } |
||
354 | |||
355 | protected function parse($trim = false) |
||
376 | } |
||
377 | } |
||
378 | } |
||
379 | |||
380 | protected function parse_charset() |
||
381 | { |
||
382 | $charset = null; |
||
383 | |||
384 | if (function_exists('get_last_retrieve_url_contents_content_type')) { |
||
385 | $contentTypeHeader = call_user_func('get_last_retrieve_url_contents_content_type'); |
||
386 | $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches); |
||
387 | if ($success) { |
||
388 | $charset = $matches[1]; |
||
389 | } |
||
390 | |||
391 | // phpcs:ignore Generic.Files.LineLength |
||
392 | Debug::log('Determining charset using get_last_retrieve_url_contents_content_type() ' . ($success ? 'successful' : 'failed')); |
||
393 | } |
||
394 | |||
395 | if (empty($charset)) { |
||
396 | // https://www.w3.org/TR/html/document-metadata.html#statedef-http-equiv-content-type |
||
397 | $el = $this->root->find('meta[http-equiv=Content-Type]', 0, true); |
||
398 | |||
399 | if (!empty($el)) { |
||
400 | $fullvalue = $el->content; |
||
401 | |||
402 | if (!empty($fullvalue)) { |
||
403 | $success = preg_match( |
||
404 | '/charset=(.+)/i', |
||
405 | $fullvalue, |
||
406 | $matches |
||
407 | ); |
||
408 | |||
409 | if ($success) { |
||
410 | $charset = $matches[1]; |
||
411 | } |
||
412 | } |
||
413 | } |
||
414 | } |
||
415 | |||
416 | if (empty($charset)) { |
||
417 | // https://www.w3.org/TR/html/document-metadata.html#character-encoding-declaration |
||
418 | if ($meta = $this->root->find('meta[charset]', 0)) { |
||
419 | $charset = $meta->charset; |
||
420 | } |
||
421 | } |
||
422 | |||
423 | if (empty($charset)) { |
||
424 | // Try to guess the charset based on the content |
||
425 | // Requires Multibyte String (mbstring) support (optional) |
||
426 | if (function_exists('mb_detect_encoding')) { |
||
427 | /** |
||
428 | * mb_detect_encoding() is not intended to distinguish between |
||
429 | * charsets, especially single-byte charsets. Its primary |
||
430 | * purpose is to detect which multibyte encoding is in use, |
||
431 | * i.e. UTF-8, UTF-16, shift-JIS, etc. |
||
432 | * |
||
433 | * -- https://bugs.php.net/bug.php?id=38138 |
||
434 | * |
||
435 | * Adding both CP1251/ISO-8859-5 and CP1252/ISO-8859-1 will |
||
436 | * always result in CP1251/ISO-8859-5 and vice versa. |
||
437 | * |
||
438 | * Thus, only detect if it's either UTF-8 or CP1252/ISO-8859-1 |
||
439 | * to stay compatible. |
||
440 | */ |
||
441 | $encoding = mb_detect_encoding( |
||
442 | $this->doc, |
||
443 | ['UTF-8', 'CP1252', 'ISO-8859-1'] |
||
444 | ); |
||
445 | |||
446 | if ('CP1252' === $encoding || 'ISO-8859-1' === $encoding) { |
||
447 | // Due to a limitation of mb_detect_encoding |
||
448 | // 'CP1251'/'ISO-8859-5' will be detected as |
||
449 | // 'CP1252'/'ISO-8859-1'. This will cause iconv to fail, in |
||
450 | // which case we can simply assume it is the other charset. |
||
451 | if (!@iconv('CP1252', 'UTF-8', $this->doc)) { |
||
452 | $encoding = 'CP1251'; |
||
453 | } |
||
454 | } |
||
455 | |||
456 | if (false !== $encoding) { |
||
457 | $charset = $encoding; |
||
458 | } |
||
459 | } |
||
460 | } |
||
461 | |||
462 | if (empty($charset)) { |
||
463 | Debug::log('Unable to determine charset from source document. Assuming UTF-8'); |
||
464 | $charset = 'UTF-8'; |
||
465 | } |
||
466 | |||
467 | // Since CP1252 is a superset, if we get one of it's subsets, we want |
||
468 | // it instead. |
||
469 | if (('iso-8859-1' == strtolower($charset)) |
||
470 | || ('latin1' == strtolower($charset)) |
||
471 | || ('latin-1' == strtolower($charset)) |
||
472 | ) { |
||
473 | $charset = 'CP1252'; |
||
474 | } |
||
475 | |||
476 | return $this->_charset = $charset; |
||
477 | } |
||
478 | |||
479 | protected function read_tag($trim) |
||
480 | { |
||
481 | if ('<' !== $this->char) { // End Of File |
||
482 | $this->root->_[HtmlNode::HDOM_INFO_END] = $this->cursor; |
||
483 | |||
484 | // We might be in a nest of unclosed elements for which the end tags |
||
485 | // can be omitted. Close them for faster seek operations. |
||
486 | do { |
||
487 | if (isset($this->optional_closing_tags[strtolower($this->parent->tag)])) { |
||
488 | $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor; |
||
489 | } |
||
490 | } while ($this->parent = $this->parent->parent); |
||
491 | |||
492 | return false; |
||
493 | } |
||
494 | |||
495 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
496 | |||
497 | if ($trim) { // "< /html>" |
||
498 | $this->skip($this->token_blank); |
||
499 | } |
||
500 | |||
501 | // End tag: https://dev.w3.org/html5/pf-summary/syntax.html#end-tags |
||
502 | if ('/' === $this->char) { |
||
503 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
504 | |||
505 | $tag = $this->copy_until_char('>'); |
||
506 | $tag = $trim ? ltrim($tag, $this->token_blank) : $tag; |
||
507 | |||
508 | // Skip attributes and whitespace in end tags |
||
509 | if ($trim && false !== ($pos = strpos($tag, ' '))) { |
||
510 | // phpcs:ignore Generic.Files.LineLength |
||
511 | Debug::log_once('Source document contains superfluous whitespace in end tags (</html >).'); |
||
512 | $tag = substr($tag, 0, $pos); |
||
513 | } |
||
514 | |||
515 | if (strcasecmp($this->parent->tag, $tag)) { // Parent is not start tag |
||
516 | $parent_lower = strtolower($this->parent->tag); |
||
517 | $tag_lower = strtolower($tag); |
||
518 | if (isset($this->optional_closing_tags[$parent_lower]) && isset($this->block_tags[$tag_lower])) { |
||
519 | $org_parent = $this->parent; |
||
520 | |||
521 | // Look for the start tag |
||
522 | while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower) { |
||
523 | // Close any unclosed element with optional end tags |
||
524 | if (isset($this->optional_closing_tags[strtolower($this->parent->tag)])) { |
||
525 | $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor; |
||
526 | } |
||
527 | $this->parent = $this->parent->parent; |
||
528 | } |
||
529 | |||
530 | // No start tag, close grandparent |
||
531 | if (strtolower($this->parent->tag) !== $tag_lower) { |
||
532 | $this->parent = $org_parent; |
||
533 | |||
534 | if ($this->parent->parent) { |
||
535 | $this->parent = $this->parent->parent; |
||
536 | } |
||
537 | |||
538 | $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor; |
||
539 | |||
540 | return $this->as_text_node($tag); |
||
541 | } |
||
542 | } elseif (($this->parent->parent) && isset($this->block_tags[$tag_lower])) { |
||
543 | // grandparent exists + current is block tag |
||
544 | // Parent has no end tag |
||
545 | $this->parent->_[HtmlNode::HDOM_INFO_END] = 0; |
||
546 | $org_parent = $this->parent; |
||
547 | |||
548 | // Find start tag |
||
549 | while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower) { |
||
550 | $this->parent = $this->parent->parent; |
||
551 | } |
||
552 | |||
553 | // No start tag, close parent |
||
554 | if (strtolower($this->parent->tag) !== $tag_lower) { |
||
555 | $this->parent = $org_parent; // restore origonal parent |
||
556 | $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor; |
||
557 | |||
558 | return $this->as_text_node($tag); |
||
559 | } |
||
560 | } elseif (($this->parent->parent) && strtolower($this->parent->parent->tag) === $tag_lower) { |
||
561 | // Grandparent exists and current tag closes it |
||
562 | $this->parent->_[HtmlNode::HDOM_INFO_END] = 0; |
||
563 | $this->parent = $this->parent->parent; |
||
564 | } else { // Random tag, add as text node |
||
565 | return $this->as_text_node($tag); |
||
566 | } |
||
567 | } |
||
568 | |||
569 | // Link with start tag |
||
570 | $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor; |
||
571 | |||
572 | if ($this->parent->parent) { |
||
573 | $this->parent = $this->parent->parent; |
||
574 | } |
||
575 | |||
576 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
577 | return true; |
||
578 | } |
||
579 | |||
580 | // Start tag: https://dev.w3.org/html5/pf-summary/syntax.html#start-tags |
||
581 | $node = new HtmlNode($this); |
||
582 | $node->_[HtmlNode::HDOM_INFO_BEGIN] = $this->cursor++; |
||
583 | |||
584 | // Tag name |
||
585 | $tag = $this->copy_until($this->token_slash); |
||
586 | |||
587 | if (isset($tag[0]) && '!' === $tag[0]) { // Doctype, CData, Comment |
||
588 | if (isset($tag[2]) && '-' === $tag[1] && '-' === $tag[2]) { // Comment ("<!--") |
||
589 | // Go back until $tag only contains start of comment "!--". |
||
590 | while (strlen($tag) > 3) { |
||
591 | $this->char = $this->doc[--$this->pos]; // previous |
||
592 | $tag = substr($tag, 0, strlen($tag) - 1); |
||
593 | } |
||
594 | |||
595 | $node->nodetype = HtmlNode::HDOM_TYPE_COMMENT; |
||
596 | $node->tag = 'comment'; |
||
597 | |||
598 | $data = ''; |
||
599 | |||
600 | // There is a rare chance of empty comment: "<!---->" |
||
601 | // In which case the current char is the first "-" of the end tag |
||
602 | // But the comment could also just be a dash: "<!----->" |
||
603 | while (true) { |
||
604 | // Copy until first char of end tag |
||
605 | $data .= $this->copy_until_char('-'); |
||
606 | |||
607 | // Look ahead in the document, maybe we are at the end |
||
608 | if (($this->pos + 3) > $this->size) { // End of document |
||
609 | Debug::log('Source document ended unexpectedly!'); |
||
610 | break; |
||
611 | } elseif ('-->' === substr($this->doc, $this->pos, 3)) { // end |
||
612 | $data .= $this->copy_until_char('>'); |
||
613 | break; |
||
614 | } |
||
615 | |||
616 | $data .= $this->char; |
||
617 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
618 | } |
||
619 | |||
620 | $tag .= $data; |
||
621 | $tag = $this->restore_noise($tag); |
||
622 | |||
623 | // Comment starts after "!--" and ends before "--" (5 chars total) |
||
624 | $node->_[HtmlNode::HDOM_INFO_INNER] = substr($tag, 3, strlen($tag) - 5); |
||
625 | } elseif ('[CDATA[' === substr($tag, 1, 7)) { |
||
626 | // Go back until $tag only contains start of cdata "![CDATA[". |
||
627 | while (strlen($tag) > 8) { |
||
628 | $this->char = $this->doc[--$this->pos]; // previous |
||
629 | $tag = substr($tag, 0, strlen($tag) - 1); |
||
630 | } |
||
631 | |||
632 | // CDATA can contain HTML stuff, need to find closing tags first |
||
633 | $node->nodetype = HtmlNode::HDOM_TYPE_CDATA; |
||
634 | $node->tag = 'cdata'; |
||
635 | |||
636 | $data = ''; |
||
637 | |||
638 | // There is a rare chance of empty CDATA: "<[CDATA[]]>" |
||
639 | // In which case the current char is the first "[" of the end tag |
||
640 | // But the CDATA could also just be a bracket: "<[CDATA[]]]>" |
||
641 | while (true) { |
||
642 | // Copy until first char of end tag |
||
643 | $data .= $this->copy_until_char(']'); |
||
644 | |||
645 | // Look ahead in the document, maybe we are at the end |
||
646 | if (($this->pos + 3) > $this->size) { // End of document |
||
647 | Debug::log('Source document ended unexpectedly!'); |
||
648 | break; |
||
649 | } elseif (']]>' === substr($this->doc, $this->pos, 3)) { // end |
||
650 | $data .= $this->copy_until_char('>'); |
||
651 | break; |
||
652 | } |
||
653 | |||
654 | $data .= $this->char; |
||
655 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
656 | } |
||
657 | |||
658 | $tag .= $data; |
||
659 | $tag = $this->restore_noise($tag); |
||
660 | |||
661 | // CDATA starts after "![CDATA[" and ends before "]]" (10 chars total) |
||
662 | $node->_[HtmlNode::HDOM_INFO_INNER] = substr($tag, 8, strlen($tag) - 10); |
||
663 | } else { // Unknown |
||
664 | Debug::log('Source document contains unknown declaration: <' . $tag); |
||
665 | $node->nodetype = HtmlNode::HDOM_TYPE_UNKNOWN; |
||
666 | $node->tag = 'unknown'; |
||
667 | } |
||
668 | |||
669 | $node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>'); |
||
670 | |||
671 | if ('>' === $this->char) { |
||
672 | $node->_[HtmlNode::HDOM_INFO_TEXT] .= '>'; |
||
673 | } |
||
674 | |||
675 | $this->link_nodes($node, true); |
||
676 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
677 | return true; |
||
678 | } |
||
679 | |||
680 | if (!preg_match('/^\w[\w:-]*$/', $tag)) { // Invalid tag name |
||
681 | $node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>'); |
||
682 | |||
683 | if ('>' === $this->char) { // End tag |
||
684 | $node->_[HtmlNode::HDOM_INFO_TEXT] .= '>'; |
||
685 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
686 | } |
||
687 | |||
688 | $this->link_nodes($node, false); |
||
689 | Debug::log('Source document contains invalid tag name: ' . $node->_[HtmlNode::HDOM_INFO_TEXT]); |
||
690 | |||
691 | return true; |
||
692 | } |
||
693 | |||
694 | // Valid tag name |
||
695 | $node->nodetype = HtmlNode::HDOM_TYPE_ELEMENT; |
||
696 | $tag_lower = strtolower($tag); |
||
697 | $node->tag = ($this->lowercase) ? $tag_lower : $tag; |
||
698 | |||
699 | if (isset($this->optional_closing_tags[$tag_lower])) { // Optional closing tag |
||
700 | while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) { |
||
701 | // Previous element was the last element of ancestor |
||
702 | $this->parent->_[HtmlNode::HDOM_INFO_END] = $node->_[HtmlNode::HDOM_INFO_BEGIN] - 1; |
||
703 | $this->parent = $this->parent->parent; |
||
704 | } |
||
705 | $node->parent = $this->parent; |
||
706 | } |
||
707 | |||
708 | $guard = 0; // prevent infinity loop |
||
709 | |||
710 | // [0] Space between tag and first attribute |
||
711 | $space = [$this->copy_skip($this->token_blank), '', '']; |
||
712 | |||
713 | do { // Parse attributes |
||
714 | $name = $this->copy_until($this->token_equal); |
||
715 | |||
716 | if ('' === $name && null !== $this->char && '' === $space[0]) { |
||
717 | break; |
||
718 | } |
||
719 | |||
720 | if ($guard === $this->pos) { // Escape infinite loop |
||
721 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
722 | continue; |
||
723 | } |
||
724 | |||
725 | $guard = $this->pos; |
||
726 | |||
727 | if ($this->pos >= $this->size - 1 && '>' !== $this->char) { // End Of File |
||
728 | Debug::log('Source document ended unexpectedly!'); |
||
729 | $node->nodetype = HtmlNode::HDOM_TYPE_TEXT; |
||
730 | $node->_[HtmlNode::HDOM_INFO_END] = 0; |
||
731 | $node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name; |
||
732 | $node->tag = 'text'; |
||
733 | $this->link_nodes($node, false); |
||
734 | |||
735 | return true; |
||
736 | } |
||
737 | |||
738 | if ('/' === $name || '' === $name) { // No more attributes |
||
739 | break; |
||
740 | } |
||
741 | |||
742 | // [1] Whitespace after attribute name |
||
743 | $space[1] = (false === strpos($this->token_blank, $this->char)) ? '' : $this->copy_skip($this->token_blank); |
||
744 | |||
745 | $name = $this->restore_noise($name); // might be a noisy name |
||
746 | |||
747 | if ($this->lowercase) { |
||
748 | $name = strtolower($name); |
||
749 | } |
||
750 | |||
751 | if ('=' === $this->char) { // Attribute with value |
||
752 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
753 | $this->parse_attr($node, $name, $space, $trim); // get attribute value |
||
754 | } else { // Attribute without value |
||
755 | $node->_[HtmlNode::HDOM_INFO_QUOTE][$name] = HtmlNode::HDOM_QUOTE_NO; |
||
756 | $node->attr[$name] = true; |
||
757 | if ('>' !== $this->char) { |
||
758 | $this->char = $this->doc[--$this->pos]; |
||
759 | } // prev |
||
760 | } |
||
761 | |||
762 | // Space before attribute and around equal sign |
||
763 | if (!$trim && $space !== [' ', '', '']) { |
||
764 | // phpcs:ignore Generic.Files.LineLength |
||
765 | Debug::log_once('Source document contains superfluous whitespace in attributes (<e attribute = "value">). Enable trimming or fix attribute spacing for best performance.'); |
||
766 | $node->_[HtmlNode::HDOM_INFO_SPACE][$name] = $space; |
||
767 | } |
||
768 | |||
769 | // prepare for next attribute |
||
770 | $space = [ |
||
771 | ((false === strpos($this->token_blank, $this->char)) ? '' : $this->copy_skip($this->token_blank)), |
||
772 | '', |
||
773 | '', |
||
774 | ]; |
||
775 | } while ('>' !== $this->char && '/' !== $this->char); |
||
776 | |||
777 | $this->link_nodes($node, true); |
||
778 | |||
779 | // Space after last attribute before closing the tag |
||
780 | if (!$trim && '' !== $space[0]) { |
||
781 | // phpcs:ignore Generic.Files.LineLength |
||
782 | Debug::log_once('Source document contains superfluous whitespace before the closing braket (<e attribute="value" >). Enable trimming or remove spaces before closing brackets for best performance.'); |
||
783 | $node->_[HtmlNode::HDOM_INFO_ENDSPACE] = $space[0]; |
||
784 | } |
||
785 | |||
786 | $rest = ('>' === $this->char) ? '' : $this->copy_until_char('>'); |
||
787 | $rest = ($trim) ? trim($rest) : $rest; // <html / > |
||
788 | |||
789 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
790 | |||
791 | if ('/' === trim($rest)) { // Void element |
||
792 | if ('' !== $rest) { |
||
793 | if (isset($node->_[HtmlNode::HDOM_INFO_ENDSPACE])) { |
||
794 | $node->_[HtmlNode::HDOM_INFO_ENDSPACE] .= $rest; |
||
795 | } else { |
||
796 | $node->_[HtmlNode::HDOM_INFO_ENDSPACE] = $rest; |
||
797 | } |
||
798 | } |
||
799 | $node->_[HtmlNode::HDOM_INFO_END] = 0; |
||
800 | } elseif (!isset($this->self_closing_tags[strtolower($node->tag)])) { |
||
801 | $innertext = $this->copy_until_char('<'); |
||
802 | if ('' !== $innertext) { |
||
803 | $node->_[HtmlNode::HDOM_INFO_INNER] = $innertext; |
||
804 | } |
||
805 | $this->parent = $node; |
||
806 | } |
||
807 | |||
808 | if ('br' === $node->tag) { |
||
809 | $node->_[HtmlNode::HDOM_INFO_INNER] = $this->default_br_text; |
||
810 | } elseif ('script' === $node->tag) { |
||
811 | $data = ''; |
||
812 | |||
813 | // There is a rare chance of empty script: "<script></script>" |
||
814 | // In which case the current char is the start of the end tag |
||
815 | // But the script could also just contain tags: "<script><div></script>" |
||
816 | while (true) { |
||
817 | // Copy until first char of end tag |
||
818 | $data .= $this->copy_until_char('<'); |
||
819 | |||
820 | // Look ahead in the document, maybe we are at the end |
||
821 | if (($this->pos + 9) > $this->size) { // End of document |
||
822 | Debug::log('Source document ended unexpectedly!'); |
||
823 | break; |
||
824 | } elseif ('</script' === substr($this->doc, $this->pos, 8)) { // end |
||
825 | $this->skip('>'); // don't include the end tag |
||
826 | break; |
||
827 | } |
||
828 | |||
829 | // Note: A script tag may contain any other tag except </script> |
||
830 | // which needs to be escaped as <\/script> |
||
831 | |||
832 | $data .= $this->char; |
||
833 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
834 | } |
||
835 | |||
836 | $node = new HtmlNode($this); |
||
837 | ++$this->cursor; |
||
838 | $node->_[HtmlNode::HDOM_INFO_TEXT] = $data; |
||
839 | $this->link_nodes($node, false); |
||
840 | } |
||
841 | |||
842 | return true; |
||
843 | } |
||
844 | |||
845 | protected function parse_attr($node, $name, &$space, $trim) |
||
846 | { |
||
847 | $is_duplicate = isset($node->attr[$name]); |
||
848 | |||
849 | if (!$is_duplicate) { // Copy whitespace between "=" and value |
||
850 | $space[2] = (false === strpos($this->token_blank, $this->char)) ? '' : $this->copy_skip($this->token_blank); |
||
851 | } |
||
852 | |||
853 | switch ($this->char) { |
||
854 | case '"': |
||
855 | $quote_type = HtmlNode::HDOM_QUOTE_DOUBLE; |
||
856 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
857 | $value = $this->copy_until_char('"'); |
||
858 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
859 | break; |
||
860 | case '\'': |
||
861 | // phpcs:ignore Generic.Files.LineLength |
||
862 | Debug::log_once('Source document contains attribute values with single quotes (<e attribute=\'value\'>). Use double quotes for best performance.'); |
||
863 | $quote_type = HtmlNode::HDOM_QUOTE_SINGLE; |
||
864 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
865 | $value = $this->copy_until_char('\''); |
||
866 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
867 | break; |
||
868 | default: |
||
869 | // phpcs:ignore Generic.Files.LineLength |
||
870 | Debug::log_once('Source document contains attribute values without quotes (<e attribute=value>). Use double quotes for best performance'); |
||
871 | $quote_type = HtmlNode::HDOM_QUOTE_NO; |
||
872 | $value = $this->copy_until($this->token_attr); |
||
873 | } |
||
874 | |||
875 | $value = $this->restore_noise($value); |
||
876 | |||
877 | if ($trim) { |
||
878 | // Attribute values must not contain control characters other than space |
||
879 | // https://www.w3.org/TR/html/dom.html#text-content |
||
880 | // https://www.w3.org/TR/html/syntax.html#attribute-values |
||
881 | // https://www.w3.org/TR/xml/#AVNormalize |
||
882 | $value = preg_replace("/[\r\n\t\s]+/u", ' ', $value); |
||
883 | $value = trim($value); |
||
884 | } |
||
885 | |||
886 | if (!$is_duplicate) { |
||
887 | if (HtmlNode::HDOM_QUOTE_DOUBLE !== $quote_type) { |
||
888 | $node->_[HtmlNode::HDOM_INFO_QUOTE][$name] = $quote_type; |
||
889 | } |
||
890 | $node->attr[$name] = $value; |
||
891 | } |
||
892 | } |
||
893 | |||
894 | protected function link_nodes(&$node, $is_child) |
||
895 | { |
||
896 | $node->parent = $this->parent; |
||
897 | $this->parent->nodes[] = $node; |
||
898 | if ($is_child) { |
||
899 | $this->parent->children[] = $node; |
||
900 | } |
||
901 | } |
||
902 | |||
903 | protected function as_text_node($tag) |
||
904 | { |
||
905 | $node = new HtmlNode($this); |
||
906 | ++$this->cursor; |
||
907 | $node->_[HtmlNode::HDOM_INFO_TEXT] = '</' . $tag . '>'; |
||
908 | $this->link_nodes($node, false); |
||
909 | $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
910 | return true; |
||
911 | } |
||
912 | |||
913 | protected function skip($chars) |
||
914 | { |
||
915 | $this->pos += strspn($this->doc, $chars, $this->pos); |
||
916 | $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
917 | } |
||
918 | |||
919 | protected function copy_skip($chars) |
||
920 | { |
||
921 | $pos = $this->pos; |
||
922 | $len = strspn($this->doc, $chars, $pos); |
||
923 | if (0 === $len) { |
||
924 | return ''; |
||
925 | } |
||
926 | $this->pos += $len; |
||
927 | $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
928 | return substr($this->doc, $pos, $len); |
||
929 | } |
||
930 | |||
931 | protected function copy_until($chars) |
||
932 | { |
||
933 | $pos = $this->pos; |
||
934 | $len = strcspn($this->doc, $chars, $pos); |
||
935 | $this->pos += $len; |
||
936 | $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next |
||
937 | return substr($this->doc, $pos, $len); |
||
938 | } |
||
939 | |||
940 | protected function copy_until_char($char) |
||
941 | { |
||
942 | if (null === $this->char) { |
||
943 | return ''; |
||
944 | } |
||
945 | |||
946 | if (false === ($pos = strpos($this->doc, $char, $this->pos))) { |
||
947 | $ret = substr($this->doc, $this->pos, $this->size - $this->pos); |
||
948 | $this->char = null; |
||
949 | $this->pos = $this->size; |
||
950 | |||
951 | return $ret; |
||
952 | } |
||
953 | |||
954 | if ($pos === $this->pos) { |
||
955 | return ''; |
||
956 | } |
||
957 | |||
958 | $pos_old = $this->pos; |
||
959 | $this->char = $this->doc[$pos]; |
||
960 | $this->pos = $pos; |
||
961 | |||
962 | return substr($this->doc, $pos_old, $pos - $pos_old); |
||
963 | } |
||
964 | |||
965 | protected function remove_noise($pattern, $remove_tag = false) |
||
966 | { |
||
967 | $count = preg_match_all( |
||
968 | $pattern, |
||
969 | $this->doc, |
||
970 | $matches, |
||
971 | PREG_SET_ORDER | PREG_OFFSET_CAPTURE |
||
972 | ); |
||
973 | |||
974 | for ($i = $count - 1; $i > -1; --$i) { |
||
975 | $key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000); |
||
976 | |||
977 | $idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch |
||
978 | $this->noise[$key] = $matches[$i][$idx][0]; |
||
979 | $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0])); |
||
980 | } |
||
981 | |||
982 | // reset the length of content |
||
983 | $this->size = strlen($this->doc); |
||
984 | |||
985 | if ($this->size > 0) { |
||
986 | $this->char = $this->doc[0]; |
||
987 | } |
||
988 | } |
||
989 | |||
990 | public function restore_noise($text) |
||
991 | { |
||
992 | if (empty($this->noise)) { |
||
993 | return $text; |
||
994 | } // nothing to restore |
||
995 | $pos = 0; |
||
996 | while (false !== ($pos = strpos($text, '___noise___', $pos))) { |
||
997 | // Sometimes there is a broken piece of markup, and we don't GET the |
||
998 | // pos+11 etc... token which indicates a problem outside of us... |
||
999 | |||
1000 | // todo: "___noise___1000" (or any number with four or more digits) |
||
1001 | // in the DOM causes an infinite loop which could be utilized by |
||
1002 | // malicious software |
||
1003 | if (strlen($text) > $pos + 15) { |
||
1004 | $key = '___noise___' |
||
1005 | . $text[$pos + 11] |
||
1006 | . $text[$pos + 12] |
||
1007 | . $text[$pos + 13] |
||
1008 | . $text[$pos + 14] |
||
1009 | . $text[$pos + 15]; |
||
1010 | |||
1011 | if (isset($this->noise[$key])) { |
||
1012 | $text = substr($text, 0, $pos) |
||
1013 | . $this->noise[$key] |
||
1014 | . substr($text, $pos + 16); |
||
1015 | |||
1016 | unset($this->noise[$key]); |
||
1017 | } else { |
||
1018 | Debug::log_once('Noise restoration failed. DOM has been corrupted!'); |
||
1019 | // do this to prevent an infinite loop. |
||
1020 | // FIXME: THis causes an infinite loop because the keyword ___NOISE___ is included in the key! |
||
1021 | $text = substr($text, 0, $pos) |
||
1022 | . 'UNDEFINED NOISE FOR KEY: ' |
||
1023 | . $key |
||
1024 | . substr($text, $pos + 16); |
||
1025 | } |
||
1026 | } else { |
||
1027 | // There is no valid key being given back to us... We must get |
||
1028 | // rid of the ___noise___ or we will have a problem. |
||
1029 | Debug::log_once('Noise restoration failed. The provided key is incomplete: ' . $text); |
||
1030 | $text = substr($text, 0, $pos) |
||
1031 | . 'NO NUMERIC NOISE KEY' |
||
1032 | . substr($text, $pos + 11); |
||
1033 | } |
||
1034 | } |
||
1035 | |||
1036 | return $text; |
||
1037 | } |
||
1038 | |||
1039 | public function search_noise($text) |
||
1040 | { |
||
1041 | foreach ($this->noise as $noiseElement) { |
||
1042 | if (false !== strpos($noiseElement, $text)) { |
||
1043 | return $noiseElement; |
||
1044 | } |
||
1045 | } |
||
1046 | } |
||
1047 | |||
1048 | public function __toString() |
||
1049 | { |
||
1050 | return $this->root->innertext(); |
||
1051 | } |
||
1052 | |||
1053 | public function __get($name) |
||
1054 | { |
||
1055 | switch ($name) { |
||
1056 | case 'outertext': |
||
1057 | return $this->root->innertext(); |
||
1058 | case 'innertext': |
||
1059 | return $this->root->innertext(); |
||
1060 | case 'plaintext': |
||
1061 | return $this->root->text(); |
||
1062 | case 'charset': |
||
1063 | return $this->_charset; |
||
1064 | case 'target_charset': |
||
1065 | return $this->_target_charset; |
||
1066 | } |
||
1067 | } |
||
1068 | |||
1069 | public function childNodes($idx = -1) |
||
1070 | { |
||
1071 | return $this->root->childNodes($idx); |
||
1072 | } |
||
1073 | |||
1074 | public function firstChild() |
||
1077 | } |
||
1078 | |||
1079 | public function lastChild() |
||
1080 | { |
||
1081 | return $this->root->lastChild(); |
||
1082 | } |
||
1083 | |||
1084 | public function createElement($name, $value = null) |
||
1085 | { |
||
1086 | $node = new HtmlNode(null); |
||
1087 | $node->nodetype = HtmlNode::HDOM_TYPE_ELEMENT; |
||
1088 | $node->_[HtmlNode::HDOM_INFO_BEGIN] = 1; |
||
1089 | $node->_[HtmlNode::HDOM_INFO_END] = 1; |
||
1090 | |||
1091 | if (null !== $value) { |
||
1092 | $node->_[HtmlNode::HDOM_INFO_INNER] = $value; |
||
1093 | } |
||
1094 | |||
1095 | $node->tag = $name; |
||
1096 | |||
1097 | return $node; |
||
1098 | } |
||
1099 | |||
1100 | public function createTextNode($value) |
||
1101 | { |
||
1102 | $node = new HtmlNode($this); |
||
1103 | $node->nodetype = HtmlNode::HDOM_TYPE_TEXT; |
||
1104 | |||
1105 | if (null !== $value) { |
||
1106 | $node->_[HtmlNode::HDOM_INFO_TEXT] = $value; |
||
1107 | } |
||
1108 | |||
1109 | return $node; |
||
1110 | } |
||
1111 | |||
1112 | public function getElementById($id) |
||
1113 | { |
||
1114 | return $this->find("#$id", 0); |
||
1115 | } |
||
1116 | |||
1117 | public function getElementsById($id, $idx = null) |
||
1120 | } |
||
1121 | |||
1122 | public function getElementByTagName($name) |
||
1123 | { |
||
1124 | return $this->find($name, 0); |
||
1125 | } |
||
1126 | |||
1127 | public function getElementsByTagName($name, $idx = null) |
||
1130 | } |
||
1131 | |||
1132 | public function loadFile($file) |
||
1133 | { |
||
1134 | $args = func_get_args(); |
||
1135 | |||
1136 | if (false !== ($doc = call_user_func_array('file_get_contents', $args))) { |
||
1137 | $this->load($doc, true); |
||
1138 | } else { |
||
1139 | return false; |
||
1140 | } |
||
1141 | } |
||
1142 | } |
||
1143 |