1 | <?php |
||
2 | /** |
||
3 | * This file is part of the O2System Framework package. |
||
4 | * |
||
5 | * For the full copyright and license information, please view the LICENSE |
||
6 | * file that was distributed with this source code. |
||
7 | * |
||
8 | * @author Steeve Andrian Salim |
||
9 | * @copyright Copyright (c) Steeve Andrian Salim |
||
10 | */ |
||
11 | |||
12 | // ------------------------------------------------------------------------ |
||
13 | |||
14 | namespace O2System\Html; |
||
15 | |||
16 | // ------------------------------------------------------------------------ |
||
17 | |||
18 | /** |
||
19 | * Class Document |
||
20 | * |
||
21 | * @package O2System\HTML |
||
22 | */ |
||
23 | class Document extends \DOMDocument |
||
24 | { |
||
25 | /** |
||
26 | * Document Meta Nodes |
||
27 | * |
||
28 | * @var \O2System\Html\Dom\Lists\Meta |
||
29 | */ |
||
30 | public $metaNodes; |
||
31 | |||
32 | /** |
||
33 | * Document Link Nodes |
||
34 | * |
||
35 | * @var \O2System\Html\Dom\Lists\Asset |
||
36 | */ |
||
37 | public $linkNodes; |
||
38 | |||
39 | /** |
||
40 | * Document Style Content |
||
41 | * |
||
42 | * @var \O2System\Html\Dom\Style |
||
43 | */ |
||
44 | public $styleContent; |
||
45 | |||
46 | /** |
||
47 | * Document Script Nodes |
||
48 | * |
||
49 | * @var \O2System\Html\Dom\Lists\Asset |
||
50 | */ |
||
51 | public $headScriptNodes; |
||
52 | |||
53 | /** |
||
54 | * Document Script Content |
||
55 | * |
||
56 | * @var \O2System\Html\Dom\Script |
||
57 | */ |
||
58 | public $headScriptContent; |
||
59 | |||
60 | /** |
||
61 | * Document Script Nodes |
||
62 | * |
||
63 | * @var \O2System\Html\Dom\Lists\Asset |
||
64 | */ |
||
65 | public $bodyScriptNodes; |
||
66 | |||
67 | /** |
||
68 | * Document Script Content |
||
69 | * |
||
70 | * @var \O2System\Html\Dom\Script |
||
71 | */ |
||
72 | public $bodyScriptContent; |
||
73 | |||
74 | // ------------------------------------------------------------------------ |
||
75 | |||
76 | /** |
||
77 | * Document::__construct |
||
78 | * |
||
79 | * @param string $version Document version. |
||
80 | * @param string $encoding Document encoding. |
||
81 | * |
||
82 | * @return Document |
||
83 | */ |
||
84 | public function __construct($version = '1.0', $encoding = 'UTF-8') |
||
85 | { |
||
86 | language() |
||
87 | ->addFilePath(__DIR__ . DIRECTORY_SEPARATOR) |
||
88 | ->loadFile('html'); |
||
89 | |||
90 | parent::__construct($version, $encoding); |
||
91 | |||
92 | $this->registerNodeClass('DOMElement', '\O2System\Html\Dom\Element'); |
||
93 | $this->registerNodeClass('DOMAttr', '\O2System\Html\Dom\Attr'); |
||
94 | |||
95 | $this->formatOutput = true; |
||
96 | |||
97 | $this->metaNodes = new Dom\Lists\Meta($this); |
||
98 | |||
99 | $this->linkNodes = new Dom\Lists\Asset($this); |
||
100 | $this->linkNodes->element = 'link'; |
||
101 | |||
102 | $this->styleContent = new Dom\Style(); |
||
103 | |||
104 | $this->headScriptNodes = new Dom\Lists\Asset($this); |
||
105 | $this->headScriptNodes->element = 'script'; |
||
106 | $this->headScriptContent = new Dom\Script(); |
||
107 | |||
108 | $this->bodyScriptNodes = new Dom\Lists\Asset($this); |
||
109 | $this->bodyScriptNodes->element = 'script'; |
||
110 | $this->bodyScriptContent = new Dom\Script(); |
||
111 | |||
112 | $this->loadHTMLTemplate(); |
||
113 | } |
||
114 | |||
115 | // ------------------------------------------------------------------------ |
||
116 | |||
117 | /** |
||
118 | * Document::loadHTMLTemplate |
||
119 | * |
||
120 | * Load HTML template from a file. |
||
121 | * |
||
122 | * @return void |
||
123 | */ |
||
124 | protected function loadHTMLTemplate() |
||
125 | { |
||
126 | $htmlTemplate = <<<HTML |
||
127 | <!DOCTYPE html> |
||
128 | <html lang="en"> |
||
129 | <head> |
||
130 | <meta charset="UTF-8"> |
||
131 | <title>O2System HTML</title> |
||
132 | </head> |
||
133 | <body> |
||
134 | </body> |
||
135 | </html> |
||
136 | HTML; |
||
137 | |||
138 | parent::loadHTML($htmlTemplate); |
||
139 | } |
||
140 | |||
141 | // ------------------------------------------------------------------------ |
||
142 | |||
143 | /** |
||
144 | * Document::__get |
||
145 | * |
||
146 | * @param string $tagName The document tag element. |
||
147 | * |
||
148 | * @return mixed The value at the specified index or false. |
||
149 | */ |
||
150 | public function &__get($tagName) |
||
151 | { |
||
152 | $getDocument[ $tagName ] = null; |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
![]() |
|||
153 | |||
154 | if (in_array($tagName, ['html', 'head', 'body', 'title'])) { |
||
155 | $getDocument[ $tagName ] = $this->getElementsByTagName($tagName)->item(0); |
||
156 | } |
||
157 | |||
158 | return $getDocument[ $tagName ]; |
||
159 | } |
||
160 | |||
161 | // ------------------------------------------------------------------------ |
||
162 | |||
163 | /** |
||
164 | * Document::saveHTMLFile |
||
165 | * |
||
166 | * Dumps the internal document into a file using HTML formatting |
||
167 | * |
||
168 | * @see http://php.net/manual/domdocument.savehtmlfile.php |
||
169 | * |
||
170 | * @param string $filePath <p> |
||
171 | * The path to the saved HTML document. |
||
172 | * </p> |
||
173 | * |
||
174 | * @return int the number of bytes written or false if an error occurred. |
||
175 | * @since 5.0 |
||
176 | */ |
||
177 | public function saveHTMLFile($filePath) |
||
178 | { |
||
179 | if ( ! is_string($filePath)) { |
||
0 ignored issues
–
show
|
|||
180 | throw new \InvalidArgumentException('The filename argument must be of type string'); |
||
181 | } |
||
182 | |||
183 | if ( ! is_writable($filePath)) { |
||
184 | return false; |
||
0 ignored issues
–
show
|
|||
185 | } |
||
186 | |||
187 | $result = $this->saveHTML(); |
||
188 | file_put_contents($filePath, $result); |
||
189 | $bytesWritten = filesize($filePath); |
||
190 | |||
191 | if ($bytesWritten === strlen($result)) { |
||
192 | return $bytesWritten; |
||
193 | } |
||
194 | |||
195 | return false; |
||
0 ignored issues
–
show
|
|||
196 | } |
||
197 | |||
198 | // ------------------------------------------------------------------------ |
||
199 | |||
200 | /** |
||
201 | * Document::saveHTML |
||
202 | * |
||
203 | * Dumps the internal document into a string using HTML formatting. |
||
204 | * |
||
205 | * @see http://php.net/manual/domdocument.savehtml.php |
||
206 | * |
||
207 | * @param \DOMNode $node [optional] parameter to output a subset of the document. |
||
208 | * |
||
209 | * @return string the HTML, or false if an error occurred. |
||
210 | * @since 5.0 |
||
211 | */ |
||
212 | public function saveHTML(\DOMNode $node = null) |
||
213 | { |
||
214 | $headElement = $this->getElementsByTagName('head')->item(0); |
||
215 | |||
216 | $styleContent = trim($this->styleContent->__toString()); |
||
217 | |||
218 | if ( ! empty($styleContent)) { |
||
219 | $styleElement = $this->createElement('style', $styleContent); |
||
220 | $styleElement->setAttribute('type', 'text/css'); |
||
221 | $headElement->appendChild($styleElement); |
||
222 | } |
||
223 | |||
224 | $titleElement = $this->getElementsByTagName('title')->item(0); |
||
225 | |||
226 | // Insert Meta |
||
227 | if ($this->metaNodes->count()) { |
||
228 | $metaNodes = array_reverse($this->metaNodes->getArrayCopy()); |
||
229 | |||
230 | foreach ($metaNodes as $metaNode) { |
||
231 | $headElement->insertBefore($this->importNode($metaNode), $titleElement); |
||
232 | } |
||
233 | } |
||
234 | |||
235 | // Insert Link |
||
236 | if ($this->linkNodes->count()) { |
||
237 | foreach ($this->linkNodes as $linkNode) { |
||
238 | $headElement->appendChild($this->importNode($linkNode)); |
||
239 | } |
||
240 | } |
||
241 | |||
242 | // Insert Head Script |
||
243 | if (count($this->headScriptNodes)) { |
||
244 | foreach ($this->headScriptNodes as $scriptNode) { |
||
245 | $headElement->appendChild($this->importNode($scriptNode)); |
||
246 | } |
||
247 | } |
||
248 | |||
249 | $headScriptContent = trim($this->headScriptContent->__toString()); |
||
250 | |||
251 | if ( ! empty($headScriptContent)) { |
||
252 | $scriptElement = $this->createElement('script', $headScriptContent); |
||
253 | $scriptElement->setAttribute('type', 'text/javascript'); |
||
254 | $headElement->appendChild($scriptElement); |
||
255 | } |
||
256 | |||
257 | $bodyElement = $this->getElementsByTagName('body')->item(0); |
||
258 | |||
259 | // Insert Body Script |
||
260 | if (count($this->bodyScriptNodes)) { |
||
261 | foreach ($this->bodyScriptNodes as $scriptNode) { |
||
262 | $bodyElement->appendChild($this->importNode($scriptNode)); |
||
263 | } |
||
264 | } |
||
265 | |||
266 | $bodyScriptContent = trim($this->bodyScriptContent->__toString()); |
||
267 | |||
268 | if ( ! empty($bodyScriptContent)) { |
||
269 | $scriptElement = $this->createElement('script', $bodyScriptContent); |
||
270 | $scriptElement->setAttribute('type', 'text/javascript'); |
||
271 | $bodyElement->appendChild($scriptElement); |
||
272 | } |
||
273 | |||
274 | $output = parent::saveHTML($node); |
||
275 | |||
276 | if ($this->formatOutput === true) { |
||
277 | $beautifier = new Dom\Beautifier(); |
||
278 | $output = $beautifier->format($output); |
||
279 | } |
||
280 | |||
281 | return (string)$output; |
||
282 | } |
||
283 | |||
284 | // ------------------------------------------------------------------------ |
||
285 | |||
286 | /** |
||
287 | * Document::find |
||
288 | * |
||
289 | * JQuery style document expression finder. |
||
290 | * |
||
291 | * @param string $expression String of document expression. |
||
292 | * |
||
293 | * @return Dom\Lists\Nodes |
||
294 | */ |
||
295 | public function find($expression) |
||
296 | { |
||
297 | $xpath = new Dom\XPath($this); |
||
298 | |||
299 | $xpath->registerNamespace("php", "http://php.net/xpath"); |
||
300 | $xpath->registerPhpFunctions(); |
||
301 | |||
302 | return $xpath->query($expression); |
||
303 | } |
||
304 | |||
305 | // ------------------------------------------------------------------------ |
||
306 | |||
307 | /** |
||
308 | * Document::importSourceNode |
||
309 | * |
||
310 | * Import HTML source code into document. |
||
311 | * |
||
312 | * @param string $source HTML Source Code. |
||
313 | * |
||
314 | * @return \DOMNode|\O2System\Html\Dom\Element |
||
315 | */ |
||
316 | public function importSourceNode($source) |
||
317 | { |
||
318 | $DOMDocument = new self(); |
||
319 | $DOMDocument->loadHTML($source); |
||
320 | |||
321 | $this->metaNodes->import($DOMDocument->metaNodes); |
||
322 | $this->headScriptNodes->import($DOMDocument->headScriptNodes); |
||
323 | $this->bodyScriptNodes->import($DOMDocument->bodyScriptNodes); |
||
324 | $this->linkNodes->import($DOMDocument->linkNodes); |
||
325 | $this->styleContent->import($DOMDocument->styleContent); |
||
326 | $this->headScriptContent->import($DOMDocument->headScriptContent); |
||
327 | $this->bodyScriptContent->import($DOMDocument->bodyScriptContent); |
||
328 | |||
329 | $bodyElement = $DOMDocument->getElementsByTagName('body')->item(0); |
||
330 | |||
331 | if ($bodyElement->firstChild instanceof Dom\Element) { |
||
332 | return $bodyElement->firstChild; |
||
333 | } elseif ($bodyElement->firstChild instanceof \DOMText) { |
||
334 | foreach ($bodyElement->childNodes as $childNode) { |
||
335 | if ($childNode instanceof Dom\Element) { |
||
336 | return $childNode->cloneNode(true); |
||
337 | break; |
||
0 ignored issues
–
show
break is not strictly necessary here and could be removed.
The switch ($x) {
case 1:
return 'foo';
break; // This break is not necessary and can be left off.
}
If you would like to keep this construct to be consistent with other ![]() |
|||
338 | } |
||
339 | } |
||
340 | } |
||
341 | |||
342 | return $bodyElement; |
||
343 | } |
||
344 | |||
345 | // ------------------------------------------------------------------------ |
||
346 | |||
347 | /** |
||
348 | * Document::loadHTML |
||
349 | * |
||
350 | * Load HTML from a string. |
||
351 | * |
||
352 | * @see http://php.net/manual/domdocument.loadhtml.php |
||
353 | * |
||
354 | * @param string $source <p> |
||
355 | * The HTML string. |
||
356 | * </p> |
||
357 | * @param int|string $options [optional] <p> |
||
358 | * Since PHP 5.4.0 and Libxml 2.6.0, you may also |
||
359 | * use the options parameter to specify additional Libxml parameters. |
||
360 | * </p> |
||
361 | * |
||
362 | * @return bool true on success or false on failure. If called statically, returns a |
||
363 | * DOMDocument and issues E_STRICT |
||
364 | * warning. |
||
365 | * @since 5.0 |
||
366 | */ |
||
367 | public function loadHTML($source, $options = 0) |
||
368 | { |
||
369 | // Enables libxml errors handling |
||
370 | $internalErrorsOptionValue = libxml_use_internal_errors(); |
||
371 | |||
372 | if ($internalErrorsOptionValue === false) { |
||
373 | libxml_use_internal_errors(true); |
||
374 | } |
||
375 | |||
376 | $source = $this->parseHTML($source); |
||
377 | |||
378 | $DOMDocument = new \DOMDocument(); |
||
379 | $DOMDocument->formatOutput = true; |
||
380 | $DOMDocument->preserveWhiteSpace = false; |
||
381 | |||
382 | if ($this->encoding === 'UTF-8') { |
||
383 | if (function_exists('mb_convert_encoding')) { |
||
384 | $source = mb_convert_encoding($source, 'HTML-ENTITIES', 'UTF-8'); |
||
385 | } else { |
||
386 | $source = utf8_decode($source); |
||
387 | } |
||
388 | |||
389 | $DOMDocument->encoding = 'UTF-8'; |
||
390 | } |
||
391 | |||
392 | if (empty($source)) { |
||
393 | return false; |
||
394 | } |
||
395 | |||
396 | $DOMDocument->loadHTML($source, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); |
||
397 | |||
398 | $headElement = $this->getElementsByTagName('head')->item(0); |
||
399 | $bodyElement = $this->getElementsByTagName('body')->item(0); |
||
400 | |||
401 | // Import head attributes |
||
402 | if (null !== ($sourceHeadElement = $DOMDocument->getElementsByTagName('head')->item(0))) { |
||
403 | if ($sourceHeadElement->attributes->length > 0) { |
||
404 | foreach ($sourceHeadElement->attributes as $attribute) { |
||
405 | $headElement->setAttribute($attribute->name, $attribute->value); |
||
406 | } |
||
407 | } |
||
408 | } |
||
409 | |||
410 | // Import body attributes and child nodes |
||
411 | if (null !== ($sourceBodyElement = $DOMDocument->getElementsByTagName('body')->item(0))) { |
||
412 | // Import body attributes |
||
413 | if ($sourceBodyElement->attributes->length > 0) { |
||
414 | foreach ($sourceBodyElement->attributes as $attribute) { |
||
415 | $bodyElement->setAttribute($attribute->name, $attribute->value); |
||
416 | } |
||
417 | } |
||
418 | |||
419 | // Import body child nodes |
||
420 | foreach ($sourceBodyElement->childNodes as $childNode) { |
||
421 | $childNode = $this->importNode($childNode, true); |
||
422 | $bodyElement->appendChild($childNode); |
||
423 | } |
||
424 | } elseif ($bodyChildNode = $this->importNode($DOMDocument->firstChild, true)) { |
||
425 | $bodyElement->appendChild($bodyChildNode); |
||
426 | } |
||
427 | } |
||
428 | |||
429 | // ------------------------------------------------------------------------ |
||
430 | |||
431 | /** |
||
432 | * Document::parseHTML |
||
433 | * |
||
434 | * Parse HTML Source Code. |
||
435 | * |
||
436 | * @param string $source HTML Source Code. |
||
437 | * |
||
438 | * @return mixed |
||
439 | */ |
||
440 | private function parseHTML($source) |
||
441 | { |
||
442 | $DOMDocument = new \DOMDocument(); |
||
443 | |||
444 | // Has inline script element |
||
445 | if (preg_match_all('/<script((?:(?!src=).)*?)>(.*?)<\/script>/smix', $source, $matches)) { |
||
446 | if (isset($matches[ 2 ])) { |
||
447 | foreach ($matches[ 2 ] as $match) { |
||
448 | $script = trim($match); |
||
449 | $this->bodyScriptContent[ md5($script) ] = $script . PHP_EOL; |
||
450 | } |
||
451 | } |
||
452 | } |
||
453 | |||
454 | // Remove all inline script first |
||
455 | $source = preg_replace('/<script((?:(?!src=).)*?)>(.*?)<\/script>/smix', '', $source); |
||
456 | |||
457 | $DOMDocument->loadHTML($source); |
||
458 | |||
459 | $DOMXPath = new \DOMXPath($DOMDocument); |
||
460 | $metas = $DOMXPath->query('//meta'); |
||
461 | foreach ($metas as $meta) { |
||
462 | $attributes = []; |
||
463 | foreach ($meta->attributes as $name => $attribute) { |
||
464 | $attributes[ $name ] = $attribute->nodeValue; |
||
465 | } |
||
466 | |||
467 | $this->metaNodes->createElement($attributes); |
||
468 | } |
||
469 | |||
470 | $source = preg_replace('#<meta(.*?)>#is', '', $source); // clean up all inline meta tags |
||
471 | |||
472 | $links = $DOMXPath->query('//link'); // find all inline link tags |
||
473 | foreach ($links as $link) { |
||
474 | $attributes = []; |
||
475 | foreach ($link->attributes as $name => $attribute) { |
||
476 | $attributes[ $name ] = $attribute->nodeValue; |
||
477 | } |
||
478 | |||
479 | $this->linkNodes->createElement($attributes); |
||
480 | } |
||
481 | |||
482 | $source = preg_replace('#<link(.*?)>#is', '', $source); // clean up all inline meta tags |
||
483 | |||
484 | $scripts = $DOMXPath->query('//head/script'); // find all inline script tags |
||
485 | foreach ($scripts as $script) { |
||
486 | $attributes = []; |
||
487 | foreach ($script->attributes as $name => $attribute) { |
||
488 | $attributes[ $name ] = $attribute->nodeValue; |
||
489 | } |
||
490 | } |
||
491 | |||
492 | $scripts = $DOMXPath->query('//body/script'); // find all inline script tags |
||
493 | foreach ($scripts as $script) { |
||
494 | $attributes = []; |
||
495 | foreach ($script->attributes as $name => $attribute) { |
||
496 | $attributes[ $name ] = $attribute->nodeValue; |
||
497 | } |
||
498 | |||
499 | if ($script->textContent == '') { |
||
500 | $this->bodyScriptNodes->createElement($attributes); |
||
501 | } |
||
502 | } |
||
503 | |||
504 | $source = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $source); |
||
505 | |||
506 | // Has inline style Element |
||
507 | if (preg_match_all('/((<[\\s\\/]*style\\b[^>]*>)([^>]*)(<\\/style>))/i', $source, $matches)) { |
||
508 | if (isset($matches[ 3 ])) { |
||
509 | foreach ($matches[ 3 ] as $match) { |
||
510 | $style = trim($match); |
||
511 | $this->styleContent[ md5($style) ] = $style . PHP_EOL; |
||
512 | } |
||
513 | } |
||
514 | |||
515 | $source = preg_replace('#<style(.*?)>(.*?)</style>#is', '', $source); |
||
516 | } |
||
517 | |||
518 | $codes = $DOMXPath->query('//code'); |
||
519 | foreach ($codes as $code) { |
||
520 | if ($code->textContent == '') { |
||
521 | $code = str_replace(['{{php', '/php}}'], ['<?php', '?>'], $code->textContent); |
||
522 | $code = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\r\n", $code); |
||
523 | $source = str_replace($code, htmlentities($code), $source); |
||
524 | } |
||
525 | } |
||
526 | |||
527 | $source = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\r\n", $source); |
||
528 | |||
529 | return $source; |
||
530 | } |
||
531 | |||
532 | // ------------------------------------------------------------------------ |
||
533 | |||
534 | /** |
||
535 | * Document::load |
||
536 | * |
||
537 | * Load HTML from a file. |
||
538 | * |
||
539 | * @link http://php.net/manual/domdocument.load.php |
||
540 | * |
||
541 | * @param string $filePath <p> |
||
542 | * The path to the HTML document. |
||
543 | * </p> |
||
544 | * @param int|string $options [optional] <p> |
||
545 | * Bitwise OR |
||
546 | * of the libxml option constants. |
||
547 | * </p> |
||
548 | * |
||
549 | * @return mixed true on success or false on failure. If called statically, returns a |
||
550 | * DOMDocument and issues E_STRICT |
||
551 | * warning. |
||
552 | * @since 5.0 |
||
553 | */ |
||
554 | public function load($filePath, $options = null) |
||
555 | { |
||
556 | if (file_exists($filePath)) { |
||
557 | return $this->loadHTMLFile($filePath, $options); |
||
558 | } elseif (is_string($filePath)) { |
||
0 ignored issues
–
show
|
|||
559 | return $this->loadHTML($filePath, $options); |
||
560 | } elseif ( ! empty($filePath)) { |
||
561 | return parent::load($filePath, $options); |
||
562 | } |
||
563 | } |
||
564 | |||
565 | /** |
||
566 | * Document::loadHTMLFile |
||
567 | * |
||
568 | * Load HTML from a file. |
||
569 | * |
||
570 | * @see http://php.net/manual/domdocument.loadhtmlfile.php |
||
571 | * |
||
572 | * @param string $filePath <p> |
||
573 | * The path to the HTML file. |
||
574 | * </p> |
||
575 | * @param int|string $options [optional] <p> |
||
576 | * |
||
577 | * Since PHP 5.4.0 and Libxml 2.6.0, you may also |
||
578 | * use the options parameter to specify additional Libxml parameters. |
||
579 | * </p> |
||
580 | * |
||
581 | * @return bool true on success or false on failure. If called statically, returns a |
||
582 | * DOMDocument and issues E_STRICT |
||
583 | * warning. |
||
584 | * @since 5.0 |
||
585 | */ |
||
586 | public function loadHTMLFile($filePath, $options = 0) |
||
587 | { |
||
588 | return $this->loadHTML(file_get_contents($filePath), $options); |
||
589 | } |
||
590 | |||
591 | // ------------------------------------------------------------------------ |
||
592 | |||
593 | /** |
||
594 | * Document::__toString |
||
595 | * |
||
596 | * Convert document into HTML source code string. |
||
597 | * |
||
598 | * @return string |
||
599 | */ |
||
600 | public function __toString() |
||
601 | { |
||
602 | return $this->saveHTML(); |
||
603 | } |
||
604 | } |