These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | /** |
||
3 | * Load a DOM document from a xml string |
||
4 | * |
||
5 | * @license http://www.opensource.org/licenses/mit-license.php The MIT License |
||
6 | * @copyright Copyright (c) 2009-2017 FluentDOM Contributors |
||
7 | */ |
||
8 | |||
9 | namespace FluentDOM\Loader { |
||
10 | |||
11 | use FluentDOM\DOM\Document; |
||
12 | use FluentDOM\DOM\DocumentFragment; |
||
13 | use FluentDOM\DOM\ProcessingInstruction; |
||
14 | use FluentDOM\Loadable; |
||
15 | |||
16 | /** |
||
17 | * Load a DOM document from a xml string |
||
18 | */ |
||
19 | class Html implements Loadable { |
||
20 | |||
21 | use Supports\Libxml; |
||
22 | |||
23 | const IS_FRAGMENT = 'is_fragment'; |
||
24 | |||
25 | /** |
||
26 | * @return string[] |
||
27 | */ |
||
28 | 19 | public function getSupported(): array { |
|
29 | 19 | return ['html', 'text/html', 'html-fragment', 'text/html-fragment']; |
|
30 | } |
||
31 | |||
32 | /** |
||
33 | * @see Loadable::load |
||
34 | * @param string $source |
||
35 | * @param string $contentType |
||
36 | * @param array|\Traversable|Options $options |
||
37 | * @return Document|Result|NULL |
||
38 | * @throws \FluentDOM\Exceptions\InvalidSource\TypeString |
||
39 | * @throws \FluentDOM\Exceptions\InvalidSource\TypeFile |
||
40 | */ |
||
41 | 14 | public function load($source, string $contentType, $options = []) { |
|
42 | 14 | if ($this->supports($contentType)) { |
|
43 | 13 | return (new Libxml\Errors())->capture( |
|
44 | function() use ($source, $contentType, $options) { |
||
45 | 13 | $selection = FALSE; |
|
46 | 13 | $document = new Document(); |
|
47 | 13 | $settings = $this->getOptions($options); |
|
48 | 13 | if ($this->isFragment($contentType, $settings)) { |
|
49 | 2 | $this->loadFragmentIntoDom($document, $source, $settings); |
|
50 | 2 | $selection = $document->evaluate('/node()'); |
|
51 | } else { |
||
52 | 11 | $settings->isAllowed($sourceType = $settings->getSourceType($source)); |
|
53 | switch ($sourceType) { |
||
54 | 10 | case Options::IS_FILE : |
|
55 | 2 | $document->loadHTMLFile($source, $settings[Options::LIBXML_OPTIONS]); |
|
56 | 2 | break; |
|
57 | 8 | case Options::IS_STRING : |
|
58 | default : |
||
59 | 8 | $document->loadHTML( |
|
60 | 8 | $this->ensureEncodingPI($source, $settings[Options::ENCODING], $settings[Options::FORCE_ENCODING]), |
|
61 | 8 | $settings[Options::LIBXML_OPTIONS] |
|
62 | ); |
||
63 | } |
||
64 | } |
||
65 | /** @var ProcessingInstruction $pi */ |
||
66 | 12 | if ($pi = $document->xpath()->firstOf('//processing-instruction()')) { |
|
67 | 8 | $pi->remove(); |
|
68 | } |
||
69 | 12 | return new Result($document, 'text/html', $selection); |
|
70 | 13 | } |
|
71 | ); |
||
72 | } |
||
73 | 1 | return NULL; |
|
74 | } |
||
75 | |||
76 | 12 | private function ensureEncodingPI(string $source, string $encoding = NULL, bool $force = NULL): string { |
|
77 | 12 | $hasXmlPi = preg_match('(<\\?xml\\s)', $source); |
|
78 | 12 | if (!$force && ($charset = $this->getCharsetFromMetaTag($source))) { |
|
1 ignored issue
–
show
|
|||
79 | 1 | $encoding = (string)$charset; |
|
80 | } |
||
81 | 12 | $pi = '<?xml version="1.0" encoding="'.htmlspecialchars($encoding).'"?>'; |
|
82 | 12 | if (!$hasXmlPi) { |
|
83 | 10 | return $pi.$source; |
|
84 | } |
||
85 | 2 | if ($force) { |
|
86 | 1 | return preg_replace('(<\\?xml\\s[^?>]*?>)', $pi, $source, 1); |
|
87 | } |
||
88 | 1 | return $source; |
|
89 | } |
||
90 | |||
91 | /** |
||
92 | * @param string $source |
||
93 | * @return string|bool |
||
94 | */ |
||
95 | 11 | private function getCharsetFromMetaTag(string $source) { |
|
96 | 11 | $hasMetaTag = preg_match( |
|
97 | 11 | '(<meta\\s+[^>]*charset=["\']\s*(?<charset>[^\\s\'">]+)\s*["\'])i', |
|
98 | 11 | $source, |
|
99 | 11 | $match |
|
100 | ); |
||
101 | 11 | if ($hasMetaTag) { |
|
102 | 1 | return $match['charset']; |
|
103 | } |
||
104 | 10 | $hasMetaTag = preg_match( |
|
105 | 10 | '(<meta\\s+[^>]*http-equiv=["\']content-type["\'][^>]*>)i', |
|
106 | 10 | $source, |
|
107 | 10 | $match |
|
108 | ); |
||
109 | 10 | if ($hasMetaTag) { |
|
110 | 1 | preg_match( |
|
111 | 1 | '(content=["\']\s*[^#\']+;\s*charset\s*=\s*(?<encoding>[^\S\'">]+))', |
|
112 | 1 | $match[0], |
|
113 | 1 | $match |
|
114 | ); |
||
115 | 1 | return $match['encoding'] ?? FALSE; |
|
116 | } |
||
117 | 9 | return FALSE; |
|
118 | } |
||
119 | |||
120 | /** |
||
121 | * @see LoadableFragment::loadFragment |
||
122 | * @param string $source |
||
123 | * @param string $contentType |
||
124 | * @param array|\Traversable|Options $options |
||
125 | * @return DocumentFragment|NULL |
||
126 | */ |
||
127 | 3 | public function loadFragment($source, string $contentType, $options = []) { |
|
128 | 3 | if ($this->supports($contentType)) { |
|
129 | 2 | $options = $this->getOptions($options); |
|
130 | 2 | return (new Libxml\Errors())->capture( |
|
131 | 2 | function() use ($source, $options) { |
|
132 | 2 | $document = new Document(); |
|
133 | 2 | $fragment = $document->createDocumentFragment(); |
|
134 | 2 | $document->loadHTML( |
|
135 | 2 | $this->ensureEncodingPI( |
|
136 | 2 | '<html-fragment>'.$source.'</html-fragment>', |
|
137 | 2 | $options[Options::ENCODING], |
|
138 | 2 | $options[Options::FORCE_ENCODING] |
|
139 | ), |
||
140 | 2 | $options[Options::LIBXML_OPTIONS] |
|
141 | ); |
||
142 | 2 | $nodes = $document->evaluate('//html-fragment[1]/node()'); |
|
143 | 2 | foreach ($nodes as $node) { |
|
144 | 2 | $fragment->append($node); |
|
145 | } |
||
146 | 2 | return $fragment; |
|
147 | 2 | } |
|
148 | ); |
||
149 | } |
||
150 | 1 | return NULL; |
|
151 | } |
||
152 | |||
153 | 13 | private function isFragment(string $contentType, $options) { |
|
154 | return ( |
||
155 | 13 | $contentType === 'html-fragment' || |
|
156 | 13 | $contentType === 'text/html-fragment' || |
|
157 | 13 | $options[self::IS_FRAGMENT] |
|
158 | ); |
||
159 | } |
||
160 | |||
161 | 2 | private function loadFragmentIntoDom(\DOMDocument $document, string $source, $settings) { |
|
162 | 2 | $htmlDom = new Document(); |
|
163 | 2 | $htmlDom->loadHTML( |
|
164 | 2 | $this->ensureEncodingPI( |
|
165 | 2 | '<html-fragment>'.$source.'</html-fragment>', |
|
166 | 2 | $settings[Options::ENCODING], |
|
167 | 2 | $settings[Options::FORCE_ENCODING] |
|
168 | ), |
||
169 | 2 | $settings[Options::LIBXML_OPTIONS] |
|
170 | ); |
||
171 | 2 | $nodes = $htmlDom->evaluate('//html-fragment[1]/node()'); |
|
172 | 2 | foreach ($nodes as $node) { |
|
173 | /** @var \DOMNode $node */ |
||
174 | 2 | if ($importedNode = $document->importNode($node, TRUE)) { |
|
175 | 2 | $document->appendChild($importedNode); |
|
176 | } |
||
177 | } |
||
178 | 2 | } |
|
179 | } |
||
180 | } |
If an expression can have both
false
, andnull
as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.