This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | namespace Wa72\HtmlPageDom; |
||
3 | |||
4 | use Symfony\Component\CssSelector\CssSelector; |
||
5 | use Wa72\HtmlPrettymin\PrettyMin; |
||
6 | |||
7 | /** |
||
8 | * This class represents a complete HTML document. |
||
9 | * |
||
10 | * It offers convenience functions for getting and setting elements of the document |
||
11 | * such as setTitle(), getTitle(), setMeta($name, $value), getBody(). |
||
12 | * |
||
13 | * It uses HtmlPageCrawler to navigate and manipulate the DOM tree. |
||
14 | * |
||
15 | * @author Christoph Singer |
||
16 | * @license MIT |
||
17 | */ |
||
18 | class HtmlPage |
||
19 | { |
||
20 | /** |
||
21 | * |
||
22 | * @var \DOMDocument |
||
23 | */ |
||
24 | protected $dom; |
||
25 | |||
26 | /** |
||
27 | * @var string |
||
28 | */ |
||
29 | protected $charset; |
||
30 | |||
31 | /** |
||
32 | * @var string |
||
33 | */ |
||
34 | protected $url; |
||
35 | |||
36 | /** |
||
37 | * |
||
38 | * @var HtmlPageCrawler |
||
39 | */ |
||
40 | protected $crawler; |
||
41 | |||
42 | 13 | public function __construct($content = '', $url = '', $charset = 'UTF-8') |
|
43 | { |
||
44 | 13 | $this->charset = $charset; |
|
45 | 13 | $this->url = $url; |
|
46 | 13 | if ($content == '') { |
|
47 | 2 | $content = '<!DOCTYPE html><html><head><title></title></head><body></body></html>'; |
|
48 | } |
||
49 | 13 | $current = libxml_use_internal_errors(true); |
|
50 | 13 | $disableEntities = libxml_disable_entity_loader(true); |
|
51 | |||
52 | 13 | $this->dom = new \DOMDocument('1.0', $charset); |
|
53 | 13 | $this->dom->validateOnParse = true; |
|
54 | |||
55 | |||
56 | 13 | if (function_exists('mb_convert_encoding') && in_array(strtolower($charset), array_map('strtolower', mb_list_encodings()))) { |
|
57 | 13 | $content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset); |
|
58 | } |
||
59 | |||
60 | 13 | @$this->dom->loadHTML($content); |
|
0 ignored issues
–
show
|
|||
61 | |||
62 | 13 | libxml_use_internal_errors($current); |
|
63 | 13 | libxml_disable_entity_loader($disableEntities); |
|
64 | 13 | $this->crawler = new HtmlPageCrawler($this->dom); |
|
65 | 13 | } |
|
66 | |||
67 | /** |
||
68 | * Get a HtmlPageCrawler object containing the root node of the HTML document |
||
69 | * |
||
70 | * @return HtmlPageCrawler |
||
71 | */ |
||
72 | 1 | public function getCrawler() |
|
73 | { |
||
74 | 1 | return $this->crawler; |
|
75 | } |
||
76 | |||
77 | /** |
||
78 | * Get a DOMDocument object for the HTML document |
||
79 | * |
||
80 | * @return \DOMDocument |
||
81 | */ |
||
82 | 1 | public function getDOMDocument() |
|
83 | { |
||
84 | 1 | return $this->dom; |
|
85 | } |
||
86 | |||
87 | /** |
||
88 | * Sets the page title of the HTML document |
||
89 | * |
||
90 | * @param string $title |
||
91 | */ |
||
92 | 3 | public function setTitle($title) |
|
93 | { |
||
94 | 3 | $t = $this->dom->getElementsByTagName('title')->item(0); |
|
95 | 3 | if ($t == null) { |
|
96 | 1 | $t = $this->dom->createElement('title'); |
|
97 | 1 | $this->getHeadNode()->appendChild($t); |
|
98 | } |
||
99 | 3 | $t->nodeValue = htmlspecialchars($title); |
|
100 | 3 | } |
|
101 | |||
102 | /** |
||
103 | * Get the page title of the HTML document |
||
104 | * |
||
105 | * @return null|string |
||
106 | */ |
||
107 | 4 | public function getTitle() |
|
108 | { |
||
109 | 4 | $t = $this->dom->getElementsByTagName('title')->item(0); |
|
110 | 4 | if ($t == null) { |
|
111 | 1 | return null; |
|
112 | } else { |
||
113 | 3 | return $t->nodeValue; |
|
114 | } |
||
115 | } |
||
116 | |||
117 | /** |
||
118 | * Set a META tag with specified 'name' and 'content' attributes |
||
119 | * |
||
120 | * @TODO: add support for multiple meta tags with the same name but different languages |
||
121 | * |
||
122 | * @param $name |
||
123 | * @param $content |
||
124 | */ |
||
125 | 1 | public function setMeta($name, $content) |
|
126 | { |
||
127 | 1 | $c = $this->filterXPath('descendant-or-self::meta[@name = \'' . $name . '\']'); |
|
128 | 1 | if (count($c) == 0) { |
|
129 | 1 | $node = $this->dom->createElement('meta'); |
|
130 | 1 | $node->setAttribute('name', $name); |
|
131 | 1 | $this->getHeadNode()->appendChild($node); |
|
132 | 1 | $c->addNode($node); |
|
133 | } |
||
134 | 1 | $c->setAttribute('content', $content); |
|
135 | 1 | } |
|
136 | |||
137 | /** |
||
138 | * Remove all meta tags with the specified name attribute |
||
139 | * |
||
140 | * @param string $name |
||
141 | */ |
||
142 | 1 | public function removeMeta($name) |
|
143 | { |
||
144 | 1 | $meta = $this->filterXPath('descendant-or-self::meta[@name = \'' . $name . '\']'); |
|
145 | 1 | $meta->remove(); |
|
146 | 1 | } |
|
147 | |||
148 | /** |
||
149 | * Get the content attribute of a meta tag with the specified name attribute |
||
150 | * |
||
151 | * @param string $name |
||
152 | * @return null|string |
||
153 | */ |
||
154 | 1 | public function getMeta($name) |
|
155 | { |
||
156 | 1 | $node = $this->filterXPath('descendant-or-self::meta[@name = \'' . $name . '\']')->getNode(0); |
|
157 | 1 | if ($node instanceof \DOMElement) { |
|
158 | 1 | return $node->getAttribute('content'); |
|
159 | } else { |
||
160 | 1 | return null; |
|
161 | } |
||
162 | } |
||
163 | |||
164 | /** |
||
165 | * Set the base tag with href attribute set to parameter $url |
||
166 | * |
||
167 | * @param string $url |
||
168 | */ |
||
169 | 1 | public function setBaseHref($url) |
|
170 | { |
||
171 | 1 | $node = $this->filterXPath('descendant-or-self::base')->getNode(0); |
|
172 | 1 | if ($node == null) { |
|
173 | 1 | $node = $this->dom->createElement('base'); |
|
174 | 1 | $this->getHeadNode()->appendChild($node); |
|
175 | } |
||
176 | 1 | $node->setAttribute('href', $url); |
|
177 | 1 | } |
|
178 | |||
179 | /** |
||
180 | * Get the href attribute from the base tag, null if not present in document |
||
181 | * |
||
182 | * @return null|string |
||
183 | */ |
||
184 | 2 | public function getBaseHref() |
|
185 | { |
||
186 | 2 | $node = $this->filterXPath('descendant-or-self::base')->getNode(0); |
|
187 | 2 | if ($node instanceof \DOMElement) { |
|
188 | 1 | return $node->getAttribute('href'); |
|
189 | } else { |
||
190 | 1 | return null; |
|
191 | } |
||
192 | } |
||
193 | |||
194 | /** |
||
195 | * Sets innerHTML content of an element specified by elementId |
||
196 | * |
||
197 | * @param string $elementId |
||
198 | * @param string $html |
||
199 | */ |
||
200 | 1 | public function setHtmlById($elementId, $html) |
|
201 | { |
||
202 | 1 | $this->getElementById($elementId)->setInnerHtml($html); |
|
203 | 1 | } |
|
204 | |||
205 | /** |
||
206 | * Get the document's HEAD section as DOMElement |
||
207 | * |
||
208 | * @return \DOMElement |
||
209 | */ |
||
210 | 3 | public function getHeadNode() |
|
211 | { |
||
212 | 3 | $head = $this->dom->getElementsByTagName('head')->item(0); |
|
213 | 3 | if ($head == null) { |
|
214 | 1 | $head = $this->dom->createElement('head'); |
|
215 | 1 | $head = $this->dom->documentElement->insertBefore($head, $this->getBodyNode()); |
|
216 | } |
||
217 | 3 | return $head; |
|
218 | } |
||
219 | |||
220 | /** |
||
221 | * Get the document's body as DOMElement |
||
222 | * |
||
223 | * @return \DOMElement |
||
224 | */ |
||
225 | 3 | public function getBodyNode() |
|
226 | { |
||
227 | 3 | $body = $this->dom->getElementsByTagName('body')->item(0); |
|
228 | 3 | if ($body == null) { |
|
229 | 1 | $body = $this->dom->createElement('body'); |
|
230 | 1 | $body = $this->dom->documentElement->appendChild($body); |
|
231 | } |
||
232 | 3 | return $body; |
|
233 | } |
||
234 | |||
235 | /** |
||
236 | * Get the document's HEAD section wrapped in a HtmlPageCrawler instance |
||
237 | * |
||
238 | * @return HtmlPageCrawler |
||
239 | */ |
||
240 | 1 | public function getHead() |
|
241 | { |
||
242 | 1 | return new HtmlPageCrawler($this->getHeadNode()); |
|
243 | } |
||
244 | |||
245 | /** |
||
246 | * Get the document's body wrapped in a HtmlPageCrawler instance |
||
247 | * |
||
248 | * @return HtmlPageCrawler |
||
249 | */ |
||
250 | 2 | public function getBody() |
|
251 | { |
||
252 | 2 | return new HtmlPageCrawler($this->getBodyNode()); |
|
253 | } |
||
254 | |||
255 | 6 | public function __toString() |
|
256 | { |
||
257 | 6 | return $this->dom->saveHTML(); |
|
258 | } |
||
259 | |||
260 | /** |
||
261 | * Save this document to a HTML file or return HTML code as string |
||
262 | * |
||
263 | * @param string $filename If provided, output will be saved to this file, otherwise returned |
||
264 | * @return string|void |
||
265 | */ |
||
266 | 4 | public function save($filename = '') |
|
267 | { |
||
268 | 4 | if ($filename != '') { |
|
269 | 1 | file_put_contents($filename, (string) $this); |
|
270 | 1 | return; |
|
271 | } else { |
||
272 | 3 | return (string) $this; |
|
273 | } |
||
274 | } |
||
275 | |||
276 | /** |
||
277 | * Get an element in the document by it's id attribute |
||
278 | * |
||
279 | * @param string $id |
||
280 | * @return HtmlPageCrawler |
||
281 | */ |
||
282 | 1 | public function getElementById($id) |
|
283 | { |
||
284 | 1 | return $this->filterXPath('descendant-or-self::*[@id = \'' . $id . '\']'); |
|
285 | } |
||
286 | |||
287 | /** |
||
288 | * Filter nodes by using a CSS selector |
||
289 | * |
||
290 | * @param string $selector CSS selector |
||
291 | * @return HtmlPageCrawler |
||
292 | */ |
||
293 | 1 | public function filter($selector) |
|
294 | { |
||
295 | //echo "\n" . CssSelector::toXPath($selector) . "\n"; |
||
296 | 1 | return $this->crawler->filter($selector); |
|
297 | } |
||
298 | |||
299 | /** |
||
300 | * Filter nodes by XPath expression |
||
301 | * |
||
302 | * @param string $xpath XPath expression |
||
303 | * @return HtmlPageCrawler |
||
304 | */ |
||
305 | 2 | public function filterXPath($xpath) |
|
306 | { |
||
307 | 2 | return $this->crawler->filterXPath($xpath); |
|
308 | } |
||
309 | |||
310 | /** |
||
311 | * remove newlines from string and minimize whitespace (multiple whitespace characters replaced by one space) |
||
312 | * |
||
313 | * useful for cleaning up text retrieved by HtmlPageCrawler::text() (nodeValue of a DOMNode) |
||
314 | * |
||
315 | * @param string $string |
||
316 | * @return string |
||
317 | */ |
||
318 | 1 | public static function trimNewlines($string) |
|
319 | { |
||
320 | 1 | return Helpers::trimNewlines($string); |
|
321 | } |
||
322 | |||
323 | 1 | public function __clone() |
|
324 | { |
||
325 | 1 | $this->dom = $this->dom->cloneNode(true); |
|
326 | 1 | $this->crawler = new HtmlPageCrawler($this->dom); |
|
327 | 1 | } |
|
328 | |||
329 | /** |
||
330 | * minify the HTML document |
||
331 | * |
||
332 | * @param array $options Options passed to PrettyMin::__construct() |
||
333 | * @return HtmlPage |
||
334 | * @throws \Exception |
||
335 | */ |
||
336 | 1 | public function minify(array $options = array()) |
|
337 | { |
||
338 | 1 | if (!class_exists('Wa72\\HtmlPrettymin\\PrettyMin')) { |
|
339 | throw new \Exception('Function minify needs composer package wa72/html-pretty-min'); |
||
340 | } |
||
341 | 1 | $pm = new PrettyMin($options); |
|
342 | 1 | $pm->load($this->dom)->minify(); |
|
343 | 1 | return $this; |
|
344 | } |
||
345 | |||
346 | /** |
||
347 | * indent the HTML document |
||
348 | * |
||
349 | * @param array $options Options passed to PrettyMin::__construct() |
||
350 | * @return HtmlPage |
||
351 | * @throws \Exception |
||
352 | */ |
||
353 | 1 | public function indent(array $options = array()) |
|
354 | { |
||
355 | 1 | if (!class_exists('Wa72\\HtmlPrettymin\\PrettyMin')) { |
|
356 | throw new \Exception('Function indent needs composer package wa72/html-pretty-min'); |
||
357 | } |
||
358 | 1 | $pm = new PrettyMin($options); |
|
359 | 1 | $pm->load($this->dom)->indent(); |
|
360 | 1 | return $this; |
|
361 | } |
||
362 | } |
||
363 |
If you suppress an error, we recommend checking for the error condition explicitly: