1 | <?php |
||
18 | class HtmlPage |
||
19 | { |
||
20 | /** |
||
21 | * |
||
22 | * @var \DOMDocument |
||
23 | */ |
||
24 | protected $dom; |
||
25 | |||
26 | /** |
||
27 | * @var string |
||
28 | */ |
||
29 | protected $charset; |
||
30 | |||
31 | /** |
||
32 | * @var string |
||
33 | */ |
||
34 | protected $url; |
||
35 | |||
36 | /** |
||
37 | * |
||
38 | * @var HtmlPageCrawler |
||
39 | */ |
||
40 | protected $crawler; |
||
41 | |||
42 | 13 | public function __construct($content = '', $url = '', $charset = 'UTF-8') |
|
66 | |||
67 | /** |
||
68 | * Get a HtmlPageCrawler object containing the root node of the HTML document |
||
69 | * |
||
70 | * @return HtmlPageCrawler |
||
71 | */ |
||
72 | 1 | public function getCrawler() |
|
76 | |||
77 | /** |
||
78 | * Get a DOMDocument object for the HTML document |
||
79 | * |
||
80 | * @return \DOMDocument |
||
81 | */ |
||
82 | 1 | public function getDOMDocument() |
|
86 | |||
87 | /** |
||
88 | * Sets the page title of the HTML document |
||
89 | * |
||
90 | * @param string $title |
||
91 | */ |
||
92 | 3 | public function setTitle($title) |
|
101 | |||
102 | /** |
||
103 | * Get the page title of the HTML document |
||
104 | * |
||
105 | * @return null|string |
||
106 | */ |
||
107 | 4 | public function getTitle() |
|
116 | |||
117 | /** |
||
118 | * Set a META tag with specified 'name' and 'content' attributes |
||
119 | * |
||
120 | * @TODO: add support for multiple meta tags with the same name but different languages |
||
121 | * |
||
122 | * @param $name |
||
123 | * @param $content |
||
124 | */ |
||
125 | 1 | public function setMeta($name, $content) |
|
136 | |||
137 | /** |
||
138 | * Remove all meta tags with the specified name attribute |
||
139 | * |
||
140 | * @param string $name |
||
141 | */ |
||
142 | 1 | public function removeMeta($name) |
|
147 | |||
148 | /** |
||
149 | * Get the content attribute of a meta tag with the specified name attribute |
||
150 | * |
||
151 | * @param string $name |
||
152 | * @return null|string |
||
153 | */ |
||
154 | 1 | public function getMeta($name) |
|
163 | |||
164 | /** |
||
165 | * Set the base tag with href attribute set to parameter $url |
||
166 | * |
||
167 | * @param string $url |
||
168 | */ |
||
169 | 1 | public function setBaseHref($url) |
|
178 | |||
179 | /** |
||
180 | * Get the href attribute from the base tag, null if not present in document |
||
181 | * |
||
182 | * @return null|string |
||
183 | */ |
||
184 | 2 | public function getBaseHref() |
|
193 | |||
194 | /** |
||
195 | * Sets innerHTML content of an element specified by elementId |
||
196 | * |
||
197 | * @param string $elementId |
||
198 | * @param string $html |
||
199 | */ |
||
200 | 1 | public function setHtmlById($elementId, $html) |
|
204 | |||
205 | /** |
||
206 | * Get the document's HEAD section as DOMElement |
||
207 | * |
||
208 | * @return \DOMElement |
||
209 | */ |
||
210 | 3 | public function getHeadNode() |
|
219 | |||
220 | /** |
||
221 | * Get the document's body as DOMElement |
||
222 | * |
||
223 | * @return \DOMElement |
||
224 | */ |
||
225 | 3 | public function getBodyNode() |
|
234 | |||
235 | /** |
||
236 | * Get the document's HEAD section wrapped in a HtmlPageCrawler instance |
||
237 | * |
||
238 | * @return HtmlPageCrawler |
||
239 | */ |
||
240 | 1 | public function getHead() |
|
244 | |||
245 | /** |
||
246 | * Get the document's body wrapped in a HtmlPageCrawler instance |
||
247 | * |
||
248 | * @return HtmlPageCrawler |
||
249 | */ |
||
250 | 2 | public function getBody() |
|
254 | |||
255 | 6 | public function __toString() |
|
259 | |||
260 | /** |
||
261 | * Save this document to a HTML file or return HTML code as string |
||
262 | * |
||
263 | * @param string $filename If provided, output will be saved to this file, otherwise returned |
||
264 | * @return string|void |
||
265 | */ |
||
266 | 4 | public function save($filename = '') |
|
275 | |||
276 | /** |
||
277 | * Get an element in the document by it's id attribute |
||
278 | * |
||
279 | * @param string $id |
||
280 | * @return HtmlPageCrawler |
||
281 | */ |
||
282 | 1 | public function getElementById($id) |
|
286 | |||
287 | /** |
||
288 | * Filter nodes by using a CSS selector |
||
289 | * |
||
290 | * @param string $selector CSS selector |
||
291 | * @return HtmlPageCrawler |
||
292 | */ |
||
293 | 1 | public function filter($selector) |
|
298 | |||
299 | /** |
||
300 | * Filter nodes by XPath expression |
||
301 | * |
||
302 | * @param string $xpath XPath expression |
||
303 | * @return HtmlPageCrawler |
||
304 | */ |
||
305 | 2 | public function filterXPath($xpath) |
|
309 | |||
310 | /** |
||
311 | * remove newlines from string and minimize whitespace (multiple whitespace characters replaced by one space) |
||
312 | * |
||
313 | * useful for cleaning up text retrieved by HtmlPageCrawler::text() (nodeValue of a DOMNode) |
||
314 | * |
||
315 | * @param string $string |
||
316 | * @return string |
||
317 | */ |
||
318 | 1 | public static function trimNewlines($string) |
|
322 | |||
323 | 1 | public function __clone() |
|
328 | |||
329 | /** |
||
330 | * minify the HTML document |
||
331 | * |
||
332 | * @param array $options Options passed to PrettyMin::__construct() |
||
333 | * @return HtmlPage |
||
334 | * @throws \Exception |
||
335 | */ |
||
336 | 1 | public function minify(array $options = array()) |
|
345 | |||
346 | /** |
||
347 | * indent the HTML document |
||
348 | * |
||
349 | * @param array $options Options passed to PrettyMin::__construct() |
||
350 | * @return HtmlPage |
||
351 | * @throws \Exception |
||
352 | */ |
||
353 | 1 | public function indent(array $options = array()) |
|
362 | } |
||
363 |
If you suppress an error, we recommend checking for the error condition explicitly: