1 | <?php |
||
16 | class HTML5 |
||
17 | { |
||
18 | /** |
||
19 | * Global options for the parser and serializer. |
||
20 | * |
||
21 | * @var array |
||
22 | */ |
||
23 | private $defaultOptions = array( |
||
24 | // Whether the serializer should aggressively encode all characters as entities. |
||
25 | 'encode_entities' => false, |
||
26 | |||
27 | // Prevents the parser from automatically assigning the HTML5 namespace to the DOM document. |
||
28 | 'disable_html_ns' => false, |
||
29 | |||
30 | // Whether to add missing root elements. |
||
31 | 'normalize' => false, |
||
32 | ); |
||
33 | |||
34 | protected $errors = array(); |
||
35 | |||
36 | 76 | public function __construct(array $defaultOptions = array()) |
|
40 | |||
41 | /** |
||
42 | * Get the current default options. |
||
43 | * |
||
44 | * @return array |
||
45 | */ |
||
46 | 48 | public function getOptions() |
|
50 | |||
51 | /** |
||
52 | * Load and parse an HTML file. |
||
53 | * |
||
54 | * This will apply the HTML5 parser, which is tolerant of many |
||
55 | * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML |
||
56 | * 3. Note that in these cases, not all of the old data will be |
||
57 | * preserved. For example, XHTML's XML declaration will be removed. |
||
58 | * |
||
59 | * The rules governing parsing are set out in the HTML 5 spec. |
||
60 | * |
||
61 | * @param string|resource $file The path to the file to parse. If this is a resource, it is |
||
62 | * assumed to be an open stream whose pointer is set to the first |
||
63 | * byte of input. |
||
64 | * @param array $options Configuration options when parsing the HTML. |
||
65 | * |
||
66 | * @return \DOMDocument A DOM document. These object type is defined by the libxml |
||
67 | * library, and should have been included with your version of PHP. |
||
68 | */ |
||
69 | 6 | public function load($file, array $options = array()) |
|
78 | |||
79 | /** |
||
80 | * Parse a HTML Document from a string. |
||
81 | * |
||
82 | * Take a string of HTML 5 (or earlier) and parse it into a |
||
83 | * DOMDocument. |
||
84 | * |
||
85 | * @param string $string A html5 document as a string. |
||
86 | * @param array $options Configuration options when parsing the HTML. |
||
87 | * |
||
88 | * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with |
||
89 | * almost all distribtions of PHP. |
||
90 | */ |
||
91 | 59 | public function loadHTML($string, array $options = array()) |
|
95 | |||
96 | /** |
||
97 | * Convenience function to load an HTML file. |
||
98 | * |
||
99 | * This is here to provide backwards compatibility with the |
||
100 | * PHP DOM implementation. It simply calls load(). |
||
101 | * |
||
102 | * @param string $file The path to the file to parse. If this is a resource, it is |
||
103 | * assumed to be an open stream whose pointer is set to the first |
||
104 | * byte of input. |
||
105 | * @param array $options Configuration options when parsing the HTML. |
||
106 | * |
||
107 | * @return \DOMDocument A DOM document. These object type is defined by the libxml |
||
108 | * library, and should have been included with your version of PHP. |
||
109 | */ |
||
110 | 1 | public function loadHTMLFile($file, array $options = array()) |
|
114 | |||
115 | /** |
||
116 | * Parse a HTML fragment from a string. |
||
117 | * |
||
118 | * @param string $string the HTML5 fragment as a string |
||
119 | * @param array $options Configuration options when parsing the HTML |
||
120 | * |
||
121 | * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with |
||
122 | * almost all distributions of PHP. |
||
123 | */ |
||
124 | 12 | public function loadHTMLFragment($string, array $options = array()) |
|
128 | |||
129 | /** |
||
130 | * Return all errors encountered into parsing phase. |
||
131 | * |
||
132 | * @return array |
||
133 | */ |
||
134 | 15 | public function getErrors() |
|
138 | |||
139 | /** |
||
140 | * Return true it some errors were encountered into parsing phase. |
||
141 | * |
||
142 | * @return bool |
||
143 | */ |
||
144 | 6 | public function hasErrors() |
|
148 | |||
149 | /** |
||
150 | * Parse an input string. |
||
151 | * |
||
152 | * @param string $input |
||
153 | * @param array $options |
||
154 | * |
||
155 | * @return \DOMDocument |
||
156 | */ |
||
157 | 64 | public function parse($input, array $options = array()) |
|
174 | |||
175 | /** |
||
176 | * Parse an input stream where the stream is a fragment. |
||
177 | * |
||
178 | * Lower-level loading function. This requires an input stream instead |
||
179 | * of a string, file, or resource. |
||
180 | * |
||
181 | * @param string $input The input data to parse in the form of a string. |
||
182 | * @param array $options An array of options. |
||
183 | * |
||
184 | * @return \DOMDocumentFragment |
||
185 | */ |
||
186 | 16 | public function parseFragment($input, array $options = array()) |
|
198 | |||
199 | /** |
||
200 | * Save a DOM into a given file as HTML5. |
||
201 | * |
||
202 | * @param mixed $dom The DOM to be serialized. |
||
203 | * @param string|resource $file The filename to be written or resource to write to. |
||
204 | * @param array $options Configuration options when serializing the DOM. These include: |
||
205 | * - encode_entities: Text written to the output is escaped by default and not all |
||
206 | * entities are encoded. If this is set to true all entities will be encoded. |
||
207 | * Defaults to false. |
||
208 | */ |
||
209 | 17 | public function save($dom, $file, $options = array()) |
|
228 | |||
229 | /** |
||
230 | * Convert a DOM into an HTML5 string. |
||
231 | * |
||
232 | * @param mixed $dom The DOM to be serialized. |
||
233 | * @param array $options Configuration options when serializing the DOM. These include: |
||
234 | * - encode_entities: Text written to the output is escaped by default and not all |
||
235 | * entities are encoded. If this is set to true all entities will be encoded. |
||
236 | * Defaults to false. |
||
237 | * |
||
238 | * @return string A HTML5 documented generated from the DOM. |
||
239 | */ |
||
240 | 16 | public function saveHTML($dom, $options = array()) |
|
247 | |||
248 | /** |
||
249 | * Add missing root elements to the input HTML. |
||
250 | * |
||
251 | * @param string $input |
||
252 | * @return string |
||
253 | */ |
||
254 | protected function normalize($input) |
||
261 | } |
||
262 |