1 | <?php |
||
2 | /** |
||
3 | * @author Niels A.D. |
||
4 | * @author Todd Burry <[email protected]> |
||
5 | * @copyright 2010 Niels A.D., 2014 Todd Burry |
||
6 | * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1 |
||
7 | * @package pQuery |
||
8 | */ |
||
9 | |||
10 | namespace pQuery; |
||
11 | |||
12 | /** |
||
13 | * Indents text |
||
14 | * @param string $text |
||
15 | * @param int $indent |
||
16 | * @param string $indent_string |
||
17 | * @return string |
||
18 | */ |
||
19 | function indent_text($text, $indent, $indent_string = ' ') { |
||
20 | if ($indent && $indent_string) { |
||
21 | return str_replace("\n", "\n".str_repeat($indent_string, $indent), $text); |
||
22 | } else { |
||
23 | return $text; |
||
24 | } |
||
25 | } |
||
26 | |||
27 | /** |
||
28 | * Class used to format/minify HTML nodes |
||
29 | * |
||
30 | * Used like: |
||
31 | * <code> |
||
32 | * <?php |
||
33 | * $formatter = new HtmlFormatter(); |
||
34 | * $formatter->format($root); |
||
35 | * ?> |
||
36 | * </code> |
||
37 | */ |
||
38 | class HtmlFormatter { |
||
39 | |||
40 | /** |
||
41 | * Determines which elements start on a new line and which function as block |
||
42 | * @var array('element' => array('new_line' => true, 'as_block' => true, 'format_inside' => true)) |
||
43 | */ |
||
44 | var $block_elements = array( |
||
45 | 'p' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
46 | 'h1' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
47 | 'h2' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
48 | 'h3' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
49 | 'h4' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
50 | 'h5' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
51 | 'h6' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
52 | |||
53 | 'form' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
54 | 'fieldset' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
55 | 'legend' => array('new_line' => true, 'as_block' => false, 'format_inside' => true), |
||
56 | 'dl' => array('new_line' => true, 'as_block' => false, 'format_inside' => true), |
||
57 | 'dt' => array('new_line' => true, 'as_block' => false, 'format_inside' => true), |
||
58 | 'dd' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
59 | 'ol' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
60 | 'ul' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
61 | 'li' => array('new_line' => true, 'as_block' => false, 'format_inside' => true), |
||
62 | |||
63 | 'table' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
64 | 'tr' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
65 | |||
66 | 'dir' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
67 | 'menu' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
68 | 'address' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
69 | 'blockquote' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
70 | 'center' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
71 | 'del' => array('new_line' => true, 'as_block' => false, 'format_inside' => true), |
||
72 | //'div' => array('new_line' => false, 'as_block' => true, 'format_inside' => true), |
||
73 | 'hr' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
74 | 'ins' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
75 | 'noscript' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
76 | 'pre' => array('new_line' => true, 'as_block' => true, 'format_inside' => false), |
||
77 | 'script' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
78 | 'style' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
79 | |||
80 | 'html' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
81 | 'head' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
82 | 'body' => array('new_line' => true, 'as_block' => true, 'format_inside' => true), |
||
83 | 'title' => array('new_line' => true, 'as_block' => false, 'format_inside' => false) |
||
84 | ); |
||
85 | |||
86 | /** |
||
87 | * Determines which characters are considered whitespace |
||
88 | * @var array("\t" => true) True to recognize as new line |
||
89 | */ |
||
90 | var $whitespace = array( |
||
91 | ' ' => false, |
||
92 | "\t" => false, |
||
93 | "\x0B" => false, |
||
94 | "\0" => false, |
||
95 | "\n" => true, |
||
96 | "\r" => true |
||
97 | ); |
||
98 | |||
99 | /** |
||
100 | * String that is used to generate correct indenting |
||
101 | * @var string |
||
102 | */ |
||
103 | var $indent_string = ' '; |
||
104 | |||
105 | /** |
||
106 | * String that is used to break lines |
||
107 | * @var string |
||
108 | */ |
||
109 | var $linebreak_string = "\n"; |
||
110 | |||
111 | /** |
||
112 | * Other formatting options |
||
113 | * @var array |
||
114 | */ |
||
115 | public $options = array( |
||
116 | 'img_alt' => '', |
||
117 | 'self_close_str' => null, |
||
118 | 'attribute_shorttag' => false, |
||
119 | 'sort_attributes' => false, |
||
120 | 'attributes_case' => CASE_LOWER, |
||
121 | 'minify_script' => true |
||
122 | ); |
||
123 | |||
124 | /** |
||
125 | * Errors found during formatting |
||
126 | * @var array |
||
127 | */ |
||
128 | var $errors = array(); |
||
129 | |||
130 | |||
131 | /** |
||
132 | * Class constructor |
||
133 | * @param array $options {@link $options} |
||
134 | */ |
||
135 | function __construct($options = array()) { |
||
136 | $this->options = array_merge($this->options, $options); |
||
137 | |||
138 | if (isset($options['indent_str'])) |
||
139 | $this->indent_string = $options['indent_str']; |
||
140 | |||
141 | if (isset($options['linebreak_str'])) |
||
142 | $this->linebreak_string = $options['linebreak_str']; |
||
143 | } |
||
144 | |||
145 | #php4 PHP4 class constructor compatibility |
||
146 | #function HtmlFormatter($options = array()) {return $this->__construct($options);} |
||
147 | #php4e |
||
148 | |||
149 | /** |
||
150 | * Class magic invoke method, performs {@link format()} |
||
151 | * @access private |
||
152 | */ |
||
153 | function __invoke(&$node) { |
||
154 | return $this->format($node); |
||
155 | } |
||
156 | |||
157 | /** |
||
158 | * Minifies HTML / removes unneeded whitespace |
||
159 | * @param DomNode $root |
||
160 | * @param bool $strip_comments |
||
161 | * @param bool $recursive |
||
162 | */ |
||
163 | static function minify_html(&$root, $strip_comments = true, $recursive = true) { |
||
164 | if ($strip_comments) { |
||
165 | foreach($root->select(':comment', false, $recursive, true) as $c) { |
||
166 | $prev = $c->getSibling(-1); |
||
167 | $next = $c->getSibling(1); |
||
168 | $c->delete(); |
||
169 | if ($prev && $next && ($prev->isText()) && ($next->isText())) { |
||
170 | $prev->text .= $next->text; |
||
171 | $next->delete(); |
||
172 | } |
||
173 | } |
||
174 | } |
||
175 | foreach($root->select('(!pre + !xmp + !style + !script + !"?php" + !"~text~" + !"~comment~"):not-empty > "~text~"', false, $recursive, true) as $c) { |
||
176 | $c->text = preg_replace('`\s+`', ' ', $c->text); |
||
177 | } |
||
178 | } |
||
179 | |||
180 | /** |
||
181 | * Minifies javascript using JSMin+ |
||
182 | * @param DomNode $root |
||
183 | * @param string $indent_string |
||
184 | * @param bool $wrap_comment Wrap javascript in HTML comments (<!-- ~text~ //-->) |
||
185 | * @param bool $recursive |
||
186 | * @return bool|array Array of errors on failure, true on succes |
||
187 | */ |
||
188 | static function minify_javascript(&$root, $indent_string = ' ', $wrap_comment = true, $recursive = true) { |
||
189 | #php4 JSMin+ doesn't support PHP4 |
||
190 | #return true; |
||
191 | #php4e |
||
192 | #php5 |
||
193 | include_once('third party/jsminplus.php'); |
||
194 | |||
195 | $errors = array(); |
||
196 | foreach($root->select('script:not-empty > "~text~"', false, $recursive, true) as $c) { |
||
197 | try { |
||
198 | $text = $c->text; |
||
199 | while ($text) { |
||
200 | $text = trim($text); |
||
201 | //Remove comment/CDATA tags at begin and end |
||
202 | if (substr($text, 0, 4) === '<!--') { |
||
203 | $text = substr($text, 5); |
||
204 | continue; |
||
205 | } elseif (strtolower(substr($text, 0, 9)) === '<![cdata[') { |
||
206 | $text = substr($text, 10); |
||
207 | continue; |
||
208 | } |
||
209 | |||
210 | if (($end = substr($text, -3)) && (($end === '-->') || ($end === ']]>'))) { |
||
211 | $text = substr($text, 0, -3); |
||
212 | continue; |
||
213 | } |
||
214 | |||
215 | break; |
||
216 | } |
||
217 | |||
218 | if (trim($text)) { |
||
219 | $text = \JSMinPlus::minify($text); |
||
220 | if ($wrap_comment) { |
||
221 | $text = "<!--\n".$text."\n//-->"; |
||
222 | } |
||
223 | if ($indent_string && ($wrap_comment || (strpos($text, "\n") !== false))) { |
||
224 | $text = indent_text("\n".$text, $c->indent(), $indent_string); |
||
225 | } |
||
226 | } |
||
227 | $c->text = $text; |
||
228 | } catch (\Exception $e) { |
||
229 | $errors[] = array($e, $c->parent->dumpLocation()); |
||
230 | } |
||
231 | } |
||
232 | |||
233 | return (($errors) ? $errors : true); |
||
234 | #php5e |
||
235 | } |
||
236 | |||
237 | /** |
||
238 | * Formats HTML |
||
239 | * @param DomNode $root |
||
240 | * @param bool $recursive |
||
241 | * @access private |
||
242 | */ |
||
243 | function format_html(&$root, $recursive = null) { |
||
244 | if ($recursive === null) { |
||
245 | $recursive = true; |
||
246 | self::minify_html($root); |
||
247 | } elseif (is_int($recursive)) { |
||
248 | $recursive = (($recursive > 1) ? $recursive - 1 : false); |
||
249 | } |
||
250 | |||
251 | $root_tag = strtolower($root->tag); |
||
252 | $in_block = isset($this->block_elements[$root_tag]) && $this->block_elements[$root_tag]['as_block']; |
||
253 | $child_count = count($root->children); |
||
254 | |||
255 | if (isset($this->options['attributes_case']) && $this->options['attributes_case']) { |
||
256 | $root->attributes = array_change_key_case($root->attributes, $this->options['attributes_case']); |
||
257 | $root->attributes_ns = null; |
||
258 | } |
||
259 | |||
260 | if (isset($this->options['sort_attributes']) && $this->options['sort_attributes']) { |
||
261 | if ($this->options['sort_attributes'] === 'reverse') { |
||
262 | krsort($root->attributes); |
||
263 | } else { |
||
264 | ksort($root->attributes); |
||
265 | } |
||
266 | } |
||
267 | |||
268 | if ($root->select(':element', true, false, true)) { |
||
269 | $root->setTag(strtolower($root->tag), true); |
||
270 | if (($this->options['img_alt'] !== null) && ($root_tag === 'img') && (!isset($root->alt))) { |
||
271 | $root->setAttribute('alt', $this->options['img_alt']); |
||
272 | } |
||
273 | } |
||
274 | if ($this->options['self_close_str'] !== null) { |
||
275 | $root->self_close_str = $this->options['self_close_str']; |
||
276 | } |
||
277 | if ($this->options['attribute_shorttag'] !== null) { |
||
278 | $root->attribute_shorttag = $this->options['attribute_shorttag']; |
||
279 | } |
||
280 | |||
281 | $prev = null; |
||
282 | $n_tag = ''; |
||
283 | // $prev_tag = ''; |
||
284 | $as_block = false; |
||
285 | $prev_asblock = false; |
||
286 | for($i = 0; $i < $child_count; $i++) { |
||
287 | $n =& $root->children[$i]; |
||
288 | $indent = $n->indent(); |
||
289 | |||
290 | if (!$n->isText()) { |
||
291 | $n_tag = strtolower($n->tag); |
||
292 | $new_line = isset($this->block_elements[$n_tag]) && $this->block_elements[$n_tag]['new_line']; |
||
293 | $as_block = isset($this->block_elements[$n_tag]) && $this->block_elements[$n_tag]['as_block']; |
||
294 | $format_inside = ((!isset($this->block_elements[$n_tag])) || $this->block_elements[$n_tag]['format_inside']); |
||
295 | |||
296 | if ($prev && ($prev->isText()) && $prev->text && ($char = $prev->text[strlen($prev->text) - 1]) && isset($this->whitespace[$char])) { |
||
297 | if ($this->whitespace[$char]) { |
||
298 | $prev->text .= str_repeat($this->indent_string, $indent); |
||
299 | } else { |
||
300 | $prev->text = substr_replace($prev->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), -1, 1); |
||
301 | } |
||
302 | } elseif (($new_line || $prev_asblock || ($in_block && ($i === 0)))){ |
||
303 | if ($prev && ($prev->isText())) { |
||
304 | $prev->text .= $this->linebreak_string.str_repeat($this->indent_string, $indent); |
||
305 | } else { |
||
306 | $root->addText($this->linebreak_string.str_repeat($this->indent_string, $indent), $i); |
||
307 | ++$child_count; |
||
308 | } |
||
309 | } |
||
310 | |||
311 | if ($format_inside && count($n->children)) { |
||
312 | //$last = end($n->children); |
||
313 | $last = $n->children[count($n->children) - 1]; |
||
314 | $last_tag = ($last) ? strtolower($last->tag) : ''; |
||
315 | $last_asblock = ($last_tag && isset($this->block_elements[$last_tag]) && $this->block_elements[$last_tag]['as_block']); |
||
316 | |||
317 | if (($n->childCount(true) > 0) || (trim($n->getPlainText()))) { |
||
318 | if ($last && ($last->isText()) && $last->text && ($char = $last->text[strlen($last->text) - 1]) && isset($this->whitespace[$char])) { |
||
319 | if ($as_block || ($last->index() > 0) || isset($this->whitespace[$last->text[0]])) { |
||
320 | if ($this->whitespace[$char]) { |
||
321 | $last->text .= str_repeat($this->indent_string, $indent); |
||
322 | } else { |
||
323 | $last->text = substr_replace($last->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), -1, 1); |
||
324 | } |
||
325 | } |
||
326 | } elseif (($as_block || $last_asblock || ($in_block && ($i === 0))) && $last) { |
||
327 | if ($last && ($last->isText())) { |
||
328 | $last->text .= $this->linebreak_string.str_repeat($this->indent_string, $indent); |
||
329 | } else { |
||
330 | $n->addText($this->linebreak_string.str_repeat($this->indent_string, $indent)); |
||
331 | } |
||
332 | } |
||
333 | } elseif (!trim($n->getInnerText())) { |
||
334 | $n->clear(); |
||
335 | } |
||
336 | |||
337 | if ($recursive) { |
||
338 | $this->format_html($n, $recursive); |
||
339 | } |
||
340 | } |
||
341 | |||
342 | } elseif (trim($n->text) && ((($i - 1 < $child_count) && ($char = $n->text[0]) && isset($this->whitespace[$char])) || ($in_block && ($i === 0)))) { |
||
343 | if (isset($this->whitespace[$char])) { |
||
344 | if ($this->whitespace[$char]) { |
||
345 | $n->text = str_repeat($this->indent_string, $indent).$n->text; |
||
346 | } else { |
||
347 | $n->text = substr_replace($n->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), 0, 1); |
||
348 | } |
||
349 | } else { |
||
350 | $n->text = $this->linebreak_string.str_repeat($this->indent_string, $indent).$n->text; |
||
351 | } |
||
352 | } |
||
353 | |||
354 | $prev = $n; |
||
355 | // $prev_tag = $n_tag; |
||
356 | $prev_asblock = $as_block; |
||
357 | } |
||
358 | |||
359 | return true; |
||
360 | } |
||
361 | |||
362 | /** |
||
363 | * Formats HTML/Javascript |
||
364 | * @param DomNode $root |
||
365 | * @see format_html() |
||
366 | */ |
||
367 | function format(&$node) { |
||
368 | $this->errors = array(); |
||
369 | if ($this->options['minify_script']) { |
||
370 | $a = self::minify_javascript($node, $this->indent_string, true, true); |
||
371 | if (is_array($a)) { |
||
372 | foreach($a as $error) { |
||
373 | $this->errors[] = $error[0]->getMessage().' >>> '.$error[1]; |
||
374 | } |
||
375 | } |
||
376 | } |
||
377 | return $this->format_html($node); |
||
378 | } |
||
379 | } |
||
380 | |||
381 | ?> |