Code

< 40 %
40-60 %
> 60 %
1
<?php
2
/**
3
 * @author Niels A.D.
4
 * @author Todd Burry <[email protected]>
5
 * @copyright 2010 Niels A.D., 2014 Todd Burry
6
 * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1
7
 * @package pQuery
8
 */
9
10
namespace pQuery;
11
12
/**
13
 * Indents text
14
 * @param string $text
15
 * @param int $indent
16
 * @param string $indent_string
17
 * @return string
18
 */
19
function indent_text($text, $indent, $indent_string = '  ') {
20
	if ($indent && $indent_string) {
21
		return str_replace("\n", "\n".str_repeat($indent_string, $indent), $text);
22
	} else {
23
		return $text;
24
	}
25
}
26
27
/**
28
 * Class used to format/minify HTML nodes
29
 *
30
 * Used like:
31
 * <code>
32
 * <?php
33
 *   $formatter = new HtmlFormatter();
34
 *   $formatter->format($root);
35
 * ?>
36
 * </code>
37
 */
38
class HtmlFormatter {
39
40
	/**
41
	 * Determines which elements start on a new line and which function as block
42
	 * @var array('element' => array('new_line' => true, 'as_block' => true, 'format_inside' => true))
43
	 */
44
	var $block_elements = array(
45
		'p' =>			array('new_line' => true,  'as_block' => true,  'format_inside' => true),
46
		'h1' => 		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
47
		'h2' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
48
		'h3' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
49
		'h4' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
50
		'h5' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
51
		'h6' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
52
53
		'form' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
54
		'fieldset' =>  	array('new_line' => true,  'as_block' => true,  'format_inside' => true),
55
		'legend' =>  	array('new_line' => true,  'as_block' => false, 'format_inside' => true),
56
		'dl' =>  		array('new_line' => true,  'as_block' => false, 'format_inside' => true),
57
		'dt' =>  		array('new_line' => true,  'as_block' => false, 'format_inside' => true),
58
		'dd' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
59
		'ol' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
60
		'ul' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
61
		'li' =>  		array('new_line' => true,  'as_block' => false, 'format_inside' => true),
62
63
		'table' =>  	array('new_line' => true,  'as_block' => true,  'format_inside' => true),
64
		'tr' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
65
66
		'dir' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
67
		'menu' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
68
		'address' =>  	array('new_line' => true,  'as_block' => true,  'format_inside' => true),
69
		'blockquote' => array('new_line' => true,  'as_block' => true,  'format_inside' => true),
70
		'center' =>  	array('new_line' => true,  'as_block' => true,  'format_inside' => true),
71
		'del' =>  		array('new_line' => true,  'as_block' => false, 'format_inside' => true),
72
		//'div' =>  	array('new_line' => false, 'as_block' => true,  'format_inside' => true),
73
		'hr' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
74
		'ins' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
75
		'noscript' =>  	array('new_line' => true,  'as_block' => true,  'format_inside' => true),
76
		'pre' =>  		array('new_line' => true,  'as_block' => true,  'format_inside' => false),
77
		'script' =>  	array('new_line' => true,  'as_block' => true,  'format_inside' => true),
78
		'style' =>  	array('new_line' => true,  'as_block' => true,  'format_inside' => true),
79
80
		'html' => 		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
81
		'head' => 		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
82
		'body' => 		array('new_line' => true,  'as_block' => true,  'format_inside' => true),
83
		'title' => 		array('new_line' => true,  'as_block' => false, 'format_inside' => false)
84
	);
85
86
	/**
87
	 * Determines which characters are considered whitespace
88
	 * @var array("\t" => true) True to recognize as new line
89
	 */
90
	var $whitespace = array(
91
		' ' => false,
92
		"\t" => false,
93
		"\x0B" => false,
94
		"\0" => false,
95
		"\n" => true,
96
		"\r" => true
97
	);
98
99
	/**
100
	 * String that is used to generate correct indenting
101
	 * @var string
102
	 */
103
	var $indent_string = ' ';
104
105
	/**
106
	 * String that is used to break lines
107
	 * @var string
108
	 */
109
	var $linebreak_string = "\n";
110
111
	/**
112
	 * Other formatting options
113
	 * @var array
114
	 */
115
	public $options = array(
116
		'img_alt' => '',
117
		'self_close_str' => null,
118
		'attribute_shorttag' => false,
119
		'sort_attributes' => false,
120
		'attributes_case' => CASE_LOWER,
121
		'minify_script' => true
122
	);
123
124
	/**
125
	 * Errors found during formatting
126
	 * @var array
127
	 */
128
	var $errors = array();
129
130
131
	/**
132
	 * Class constructor
133
	 * @param array $options {@link $options}
134
	 */
135
	function __construct($options = array()) {
136
		$this->options = array_merge($this->options, $options);
137
138
      if (isset($options['indent_str']))
139
         $this->indent_string = $options['indent_str'];
140
141
      if (isset($options['linebreak_str']))
142
         $this->linebreak_string = $options['linebreak_str'];
143
	}
144
145
	#php4 PHP4 class constructor compatibility
146
	#function HtmlFormatter($options = array()) {return $this->__construct($options);}
147
	#php4e
148
149
	/**
150
	 * Class magic invoke method, performs {@link format()}
151
	 * @access private
152
	 */
153
	function __invoke(&$node) {
154
		return $this->format($node);
155
	}
156
157
	/**
158
	 * Minifies HTML / removes unneeded whitespace
159
	 * @param DomNode $root
160
	 * @param bool $strip_comments
161
	 * @param bool $recursive
162
	 */
163
	static function minify_html(&$root, $strip_comments = true, $recursive = true) {
164
		if ($strip_comments) {
165
			foreach($root->select(':comment', false, $recursive, true) as $c) {
166
				$prev = $c->getSibling(-1);
167
				$next = $c->getSibling(1);
168
				$c->delete();
169
				if ($prev && $next && ($prev->isText()) && ($next->isText())) {
170
					$prev->text .= $next->text;
171
					$next->delete();
172
				}
173
			}
174
		}
175
		foreach($root->select('(!pre + !xmp + !style + !script + !"?php" + !"~text~" + !"~comment~"):not-empty > "~text~"', false, $recursive, true) as $c) {
176
			$c->text = preg_replace('`\s+`', ' ', $c->text);
177
		}
178
	}
179
180
	/**
181
	 * Minifies javascript using JSMin+
182
	 * @param DomNode $root
183
	 * @param string $indent_string
184
	 * @param bool $wrap_comment Wrap javascript in HTML comments (<!-- ~text~ //-->)
185
	 * @param bool $recursive
186
	 * @return bool|array Array of errors on failure, true on succes
187
	 */
188
	static function minify_javascript(&$root, $indent_string = ' ', $wrap_comment = true, $recursive = true) {
189
	#php4 JSMin+ doesn't support PHP4
190
	#return true;
191
	#php4e
192
	#php5
193
		include_once('third party/jsminplus.php');
194
195
		$errors = array();
196
		foreach($root->select('script:not-empty > "~text~"', false, $recursive, true) as $c) {
197
			try {
198
				$text = $c->text;
199
				while ($text) {
200
					$text = trim($text);
201
					//Remove comment/CDATA tags at begin and end
202
					if (substr($text, 0, 4) === '<!--') {
203
						$text = substr($text, 5);
204
						continue;
205
					} elseif (strtolower(substr($text, 0, 9)) === '<![cdata[') {
206
						$text = substr($text, 10);
207
						continue;
208
					}
209
210
					if (($end = substr($text, -3)) && (($end === '-->') || ($end === ']]>'))) {
211
						$text = substr($text, 0, -3);
212
						continue;
213
					}
214
215
					break;
216
				}
217
218
				if (trim($text)) {
219
					$text = \JSMinPlus::minify($text);
220
					if ($wrap_comment) {
221
						$text = "<!--\n".$text."\n//-->";
222
					}
223
					if ($indent_string && ($wrap_comment || (strpos($text, "\n") !== false))) {
224
						$text = indent_text("\n".$text, $c->indent(), $indent_string);
225
					}
226
				}
227
				$c->text = $text;
228
			} catch (\Exception $e) {
229
				$errors[] = array($e, $c->parent->dumpLocation());
230
			}
231
		}
232
233
		return (($errors) ? $errors : true);
234
	#php5e
235
	}
236
237
	/**
238
	 * Formats HTML
239
	 * @param DomNode $root
240
	 * @param bool $recursive
241
	 * @access private
242
	 */
243
	function format_html(&$root, $recursive = null) {
244
		if ($recursive === null) {
245
			$recursive = true;
246
			self::minify_html($root);
247
		} elseif (is_int($recursive)) {
248
			$recursive = (($recursive > 1) ? $recursive - 1 : false);
249
		}
250
251
		$root_tag = strtolower($root->tag);
252
		$in_block = isset($this->block_elements[$root_tag]) && $this->block_elements[$root_tag]['as_block'];
253
		$child_count = count($root->children);
254
255
		if (isset($this->options['attributes_case']) && $this->options['attributes_case']) {
256
			$root->attributes = array_change_key_case($root->attributes, $this->options['attributes_case']);
257
			$root->attributes_ns = null;
258
		}
259
260
		if (isset($this->options['sort_attributes']) && $this->options['sort_attributes']) {
261
			if ($this->options['sort_attributes'] === 'reverse') {
262
				krsort($root->attributes);
263
			} else {
264
				ksort($root->attributes);
265
			}
266
		}
267
268
		if ($root->select(':element', true, false, true)) {
269
			$root->setTag(strtolower($root->tag), true);
270
			if (($this->options['img_alt'] !== null) && ($root_tag === 'img') && (!isset($root->alt))) {
271
                $root->setAttribute('alt', $this->options['img_alt']);
272
			}
273
		}
274
		if ($this->options['self_close_str'] !== null) {
275
			$root->self_close_str = $this->options['self_close_str'];
276
		}
277
		if ($this->options['attribute_shorttag'] !== null) {
278
			$root->attribute_shorttag = $this->options['attribute_shorttag'];
279
		}
280
281
		$prev = null;
282
		$n_tag = '';
283
//		$prev_tag = '';
284
		$as_block = false;
285
		$prev_asblock = false;
286
		for($i = 0; $i < $child_count; $i++) {
287
			$n =& $root->children[$i];
288
			$indent = $n->indent();
289
290
			if (!$n->isText()) {
291
				$n_tag = strtolower($n->tag);
292
				$new_line = isset($this->block_elements[$n_tag]) && $this->block_elements[$n_tag]['new_line'];
293
				$as_block = isset($this->block_elements[$n_tag]) && $this->block_elements[$n_tag]['as_block'];
294
				$format_inside = ((!isset($this->block_elements[$n_tag])) || $this->block_elements[$n_tag]['format_inside']);
295
296
				if ($prev && ($prev->isText()) && $prev->text && ($char = $prev->text[strlen($prev->text) - 1]) && isset($this->whitespace[$char])) {
297
					if ($this->whitespace[$char]) {
298
						$prev->text .= str_repeat($this->indent_string, $indent);
299
					} else {
300
						$prev->text = substr_replace($prev->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), -1, 1);
301
					}
302
				} elseif (($new_line || $prev_asblock || ($in_block && ($i === 0)))){
303
					if ($prev && ($prev->isText())) {
304
						$prev->text .= $this->linebreak_string.str_repeat($this->indent_string, $indent);
305
					} else {
306
						$root->addText($this->linebreak_string.str_repeat($this->indent_string, $indent), $i);
307
						++$child_count;
308
					}
309
				}
310
311
				if ($format_inside && count($n->children)) {
312
					//$last = end($n->children);
313
					$last = $n->children[count($n->children) - 1];
314
					$last_tag = ($last) ? strtolower($last->tag) : '';
315
					$last_asblock = ($last_tag && isset($this->block_elements[$last_tag]) && $this->block_elements[$last_tag]['as_block']);
316
317
					if (($n->childCount(true) > 0) || (trim($n->getPlainText()))) {
318
						if ($last && ($last->isText()) && $last->text && ($char = $last->text[strlen($last->text) - 1]) && isset($this->whitespace[$char])) {
319
							if ($as_block || ($last->index() > 0) || isset($this->whitespace[$last->text[0]])) {
320
								if ($this->whitespace[$char]) {
321
									$last->text .= str_repeat($this->indent_string, $indent);
322
								} else {
323
									$last->text = substr_replace($last->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), -1, 1);
324
								}
325
							}
326
						} elseif (($as_block || $last_asblock || ($in_block && ($i === 0))) && $last) {
327
							if ($last && ($last->isText())) {
328
								$last->text .= $this->linebreak_string.str_repeat($this->indent_string, $indent);
329
							} else {
330
								$n->addText($this->linebreak_string.str_repeat($this->indent_string, $indent));
331
							}
332
						}
333
					} elseif (!trim($n->getInnerText())) {
334
						$n->clear();
335
					}
336
337
					if ($recursive) {
338
						$this->format_html($n, $recursive);
339
					}
340
				}
341
342
			} elseif (trim($n->text) && ((($i - 1 < $child_count) && ($char = $n->text[0]) && isset($this->whitespace[$char])) || ($in_block && ($i === 0)))) {
343
				if (isset($this->whitespace[$char])) {
344
					if ($this->whitespace[$char]) {
345
						$n->text = str_repeat($this->indent_string, $indent).$n->text;
346
					} else {
347
						$n->text = substr_replace($n->text, $this->linebreak_string.str_repeat($this->indent_string, $indent), 0, 1);
348
					}
349
				} else {
350
					$n->text = $this->linebreak_string.str_repeat($this->indent_string, $indent).$n->text;
351
				}
352
			}
353
354
			$prev = $n;
355
//			$prev_tag = $n_tag;
356
			$prev_asblock = $as_block;
357
		}
358
359
		return true;
360
	}
361
362
	/**
363
	 * Formats HTML/Javascript
364
	 * @param DomNode $root
365
	 * @see format_html()
366
	 */
367
	function format(&$node) {
368
		$this->errors = array();
369
		if ($this->options['minify_script']) {
370
			$a = self::minify_javascript($node, $this->indent_string, true, true);
371
			if (is_array($a)) {
372
				foreach($a as $error) {
373
					$this->errors[] = $error[0]->getMessage().' >>> '.$error[1];
374
				}
375
			}
376
		}
377
		return $this->format_html($node);
378
	}
379
}
380
381
?>