|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace voku\helper; |
|
4
|
|
|
|
|
5
|
|
|
/** |
|
6
|
|
|
* Class HtmlMin |
|
7
|
|
|
* |
|
8
|
|
|
* Inspired by: |
|
9
|
|
|
* - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js |
|
10
|
|
|
* - PHP: https://github.com/searchturbine/phpwee-php-minifier |
|
11
|
|
|
* - PHP: https://github.com/WyriHaximus/HtmlCompress |
|
12
|
|
|
* - PHP: https://github.com/zaininnari/html-minifier |
|
13
|
|
|
* - Java: https://code.google.com/archive/p/htmlcompressor/ |
|
14
|
|
|
* |
|
15
|
|
|
* @package voku\helper |
|
16
|
|
|
*/ |
|
17
|
|
|
class HtmlMin |
|
18
|
|
|
{ |
|
19
|
|
|
/** |
|
20
|
|
|
* // https://mathiasbynens.be/demo/javascript-mime-type |
|
21
|
|
|
* // https://developer.mozilla.org/en/docs/Web/HTML/Element/script#attr-type |
|
22
|
|
|
* |
|
23
|
|
|
* @var array |
|
24
|
|
|
*/ |
|
25
|
|
|
private static $executableScriptsMimeTypes = array( |
|
26
|
|
|
'text/javascript' => '', |
|
27
|
|
|
'text/ecmascript' => '', |
|
28
|
|
|
'text/jscript' => '', |
|
29
|
|
|
'application/javascript' => '', |
|
30
|
|
|
'application/x-javascript' => '', |
|
31
|
|
|
'application/ecmascript' => '', |
|
32
|
|
|
); |
|
33
|
|
|
|
|
34
|
|
|
private static $selfClosingTags = array( |
|
35
|
|
|
'area', |
|
36
|
|
|
'base', |
|
37
|
|
|
'basefont', |
|
38
|
|
|
'br', |
|
39
|
|
|
'col', |
|
40
|
|
|
'command', |
|
41
|
|
|
'embed', |
|
42
|
|
|
'frame', |
|
43
|
|
|
'hr', |
|
44
|
|
|
'img', |
|
45
|
|
|
'input', |
|
46
|
|
|
'isindex', |
|
47
|
|
|
'keygen', |
|
48
|
|
|
'link', |
|
49
|
|
|
'meta', |
|
50
|
|
|
'param', |
|
51
|
|
|
'source', |
|
52
|
|
|
'track', |
|
53
|
|
|
'wbr', |
|
54
|
|
|
); |
|
55
|
|
|
|
|
56
|
|
|
private static $trimWhitespaceFromTags = array( |
|
57
|
|
|
'article' => '', |
|
58
|
|
|
'br' => '', |
|
59
|
|
|
'div' => '', |
|
60
|
|
|
'footer' => '', |
|
61
|
|
|
'hr' => '', |
|
62
|
|
|
'nav' => '', |
|
63
|
|
|
'p' => '', |
|
64
|
|
|
'script' => '', |
|
65
|
|
|
); |
|
66
|
|
|
|
|
67
|
|
|
/** |
|
68
|
|
|
* @var array |
|
69
|
|
|
*/ |
|
70
|
|
|
private static $booleanAttributes = array( |
|
71
|
|
|
'allowfullscreen' => '', |
|
72
|
|
|
'async' => '', |
|
73
|
|
|
'autofocus' => '', |
|
74
|
|
|
'autoplay' => '', |
|
75
|
|
|
'checked' => '', |
|
76
|
|
|
'compact' => '', |
|
77
|
|
|
'controls' => '', |
|
78
|
|
|
'declare' => '', |
|
79
|
|
|
'default' => '', |
|
80
|
|
|
'defaultchecked' => '', |
|
81
|
|
|
'defaultmuted' => '', |
|
82
|
|
|
'defaultselected' => '', |
|
83
|
|
|
'defer' => '', |
|
84
|
|
|
'disabled' => '', |
|
85
|
|
|
'enabled' => '', |
|
86
|
|
|
'formnovalidate' => '', |
|
87
|
|
|
'hidden' => '', |
|
88
|
|
|
'indeterminate' => '', |
|
89
|
|
|
'inert' => '', |
|
90
|
|
|
'ismap' => '', |
|
91
|
|
|
'itemscope' => '', |
|
92
|
|
|
'loop' => '', |
|
93
|
|
|
'multiple' => '', |
|
94
|
|
|
'muted' => '', |
|
95
|
|
|
'nohref' => '', |
|
96
|
|
|
'noresize' => '', |
|
97
|
|
|
'noshade' => '', |
|
98
|
|
|
'novalidate' => '', |
|
99
|
|
|
'nowrap' => '', |
|
100
|
|
|
'open' => '', |
|
101
|
|
|
'pauseonexit' => '', |
|
102
|
|
|
'readonly' => '', |
|
103
|
|
|
'required' => '', |
|
104
|
|
|
'reversed' => '', |
|
105
|
|
|
'scoped' => '', |
|
106
|
|
|
'seamless' => '', |
|
107
|
|
|
'selected' => '', |
|
108
|
|
|
'sortable' => '', |
|
109
|
|
|
'truespeed' => '', |
|
110
|
|
|
'typemustmatch' => '', |
|
111
|
|
|
'visible' => '', |
|
112
|
|
|
); |
|
113
|
|
|
/** |
|
114
|
|
|
* @var array |
|
115
|
|
|
*/ |
|
116
|
|
|
private static $skipTagsForRemoveWhitespace = array( |
|
117
|
|
|
'code', |
|
118
|
|
|
'pre', |
|
119
|
|
|
'script', |
|
120
|
|
|
'style', |
|
121
|
|
|
'textarea', |
|
122
|
|
|
); |
|
123
|
|
|
|
|
124
|
|
|
/** |
|
125
|
|
|
* @var array |
|
126
|
|
|
*/ |
|
127
|
|
|
private $protectedChildNodes = array(); |
|
128
|
|
|
|
|
129
|
|
|
/** |
|
130
|
|
|
* @var string |
|
131
|
|
|
*/ |
|
132
|
|
|
private $protectedChildNodesHelper = 'html-min--voku--saved-content'; |
|
133
|
|
|
|
|
134
|
|
|
/** |
|
135
|
|
|
* @var string |
|
136
|
23 |
|
*/ |
|
137
|
|
|
private $booleanAttributesHelper = 'html-min--voku--delete-this'; |
|
138
|
23 |
|
|
|
139
|
23 |
|
/** |
|
140
|
|
|
* @var bool |
|
141
|
23 |
|
*/ |
|
142
|
23 |
|
private $doOptimizeAttributes = true; |
|
143
|
23 |
|
|
|
144
|
|
|
/** |
|
145
|
|
|
* @var bool |
|
146
|
|
|
*/ |
|
147
|
|
|
private $doRemoveComments = true; |
|
148
|
|
|
|
|
149
|
|
|
/** |
|
150
|
23 |
|
* @var bool |
|
151
|
|
|
*/ |
|
152
|
23 |
|
private $doRemoveWhitespaceAroundTags = true; |
|
153
|
23 |
|
|
|
154
|
1 |
|
/** |
|
155
|
|
|
* @var bool |
|
156
|
|
|
*/ |
|
157
|
23 |
|
private $doRemoveHttpPrefixFromAttributes = true; |
|
158
|
23 |
|
|
|
159
|
3 |
|
/** |
|
160
|
|
|
* @var bool |
|
161
|
|
|
*/ |
|
162
|
|
|
private $doSortCssClassNames = true; |
|
163
|
20 |
|
|
|
164
|
20 |
|
/** |
|
165
|
20 |
|
* @var bool |
|
166
|
|
|
*/ |
|
167
|
20 |
|
private $doSortHtmlAttributes = true; |
|
168
|
20 |
|
|
|
169
|
20 |
|
/** |
|
170
|
|
|
* @var bool |
|
171
|
20 |
|
*/ |
|
172
|
|
|
private $doRemoveDeprecatedScriptCharsetAttribute = true; |
|
173
|
20 |
|
|
|
174
|
20 |
|
/** |
|
175
|
11 |
|
* @var bool |
|
176
|
11 |
|
*/ |
|
177
|
11 |
|
private $doRemoveDefaultAttributes = true; |
|
178
|
|
|
|
|
179
|
11 |
|
/** |
|
180
|
|
|
* @var bool |
|
181
|
|
|
*/ |
|
182
|
|
|
private $doRemoveDeprecatedAnchorName = true; |
|
183
|
|
|
|
|
184
|
|
|
/** |
|
185
|
|
|
* @var bool |
|
186
|
11 |
|
*/ |
|
187
|
|
|
private $doRemoveDeprecatedTypeFromStylesheetLink = true; |
|
188
|
11 |
|
|
|
189
|
|
|
/** |
|
190
|
11 |
|
* @var bool |
|
191
|
|
|
*/ |
|
192
|
|
|
private $doRemoveDeprecatedTypeFromScriptTag = true; |
|
193
|
|
|
|
|
194
|
|
|
/** |
|
195
|
|
|
* @var bool |
|
196
|
11 |
|
*/ |
|
197
|
11 |
|
private $doRemoveValueFromEmptyInput = true; |
|
198
|
11 |
|
|
|
199
|
|
|
/** |
|
200
|
11 |
|
* @var bool |
|
201
|
11 |
|
*/ |
|
202
|
|
|
private $doRemoveEmptyAttributes = true; |
|
203
|
|
|
|
|
204
|
|
|
/** |
|
205
|
|
|
* @var bool |
|
206
|
|
|
*/ |
|
207
|
11 |
|
private $doSumUpWhitespace = true; |
|
208
|
|
|
|
|
209
|
11 |
|
/** |
|
210
|
|
|
* HtmlMin constructor. |
|
211
|
11 |
|
*/ |
|
212
|
11 |
|
public function __construct() |
|
213
|
11 |
|
{ |
|
214
|
11 |
|
} |
|
215
|
11 |
|
|
|
216
|
11 |
|
/** |
|
217
|
|
|
* Check if the current string is an conditional comment. |
|
218
|
11 |
|
* |
|
219
|
11 |
|
* INFO: since IE >= 10 conditional comment are not working anymore |
|
220
|
11 |
|
* |
|
221
|
11 |
|
* <!--[if expression]> HTML <![endif]--> |
|
222
|
11 |
|
* <![if expression]> HTML <![endif]> |
|
223
|
11 |
|
* |
|
224
|
|
|
* @param string $comment |
|
225
|
11 |
|
* |
|
226
|
|
|
* @return bool |
|
227
|
11 |
|
*/ |
|
228
|
|
|
private function isConditionalComment($comment) |
|
229
|
11 |
|
{ |
|
230
|
11 |
|
if (preg_match('/^\[if [^\]]+\]/', $comment)) { |
|
231
|
|
|
return true; |
|
232
|
11 |
|
} |
|
233
|
11 |
|
|
|
234
|
|
|
if (preg_match('/\[endif\]$/', $comment)) { |
|
235
|
11 |
|
return true; |
|
236
|
|
|
} |
|
237
|
11 |
|
|
|
238
|
11 |
|
return false; |
|
239
|
1 |
|
} |
|
240
|
1 |
|
|
|
241
|
11 |
|
/** |
|
242
|
|
|
* @param string $html |
|
243
|
|
|
* |
|
244
|
|
|
* @return string |
|
245
|
|
|
*/ |
|
246
|
|
|
public function minify($html) |
|
247
|
11 |
|
{ |
|
248
|
3 |
|
$html = (string)$html; |
|
249
|
3 |
|
if (!isset($html[0])) { |
|
250
|
|
|
return ''; |
|
251
|
11 |
|
} |
|
252
|
|
|
|
|
253
|
|
|
$html = trim($html); |
|
254
|
|
|
if (!$html) { |
|
255
|
|
|
return ''; |
|
256
|
|
|
} |
|
257
|
|
|
|
|
258
|
|
|
// init |
|
259
|
|
|
static $cacheSelfClosingTags = null; |
|
260
|
|
|
if ($cacheSelfClosingTags === null) { |
|
261
|
|
|
$cacheSelfClosingTags = implode('|', self::$selfClosingTags); |
|
262
|
20 |
|
} |
|
263
|
|
|
|
|
264
|
20 |
|
// reset |
|
265
|
20 |
|
$this->protectedChildNodes = array(); |
|
266
|
17 |
|
|
|
267
|
|
|
// save old content |
|
268
|
|
|
$origHtml = $html; |
|
269
|
9 |
|
$origHtmlLength = UTF8::strlen($html); |
|
270
|
9 |
|
|
|
271
|
|
|
// init dom |
|
272
|
9 |
|
$dom = new HtmlDomParser(); |
|
273
|
5 |
|
$dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space |
|
274
|
5 |
|
$dom->getDocument()->formatOutput = false; // do not formats output with indentation |
|
275
|
5 |
|
|
|
276
|
|
|
// load dom |
|
277
|
|
|
$dom->loadHtml($html); |
|
278
|
|
|
|
|
279
|
9 |
|
// ------------------------------------------------------------------------- |
|
280
|
9 |
|
// Protect HTML tags and conditional comments. |
|
281
|
|
|
// ------------------------------------------------------------------------- |
|
282
|
9 |
|
|
|
283
|
|
|
$dom = $this->protectTags($dom); |
|
284
|
9 |
|
|
|
285
|
|
|
// ------------------------------------------------------------------------- |
|
286
|
|
|
// Remove default HTML comments. [protected html is still protected] |
|
287
|
|
|
// ------------------------------------------------------------------------- |
|
288
|
9 |
|
|
|
289
|
|
|
if ($this->doRemoveComments === true) { |
|
290
|
|
|
$dom = $this->removeComments($dom); |
|
291
|
|
|
} |
|
292
|
|
|
|
|
293
|
9 |
|
// ------------------------------------------------------------------------- |
|
294
|
|
|
// Remove whitespace from the Dom. [protected html is still protected] |
|
295
|
9 |
|
// ------------------------------------------------------------------------- |
|
296
|
9 |
|
|
|
297
|
9 |
|
if ($this->doSumUpWhitespace === true) { |
|
298
|
|
|
$dom = $this->sumUpWhitespace($dom); |
|
299
|
9 |
|
} |
|
300
|
9 |
|
|
|
301
|
9 |
|
foreach ($dom->find('*') as $element) { |
|
302
|
|
|
|
|
303
|
|
|
// ------------------------------------------------------------------------- |
|
304
|
|
|
// Optimize html attributes. [protected html is still protected] |
|
305
|
|
|
// ------------------------------------------------------------------------- |
|
306
|
|
|
|
|
307
|
|
|
if ($this->doOptimizeAttributes === true) { |
|
308
|
|
|
$this->optimizeAttributes($element); |
|
309
|
|
|
} |
|
310
|
|
|
|
|
311
|
|
|
if ($this->doRemoveWhitespaceAroundTags === true) { |
|
312
|
|
|
$this->removeWhitespaceAroundTags($element); |
|
313
|
|
|
} |
|
314
|
|
|
} |
|
315
|
|
|
|
|
316
|
|
|
// ------------------------------------------------------------------------- |
|
317
|
|
|
// Convert the Dom into a string. |
|
318
|
9 |
|
// ------------------------------------------------------------------------- |
|
319
|
|
|
|
|
320
|
|
|
$html = $dom->html(); |
|
321
|
9 |
|
|
|
322
|
|
|
// ------------------------------------------------------------------------- |
|
323
|
|
|
// Trim whitespace from html-string. [protected html is still protected] |
|
324
|
|
|
// ------------------------------------------------------------------------- |
|
325
|
|
|
|
|
326
|
9 |
|
// Remove spaces that are followed by either > or < |
|
327
|
|
|
$html = preg_replace('/ (>)/', '$1', $html); |
|
328
|
|
|
// Remove spaces that are preceded by either > or < |
|
329
|
|
|
$html = preg_replace('/(<) /', '$1', $html); |
|
330
|
|
|
// Remove spaces that are between > and < |
|
331
|
9 |
|
$html = preg_replace('/(>) (<)/', '>$2', $html); |
|
332
|
|
|
|
|
333
|
|
|
// ------------------------------------------------------------------------- |
|
334
|
|
|
// Restore protected HTML-code. |
|
335
|
|
|
// ------------------------------------------------------------------------- |
|
336
|
9 |
|
|
|
337
|
|
|
$html = preg_replace_callback( |
|
338
|
|
|
'/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/', |
|
339
|
|
|
array($this, 'restoreProtectedHtml'), |
|
340
|
|
|
$html |
|
341
|
9 |
|
); |
|
342
|
|
|
$html = $dom::putReplacedBackToPreserveHtmlEntities($html); |
|
343
|
|
|
|
|
344
|
|
|
// ------------------------------------ |
|
345
|
|
|
// Final clean-up |
|
346
|
9 |
|
// ------------------------------------ |
|
347
|
|
|
|
|
348
|
|
|
$html = UTF8::cleanup($html); |
|
349
|
|
|
|
|
350
|
|
|
$html = str_replace( |
|
351
|
9 |
|
array( |
|
352
|
|
|
'html>' . "\n", |
|
353
|
|
|
"\n" . '<html', |
|
354
|
|
|
'html/>' . "\n", |
|
355
|
|
|
"\n" . '</html', |
|
356
|
9 |
|
'head>' . "\n", |
|
357
|
|
|
"\n" . '<head', |
|
358
|
|
|
'head/>' . "\n", |
|
359
|
|
|
"\n" . '</head', |
|
360
|
|
|
'="' . $this->booleanAttributesHelper . '"', |
|
361
|
9 |
|
), |
|
362
|
|
|
array( |
|
363
|
|
|
'html>', |
|
364
|
|
|
'<html', |
|
365
|
|
|
'html/>', |
|
366
|
9 |
|
'</html', |
|
367
|
|
|
'head>', |
|
368
|
|
|
'<head', |
|
369
|
|
|
'head/>', |
|
370
|
9 |
|
'</head', |
|
371
|
|
|
'', |
|
372
|
|
|
), |
|
373
|
|
|
$html |
|
374
|
|
|
); |
|
375
|
|
|
|
|
376
|
|
|
$html = preg_replace('#<\b(' . $cacheSelfClosingTags . ')([^>]+)><\/\b\1>#', '<\\1\\2/>', $html); |
|
377
|
|
|
|
|
378
|
|
|
// ------------------------------------ |
|
379
|
|
|
// check if compression worked |
|
380
|
20 |
|
// ------------------------------------ |
|
381
|
|
|
|
|
382
|
20 |
|
if ($origHtmlLength < UTF8::strlen($html)) { |
|
383
|
20 |
|
$html = $origHtml; |
|
384
|
17 |
|
} |
|
385
|
|
|
|
|
386
|
11 |
|
return $html; |
|
387
|
|
|
} |
|
388
|
|
|
|
|
389
|
|
|
/** |
|
390
|
|
|
* Sort HTML-Attributes, so that gzip can do better work |
|
391
|
|
|
* and remove some default attributes. |
|
392
|
|
|
* |
|
393
|
|
|
* @param SimpleHtmlDom $element |
|
394
|
|
|
* |
|
395
|
|
|
* @return bool |
|
396
|
20 |
|
*/ |
|
397
|
|
|
private function optimizeAttributes(SimpleHtmlDom $element) |
|
398
|
|
|
{ |
|
399
|
20 |
|
$attributes = $element->getAllAttributes(); |
|
400
|
|
|
if ($attributes === null) { |
|
401
|
20 |
|
return false; |
|
402
|
|
|
} |
|
403
|
|
|
|
|
404
|
4 |
|
$attrs = array(); |
|
405
|
4 |
|
foreach ((array)$attributes as $attrName => $attrValue) { |
|
406
|
4 |
|
|
|
407
|
2 |
View Code Duplication |
if (isset(self::$booleanAttributes[$attrName])) { |
|
|
|
|
|
|
408
|
|
|
|
|
409
|
3 |
|
if ($this->doSortHtmlAttributes === true) { |
|
410
|
|
|
$attrs[$attrName] = $this->booleanAttributesHelper; |
|
411
|
3 |
|
$element->{$attrName} = null; |
|
412
|
3 |
|
} |
|
413
|
3 |
|
|
|
414
|
3 |
|
continue; |
|
415
|
3 |
|
} |
|
416
|
|
|
|
|
417
|
3 |
|
if ($this->doRemoveHttpPrefixFromAttributes === true) { |
|
418
|
3 |
|
if ( |
|
419
|
|
|
($attrName === 'href' || $attrName === 'src' || $attrName === 'action') |
|
420
|
3 |
|
&& |
|
421
|
|
|
!(isset($attributes['rel']) && $attributes['rel'] === 'external') |
|
422
|
3 |
|
&& |
|
423
|
20 |
|
!(isset($attributes['target']) && $attributes['target'] === '_blank') |
|
424
|
|
|
) { |
|
425
|
20 |
|
$attrValue = str_replace('http://', '//', $attrValue); |
|
426
|
|
|
} |
|
427
|
|
|
} |
|
428
|
|
|
|
|
429
|
|
|
if ($this->removeAttributeHelper($element->tag, $attrName, $attrValue, $attributes)) { |
|
|
|
|
|
|
430
|
|
|
$element->{$attrName} = null; |
|
431
|
|
|
continue; |
|
432
|
|
|
} |
|
433
|
|
|
|
|
434
|
|
|
if ($this->doSortCssClassNames === true) { |
|
435
|
11 |
|
$attrValue = $this->sortCssClassNames($attrName, $attrValue); |
|
436
|
|
|
} |
|
437
|
11 |
|
|
|
438
|
|
View Code Duplication |
if ($this->doSortHtmlAttributes === true) { |
|
|
|
|
|
|
439
|
|
|
$attrs[$attrName] = $attrValue; |
|
440
|
|
|
$element->{$attrName} = null; |
|
441
|
|
|
} |
|
442
|
|
|
} |
|
443
|
11 |
|
|
|
444
|
|
|
if ($this->doSortHtmlAttributes === true) { |
|
445
|
11 |
|
ksort($attrs); |
|
446
|
|
|
foreach ($attrs as $attrName => $attrValue) { |
|
447
|
11 |
|
$attrValue = HtmlDomParser::replaceToPreserveHtmlEntities($attrValue); |
|
|
|
|
|
|
448
|
|
|
$element->setAttribute($attrName, $attrValue, true); |
|
449
|
|
|
} |
|
450
|
|
|
} |
|
451
|
|
|
|
|
452
|
|
|
return true; |
|
453
|
|
|
} |
|
454
|
|
|
|
|
455
|
|
|
/** |
|
456
|
|
|
* Prevent changes of inline "styles" and "scripts". |
|
457
|
12 |
|
* |
|
458
|
|
|
* @param HtmlDomParser $dom |
|
459
|
11 |
|
* |
|
460
|
11 |
|
* @return HtmlDomParser |
|
461
|
11 |
|
*/ |
|
462
|
11 |
|
private function protectTags(HtmlDomParser $dom) |
|
463
|
|
|
{ |
|
464
|
11 |
|
// init |
|
465
|
11 |
|
$counter = 0; |
|
466
|
11 |
|
|
|
467
|
|
|
foreach ($dom->find('script, style') as $element) { |
|
468
|
|
|
|
|
469
|
|
|
// skip external links |
|
470
|
11 |
|
if ($element->tag === 'script' || $element->tag === 'style') { |
|
471
|
|
|
$attributes = $element->getAllAttributes(); |
|
472
|
11 |
|
if (isset($attributes['src'])) { |
|
473
|
1 |
|
continue; |
|
474
|
|
|
} |
|
475
|
|
|
} |
|
476
|
11 |
|
|
|
477
|
12 |
|
$this->protectedChildNodes[$counter] = $element->text(); |
|
478
|
|
|
$element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>'; |
|
479
|
11 |
|
|
|
480
|
|
|
++$counter; |
|
481
|
11 |
|
} |
|
482
|
|
|
|
|
483
|
|
|
$dom->getDocument()->normalizeDocument(); |
|
484
|
|
|
|
|
485
|
|
|
foreach ($dom->find('//comment()') as $element) { |
|
486
|
|
|
$text = $element->text(); |
|
487
|
|
|
|
|
488
|
|
|
// skip normal comments |
|
489
|
|
|
if ($this->isConditionalComment($text) === false) { |
|
490
|
|
|
continue; |
|
491
|
|
|
} |
|
492
|
|
|
|
|
493
|
|
|
$this->protectedChildNodes[$counter] = '<!--' . $text . '-->'; |
|
494
|
|
|
|
|
495
|
|
|
/* @var $node \DOMComment */ |
|
496
|
|
|
$node = $element->getNode(); |
|
497
|
|
|
$child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>'); |
|
498
|
|
|
$element->getNode()->parentNode->replaceChild($child, $node); |
|
499
|
|
|
|
|
500
|
|
|
++$counter; |
|
501
|
|
|
} |
|
502
|
|
|
|
|
503
|
|
|
$dom->getDocument()->normalizeDocument(); |
|
504
|
|
|
|
|
505
|
|
|
return $dom; |
|
506
|
|
|
} |
|
507
|
|
|
|
|
508
|
|
|
/** |
|
509
|
|
|
* Check if the attribute can be removed. |
|
510
|
|
|
* |
|
511
|
9 |
|
* @param string $tag |
|
512
|
|
|
* @param string $attrName |
|
513
|
9 |
|
* @param string $attrValue |
|
514
|
8 |
|
* @param string $allAttr |
|
515
|
|
|
* |
|
516
|
|
|
* @return bool |
|
517
|
3 |
|
*/ |
|
518
|
3 |
|
private function removeAttributeHelper($tag, $attrName, $attrValue, $allAttr) |
|
519
|
|
|
{ |
|
520
|
|
|
// remove defaults |
|
521
|
|
|
if ($this->doRemoveDefaultAttributes === true) { |
|
522
|
3 |
|
|
|
523
|
3 |
|
if ($tag === 'script' && $attrName === 'language' && $attrValue === 'javascript') { |
|
524
|
3 |
|
return true; |
|
525
|
3 |
|
} |
|
526
|
|
|
|
|
527
|
|
|
if ($tag === 'form' && $attrName === 'method' && $attrValue === 'get') { |
|
528
|
3 |
|
return true; |
|
529
|
3 |
|
} |
|
530
|
3 |
|
|
|
531
|
|
|
if ($tag === 'input' && $attrName === 'type' && $attrValue === 'text') { |
|
532
|
3 |
|
return true; |
|
533
|
|
|
} |
|
534
|
|
|
|
|
535
|
|
|
if ($tag === 'area' && $attrName === 'shape' && $attrValue === 'rect') { |
|
536
|
|
|
return true; |
|
537
|
|
|
} |
|
538
|
|
|
} |
|
539
|
|
|
|
|
540
|
|
|
// remove deprecated charset-attribute (the Browser will use the charset from the HTTP-Header, anyway) |
|
541
|
|
View Code Duplication |
if ($this->doRemoveDeprecatedScriptCharsetAttribute === true) { |
|
|
|
|
|
|
542
|
11 |
|
if ($tag === 'script' && $attrName === 'charset' && !isset($allAttr['src'])) { |
|
543
|
|
|
return true; |
|
544
|
11 |
|
} |
|
545
|
11 |
|
} |
|
546
|
3 |
|
|
|
547
|
|
|
// remove deprecated anchor-jump |
|
548
|
3 |
View Code Duplication |
if ($this->doRemoveDeprecatedAnchorName === true) { |
|
|
|
|
|
|
549
|
|
|
if ($tag === 'a' && $attrName === 'name' && isset($allAttr['id']) && $allAttr['id'] === $attrValue) { |
|
550
|
3 |
|
return true; |
|
551
|
3 |
|
} |
|
552
|
3 |
|
} |
|
553
|
3 |
|
|
|
554
|
3 |
|
// remove "type=text/css" for css links |
|
555
|
3 |
View Code Duplication |
if ($this->doRemoveDeprecatedTypeFromStylesheetLink === true) { |
|
|
|
|
|
|
556
|
|
|
if ($tag === 'link' && $attrName === 'type' && $attrValue === 'text/css' && isset($allAttr['rel']) && $allAttr['rel'] === 'stylesheet') { |
|
557
|
3 |
|
return true; |
|
558
|
3 |
|
} |
|
559
|
3 |
|
} |
|
560
|
|
|
|
|
561
|
|
|
// remove deprecated script-mime-types |
|
562
|
3 |
View Code Duplication |
if ($this->doRemoveDeprecatedTypeFromScriptTag === true) { |
|
|
|
|
|
|
563
|
3 |
|
if ($tag === 'script' && $attrName === 'type' && isset($allAttr['src'], self::$executableScriptsMimeTypes[$attrValue])) { |
|
564
|
3 |
|
return true; |
|
565
|
3 |
|
} |
|
566
|
11 |
|
} |
|
567
|
|
|
|
|
568
|
11 |
|
if ($this->doRemoveValueFromEmptyInput === true) { |
|
569
|
|
|
// remove "value" from empty <input> |
|
570
|
11 |
|
if ($tag === 'input' && $attrName === 'value' && $attrValue === '') { |
|
571
|
|
|
return true; |
|
572
|
|
|
} |
|
573
|
|
|
} |
|
574
|
|
|
|
|
575
|
|
|
// remove some empty attributes |
|
576
|
|
|
if ($this->doRemoveEmptyAttributes === true) { |
|
577
|
|
|
if ($attrValue === '' && preg_match('/^(?:class|id|style|title|lang|dir|on(?:focus|blur|change|click|dblclick|mouse(?:down|up|over|move|out)|key(?:press|down|up)))$/', $attrName)) { |
|
578
|
|
|
return true; |
|
579
|
|
|
} |
|
580
|
|
|
} |
|
581
|
|
|
|
|
582
|
|
|
return false; |
|
583
|
|
|
} |
|
584
|
|
|
|
|
585
|
|
|
/** |
|
586
|
|
|
* Remove comments in the dom. |
|
587
|
|
|
* |
|
588
|
|
|
* @param HtmlDomParser $dom |
|
589
|
|
|
* |
|
590
|
|
|
* @return HtmlDomParser |
|
591
|
|
|
*/ |
|
592
|
|
|
private function removeComments(HtmlDomParser $dom) |
|
593
|
|
|
{ |
|
594
|
|
|
foreach ($dom->find('//comment()') as $commentWrapper) { |
|
595
|
|
|
$comment = $commentWrapper->getNode(); |
|
596
|
|
|
$val = $comment->nodeValue; |
|
597
|
|
|
if (strpos($val, '[') === false) { |
|
598
|
|
|
$comment->parentNode->removeChild($comment); |
|
599
|
|
|
} |
|
600
|
|
|
} |
|
601
|
|
|
|
|
602
|
|
|
$dom->getDocument()->normalizeDocument(); |
|
603
|
|
|
|
|
604
|
|
|
return $dom; |
|
605
|
|
|
} |
|
606
|
|
|
|
|
607
|
|
|
/** |
|
608
|
|
|
* Trim tags in the dom. |
|
609
|
|
|
* |
|
610
|
|
|
* @param SimpleHtmlDom $element |
|
611
|
|
|
* |
|
612
|
|
|
* @return void |
|
613
|
|
|
*/ |
|
614
|
|
|
private function removeWhitespaceAroundTags(SimpleHtmlDom $element) |
|
615
|
|
|
{ |
|
616
|
|
|
if (isset(self::$trimWhitespaceFromTags[$element->tag])) { |
|
617
|
|
|
$node = $element->getNode(); |
|
618
|
|
|
|
|
619
|
|
|
$candidates = array(); |
|
620
|
|
|
/** @noinspection PhpParamsInspection */ |
|
621
|
|
|
if (count($node->childNodes) > 0) { |
|
622
|
|
|
$candidates[] = $node->firstChild; |
|
623
|
|
|
$candidates[] = $node->lastChild; |
|
624
|
|
|
$candidates[] = $node->previousSibling; |
|
625
|
|
|
$candidates[] = $node->nextSibling; |
|
626
|
|
|
} |
|
627
|
|
|
|
|
628
|
|
|
foreach ($candidates as &$candidate) { |
|
629
|
|
|
if ($candidate === null) { |
|
630
|
|
|
continue; |
|
631
|
|
|
} |
|
632
|
|
|
|
|
633
|
|
|
if ($candidate->nodeType === 3) { |
|
634
|
|
|
$candidate->nodeValue = trim($candidate->nodeValue); |
|
635
|
|
|
} |
|
636
|
|
|
} |
|
637
|
|
|
} |
|
638
|
|
|
} |
|
639
|
|
|
|
|
640
|
|
|
/** |
|
641
|
|
|
* Callback function for preg_replace_callback use. |
|
642
|
|
|
* |
|
643
|
|
|
* @param array $matches PREG matches |
|
644
|
|
|
* |
|
645
|
|
|
* @return string |
|
646
|
|
|
*/ |
|
647
|
|
|
private function restoreProtectedHtml($matches) |
|
648
|
|
|
{ |
|
649
|
|
|
preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner); |
|
650
|
|
|
|
|
651
|
|
|
$html = ''; |
|
652
|
|
|
if (isset($this->protectedChildNodes[$matchesInner['id']])) { |
|
653
|
|
|
$html .= $this->protectedChildNodes[$matchesInner['id']]; |
|
654
|
|
|
} |
|
655
|
|
|
|
|
656
|
|
|
return $html; |
|
657
|
|
|
} |
|
658
|
|
|
|
|
659
|
|
|
/** |
|
660
|
|
|
* @param boolean $doOptimizeAttributes |
|
661
|
|
|
*/ |
|
662
|
|
|
public function setDoOptimizeAttributes($doOptimizeAttributes) |
|
663
|
|
|
{ |
|
664
|
|
|
$this->doOptimizeAttributes = $doOptimizeAttributes; |
|
665
|
|
|
} |
|
666
|
|
|
|
|
667
|
|
|
/** |
|
668
|
|
|
* @param boolean $doRemoveComments |
|
669
|
|
|
*/ |
|
670
|
|
|
public function setDoRemoveComments($doRemoveComments) |
|
671
|
|
|
{ |
|
672
|
|
|
$this->doRemoveComments = $doRemoveComments; |
|
673
|
|
|
} |
|
674
|
|
|
|
|
675
|
|
|
/** |
|
676
|
|
|
* @param boolean $doRemoveDefaultAttributes |
|
677
|
|
|
*/ |
|
678
|
|
|
public function setDoRemoveDefaultAttributes($doRemoveDefaultAttributes) |
|
679
|
|
|
{ |
|
680
|
|
|
$this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes; |
|
681
|
|
|
} |
|
682
|
|
|
|
|
683
|
|
|
/** |
|
684
|
|
|
* @param boolean $doRemoveDeprecatedAnchorName |
|
685
|
|
|
*/ |
|
686
|
|
|
public function setDoRemoveDeprecatedAnchorName($doRemoveDeprecatedAnchorName) |
|
687
|
|
|
{ |
|
688
|
|
|
$this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName; |
|
689
|
|
|
} |
|
690
|
|
|
|
|
691
|
|
|
/** |
|
692
|
|
|
* @param boolean $doRemoveDeprecatedScriptCharsetAttribute |
|
693
|
|
|
*/ |
|
694
|
|
|
public function setDoRemoveDeprecatedScriptCharsetAttribute($doRemoveDeprecatedScriptCharsetAttribute) |
|
695
|
|
|
{ |
|
696
|
|
|
$this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute; |
|
697
|
|
|
} |
|
698
|
|
|
|
|
699
|
|
|
/** |
|
700
|
|
|
* @param boolean $doRemoveDeprecatedTypeFromScriptTag |
|
701
|
|
|
*/ |
|
702
|
|
|
public function setDoRemoveDeprecatedTypeFromScriptTag($doRemoveDeprecatedTypeFromScriptTag) |
|
703
|
|
|
{ |
|
704
|
|
|
$this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag; |
|
705
|
|
|
} |
|
706
|
|
|
|
|
707
|
|
|
/** |
|
708
|
|
|
* @param boolean $doRemoveDeprecatedTypeFromStylesheetLink |
|
709
|
|
|
*/ |
|
710
|
|
|
public function setDoRemoveDeprecatedTypeFromStylesheetLink($doRemoveDeprecatedTypeFromStylesheetLink) |
|
711
|
|
|
{ |
|
712
|
|
|
$this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink; |
|
713
|
|
|
} |
|
714
|
|
|
|
|
715
|
|
|
/** |
|
716
|
|
|
* @param boolean $doRemoveEmptyAttributes |
|
717
|
|
|
*/ |
|
718
|
|
|
public function setDoRemoveEmptyAttributes($doRemoveEmptyAttributes) |
|
719
|
|
|
{ |
|
720
|
|
|
$this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes; |
|
721
|
|
|
} |
|
722
|
|
|
|
|
723
|
|
|
/** |
|
724
|
|
|
* @param boolean $doRemoveHttpPrefixFromAttributes |
|
725
|
|
|
*/ |
|
726
|
|
|
public function setDoRemoveHttpPrefixFromAttributes($doRemoveHttpPrefixFromAttributes) |
|
727
|
|
|
{ |
|
728
|
|
|
$this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes; |
|
729
|
|
|
} |
|
730
|
|
|
|
|
731
|
|
|
/** |
|
732
|
|
|
* @param boolean $doRemoveValueFromEmptyInput |
|
733
|
|
|
*/ |
|
734
|
|
|
public function setDoRemoveValueFromEmptyInput($doRemoveValueFromEmptyInput) |
|
735
|
|
|
{ |
|
736
|
|
|
$this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput; |
|
737
|
|
|
} |
|
738
|
|
|
|
|
739
|
|
|
/** |
|
740
|
|
|
* @param boolean $doRemoveWhitespaceAroundTags |
|
741
|
|
|
*/ |
|
742
|
|
|
public function setDoRemoveWhitespaceAroundTags($doRemoveWhitespaceAroundTags) |
|
743
|
|
|
{ |
|
744
|
|
|
$this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags; |
|
745
|
|
|
} |
|
746
|
|
|
|
|
747
|
|
|
/** |
|
748
|
|
|
* @param boolean $doSortCssClassNames |
|
749
|
|
|
*/ |
|
750
|
|
|
public function setDoSortCssClassNames($doSortCssClassNames) |
|
751
|
|
|
{ |
|
752
|
|
|
$this->doSortCssClassNames = $doSortCssClassNames; |
|
753
|
|
|
} |
|
754
|
|
|
|
|
755
|
|
|
/** |
|
756
|
|
|
* @param boolean $doSortHtmlAttributes |
|
757
|
|
|
*/ |
|
758
|
|
|
public function setDoSortHtmlAttributes($doSortHtmlAttributes) |
|
759
|
|
|
{ |
|
760
|
|
|
$this->doSortHtmlAttributes = $doSortHtmlAttributes; |
|
761
|
|
|
} |
|
762
|
|
|
|
|
763
|
|
|
/** |
|
764
|
|
|
* @param boolean $doSumUpWhitespace |
|
765
|
|
|
*/ |
|
766
|
|
|
public function setDoSumUpWhitespace($doSumUpWhitespace) |
|
767
|
|
|
{ |
|
768
|
|
|
$this->doSumUpWhitespace = $doSumUpWhitespace; |
|
769
|
|
|
} |
|
770
|
|
|
|
|
771
|
|
|
/** |
|
772
|
|
|
* @param $attrName |
|
773
|
|
|
* @param $attrValue |
|
774
|
|
|
* |
|
775
|
|
|
* @return string |
|
776
|
|
|
*/ |
|
777
|
|
|
private function sortCssClassNames($attrName, $attrValue) |
|
778
|
|
|
{ |
|
779
|
|
|
if ($attrName !== 'class' || !$attrValue) { |
|
780
|
|
|
return $attrValue; |
|
781
|
|
|
} |
|
782
|
|
|
|
|
783
|
|
|
$classes = explode(' ', $attrValue); |
|
784
|
|
|
if (!$classes) { |
|
|
|
|
|
|
785
|
|
|
return ''; |
|
786
|
|
|
} |
|
787
|
|
|
|
|
788
|
|
|
sort($classes); |
|
789
|
|
|
$attrValue = ''; |
|
790
|
|
|
foreach ($classes as $class) { |
|
791
|
|
|
if (!$class) { |
|
792
|
|
|
continue; |
|
793
|
|
|
} |
|
794
|
|
|
$attrValue .= trim($class) . ' '; |
|
795
|
|
|
} |
|
796
|
|
|
$attrValue = trim($attrValue); |
|
797
|
|
|
|
|
798
|
|
|
return $attrValue; |
|
799
|
|
|
} |
|
800
|
|
|
|
|
801
|
|
|
/** |
|
802
|
|
|
* Sum-up extra whitespace from dom-nodes. |
|
803
|
|
|
* |
|
804
|
|
|
* @param HtmlDomParser $dom |
|
805
|
|
|
* |
|
806
|
|
|
* @return HtmlDomParser |
|
807
|
|
|
*/ |
|
808
|
|
|
private function sumUpWhitespace(HtmlDomParser $dom) |
|
809
|
|
|
{ |
|
810
|
|
|
$textnodes = $dom->find('//text()'); |
|
811
|
|
|
foreach ($textnodes as $textnodeWrapper) { |
|
812
|
|
|
$textnode = $textnodeWrapper->getNode(); |
|
813
|
|
|
$xp = $textnode->getNodePath(); |
|
814
|
|
|
|
|
815
|
|
|
$doSkip = false; |
|
816
|
|
|
foreach (self::$skipTagsForRemoveWhitespace as $pattern) { |
|
817
|
|
|
if (strpos($xp, "/$pattern") !== false) { |
|
818
|
|
|
$doSkip = true; |
|
819
|
|
|
break; |
|
820
|
|
|
} |
|
821
|
|
|
} |
|
822
|
|
|
if ($doSkip) { |
|
823
|
|
|
continue; |
|
824
|
|
|
} |
|
825
|
|
|
|
|
826
|
|
|
$textnode->nodeValue = preg_replace("/\s{2,}/", ' ', $textnode->nodeValue); |
|
827
|
|
|
} |
|
828
|
|
|
|
|
829
|
|
|
$dom->getDocument()->normalizeDocument(); |
|
830
|
|
|
|
|
831
|
|
|
return $dom; |
|
832
|
|
|
} |
|
833
|
|
|
} |
|
834
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.