|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace voku\helper; |
|
4
|
|
|
|
|
5
|
|
|
/** |
|
6
|
|
|
* Class HtmlMin |
|
7
|
|
|
* |
|
8
|
|
|
* Inspired by: |
|
9
|
|
|
* - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js |
|
10
|
|
|
* - PHP: https://github.com/searchturbine/phpwee-php-minifier |
|
11
|
|
|
* - PHP: https://github.com/WyriHaximus/HtmlCompress |
|
12
|
|
|
* - PHP: https://github.com/zaininnari/html-minifier |
|
13
|
|
|
* - Java: https://code.google.com/archive/p/htmlcompressor/ |
|
14
|
|
|
* |
|
15
|
|
|
* @package voku\helper |
|
16
|
|
|
*/ |
|
17
|
|
|
class HtmlMin |
|
18
|
|
|
{ |
|
19
|
|
|
/** |
|
20
|
|
|
* // https://mathiasbynens.be/demo/javascript-mime-type |
|
21
|
|
|
* // https://developer.mozilla.org/en/docs/Web/HTML/Element/script#attr-type |
|
22
|
|
|
* |
|
23
|
|
|
* @var array |
|
24
|
|
|
*/ |
|
25
|
|
|
private static $executableScriptsMimeTypes = array( |
|
26
|
|
|
'text/javascript' => '', |
|
27
|
|
|
'text/ecmascript' => '', |
|
28
|
|
|
'text/jscript' => '', |
|
29
|
|
|
'application/javascript' => '', |
|
30
|
|
|
'application/x-javascript' => '', |
|
31
|
|
|
'application/ecmascript' => '', |
|
32
|
|
|
); |
|
33
|
|
|
|
|
34
|
|
|
private static $selfClosingTags = array( |
|
35
|
|
|
'area', |
|
36
|
|
|
'base', |
|
37
|
|
|
'basefont', |
|
38
|
|
|
'br', |
|
39
|
|
|
'col', |
|
40
|
|
|
'command', |
|
41
|
|
|
'embed', |
|
42
|
|
|
'frame', |
|
43
|
|
|
'hr', |
|
44
|
|
|
'img', |
|
45
|
|
|
'input', |
|
46
|
|
|
'isindex', |
|
47
|
|
|
'keygen', |
|
48
|
|
|
'link', |
|
49
|
|
|
'meta', |
|
50
|
|
|
'param', |
|
51
|
|
|
'source', |
|
52
|
|
|
'track', |
|
53
|
|
|
'wbr', |
|
54
|
|
|
); |
|
55
|
|
|
|
|
56
|
|
|
private static $trimWhitespaceFromTags = array( |
|
57
|
|
|
'article' => '', |
|
58
|
|
|
'br' => '', |
|
59
|
|
|
'div' => '', |
|
60
|
|
|
'footer' => '', |
|
61
|
|
|
'hr' => '', |
|
62
|
|
|
'nav' => '', |
|
63
|
|
|
'p' => '', |
|
64
|
|
|
'script' => '', |
|
65
|
|
|
); |
|
66
|
|
|
|
|
67
|
|
|
/** |
|
68
|
|
|
* @var array |
|
69
|
|
|
*/ |
|
70
|
|
|
private static $booleanAttributes = array( |
|
71
|
|
|
'allowfullscreen' => '', |
|
72
|
|
|
'async' => '', |
|
73
|
|
|
'autofocus' => '', |
|
74
|
|
|
'autoplay' => '', |
|
75
|
|
|
'checked' => '', |
|
76
|
|
|
'compact' => '', |
|
77
|
|
|
'controls' => '', |
|
78
|
|
|
'declare' => '', |
|
79
|
|
|
'default' => '', |
|
80
|
|
|
'defaultchecked' => '', |
|
81
|
|
|
'defaultmuted' => '', |
|
82
|
|
|
'defaultselected' => '', |
|
83
|
|
|
'defer' => '', |
|
84
|
|
|
'disabled' => '', |
|
85
|
|
|
'enabled' => '', |
|
86
|
|
|
'formnovalidate' => '', |
|
87
|
|
|
'hidden' => '', |
|
88
|
|
|
'indeterminate' => '', |
|
89
|
|
|
'inert' => '', |
|
90
|
|
|
'ismap' => '', |
|
91
|
|
|
'itemscope' => '', |
|
92
|
|
|
'loop' => '', |
|
93
|
|
|
'multiple' => '', |
|
94
|
|
|
'muted' => '', |
|
95
|
|
|
'nohref' => '', |
|
96
|
|
|
'noresize' => '', |
|
97
|
|
|
'noshade' => '', |
|
98
|
|
|
'novalidate' => '', |
|
99
|
|
|
'nowrap' => '', |
|
100
|
|
|
'open' => '', |
|
101
|
|
|
'pauseonexit' => '', |
|
102
|
|
|
'readonly' => '', |
|
103
|
|
|
'required' => '', |
|
104
|
|
|
'reversed' => '', |
|
105
|
|
|
'scoped' => '', |
|
106
|
|
|
'seamless' => '', |
|
107
|
|
|
'selected' => '', |
|
108
|
|
|
'sortable' => '', |
|
109
|
|
|
'truespeed' => '', |
|
110
|
|
|
'typemustmatch' => '', |
|
111
|
|
|
'visible' => '', |
|
112
|
|
|
); |
|
113
|
|
|
/** |
|
114
|
|
|
* @var array |
|
115
|
|
|
*/ |
|
116
|
|
|
private static $skipTagsForRemoveWhitespace = array( |
|
117
|
|
|
'code', |
|
118
|
|
|
'pre', |
|
119
|
|
|
'script', |
|
120
|
|
|
'style', |
|
121
|
|
|
'textarea', |
|
122
|
|
|
); |
|
123
|
|
|
|
|
124
|
|
|
/** |
|
125
|
|
|
* @var array |
|
126
|
|
|
*/ |
|
127
|
|
|
private $protectedChildNodes = array(); |
|
128
|
|
|
|
|
129
|
|
|
/** |
|
130
|
|
|
* @var string |
|
131
|
|
|
*/ |
|
132
|
|
|
private $protectedChildNodesHelper = 'html-min--voku--saved-content'; |
|
133
|
|
|
|
|
134
|
|
|
/** |
|
135
|
|
|
* @var string |
|
136
|
23 |
|
*/ |
|
137
|
|
|
private $booleanAttributesHelper = 'html-min--voku--delete-this'; |
|
138
|
23 |
|
|
|
139
|
23 |
|
/** |
|
140
|
|
|
* @var bool |
|
141
|
23 |
|
*/ |
|
142
|
23 |
|
private $doOptimizeAttributes = true; |
|
143
|
23 |
|
|
|
144
|
|
|
/** |
|
145
|
|
|
* @var bool |
|
146
|
|
|
*/ |
|
147
|
|
|
private $doRemoveComments = true; |
|
148
|
|
|
|
|
149
|
|
|
/** |
|
150
|
23 |
|
* @var bool |
|
151
|
|
|
*/ |
|
152
|
23 |
|
private $doRemoveWhitespaceAroundTags = true; |
|
153
|
23 |
|
|
|
154
|
1 |
|
/** |
|
155
|
|
|
* @var bool |
|
156
|
|
|
*/ |
|
157
|
23 |
|
private $doRemoveHttpPrefixFromAttributes = true; |
|
158
|
23 |
|
|
|
159
|
3 |
|
/** |
|
160
|
|
|
* @var bool |
|
161
|
|
|
*/ |
|
162
|
|
|
private $doSortCssClassNames = true; |
|
163
|
20 |
|
|
|
164
|
20 |
|
/** |
|
165
|
20 |
|
* @var bool |
|
166
|
|
|
*/ |
|
167
|
20 |
|
private $doSortHtmlAttributes = true; |
|
168
|
20 |
|
|
|
169
|
20 |
|
/** |
|
170
|
|
|
* @var bool |
|
171
|
20 |
|
*/ |
|
172
|
|
|
private $doRemoveDeprecatedScriptCharsetAttribute = true; |
|
173
|
20 |
|
|
|
174
|
20 |
|
/** |
|
175
|
11 |
|
* @var bool |
|
176
|
11 |
|
*/ |
|
177
|
11 |
|
private $doRemoveDefaultAttributes = true; |
|
178
|
|
|
|
|
179
|
11 |
|
/** |
|
180
|
|
|
* @var bool |
|
181
|
|
|
*/ |
|
182
|
|
|
private $doRemoveDeprecatedAnchorName = true; |
|
183
|
|
|
|
|
184
|
|
|
/** |
|
185
|
|
|
* @var bool |
|
186
|
11 |
|
*/ |
|
187
|
|
|
private $doRemoveDeprecatedTypeFromStylesheetLink = true; |
|
188
|
11 |
|
|
|
189
|
|
|
/** |
|
190
|
11 |
|
* @var bool |
|
191
|
|
|
*/ |
|
192
|
|
|
private $doRemoveDeprecatedTypeFromScriptTag = true; |
|
193
|
|
|
|
|
194
|
|
|
/** |
|
195
|
|
|
* @var bool |
|
196
|
11 |
|
*/ |
|
197
|
11 |
|
private $doRemoveValueFromEmptyInput = true; |
|
198
|
11 |
|
|
|
199
|
|
|
/** |
|
200
|
11 |
|
* @var bool |
|
201
|
11 |
|
*/ |
|
202
|
|
|
private $doRemoveEmptyAttributes = true; |
|
203
|
|
|
|
|
204
|
|
|
/** |
|
205
|
|
|
* @var bool |
|
206
|
|
|
*/ |
|
207
|
11 |
|
private $doSumUpWhitespace = true; |
|
208
|
|
|
|
|
209
|
11 |
|
/** |
|
210
|
|
|
* HtmlMin constructor. |
|
211
|
11 |
|
*/ |
|
212
|
11 |
|
public function __construct() |
|
213
|
11 |
|
{ |
|
214
|
11 |
|
} |
|
215
|
11 |
|
|
|
216
|
11 |
|
/** |
|
217
|
|
|
* Check if the current string is an conditional comment. |
|
218
|
11 |
|
* |
|
219
|
11 |
|
* INFO: since IE >= 10 conditional comment are not working anymore |
|
220
|
11 |
|
* |
|
221
|
11 |
|
* <!--[if expression]> HTML <![endif]--> |
|
222
|
11 |
|
* <![if expression]> HTML <![endif]> |
|
223
|
11 |
|
* |
|
224
|
|
|
* @param string $comment |
|
225
|
11 |
|
* |
|
226
|
|
|
* @return bool |
|
227
|
11 |
|
*/ |
|
228
|
|
|
private function isConditionalComment($comment) |
|
229
|
11 |
|
{ |
|
230
|
11 |
|
if (preg_match('/^\[if [^\]]+\]/', $comment)) { |
|
231
|
|
|
return true; |
|
232
|
11 |
|
} |
|
233
|
11 |
|
|
|
234
|
|
|
if (preg_match('/\[endif\]$/', $comment)) { |
|
235
|
11 |
|
return true; |
|
236
|
|
|
} |
|
237
|
11 |
|
|
|
238
|
11 |
|
return false; |
|
239
|
1 |
|
} |
|
240
|
1 |
|
|
|
241
|
11 |
|
/** |
|
242
|
|
|
* @param string $html |
|
243
|
|
|
* |
|
244
|
|
|
* @return string |
|
245
|
|
|
*/ |
|
246
|
|
|
public function minify($html) |
|
247
|
11 |
|
{ |
|
248
|
3 |
|
$html = (string)$html; |
|
249
|
3 |
|
if (!isset($html[0])) { |
|
250
|
|
|
return ''; |
|
251
|
11 |
|
} |
|
252
|
|
|
|
|
253
|
|
|
$html = trim($html); |
|
254
|
|
|
if (!$html) { |
|
255
|
|
|
return ''; |
|
256
|
|
|
} |
|
257
|
|
|
|
|
258
|
|
|
// init |
|
259
|
|
|
static $cacheSelfClosingTags = null; |
|
260
|
|
|
if ($cacheSelfClosingTags === null) { |
|
261
|
|
|
$cacheSelfClosingTags = implode('|', self::$selfClosingTags); |
|
262
|
20 |
|
} |
|
263
|
|
|
|
|
264
|
20 |
|
// reset |
|
265
|
20 |
|
$this->protectedChildNodes = array(); |
|
266
|
17 |
|
|
|
267
|
|
|
// save old content |
|
268
|
|
|
$origHtml = $html; |
|
269
|
9 |
|
$origHtmlLength = UTF8::strlen($html); |
|
270
|
9 |
|
|
|
271
|
|
|
// init dom |
|
272
|
9 |
|
$dom = new HtmlDomParser(); |
|
273
|
5 |
|
$dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space |
|
274
|
5 |
|
$dom->getDocument()->formatOutput = false; // do not formats output with indentation |
|
275
|
5 |
|
|
|
276
|
|
|
// load dom |
|
277
|
|
|
$dom->loadHtml($html); |
|
278
|
|
|
|
|
279
|
9 |
|
// ------------------------------------------------------------------------- |
|
280
|
9 |
|
// Protect HTML tags and conditional comments. |
|
281
|
|
|
// ------------------------------------------------------------------------- |
|
282
|
9 |
|
|
|
283
|
|
|
$dom = $this->protectTags($dom); |
|
284
|
9 |
|
|
|
285
|
|
|
// ------------------------------------------------------------------------- |
|
286
|
|
|
// Remove default HTML comments. [protected html is still protected] |
|
287
|
|
|
// ------------------------------------------------------------------------- |
|
288
|
9 |
|
|
|
289
|
|
|
if ($this->doRemoveComments === true) { |
|
290
|
|
|
$dom = $this->removeComments($dom); |
|
291
|
|
|
} |
|
292
|
|
|
|
|
293
|
9 |
|
// ------------------------------------------------------------------------- |
|
294
|
|
|
// Sum-Up extra whitespace from the Dom. [protected html is still protected] |
|
295
|
9 |
|
// ------------------------------------------------------------------------- |
|
296
|
9 |
|
|
|
297
|
9 |
|
if ($this->doSumUpWhitespace === true) { |
|
298
|
|
|
$dom = $this->sumUpWhitespace($dom); |
|
299
|
9 |
|
} |
|
300
|
9 |
|
|
|
301
|
9 |
|
foreach ($dom->find('*') as $element) { |
|
302
|
|
|
|
|
303
|
|
|
// ------------------------------------------------------------------------- |
|
304
|
|
|
// Optimize html attributes. [protected html is still protected] |
|
305
|
|
|
// ------------------------------------------------------------------------- |
|
306
|
|
|
|
|
307
|
|
|
if ($this->doOptimizeAttributes === true) { |
|
308
|
|
|
$this->optimizeAttributes($element); |
|
309
|
|
|
} |
|
310
|
|
|
|
|
311
|
|
|
// ------------------------------------------------------------------------- |
|
312
|
|
|
// Remove whitespace around tags. [protected html is still protected] |
|
313
|
|
|
// ------------------------------------------------------------------------- |
|
314
|
|
|
|
|
315
|
|
|
if ($this->doRemoveWhitespaceAroundTags === true) { |
|
316
|
|
|
$this->removeWhitespaceAroundTags($element); |
|
317
|
|
|
} |
|
318
|
9 |
|
} |
|
319
|
|
|
|
|
320
|
|
|
// ------------------------------------------------------------------------- |
|
321
|
9 |
|
// Convert the Dom into a string. |
|
322
|
|
|
// ------------------------------------------------------------------------- |
|
323
|
|
|
|
|
324
|
|
|
$html = $dom->html(); |
|
325
|
|
|
|
|
326
|
9 |
|
// ------------------------------------------------------------------------- |
|
327
|
|
|
// Trim whitespace from html-string. [protected html is still protected] |
|
328
|
|
|
// ------------------------------------------------------------------------- |
|
329
|
|
|
|
|
330
|
|
|
// Remove spaces that are followed by either > or < |
|
331
|
9 |
|
$html = preg_replace('/ (>)/', '$1', $html); |
|
332
|
|
|
// Remove spaces that are preceded by either > or < |
|
333
|
|
|
$html = preg_replace('/(<) /', '$1', $html); |
|
334
|
|
|
// Remove spaces that are between > and < |
|
335
|
|
|
$html = preg_replace('/(>) (<)/', '>$2', $html); |
|
336
|
9 |
|
|
|
337
|
|
|
// ------------------------------------------------------------------------- |
|
338
|
|
|
// Restore protected HTML-code. |
|
339
|
|
|
// ------------------------------------------------------------------------- |
|
340
|
|
|
|
|
341
|
9 |
|
$html = preg_replace_callback( |
|
342
|
|
|
'/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/', |
|
343
|
|
|
array($this, 'restoreProtectedHtml'), |
|
344
|
|
|
$html |
|
345
|
|
|
); |
|
346
|
9 |
|
$html = $dom::putReplacedBackToPreserveHtmlEntities($html); |
|
347
|
|
|
|
|
348
|
|
|
// ------------------------------------ |
|
349
|
|
|
// Final clean-up |
|
350
|
|
|
// ------------------------------------ |
|
351
|
9 |
|
|
|
352
|
|
|
$html = UTF8::cleanup($html); |
|
353
|
|
|
|
|
354
|
|
|
$html = str_replace( |
|
355
|
|
|
array( |
|
356
|
9 |
|
'html>' . "\n", |
|
357
|
|
|
"\n" . '<html', |
|
358
|
|
|
'html/>' . "\n", |
|
359
|
|
|
"\n" . '</html', |
|
360
|
|
|
'head>' . "\n", |
|
361
|
9 |
|
"\n" . '<head', |
|
362
|
|
|
'head/>' . "\n", |
|
363
|
|
|
"\n" . '</head', |
|
364
|
|
|
'="' . $this->booleanAttributesHelper . '"', |
|
365
|
|
|
), |
|
366
|
9 |
|
array( |
|
367
|
|
|
'html>', |
|
368
|
|
|
'<html', |
|
369
|
|
|
'html/>', |
|
370
|
9 |
|
'</html', |
|
371
|
|
|
'head>', |
|
372
|
|
|
'<head', |
|
373
|
|
|
'head/>', |
|
374
|
|
|
'</head', |
|
375
|
|
|
'', |
|
376
|
|
|
), |
|
377
|
|
|
$html |
|
378
|
|
|
); |
|
379
|
|
|
|
|
380
|
20 |
|
$html = preg_replace('#<\b(' . $cacheSelfClosingTags . ')([^>]+)><\/\b\1>#', '<\\1\\2/>', $html); |
|
381
|
|
|
|
|
382
|
20 |
|
// ------------------------------------ |
|
383
|
20 |
|
// check if compression worked |
|
384
|
17 |
|
// ------------------------------------ |
|
385
|
|
|
|
|
386
|
11 |
|
if ($origHtmlLength < UTF8::strlen($html)) { |
|
387
|
|
|
$html = $origHtml; |
|
388
|
|
|
} |
|
389
|
|
|
|
|
390
|
|
|
return $html; |
|
391
|
|
|
} |
|
392
|
|
|
|
|
393
|
|
|
/** |
|
394
|
|
|
* Sort HTML-Attributes, so that gzip can do better work and remove some default attributes... |
|
395
|
|
|
* |
|
396
|
20 |
|
* @param SimpleHtmlDom $element |
|
397
|
|
|
* |
|
398
|
|
|
* @return bool |
|
399
|
20 |
|
*/ |
|
400
|
|
|
private function optimizeAttributes(SimpleHtmlDom $element) |
|
401
|
20 |
|
{ |
|
402
|
|
|
$attributes = $element->getAllAttributes(); |
|
403
|
|
|
if ($attributes === null) { |
|
404
|
4 |
|
return false; |
|
405
|
4 |
|
} |
|
406
|
4 |
|
|
|
407
|
2 |
|
$attrs = array(); |
|
408
|
|
|
foreach ((array)$attributes as $attrName => $attrValue) { |
|
409
|
3 |
|
|
|
410
|
|
View Code Duplication |
if (isset(self::$booleanAttributes[$attrName])) { |
|
|
|
|
|
|
411
|
3 |
|
|
|
412
|
3 |
|
if ($this->doSortHtmlAttributes === true) { |
|
413
|
3 |
|
$attrs[$attrName] = $this->booleanAttributesHelper; |
|
414
|
3 |
|
$element->{$attrName} = null; |
|
415
|
3 |
|
} |
|
416
|
|
|
|
|
417
|
3 |
|
continue; |
|
418
|
3 |
|
} |
|
419
|
|
|
|
|
420
|
3 |
|
// ------------------------------------------------------------------------- |
|
421
|
|
|
// Remove optional "http:"-prefix from attributes. |
|
422
|
3 |
|
// ------------------------------------------------------------------------- |
|
423
|
20 |
|
|
|
424
|
|
|
if ($this->doRemoveHttpPrefixFromAttributes === true) { |
|
425
|
20 |
|
if ( |
|
426
|
|
|
($attrName === 'href' || $attrName === 'src' || $attrName === 'action') |
|
427
|
|
|
&& |
|
428
|
|
|
!(isset($attributes['rel']) && $attributes['rel'] === 'external') |
|
429
|
|
|
&& |
|
430
|
|
|
!(isset($attributes['target']) && $attributes['target'] === '_blank') |
|
431
|
|
|
) { |
|
432
|
|
|
$attrValue = str_replace('http://', '//', $attrValue); |
|
433
|
|
|
} |
|
434
|
|
|
} |
|
435
|
11 |
|
|
|
436
|
|
|
if ($this->removeAttributeHelper($element->tag, $attrName, $attrValue, $attributes)) { |
|
|
|
|
|
|
437
|
11 |
|
$element->{$attrName} = null; |
|
438
|
|
|
continue; |
|
439
|
|
|
} |
|
440
|
|
|
|
|
441
|
|
|
// ------------------------------------------------------------------------- |
|
442
|
|
|
// Sort css-class-names, for better gzip results. |
|
443
|
11 |
|
// ------------------------------------------------------------------------- |
|
444
|
|
|
|
|
445
|
11 |
|
if ($this->doSortCssClassNames === true) { |
|
446
|
|
|
$attrValue = $this->sortCssClassNames($attrName, $attrValue); |
|
447
|
11 |
|
} |
|
448
|
|
|
|
|
449
|
|
View Code Duplication |
if ($this->doSortHtmlAttributes === true) { |
|
|
|
|
|
|
450
|
|
|
$attrs[$attrName] = $attrValue; |
|
451
|
|
|
$element->{$attrName} = null; |
|
452
|
|
|
} |
|
453
|
|
|
} |
|
454
|
|
|
|
|
455
|
|
|
// ------------------------------------------------------------------------- |
|
456
|
|
|
// Sort html-attributes, for better gzip results. |
|
457
|
12 |
|
// ------------------------------------------------------------------------- |
|
458
|
|
|
|
|
459
|
11 |
|
if ($this->doSortHtmlAttributes === true) { |
|
460
|
11 |
|
ksort($attrs); |
|
461
|
11 |
|
foreach ($attrs as $attrName => $attrValue) { |
|
462
|
11 |
|
$attrValue = HtmlDomParser::replaceToPreserveHtmlEntities($attrValue); |
|
|
|
|
|
|
463
|
|
|
$element->setAttribute($attrName, $attrValue, true); |
|
464
|
11 |
|
} |
|
465
|
11 |
|
} |
|
466
|
11 |
|
|
|
467
|
|
|
return true; |
|
468
|
|
|
} |
|
469
|
|
|
|
|
470
|
11 |
|
/** |
|
471
|
|
|
* Prevent changes of inline "styles" and "scripts". |
|
472
|
11 |
|
* |
|
473
|
1 |
|
* @param HtmlDomParser $dom |
|
474
|
|
|
* |
|
475
|
|
|
* @return HtmlDomParser |
|
476
|
11 |
|
*/ |
|
477
|
12 |
|
private function protectTags(HtmlDomParser $dom) |
|
478
|
|
|
{ |
|
479
|
11 |
|
// init |
|
480
|
|
|
$counter = 0; |
|
481
|
11 |
|
|
|
482
|
|
|
foreach ($dom->find('script, style') as $element) { |
|
483
|
|
|
|
|
484
|
|
|
// skip external links |
|
485
|
|
|
if ($element->tag === 'script' || $element->tag === 'style') { |
|
486
|
|
|
$attributes = $element->getAllAttributes(); |
|
487
|
|
|
if (isset($attributes['src'])) { |
|
488
|
|
|
continue; |
|
489
|
|
|
} |
|
490
|
|
|
} |
|
491
|
|
|
|
|
492
|
|
|
$this->protectedChildNodes[$counter] = $element->text(); |
|
493
|
|
|
$element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>'; |
|
494
|
|
|
|
|
495
|
|
|
++$counter; |
|
496
|
|
|
} |
|
497
|
|
|
|
|
498
|
|
|
$dom->getDocument()->normalizeDocument(); |
|
499
|
|
|
|
|
500
|
|
|
foreach ($dom->find('//comment()') as $element) { |
|
501
|
|
|
$text = $element->text(); |
|
502
|
|
|
|
|
503
|
|
|
// skip normal comments |
|
504
|
|
|
if ($this->isConditionalComment($text) === false) { |
|
505
|
|
|
continue; |
|
506
|
|
|
} |
|
507
|
|
|
|
|
508
|
|
|
$this->protectedChildNodes[$counter] = '<!--' . $text . '-->'; |
|
509
|
|
|
|
|
510
|
|
|
/* @var $node \DOMComment */ |
|
511
|
9 |
|
$node = $element->getNode(); |
|
512
|
|
|
$child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>'); |
|
513
|
9 |
|
$element->getNode()->parentNode->replaceChild($child, $node); |
|
514
|
8 |
|
|
|
515
|
|
|
++$counter; |
|
516
|
|
|
} |
|
517
|
3 |
|
|
|
518
|
3 |
|
$dom->getDocument()->normalizeDocument(); |
|
519
|
|
|
|
|
520
|
|
|
return $dom; |
|
521
|
|
|
} |
|
522
|
3 |
|
|
|
523
|
3 |
|
/** |
|
524
|
3 |
|
* Check if the attribute can be removed. |
|
525
|
3 |
|
* |
|
526
|
|
|
* @param string $tag |
|
527
|
|
|
* @param string $attrName |
|
528
|
3 |
|
* @param string $attrValue |
|
529
|
3 |
|
* @param string $allAttr |
|
530
|
3 |
|
* |
|
531
|
|
|
* @return bool |
|
532
|
3 |
|
*/ |
|
533
|
|
|
private function removeAttributeHelper($tag, $attrName, $attrValue, $allAttr) |
|
534
|
|
|
{ |
|
535
|
|
|
// remove defaults |
|
536
|
|
|
if ($this->doRemoveDefaultAttributes === true) { |
|
537
|
|
|
|
|
538
|
|
|
if ($tag === 'script' && $attrName === 'language' && $attrValue === 'javascript') { |
|
539
|
|
|
return true; |
|
540
|
|
|
} |
|
541
|
|
|
|
|
542
|
11 |
|
if ($tag === 'form' && $attrName === 'method' && $attrValue === 'get') { |
|
543
|
|
|
return true; |
|
544
|
11 |
|
} |
|
545
|
11 |
|
|
|
546
|
3 |
|
if ($tag === 'input' && $attrName === 'type' && $attrValue === 'text') { |
|
547
|
|
|
return true; |
|
548
|
3 |
|
} |
|
549
|
|
|
|
|
550
|
3 |
|
if ($tag === 'area' && $attrName === 'shape' && $attrValue === 'rect') { |
|
551
|
3 |
|
return true; |
|
552
|
3 |
|
} |
|
553
|
3 |
|
} |
|
554
|
3 |
|
|
|
555
|
3 |
|
// remove deprecated charset-attribute (the browser will use the charset from the HTTP-Header, anyway) |
|
556
|
|
View Code Duplication |
if ($this->doRemoveDeprecatedScriptCharsetAttribute === true) { |
|
|
|
|
|
|
557
|
3 |
|
if ($tag === 'script' && $attrName === 'charset' && !isset($allAttr['src'])) { |
|
558
|
3 |
|
return true; |
|
559
|
3 |
|
} |
|
560
|
|
|
} |
|
561
|
|
|
|
|
562
|
3 |
|
// remove deprecated anchor-jump |
|
563
|
3 |
View Code Duplication |
if ($this->doRemoveDeprecatedAnchorName === true) { |
|
|
|
|
|
|
564
|
3 |
|
if ($tag === 'a' && $attrName === 'name' && isset($allAttr['id']) && $allAttr['id'] === $attrValue) { |
|
565
|
3 |
|
return true; |
|
566
|
11 |
|
} |
|
567
|
|
|
} |
|
568
|
11 |
|
|
|
569
|
|
|
// remove "type=text/css" for css links |
|
570
|
11 |
View Code Duplication |
if ($this->doRemoveDeprecatedTypeFromStylesheetLink === true) { |
|
|
|
|
|
|
571
|
|
|
if ($tag === 'link' && $attrName === 'type' && $attrValue === 'text/css' && isset($allAttr['rel']) && $allAttr['rel'] === 'stylesheet') { |
|
572
|
|
|
return true; |
|
573
|
|
|
} |
|
574
|
|
|
} |
|
575
|
|
|
|
|
576
|
|
|
// remove deprecated script-mime-types |
|
577
|
|
View Code Duplication |
if ($this->doRemoveDeprecatedTypeFromScriptTag === true) { |
|
|
|
|
|
|
578
|
|
|
if ($tag === 'script' && $attrName === 'type' && isset($allAttr['src'], self::$executableScriptsMimeTypes[$attrValue])) { |
|
579
|
|
|
return true; |
|
580
|
|
|
} |
|
581
|
|
|
} |
|
582
|
|
|
|
|
583
|
|
|
// remove 'value=""' from empty <input> |
|
584
|
|
|
if ($this->doRemoveValueFromEmptyInput === true) { |
|
585
|
|
|
if ($tag === 'input' && $attrName === 'value' && $attrValue === '') { |
|
586
|
|
|
return true; |
|
587
|
|
|
} |
|
588
|
|
|
} |
|
589
|
|
|
|
|
590
|
|
|
// remove some empty attributes |
|
591
|
|
|
if ($this->doRemoveEmptyAttributes === true) { |
|
592
|
|
|
if ($attrValue === '' && preg_match('/^(?:class|id|style|title|lang|dir|on(?:focus|blur|change|click|dblclick|mouse(?:down|up|over|move|out)|key(?:press|down|up)))$/', $attrName)) { |
|
593
|
|
|
return true; |
|
594
|
|
|
} |
|
595
|
|
|
} |
|
596
|
|
|
|
|
597
|
|
|
return false; |
|
598
|
|
|
} |
|
599
|
|
|
|
|
600
|
|
|
/** |
|
601
|
|
|
* Remove comments in the dom. |
|
602
|
|
|
* |
|
603
|
|
|
* @param HtmlDomParser $dom |
|
604
|
|
|
* |
|
605
|
|
|
* @return HtmlDomParser |
|
606
|
|
|
*/ |
|
607
|
|
|
private function removeComments(HtmlDomParser $dom) |
|
608
|
|
|
{ |
|
609
|
|
|
foreach ($dom->find('//comment()') as $commentWrapper) { |
|
610
|
|
|
$comment = $commentWrapper->getNode(); |
|
611
|
|
|
$val = $comment->nodeValue; |
|
612
|
|
|
if (strpos($val, '[') === false) { |
|
613
|
|
|
$comment->parentNode->removeChild($comment); |
|
614
|
|
|
} |
|
615
|
|
|
} |
|
616
|
|
|
|
|
617
|
|
|
$dom->getDocument()->normalizeDocument(); |
|
618
|
|
|
|
|
619
|
|
|
return $dom; |
|
620
|
|
|
} |
|
621
|
|
|
|
|
622
|
|
|
/** |
|
623
|
|
|
* Trim tags in the dom. |
|
624
|
|
|
* |
|
625
|
|
|
* @param SimpleHtmlDom $element |
|
626
|
|
|
* |
|
627
|
|
|
* @return void |
|
628
|
|
|
*/ |
|
629
|
|
|
private function removeWhitespaceAroundTags(SimpleHtmlDom $element) |
|
630
|
|
|
{ |
|
631
|
|
|
if (isset(self::$trimWhitespaceFromTags[$element->tag])) { |
|
632
|
|
|
$node = $element->getNode(); |
|
633
|
|
|
|
|
634
|
|
|
$candidates = array(); |
|
635
|
|
|
/** @noinspection PhpParamsInspection */ |
|
636
|
|
|
if (count($node->childNodes) > 0) { |
|
637
|
|
|
$candidates[] = $node->firstChild; |
|
638
|
|
|
$candidates[] = $node->lastChild; |
|
639
|
|
|
$candidates[] = $node->previousSibling; |
|
640
|
|
|
$candidates[] = $node->nextSibling; |
|
641
|
|
|
} |
|
642
|
|
|
|
|
643
|
|
|
foreach ($candidates as &$candidate) { |
|
644
|
|
|
if ($candidate === null) { |
|
645
|
|
|
continue; |
|
646
|
|
|
} |
|
647
|
|
|
|
|
648
|
|
|
if ($candidate->nodeType === 3) { |
|
649
|
|
|
$candidate->nodeValue = trim($candidate->nodeValue); |
|
650
|
|
|
} |
|
651
|
|
|
} |
|
652
|
|
|
} |
|
653
|
|
|
} |
|
654
|
|
|
|
|
655
|
|
|
/** |
|
656
|
|
|
* Callback function for preg_replace_callback use. |
|
657
|
|
|
* |
|
658
|
|
|
* @param array $matches PREG matches |
|
659
|
|
|
* |
|
660
|
|
|
* @return string |
|
661
|
|
|
*/ |
|
662
|
|
|
private function restoreProtectedHtml($matches) |
|
663
|
|
|
{ |
|
664
|
|
|
preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner); |
|
665
|
|
|
|
|
666
|
|
|
$html = ''; |
|
667
|
|
|
if (isset($this->protectedChildNodes[$matchesInner['id']])) { |
|
668
|
|
|
$html .= $this->protectedChildNodes[$matchesInner['id']]; |
|
669
|
|
|
} |
|
670
|
|
|
|
|
671
|
|
|
return $html; |
|
672
|
|
|
} |
|
673
|
|
|
|
|
674
|
|
|
/** |
|
675
|
|
|
* @param boolean $doOptimizeAttributes |
|
676
|
|
|
*/ |
|
677
|
|
|
public function doOptimizeAttributes($doOptimizeAttributes = true) |
|
678
|
|
|
{ |
|
679
|
|
|
$this->doOptimizeAttributes = $doOptimizeAttributes; |
|
680
|
|
|
} |
|
681
|
|
|
|
|
682
|
|
|
/** |
|
683
|
|
|
* @param boolean $doRemoveComments |
|
684
|
|
|
*/ |
|
685
|
|
|
public function doRemoveComments($doRemoveComments = true) |
|
686
|
|
|
{ |
|
687
|
|
|
$this->doRemoveComments = $doRemoveComments; |
|
688
|
|
|
} |
|
689
|
|
|
|
|
690
|
|
|
/** |
|
691
|
|
|
* @param boolean $doRemoveDefaultAttributes |
|
692
|
|
|
*/ |
|
693
|
|
|
public function doRemoveDefaultAttributes($doRemoveDefaultAttributes = true) |
|
694
|
|
|
{ |
|
695
|
|
|
$this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes; |
|
696
|
|
|
} |
|
697
|
|
|
|
|
698
|
|
|
/** |
|
699
|
|
|
* @param boolean $doRemoveDeprecatedAnchorName |
|
700
|
|
|
*/ |
|
701
|
|
|
public function doRemoveDeprecatedAnchorName($doRemoveDeprecatedAnchorName = true) |
|
702
|
|
|
{ |
|
703
|
|
|
$this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName; |
|
704
|
|
|
} |
|
705
|
|
|
|
|
706
|
|
|
/** |
|
707
|
|
|
* @param boolean $doRemoveDeprecatedScriptCharsetAttribute |
|
708
|
|
|
*/ |
|
709
|
|
|
public function doRemoveDeprecatedScriptCharsetAttribute($doRemoveDeprecatedScriptCharsetAttribute = true) |
|
710
|
|
|
{ |
|
711
|
|
|
$this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute; |
|
712
|
|
|
} |
|
713
|
|
|
|
|
714
|
|
|
/** |
|
715
|
|
|
* @param boolean $doRemoveDeprecatedTypeFromScriptTag |
|
716
|
|
|
*/ |
|
717
|
|
|
public function doRemoveDeprecatedTypeFromScriptTag($doRemoveDeprecatedTypeFromScriptTag = true) |
|
718
|
|
|
{ |
|
719
|
|
|
$this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag; |
|
720
|
|
|
} |
|
721
|
|
|
|
|
722
|
|
|
/** |
|
723
|
|
|
* @param boolean $doRemoveDeprecatedTypeFromStylesheetLink |
|
724
|
|
|
*/ |
|
725
|
|
|
public function doRemoveDeprecatedTypeFromStylesheetLink($doRemoveDeprecatedTypeFromStylesheetLink = true) |
|
726
|
|
|
{ |
|
727
|
|
|
$this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink; |
|
728
|
|
|
} |
|
729
|
|
|
|
|
730
|
|
|
/** |
|
731
|
|
|
* @param boolean $doRemoveEmptyAttributes |
|
732
|
|
|
*/ |
|
733
|
|
|
public function doRemoveEmptyAttributes($doRemoveEmptyAttributes = true) |
|
734
|
|
|
{ |
|
735
|
|
|
$this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes; |
|
736
|
|
|
} |
|
737
|
|
|
|
|
738
|
|
|
/** |
|
739
|
|
|
* @param boolean $doRemoveHttpPrefixFromAttributes |
|
740
|
|
|
*/ |
|
741
|
|
|
public function doRemoveHttpPrefixFromAttributes($doRemoveHttpPrefixFromAttributes = true) |
|
742
|
|
|
{ |
|
743
|
|
|
$this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes; |
|
744
|
|
|
} |
|
745
|
|
|
|
|
746
|
|
|
/** |
|
747
|
|
|
* @param boolean $doRemoveValueFromEmptyInput |
|
748
|
|
|
*/ |
|
749
|
|
|
public function doRemoveValueFromEmptyInput($doRemoveValueFromEmptyInput = true) |
|
750
|
|
|
{ |
|
751
|
|
|
$this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput; |
|
752
|
|
|
} |
|
753
|
|
|
|
|
754
|
|
|
/** |
|
755
|
|
|
* @param boolean $doRemoveWhitespaceAroundTags |
|
756
|
|
|
*/ |
|
757
|
|
|
public function doRemoveWhitespaceAroundTags($doRemoveWhitespaceAroundTags = true) |
|
758
|
|
|
{ |
|
759
|
|
|
$this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags; |
|
760
|
|
|
} |
|
761
|
|
|
|
|
762
|
|
|
/** |
|
763
|
|
|
* @param boolean $doSortCssClassNames |
|
764
|
|
|
*/ |
|
765
|
|
|
public function doSortCssClassNames($doSortCssClassNames = true) |
|
766
|
|
|
{ |
|
767
|
|
|
$this->doSortCssClassNames = $doSortCssClassNames; |
|
768
|
|
|
} |
|
769
|
|
|
|
|
770
|
|
|
/** |
|
771
|
|
|
* @param boolean $doSortHtmlAttributes |
|
772
|
|
|
*/ |
|
773
|
|
|
public function doSortHtmlAttributes($doSortHtmlAttributes = true) |
|
774
|
|
|
{ |
|
775
|
|
|
$this->doSortHtmlAttributes = $doSortHtmlAttributes; |
|
776
|
|
|
} |
|
777
|
|
|
|
|
778
|
|
|
/** |
|
779
|
|
|
* @param boolean $doSumUpWhitespace |
|
780
|
|
|
*/ |
|
781
|
|
|
public function doSumUpWhitespace($doSumUpWhitespace = true) |
|
782
|
|
|
{ |
|
783
|
|
|
$this->doSumUpWhitespace = $doSumUpWhitespace; |
|
784
|
|
|
} |
|
785
|
|
|
|
|
786
|
|
|
/** |
|
787
|
|
|
* @param $attrName |
|
788
|
|
|
* @param $attrValue |
|
789
|
|
|
* |
|
790
|
|
|
* @return string |
|
791
|
|
|
*/ |
|
792
|
|
|
private function sortCssClassNames($attrName, $attrValue) |
|
793
|
|
|
{ |
|
794
|
|
|
if ($attrName !== 'class' || !$attrValue) { |
|
795
|
|
|
return $attrValue; |
|
796
|
|
|
} |
|
797
|
|
|
|
|
798
|
|
|
$classes = explode(' ', $attrValue); |
|
799
|
|
|
if (!$classes) { |
|
|
|
|
|
|
800
|
|
|
return ''; |
|
801
|
|
|
} |
|
802
|
|
|
|
|
803
|
|
|
sort($classes); |
|
804
|
|
|
$attrValue = ''; |
|
805
|
|
|
foreach ($classes as $class) { |
|
806
|
|
|
if (!$class) { |
|
807
|
|
|
continue; |
|
808
|
|
|
} |
|
809
|
|
|
$attrValue .= trim($class) . ' '; |
|
810
|
|
|
} |
|
811
|
|
|
$attrValue = trim($attrValue); |
|
812
|
|
|
|
|
813
|
|
|
return $attrValue; |
|
814
|
|
|
} |
|
815
|
|
|
|
|
816
|
|
|
/** |
|
817
|
|
|
* Sum-up extra whitespace from dom-nodes. |
|
818
|
|
|
* |
|
819
|
|
|
* @param HtmlDomParser $dom |
|
820
|
|
|
* |
|
821
|
|
|
* @return HtmlDomParser |
|
822
|
|
|
*/ |
|
823
|
|
|
private function sumUpWhitespace(HtmlDomParser $dom) |
|
824
|
|
|
{ |
|
825
|
|
|
$textnodes = $dom->find('//text()'); |
|
826
|
|
|
foreach ($textnodes as $textnodeWrapper) { |
|
827
|
|
|
$textnode = $textnodeWrapper->getNode(); |
|
828
|
|
|
$xp = $textnode->getNodePath(); |
|
829
|
|
|
|
|
830
|
|
|
$doSkip = false; |
|
831
|
|
|
foreach (self::$skipTagsForRemoveWhitespace as $pattern) { |
|
832
|
|
|
if (strpos($xp, "/$pattern") !== false) { |
|
833
|
|
|
$doSkip = true; |
|
834
|
|
|
break; |
|
835
|
|
|
} |
|
836
|
|
|
} |
|
837
|
|
|
if ($doSkip) { |
|
838
|
|
|
continue; |
|
839
|
|
|
} |
|
840
|
|
|
|
|
841
|
|
|
$textnode->nodeValue = preg_replace("/\s{2,}/", ' ', $textnode->nodeValue); |
|
842
|
|
|
} |
|
843
|
|
|
|
|
844
|
|
|
$dom->getDocument()->normalizeDocument(); |
|
845
|
|
|
|
|
846
|
|
|
return $dom; |
|
847
|
|
|
} |
|
848
|
|
|
} |
|
849
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.