1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace voku\helper; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* Class HtmlMin |
9
|
|
|
* |
10
|
|
|
* Inspired by: |
11
|
|
|
* - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js |
12
|
|
|
* - PHP: https://github.com/searchturbine/phpwee-php-minifier |
13
|
|
|
* - PHP: https://github.com/WyriHaximus/HtmlCompress |
14
|
|
|
* - PHP: https://github.com/zaininnari/html-minifier |
15
|
|
|
* - PHP: https://github.com/ampaze/PHP-HTML-Minifier |
16
|
|
|
* - Java: https://code.google.com/archive/p/htmlcompressor/ |
17
|
|
|
* |
18
|
|
|
* Ideas: |
19
|
|
|
* - http://perfectionkills.com/optimizing-html/ |
20
|
|
|
*/ |
21
|
|
|
class HtmlMin implements HtmlMinInterface |
22
|
|
|
{ |
23
|
|
|
/** |
24
|
|
|
* @var string |
25
|
|
|
*/ |
26
|
|
|
private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u"; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* @var string[] |
30
|
|
|
* |
31
|
|
|
* @psalm-var list<string> |
32
|
|
|
*/ |
33
|
|
|
private static $optional_end_tags = [ |
34
|
|
|
'html', |
35
|
|
|
'head', |
36
|
|
|
'body', |
37
|
|
|
]; |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* @var string[] |
41
|
|
|
* |
42
|
|
|
* @psalm-var list<string> |
43
|
|
|
*/ |
44
|
|
|
private static $selfClosingTags = [ |
45
|
|
|
'area', |
46
|
|
|
'base', |
47
|
|
|
'basefont', |
48
|
|
|
'br', |
49
|
|
|
'col', |
50
|
|
|
'command', |
51
|
|
|
'embed', |
52
|
|
|
'frame', |
53
|
|
|
'hr', |
54
|
|
|
'img', |
55
|
|
|
'input', |
56
|
|
|
'isindex', |
57
|
|
|
'keygen', |
58
|
|
|
'link', |
59
|
|
|
'meta', |
60
|
|
|
'param', |
61
|
|
|
'source', |
62
|
|
|
'track', |
63
|
|
|
'wbr', |
64
|
|
|
]; |
65
|
|
|
|
66
|
|
|
/** |
67
|
|
|
* @var string[] |
68
|
|
|
* |
69
|
|
|
* @psalm-var array<string, string> |
70
|
|
|
*/ |
71
|
|
|
private static $trimWhitespaceFromTags = [ |
72
|
|
|
'article' => '', |
73
|
|
|
'br' => '', |
74
|
|
|
'div' => '', |
75
|
|
|
'footer' => '', |
76
|
|
|
'hr' => '', |
77
|
|
|
'nav' => '', |
78
|
|
|
'p' => '', |
79
|
|
|
'script' => '', |
80
|
|
|
]; |
81
|
|
|
|
82
|
|
|
/** |
83
|
|
|
* @var array |
84
|
|
|
*/ |
85
|
|
|
private static $booleanAttributes = [ |
86
|
|
|
'allowfullscreen' => '', |
87
|
|
|
'async' => '', |
88
|
|
|
'autofocus' => '', |
89
|
|
|
'autoplay' => '', |
90
|
|
|
'checked' => '', |
91
|
|
|
'compact' => '', |
92
|
|
|
'controls' => '', |
93
|
|
|
'declare' => '', |
94
|
|
|
'default' => '', |
95
|
|
|
'defaultchecked' => '', |
96
|
|
|
'defaultmuted' => '', |
97
|
|
|
'defaultselected' => '', |
98
|
|
|
'defer' => '', |
99
|
|
|
'disabled' => '', |
100
|
|
|
'enabled' => '', |
101
|
|
|
'formnovalidate' => '', |
102
|
|
|
'hidden' => '', |
103
|
|
|
'indeterminate' => '', |
104
|
|
|
'inert' => '', |
105
|
|
|
'ismap' => '', |
106
|
|
|
'itemscope' => '', |
107
|
|
|
'loop' => '', |
108
|
|
|
'multiple' => '', |
109
|
|
|
'muted' => '', |
110
|
|
|
'nohref' => '', |
111
|
|
|
'noresize' => '', |
112
|
|
|
'noshade' => '', |
113
|
|
|
'novalidate' => '', |
114
|
|
|
'nowrap' => '', |
115
|
|
|
'open' => '', |
116
|
|
|
'pauseonexit' => '', |
117
|
|
|
'readonly' => '', |
118
|
|
|
'required' => '', |
119
|
|
|
'reversed' => '', |
120
|
|
|
'scoped' => '', |
121
|
|
|
'seamless' => '', |
122
|
|
|
'selected' => '', |
123
|
|
|
'sortable' => '', |
124
|
|
|
'truespeed' => '', |
125
|
|
|
'typemustmatch' => '', |
126
|
|
|
'visible' => '', |
127
|
|
|
]; |
128
|
|
|
|
129
|
|
|
/** |
130
|
|
|
* @var array |
131
|
|
|
*/ |
132
|
|
|
private static $skipTagsForRemoveWhitespace = [ |
133
|
|
|
'code', |
134
|
|
|
'pre', |
135
|
|
|
'script', |
136
|
|
|
'style', |
137
|
|
|
'textarea', |
138
|
|
|
]; |
139
|
|
|
|
140
|
|
|
/** |
141
|
|
|
* @var array |
142
|
|
|
*/ |
143
|
|
|
private $protectedChildNodes = []; |
144
|
|
|
|
145
|
|
|
/** |
146
|
|
|
* @var string |
147
|
|
|
*/ |
148
|
|
|
private $protectedChildNodesHelper = 'html-min--voku--saved-content'; |
149
|
|
|
|
150
|
|
|
/** |
151
|
|
|
* @var bool |
152
|
|
|
*/ |
153
|
|
|
private $doOptimizeViaHtmlDomParser = true; |
154
|
|
|
|
155
|
|
|
/** |
156
|
|
|
* @var bool |
157
|
|
|
*/ |
158
|
|
|
private $doOptimizeAttributes = true; |
159
|
|
|
|
160
|
|
|
/** |
161
|
|
|
* @var bool |
162
|
|
|
*/ |
163
|
|
|
private $doRemoveComments = true; |
164
|
|
|
|
165
|
|
|
/** |
166
|
|
|
* @var bool |
167
|
|
|
*/ |
168
|
|
|
private $doRemoveWhitespaceAroundTags = false; |
169
|
|
|
|
170
|
|
|
/** |
171
|
|
|
* @var bool |
172
|
|
|
*/ |
173
|
|
|
private $doRemoveOmittedQuotes = true; |
174
|
|
|
|
175
|
|
|
/** |
176
|
|
|
* @var bool |
177
|
|
|
*/ |
178
|
|
|
private $doRemoveOmittedHtmlTags = true; |
179
|
|
|
|
180
|
|
|
/** |
181
|
|
|
* @var bool |
182
|
|
|
*/ |
183
|
|
|
private $doRemoveHttpPrefixFromAttributes = false; |
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* @var bool |
187
|
|
|
*/ |
188
|
|
|
private $doRemoveHttpsPrefixFromAttributes = false; |
189
|
|
|
|
190
|
|
|
/** |
191
|
|
|
* @var bool |
192
|
|
|
*/ |
193
|
|
|
private $doKeepHttpAndHttpsPrefixOnExternalAttributes = false; |
194
|
|
|
|
195
|
|
|
/** |
196
|
|
|
* @var bool |
197
|
|
|
*/ |
198
|
|
|
private $doMakeSameDomainsLinksRelative = false; |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* @var string[] |
202
|
|
|
*/ |
203
|
|
|
private $localDomains = []; |
204
|
|
|
|
205
|
|
|
/** |
206
|
|
|
* @var string[] |
207
|
|
|
*/ |
208
|
|
|
private $domainsToRemoveHttpPrefixFromAttributes = [ |
209
|
|
|
'google.com', |
210
|
|
|
'google.de', |
211
|
|
|
]; |
212
|
|
|
|
213
|
|
|
/** |
214
|
|
|
* @var string[] |
215
|
|
|
*/ |
216
|
|
|
private $specialHtmlCommentsStaringWith = []; |
217
|
|
|
|
218
|
|
|
/** |
219
|
|
|
* @var string[] |
220
|
|
|
*/ |
221
|
|
|
private $specialHtmlCommentsEndingWith = []; |
222
|
|
|
|
223
|
|
|
/** |
224
|
|
|
* @var bool |
225
|
|
|
*/ |
226
|
|
|
private $doSortCssClassNames = true; |
227
|
|
|
|
228
|
|
|
/** |
229
|
|
|
* @var bool |
230
|
|
|
*/ |
231
|
|
|
private $doSortHtmlAttributes = true; |
232
|
|
|
|
233
|
|
|
/** |
234
|
|
|
* @var bool |
235
|
|
|
*/ |
236
|
|
|
private $doRemoveDeprecatedScriptCharsetAttribute = true; |
237
|
|
|
|
238
|
|
|
/** |
239
|
|
|
* @var bool |
240
|
|
|
*/ |
241
|
|
|
private $doRemoveDefaultAttributes = false; |
242
|
|
|
|
243
|
|
|
/** |
244
|
|
|
* @var bool |
245
|
|
|
*/ |
246
|
|
|
private $doRemoveDeprecatedAnchorName = true; |
247
|
|
|
|
248
|
|
|
/** |
249
|
|
|
* @var bool |
250
|
|
|
*/ |
251
|
|
|
private $doRemoveDeprecatedTypeFromStylesheetLink = true; |
252
|
|
|
|
253
|
|
|
/** |
254
|
|
|
* @var bool |
255
|
|
|
*/ |
256
|
|
|
private $doRemoveDeprecatedTypeFromStyleAndLinkTag = true; |
257
|
|
|
|
258
|
|
|
/** |
259
|
|
|
* @var bool |
260
|
|
|
*/ |
261
|
|
|
private $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true; |
262
|
|
|
|
263
|
|
|
/** |
264
|
|
|
* @var bool |
265
|
|
|
*/ |
266
|
|
|
private $doRemoveDefaultTypeFromButton = false; |
267
|
|
|
|
268
|
|
|
/** |
269
|
|
|
* @var bool |
270
|
|
|
*/ |
271
|
|
|
private $doRemoveDeprecatedTypeFromScriptTag = true; |
272
|
|
|
|
273
|
|
|
/** |
274
|
|
|
* @var bool |
275
|
|
|
*/ |
276
|
|
|
private $doRemoveValueFromEmptyInput = true; |
277
|
|
|
|
278
|
|
|
/** |
279
|
|
|
* @var bool |
280
|
|
|
*/ |
281
|
|
|
private $doRemoveEmptyAttributes = true; |
282
|
|
|
|
283
|
|
|
/** |
284
|
|
|
* @var bool |
285
|
|
|
*/ |
286
|
|
|
private $doSumUpWhitespace = true; |
287
|
|
|
|
288
|
|
|
/** |
289
|
|
|
* @var bool |
290
|
|
|
*/ |
291
|
|
|
private $doRemoveSpacesBetweenTags = false; |
292
|
|
|
|
293
|
|
|
/** |
294
|
|
|
* @var bool |
295
|
|
|
*/ |
296
|
|
|
private $keepBrokenHtml = false; |
297
|
|
|
|
298
|
|
|
/** |
299
|
|
|
* @var bool |
300
|
|
|
*/ |
301
|
|
|
private $withDocType = false; |
302
|
|
|
|
303
|
|
|
/** |
304
|
|
|
* @var HtmlMinDomObserverInterface[]|\SplObjectStorage |
305
|
|
|
* |
306
|
|
|
* @psalm-var \SplObjectStorage<HtmlMinDomObserverInterface> |
307
|
|
|
*/ |
308
|
|
|
private $domLoopObservers; |
309
|
|
|
|
310
|
|
|
/** |
311
|
|
|
* @var int |
312
|
|
|
*/ |
313
|
|
|
private $protected_tags_counter = 0; |
314
|
|
|
|
315
|
|
|
/** |
316
|
|
|
* @var bool |
317
|
|
|
*/ |
318
|
|
|
private $isHTML4 = false; |
319
|
|
|
|
320
|
|
|
/** |
321
|
|
|
* @var bool |
322
|
|
|
*/ |
323
|
|
|
private $isXHTML = false; |
324
|
|
|
|
325
|
|
|
/** |
326
|
|
|
* @var string[]|null |
327
|
|
|
*/ |
328
|
|
|
private $templateLogicSyntaxInSpecialScriptTags; |
329
|
|
|
|
330
|
|
|
/** |
331
|
|
|
* HtmlMin constructor. |
332
|
|
|
*/ |
333
|
61 |
|
public function __construct() |
334
|
|
|
{ |
335
|
61 |
|
$this->domLoopObservers = new \SplObjectStorage(); |
336
|
|
|
|
337
|
61 |
|
$this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes()); |
338
|
61 |
|
} |
339
|
|
|
|
340
|
|
|
/** |
341
|
|
|
* @param HtmlMinDomObserverInterface $observer |
342
|
|
|
* |
343
|
|
|
* @return void |
344
|
|
|
*/ |
345
|
61 |
|
public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer) |
346
|
|
|
{ |
347
|
61 |
|
$this->domLoopObservers->attach($observer); |
348
|
61 |
|
} |
349
|
|
|
|
350
|
|
|
/** |
351
|
|
|
* @param bool $doOptimizeAttributes |
352
|
|
|
* |
353
|
|
|
* @return $this |
354
|
|
|
*/ |
355
|
2 |
|
public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self |
356
|
|
|
{ |
357
|
2 |
|
$this->doOptimizeAttributes = $doOptimizeAttributes; |
358
|
|
|
|
359
|
2 |
|
return $this; |
360
|
|
|
} |
361
|
|
|
|
362
|
|
|
/** |
363
|
|
|
* @param bool $doOptimizeViaHtmlDomParser |
364
|
|
|
* |
365
|
|
|
* @return $this |
366
|
|
|
*/ |
367
|
2 |
|
public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self |
368
|
|
|
{ |
369
|
2 |
|
$this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser; |
370
|
|
|
|
371
|
2 |
|
return $this; |
372
|
|
|
} |
373
|
|
|
|
374
|
|
|
/** |
375
|
|
|
* @param bool $doRemoveComments |
376
|
|
|
* |
377
|
|
|
* @return $this |
378
|
|
|
*/ |
379
|
3 |
|
public function doRemoveComments(bool $doRemoveComments = true): self |
380
|
|
|
{ |
381
|
3 |
|
$this->doRemoveComments = $doRemoveComments; |
382
|
|
|
|
383
|
3 |
|
return $this; |
384
|
|
|
} |
385
|
|
|
|
386
|
|
|
/** |
387
|
|
|
* @param bool $doRemoveDefaultAttributes |
388
|
|
|
* |
389
|
|
|
* @return $this |
390
|
|
|
*/ |
391
|
2 |
|
public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self |
392
|
|
|
{ |
393
|
2 |
|
$this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes; |
394
|
|
|
|
395
|
2 |
|
return $this; |
396
|
|
|
} |
397
|
|
|
|
398
|
|
|
/** |
399
|
|
|
* @param bool $doRemoveDeprecatedAnchorName |
400
|
|
|
* |
401
|
|
|
* @return $this |
402
|
|
|
*/ |
403
|
2 |
|
public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self |
404
|
|
|
{ |
405
|
2 |
|
$this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName; |
406
|
|
|
|
407
|
2 |
|
return $this; |
408
|
|
|
} |
409
|
|
|
|
410
|
|
|
/** |
411
|
|
|
* @param bool $doRemoveDeprecatedScriptCharsetAttribute |
412
|
|
|
* |
413
|
|
|
* @return $this |
414
|
|
|
*/ |
415
|
2 |
|
public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self |
416
|
|
|
{ |
417
|
2 |
|
$this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute; |
418
|
|
|
|
419
|
2 |
|
return $this; |
420
|
|
|
} |
421
|
|
|
|
422
|
|
|
/** |
423
|
|
|
* @param bool $doRemoveDeprecatedTypeFromScriptTag |
424
|
|
|
* |
425
|
|
|
* @return $this |
426
|
|
|
*/ |
427
|
3 |
|
public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self |
428
|
|
|
{ |
429
|
3 |
|
$this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag; |
430
|
|
|
|
431
|
3 |
|
return $this; |
432
|
|
|
} |
433
|
|
|
|
434
|
|
|
/** |
435
|
|
|
* @param bool $doRemoveDeprecatedTypeFromStylesheetLink |
436
|
|
|
* |
437
|
|
|
* @return $this |
438
|
|
|
*/ |
439
|
2 |
|
public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self |
440
|
|
|
{ |
441
|
2 |
|
$this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink; |
442
|
|
|
|
443
|
2 |
|
return $this; |
444
|
|
|
} |
445
|
|
|
|
446
|
|
|
/** |
447
|
|
|
* @param bool $doRemoveDeprecatedTypeFromStyleAndLinkTag |
448
|
|
|
* |
449
|
|
|
* @return $this |
450
|
|
|
*/ |
451
|
1 |
|
public function doRemoveDeprecatedTypeFromStyleAndLinkTag(bool $doRemoveDeprecatedTypeFromStyleAndLinkTag = true): self |
452
|
|
|
{ |
453
|
1 |
|
$this->doRemoveDeprecatedTypeFromStyleAndLinkTag = $doRemoveDeprecatedTypeFromStyleAndLinkTag; |
454
|
|
|
|
455
|
1 |
|
return $this; |
456
|
|
|
} |
457
|
|
|
|
458
|
|
|
/** |
459
|
|
|
* @param bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag |
460
|
|
|
* |
461
|
|
|
* @return $this |
462
|
|
|
*/ |
463
|
1 |
|
public function doRemoveDefaultMediaTypeFromStyleAndLinkTag(bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true): self |
464
|
|
|
{ |
465
|
1 |
|
$this->doRemoveDefaultMediaTypeFromStyleAndLinkTag = $doRemoveDefaultMediaTypeFromStyleAndLinkTag; |
466
|
|
|
|
467
|
1 |
|
return $this; |
468
|
|
|
} |
469
|
|
|
|
470
|
|
|
/** |
471
|
|
|
* @param bool $doRemoveDefaultTypeFromButton |
472
|
|
|
* |
473
|
|
|
* @return $this |
474
|
|
|
*/ |
475
|
1 |
|
public function doRemoveDefaultTypeFromButton(bool $doRemoveDefaultTypeFromButton = true): self |
476
|
|
|
{ |
477
|
1 |
|
$this->doRemoveDefaultTypeFromButton = $doRemoveDefaultTypeFromButton; |
478
|
|
|
|
479
|
1 |
|
return $this; |
480
|
|
|
} |
481
|
|
|
|
482
|
|
|
/** |
483
|
|
|
* @param bool $doRemoveEmptyAttributes |
484
|
|
|
* |
485
|
|
|
* @return $this |
486
|
|
|
*/ |
487
|
2 |
|
public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self |
488
|
|
|
{ |
489
|
2 |
|
$this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes; |
490
|
|
|
|
491
|
2 |
|
return $this; |
492
|
|
|
} |
493
|
|
|
|
494
|
|
|
/** |
495
|
|
|
* @param bool $doRemoveHttpPrefixFromAttributes |
496
|
|
|
* |
497
|
|
|
* @return $this |
498
|
|
|
*/ |
499
|
6 |
|
public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self |
500
|
|
|
{ |
501
|
6 |
|
$this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes; |
502
|
|
|
|
503
|
6 |
|
return $this; |
504
|
|
|
} |
505
|
|
|
|
506
|
|
|
/** |
507
|
|
|
* @param bool $doRemoveHttpsPrefixFromAttributes |
508
|
|
|
* |
509
|
|
|
* @return $this |
510
|
|
|
*/ |
511
|
1 |
|
public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self |
512
|
|
|
{ |
513
|
1 |
|
$this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes; |
514
|
|
|
|
515
|
1 |
|
return $this; |
516
|
|
|
} |
517
|
|
|
|
518
|
|
|
/** |
519
|
|
|
* @param bool $doKeepHttpAndHttpsPrefixOnExternalAttributes |
520
|
|
|
* |
521
|
|
|
* @return $this |
522
|
|
|
*/ |
523
|
1 |
|
public function doKeepHttpAndHttpsPrefixOnExternalAttributes(bool $doKeepHttpAndHttpsPrefixOnExternalAttributes = true): self |
524
|
|
|
{ |
525
|
1 |
|
$this->doKeepHttpAndHttpsPrefixOnExternalAttributes = $doKeepHttpAndHttpsPrefixOnExternalAttributes; |
526
|
|
|
|
527
|
1 |
|
return $this; |
528
|
|
|
} |
529
|
|
|
|
530
|
|
|
/** |
531
|
|
|
* @param string[] $localDomains |
532
|
|
|
* |
533
|
|
|
* @return $this |
534
|
|
|
*/ |
535
|
1 |
|
public function doMakeSameDomainsLinksRelative(array $localDomains): self |
536
|
|
|
{ |
537
|
|
|
/** @noinspection AlterInForeachInspection */ |
538
|
1 |
|
foreach ($localDomains as &$localDomain) { |
539
|
1 |
|
$localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/'); |
540
|
|
|
} |
541
|
|
|
|
542
|
1 |
|
$this->localDomains = $localDomains; |
543
|
1 |
|
$this->doMakeSameDomainsLinksRelative = \count($this->localDomains) > 0; |
544
|
|
|
|
545
|
1 |
|
return $this; |
546
|
|
|
} |
547
|
|
|
|
548
|
|
|
/** |
549
|
|
|
* @return string[] |
550
|
|
|
*/ |
551
|
1 |
|
public function getLocalDomains(): array |
552
|
|
|
{ |
553
|
1 |
|
return $this->localDomains; |
554
|
|
|
} |
555
|
|
|
|
556
|
|
|
/** |
557
|
|
|
* @param bool $doRemoveOmittedHtmlTags |
558
|
|
|
* |
559
|
|
|
* @return $this |
560
|
|
|
*/ |
561
|
1 |
|
public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self |
562
|
|
|
{ |
563
|
1 |
|
$this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags; |
564
|
|
|
|
565
|
1 |
|
return $this; |
566
|
|
|
} |
567
|
|
|
|
568
|
|
|
/** |
569
|
|
|
* @param bool $doRemoveOmittedQuotes |
570
|
|
|
* |
571
|
|
|
* @return $this |
572
|
|
|
*/ |
573
|
1 |
|
public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self |
574
|
|
|
{ |
575
|
1 |
|
$this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes; |
576
|
|
|
|
577
|
1 |
|
return $this; |
578
|
|
|
} |
579
|
|
|
|
580
|
|
|
/** |
581
|
|
|
* @param bool $doRemoveSpacesBetweenTags |
582
|
|
|
* |
583
|
|
|
* @return $this |
584
|
|
|
*/ |
585
|
1 |
|
public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self |
586
|
|
|
{ |
587
|
1 |
|
$this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags; |
588
|
|
|
|
589
|
1 |
|
return $this; |
590
|
|
|
} |
591
|
|
|
|
592
|
|
|
/** |
593
|
|
|
* @param bool $doRemoveValueFromEmptyInput |
594
|
|
|
* |
595
|
|
|
* @return $this |
596
|
|
|
*/ |
597
|
2 |
|
public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self |
598
|
|
|
{ |
599
|
2 |
|
$this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput; |
600
|
|
|
|
601
|
2 |
|
return $this; |
602
|
|
|
} |
603
|
|
|
|
604
|
|
|
/** |
605
|
|
|
* @param bool $doRemoveWhitespaceAroundTags |
606
|
|
|
* |
607
|
|
|
* @return $this |
608
|
|
|
*/ |
609
|
5 |
|
public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self |
610
|
|
|
{ |
611
|
5 |
|
$this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags; |
612
|
|
|
|
613
|
5 |
|
return $this; |
614
|
|
|
} |
615
|
|
|
|
616
|
|
|
/** |
617
|
|
|
* @param bool $doSortCssClassNames |
618
|
|
|
* |
619
|
|
|
* @return $this |
620
|
|
|
*/ |
621
|
2 |
|
public function doSortCssClassNames(bool $doSortCssClassNames = true): self |
622
|
|
|
{ |
623
|
2 |
|
$this->doSortCssClassNames = $doSortCssClassNames; |
624
|
|
|
|
625
|
2 |
|
return $this; |
626
|
|
|
} |
627
|
|
|
|
628
|
|
|
/** |
629
|
|
|
* @param bool $doSortHtmlAttributes |
630
|
|
|
* |
631
|
|
|
* @return $this |
632
|
|
|
*/ |
633
|
2 |
|
public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self |
634
|
|
|
{ |
635
|
2 |
|
$this->doSortHtmlAttributes = $doSortHtmlAttributes; |
636
|
|
|
|
637
|
2 |
|
return $this; |
638
|
|
|
} |
639
|
|
|
|
640
|
|
|
/** |
641
|
|
|
* @param bool $doSumUpWhitespace |
642
|
|
|
* |
643
|
|
|
* @return $this |
644
|
|
|
*/ |
645
|
2 |
|
public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self |
646
|
|
|
{ |
647
|
2 |
|
$this->doSumUpWhitespace = $doSumUpWhitespace; |
648
|
|
|
|
649
|
2 |
|
return $this; |
650
|
|
|
} |
651
|
|
|
|
652
|
57 |
|
private function domNodeAttributesToString(\DOMNode $node): string |
653
|
|
|
{ |
654
|
|
|
// Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>) |
655
|
57 |
|
$attr_str = ''; |
656
|
57 |
|
if ($node->attributes !== null) { |
657
|
57 |
|
foreach ($node->attributes as $attribute) { |
658
|
37 |
|
$attr_str .= $attribute->name; |
659
|
|
|
|
660
|
|
|
if ( |
661
|
37 |
|
$this->doOptimizeAttributes |
662
|
|
|
&& |
663
|
37 |
|
isset(self::$booleanAttributes[$attribute->name]) |
664
|
|
|
) { |
665
|
10 |
|
$attr_str .= ' '; |
666
|
|
|
|
667
|
10 |
|
continue; |
668
|
|
|
} |
669
|
|
|
|
670
|
37 |
|
$attr_str .= '='; |
671
|
|
|
|
672
|
|
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0 |
673
|
37 |
|
$omit_quotes = $this->doRemoveOmittedQuotes |
674
|
|
|
&& |
675
|
37 |
|
$attribute->value !== '' |
676
|
|
|
&& |
677
|
37 |
|
\strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0 |
678
|
|
|
&& |
679
|
37 |
|
\strpos($attribute->name, ' ') === false |
680
|
|
|
&& |
681
|
37 |
|
\preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0; |
682
|
|
|
|
683
|
37 |
|
$quoteTmp = '"'; |
684
|
|
|
if ( |
685
|
37 |
|
!$omit_quotes |
686
|
|
|
&& |
687
|
37 |
|
\strpos($attribute->value, '"') !== false |
688
|
|
|
) { |
689
|
1 |
|
$quoteTmp = "'"; |
690
|
|
|
} |
691
|
|
|
|
692
|
|
|
if ( |
693
|
37 |
|
$this->doOptimizeAttributes |
694
|
|
|
&& |
695
|
|
|
( |
696
|
36 |
|
$attribute->name === 'srcset' |
697
|
|
|
|| |
698
|
37 |
|
$attribute->name === 'sizes' |
699
|
|
|
) |
700
|
|
|
) { |
701
|
2 |
|
$attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value); |
702
|
|
|
} else { |
703
|
37 |
|
$attr_val = $attribute->value; |
704
|
|
|
} |
705
|
|
|
|
706
|
37 |
|
$attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp); |
707
|
37 |
|
$attr_str .= ' '; |
708
|
|
|
} |
709
|
|
|
} |
710
|
|
|
|
711
|
57 |
|
return \trim($attr_str); |
712
|
|
|
} |
713
|
|
|
|
714
|
|
|
/** |
715
|
|
|
* @param \DOMNode $node |
716
|
|
|
* |
717
|
|
|
* @return bool |
718
|
|
|
*/ |
719
|
56 |
|
private function domNodeClosingTagOptional(\DOMNode $node): bool |
720
|
|
|
{ |
721
|
56 |
|
$tag_name = $node->nodeName; |
722
|
|
|
|
723
|
|
|
/** @var \DOMNode|null $parent_node - false-positive error from phpstan */ |
724
|
56 |
|
$parent_node = $node->parentNode; |
725
|
|
|
|
726
|
56 |
|
if ($parent_node) { |
727
|
56 |
|
$parent_tag_name = $parent_node->nodeName; |
728
|
|
|
} else { |
729
|
|
|
$parent_tag_name = null; |
730
|
|
|
} |
731
|
|
|
|
732
|
56 |
|
$nextSibling = $this->getNextSiblingOfTypeDOMElement($node); |
733
|
|
|
|
734
|
|
|
// https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission |
735
|
|
|
|
736
|
|
|
// Implemented: |
737
|
|
|
// |
738
|
|
|
// A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element. |
739
|
|
|
// An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element. |
740
|
|
|
// A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element. |
741
|
|
|
// An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element. |
742
|
|
|
// A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element. |
743
|
|
|
// A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element. |
744
|
|
|
// A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element. |
745
|
|
|
// A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element. |
746
|
|
|
// An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element. |
747
|
|
|
// An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element. |
748
|
|
|
|
749
|
|
|
/** |
750
|
|
|
* @noinspection TodoComment |
751
|
|
|
* |
752
|
|
|
* TODO: Not Implemented |
753
|
|
|
*/ |
754
|
|
|
// |
755
|
|
|
// <html> may be omitted if first thing inside is not comment |
756
|
|
|
// <head> may be omitted if first thing inside is an element |
757
|
|
|
// <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template> |
758
|
|
|
// <colgroup> may be omitted if first thing inside is <col> |
759
|
|
|
// <tbody> may be omitted if first thing inside is <tr> |
760
|
|
|
// A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.) |
761
|
|
|
// A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment. |
762
|
|
|
// A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment. |
763
|
|
|
// A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element. |
764
|
|
|
// A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.) |
765
|
|
|
// A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element. |
766
|
|
|
// A <tfoot> element's end tag may be omitted if there is no more content in the parent element. |
767
|
|
|
// |
768
|
|
|
// <-- However, a start tag must never be omitted if it has any attributes. |
769
|
|
|
|
770
|
|
|
/** @noinspection InArrayCanBeUsedInspection */ |
771
|
56 |
|
return \in_array($tag_name, self::$optional_end_tags, true) |
772
|
|
|
|| |
773
|
|
|
( |
774
|
53 |
|
$tag_name === 'li' |
775
|
|
|
&& |
776
|
|
|
( |
777
|
6 |
|
$nextSibling === null |
778
|
|
|
|| |
779
|
|
|
( |
780
|
4 |
|
$nextSibling instanceof \DOMElement |
781
|
|
|
&& |
782
|
53 |
|
$nextSibling->tagName === 'li' |
783
|
|
|
) |
784
|
|
|
) |
785
|
|
|
) |
786
|
|
|
|| |
787
|
|
|
( |
788
|
53 |
|
$tag_name === 'optgroup' |
789
|
|
|
&& |
790
|
|
|
( |
791
|
1 |
|
$nextSibling === null |
792
|
|
|
|| |
793
|
|
|
( |
794
|
1 |
|
$nextSibling instanceof \DOMElement |
795
|
|
|
&& |
796
|
53 |
|
$nextSibling->tagName === 'optgroup' |
797
|
|
|
) |
798
|
|
|
) |
799
|
|
|
) |
800
|
|
|
|| |
801
|
|
|
( |
802
|
53 |
|
$tag_name === 'rp' |
803
|
|
|
&& |
804
|
|
|
( |
805
|
|
|
$nextSibling === null |
806
|
|
|
|| |
807
|
|
|
( |
808
|
|
|
$nextSibling instanceof \DOMElement |
809
|
|
|
&& |
810
|
|
|
( |
811
|
|
|
$nextSibling->tagName === 'rp' |
812
|
|
|
|| |
813
|
53 |
|
$nextSibling->tagName === 'rt' |
814
|
|
|
) |
815
|
|
|
) |
816
|
|
|
) |
817
|
|
|
) |
818
|
|
|
|| |
819
|
|
|
( |
820
|
53 |
|
$tag_name === 'tr' |
821
|
|
|
&& |
822
|
|
|
( |
823
|
1 |
|
$nextSibling === null |
824
|
|
|
|| |
825
|
|
|
( |
826
|
1 |
|
$nextSibling instanceof \DOMElement |
827
|
|
|
&& |
828
|
53 |
|
$nextSibling->tagName === 'tr' |
829
|
|
|
) |
830
|
|
|
) |
831
|
|
|
) |
832
|
|
|
|| |
833
|
|
|
( |
834
|
53 |
|
$tag_name === 'source' |
835
|
|
|
&& |
836
|
|
|
( |
837
|
1 |
|
$parent_tag_name === 'audio' |
838
|
|
|
|| |
839
|
1 |
|
$parent_tag_name === 'video' |
840
|
|
|
|| |
841
|
1 |
|
$parent_tag_name === 'picture' |
842
|
|
|
|| |
843
|
53 |
|
$parent_tag_name === 'source' |
844
|
|
|
) |
845
|
|
|
&& |
846
|
|
|
( |
847
|
1 |
|
$nextSibling === null |
848
|
|
|
|| |
849
|
|
|
( |
850
|
|
|
$nextSibling instanceof \DOMElement |
851
|
|
|
&& |
852
|
53 |
|
$nextSibling->tagName === 'source' |
853
|
|
|
) |
854
|
|
|
) |
855
|
|
|
) |
856
|
|
|
|| |
857
|
|
|
( |
858
|
|
|
( |
859
|
53 |
|
$tag_name === 'td' |
860
|
|
|
|| |
861
|
53 |
|
$tag_name === 'th' |
862
|
|
|
) |
863
|
|
|
&& |
864
|
|
|
( |
865
|
1 |
|
$nextSibling === null |
866
|
|
|
|| |
867
|
|
|
( |
868
|
1 |
|
$nextSibling instanceof \DOMElement |
869
|
|
|
&& |
870
|
|
|
( |
871
|
1 |
|
$nextSibling->tagName === 'td' |
872
|
|
|
|| |
873
|
53 |
|
$nextSibling->tagName === 'th' |
874
|
|
|
) |
875
|
|
|
) |
876
|
|
|
) |
877
|
|
|
) |
878
|
|
|
|| |
879
|
|
|
( |
880
|
|
|
( |
881
|
53 |
|
$tag_name === 'dd' |
882
|
|
|
|| |
883
|
53 |
|
$tag_name === 'dt' |
884
|
|
|
) |
885
|
|
|
&& |
886
|
|
|
( |
887
|
3 |
|
$nextSibling === null |
888
|
|
|
|| |
889
|
|
|
( |
890
|
3 |
|
$nextSibling instanceof \DOMElement |
891
|
|
|
&& |
892
|
|
|
( |
893
|
3 |
|
$nextSibling->tagName === 'dd' |
894
|
|
|
|| |
895
|
53 |
|
$nextSibling->tagName === 'dt' |
896
|
|
|
) |
897
|
|
|
) |
898
|
|
|
) |
899
|
|
|
) |
900
|
|
|
|| |
901
|
|
|
( |
902
|
53 |
|
$tag_name === 'option' |
903
|
|
|
&& |
904
|
|
|
( |
905
|
2 |
|
$nextSibling === null |
906
|
|
|
|| |
907
|
|
|
( |
908
|
2 |
|
$nextSibling instanceof \DOMElement |
909
|
|
|
&& |
910
|
|
|
( |
911
|
2 |
|
$nextSibling->tagName === 'option' |
912
|
|
|
|| |
913
|
53 |
|
$nextSibling->tagName === 'optgroup' |
914
|
|
|
) |
915
|
|
|
) |
916
|
|
|
) |
917
|
|
|
) |
918
|
|
|
|| |
919
|
|
|
( |
920
|
53 |
|
$tag_name === 'p' |
921
|
|
|
&& |
922
|
|
|
( |
923
|
|
|
( |
924
|
17 |
|
$nextSibling === null |
925
|
|
|
&& |
926
|
17 |
|
$node->parentNode !== null |
927
|
|
|
&& |
928
|
|
|
!\in_array( |
929
|
14 |
|
$node->parentNode->nodeName, |
930
|
|
|
[ |
931
|
|
|
'a', |
932
|
|
|
'audio', |
933
|
|
|
'del', |
934
|
|
|
'ins', |
935
|
|
|
'map', |
936
|
|
|
'noscript', |
937
|
|
|
'video', |
938
|
|
|
], |
939
|
|
|
true |
940
|
|
|
) |
941
|
|
|
) |
942
|
|
|
|| |
943
|
|
|
( |
944
|
11 |
|
$nextSibling instanceof \DOMElement |
945
|
|
|
&& |
946
|
|
|
\in_array( |
947
|
56 |
|
$nextSibling->tagName, |
948
|
|
|
[ |
949
|
|
|
'address', |
950
|
|
|
'article', |
951
|
|
|
'aside', |
952
|
|
|
'blockquote', |
953
|
|
|
'dir', |
954
|
|
|
'div', |
955
|
|
|
'dl', |
956
|
|
|
'fieldset', |
957
|
|
|
'footer', |
958
|
|
|
'form', |
959
|
|
|
'h1', |
960
|
|
|
'h2', |
961
|
|
|
'h3', |
962
|
|
|
'h4', |
963
|
|
|
'h5', |
964
|
|
|
'h6', |
965
|
|
|
'header', |
966
|
|
|
'hgroup', |
967
|
|
|
'hr', |
968
|
|
|
'menu', |
969
|
|
|
'nav', |
970
|
|
|
'ol', |
971
|
|
|
'p', |
972
|
|
|
'pre', |
973
|
|
|
'section', |
974
|
|
|
'table', |
975
|
|
|
'ul', |
976
|
|
|
], |
977
|
|
|
true |
978
|
|
|
) |
979
|
|
|
) |
980
|
|
|
) |
981
|
|
|
); |
982
|
|
|
} |
983
|
|
|
|
984
|
57 |
|
protected function domNodeToString(\DOMNode $node): string |
985
|
|
|
{ |
986
|
|
|
// init |
987
|
57 |
|
$html = ''; |
988
|
57 |
|
$emptyStringTmp = ''; |
989
|
|
|
|
990
|
57 |
|
foreach ($node->childNodes as $child) { |
991
|
57 |
|
if ($emptyStringTmp === 'is_empty') { |
992
|
31 |
|
$emptyStringTmp = 'last_was_empty'; |
993
|
|
|
} else { |
994
|
57 |
|
$emptyStringTmp = ''; |
995
|
|
|
} |
996
|
|
|
|
997
|
57 |
|
if ($child instanceof \DOMElement) { |
998
|
57 |
|
$html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child)); |
999
|
57 |
|
$html .= '>' . $this->domNodeToString($child); |
1000
|
|
|
|
1001
|
|
|
if ( |
1002
|
|
|
!( |
1003
|
57 |
|
$this->doRemoveOmittedHtmlTags |
1004
|
|
|
&& |
1005
|
57 |
|
!$this->isHTML4 |
1006
|
|
|
&& |
1007
|
57 |
|
!$this->isXHTML |
1008
|
|
|
&& |
1009
|
57 |
|
$this->domNodeClosingTagOptional($child) |
1010
|
|
|
) |
1011
|
|
|
) { |
1012
|
50 |
|
$html .= '</' . $child->tagName . '>'; |
1013
|
|
|
} |
1014
|
|
|
|
1015
|
57 |
|
if (!$this->doRemoveWhitespaceAroundTags) { |
1016
|
|
|
/** @var \DOMText|null $nextSiblingTmp - false-positive error from phpstan */ |
1017
|
56 |
|
$nextSiblingTmp = $child->nextSibling; |
1018
|
|
|
if ( |
1019
|
56 |
|
$nextSiblingTmp instanceof \DOMText |
1020
|
|
|
&& |
1021
|
56 |
|
$nextSiblingTmp->wholeText === ' ' |
1022
|
|
|
) { |
1023
|
|
|
if ( |
1024
|
30 |
|
$emptyStringTmp !== 'last_was_empty' |
1025
|
|
|
&& |
1026
|
30 |
|
\substr($html, -1) !== ' ' |
1027
|
|
|
) { |
1028
|
30 |
|
$html = \rtrim($html); |
1029
|
|
|
|
1030
|
|
|
if ( |
1031
|
30 |
|
$child->parentNode |
1032
|
|
|
&& |
1033
|
30 |
|
$child->parentNode->nodeName !== 'head' |
1034
|
|
|
) { |
1035
|
29 |
|
$html .= ' '; |
1036
|
|
|
} |
1037
|
|
|
} |
1038
|
57 |
|
$emptyStringTmp = 'is_empty'; |
1039
|
|
|
} |
1040
|
|
|
} |
1041
|
57 |
|
} elseif ($child instanceof \DOMText) { |
1042
|
53 |
|
if ($child->isElementContentWhitespace()) { |
1043
|
|
|
if ( |
1044
|
34 |
|
$child->previousSibling !== null |
1045
|
|
|
&& |
1046
|
34 |
|
$child->nextSibling !== null |
1047
|
|
|
) { |
1048
|
|
|
if ( |
1049
|
|
|
( |
1050
|
23 |
|
$child->wholeText |
1051
|
|
|
&& |
1052
|
23 |
|
\strpos($child->wholeText, ' ') !== false |
1053
|
|
|
) |
1054
|
|
|
|| |
1055
|
|
|
( |
1056
|
|
|
$emptyStringTmp !== 'last_was_empty' |
1057
|
|
|
&& |
1058
|
23 |
|
\substr($html, -1) !== ' ' |
1059
|
|
|
) |
1060
|
|
|
) { |
1061
|
23 |
|
$html = \rtrim($html); |
1062
|
|
|
|
1063
|
|
|
if ( |
1064
|
23 |
|
$child->parentNode |
1065
|
|
|
&& |
1066
|
23 |
|
$child->parentNode->nodeName !== 'head' |
1067
|
|
|
) { |
1068
|
22 |
|
$html .= ' '; |
1069
|
|
|
} |
1070
|
|
|
} |
1071
|
34 |
|
$emptyStringTmp = 'is_empty'; |
1072
|
|
|
} |
1073
|
|
|
} else { |
1074
|
53 |
|
$html .= $child->wholeText; |
1075
|
|
|
} |
1076
|
12 |
|
} elseif ($child instanceof \DOMComment) { |
1077
|
1 |
|
$html .= '<!--' . $child->textContent . '-->'; |
1078
|
|
|
} |
1079
|
|
|
} |
1080
|
|
|
|
1081
|
57 |
|
return $html; |
1082
|
|
|
} |
1083
|
|
|
|
1084
|
|
|
/** |
1085
|
|
|
* @param \DOMNode $node |
1086
|
|
|
* |
1087
|
|
|
* @return string |
1088
|
|
|
*/ |
1089
|
57 |
|
private function getDoctype(\DOMNode $node): string |
1090
|
|
|
{ |
1091
|
|
|
// check the doc-type only if it wasn't generated by DomDocument itself |
1092
|
57 |
|
if (!$this->withDocType) { |
1093
|
49 |
|
return ''; |
1094
|
|
|
} |
1095
|
|
|
|
1096
|
12 |
|
foreach ($node->childNodes as $child) { |
1097
|
|
|
if ( |
1098
|
12 |
|
$child instanceof \DOMDocumentType |
1099
|
|
|
&& |
1100
|
12 |
|
$child->name |
1101
|
|
|
) { |
1102
|
12 |
|
if (!$child->publicId && $child->systemId) { |
1103
|
|
|
$tmpTypeSystem = 'SYSTEM'; |
1104
|
|
|
$tmpTypePublic = ''; |
1105
|
|
|
} else { |
1106
|
12 |
|
$tmpTypeSystem = ''; |
1107
|
12 |
|
$tmpTypePublic = 'PUBLIC'; |
1108
|
|
|
} |
1109
|
|
|
|
1110
|
12 |
|
return '<!DOCTYPE ' . $child->name . '' |
1111
|
12 |
|
. ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '') |
1112
|
12 |
|
. ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '') |
1113
|
12 |
|
. '>'; |
1114
|
|
|
} |
1115
|
|
|
} |
1116
|
|
|
|
1117
|
|
|
return ''; |
1118
|
|
|
} |
1119
|
|
|
|
1120
|
|
|
/** |
1121
|
|
|
* @return array |
1122
|
|
|
*/ |
1123
|
|
|
public function getDomainsToRemoveHttpPrefixFromAttributes(): array |
1124
|
|
|
{ |
1125
|
|
|
return $this->domainsToRemoveHttpPrefixFromAttributes; |
1126
|
|
|
} |
1127
|
|
|
|
1128
|
|
|
/** |
1129
|
|
|
* @return bool |
1130
|
|
|
*/ |
1131
|
|
|
public function isDoOptimizeAttributes(): bool |
1132
|
|
|
{ |
1133
|
|
|
return $this->doOptimizeAttributes; |
1134
|
|
|
} |
1135
|
|
|
|
1136
|
|
|
/** |
1137
|
|
|
* @return bool |
1138
|
|
|
*/ |
1139
|
|
|
public function isDoOptimizeViaHtmlDomParser(): bool |
1140
|
|
|
{ |
1141
|
|
|
return $this->doOptimizeViaHtmlDomParser; |
1142
|
|
|
} |
1143
|
|
|
|
1144
|
|
|
/** |
1145
|
|
|
* @return bool |
1146
|
|
|
*/ |
1147
|
|
|
public function isDoRemoveComments(): bool |
1148
|
|
|
{ |
1149
|
|
|
return $this->doRemoveComments; |
1150
|
|
|
} |
1151
|
|
|
|
1152
|
|
|
/** |
1153
|
|
|
* @return bool |
1154
|
|
|
*/ |
1155
|
38 |
|
public function isDoRemoveDefaultAttributes(): bool |
1156
|
|
|
{ |
1157
|
38 |
|
return $this->doRemoveDefaultAttributes; |
1158
|
|
|
} |
1159
|
|
|
|
1160
|
|
|
/** |
1161
|
|
|
* @return bool |
1162
|
|
|
*/ |
1163
|
38 |
|
public function isDoRemoveDeprecatedAnchorName(): bool |
1164
|
|
|
{ |
1165
|
38 |
|
return $this->doRemoveDeprecatedAnchorName; |
1166
|
|
|
} |
1167
|
|
|
|
1168
|
|
|
/** |
1169
|
|
|
* @return bool |
1170
|
|
|
*/ |
1171
|
38 |
|
public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool |
1172
|
|
|
{ |
1173
|
38 |
|
return $this->doRemoveDeprecatedScriptCharsetAttribute; |
1174
|
|
|
} |
1175
|
|
|
|
1176
|
|
|
/** |
1177
|
|
|
* @return bool |
1178
|
|
|
*/ |
1179
|
38 |
|
public function isDoRemoveDeprecatedTypeFromScriptTag(): bool |
1180
|
|
|
{ |
1181
|
38 |
|
return $this->doRemoveDeprecatedTypeFromScriptTag; |
1182
|
|
|
} |
1183
|
|
|
|
1184
|
|
|
/** |
1185
|
|
|
* @return bool |
1186
|
|
|
*/ |
1187
|
38 |
|
public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool |
1188
|
|
|
{ |
1189
|
38 |
|
return $this->doRemoveDeprecatedTypeFromStylesheetLink; |
1190
|
|
|
} |
1191
|
|
|
|
1192
|
|
|
/** |
1193
|
|
|
* @return bool |
1194
|
|
|
*/ |
1195
|
38 |
|
public function isDoRemoveDeprecatedTypeFromStyleAndLinkTag(): bool |
1196
|
|
|
{ |
1197
|
38 |
|
return $this->doRemoveDeprecatedTypeFromStyleAndLinkTag; |
1198
|
|
|
} |
1199
|
|
|
|
1200
|
|
|
/** |
1201
|
|
|
* @return bool |
1202
|
|
|
*/ |
1203
|
38 |
|
public function isDoRemoveDefaultMediaTypeFromStyleAndLinkTag(): bool |
1204
|
|
|
{ |
1205
|
38 |
|
return $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag; |
1206
|
|
|
} |
1207
|
|
|
|
1208
|
|
|
/** |
1209
|
|
|
* @return bool |
1210
|
|
|
*/ |
1211
|
37 |
|
public function isDoRemoveDefaultTypeFromButton(): bool |
1212
|
|
|
{ |
1213
|
37 |
|
return $this->doRemoveDefaultTypeFromButton; |
1214
|
|
|
} |
1215
|
|
|
|
1216
|
|
|
/** |
1217
|
|
|
* @return bool |
1218
|
|
|
*/ |
1219
|
37 |
|
public function isDoRemoveEmptyAttributes(): bool |
1220
|
|
|
{ |
1221
|
37 |
|
return $this->doRemoveEmptyAttributes; |
1222
|
|
|
} |
1223
|
|
|
|
1224
|
|
|
/** |
1225
|
|
|
* @return bool |
1226
|
|
|
*/ |
1227
|
38 |
|
public function isDoRemoveHttpPrefixFromAttributes(): bool |
1228
|
|
|
{ |
1229
|
38 |
|
return $this->doRemoveHttpPrefixFromAttributes; |
1230
|
|
|
} |
1231
|
|
|
|
1232
|
|
|
/** |
1233
|
|
|
* @return bool |
1234
|
|
|
*/ |
1235
|
38 |
|
public function isDoRemoveHttpsPrefixFromAttributes(): bool |
1236
|
|
|
{ |
1237
|
38 |
|
return $this->doRemoveHttpsPrefixFromAttributes; |
1238
|
|
|
} |
1239
|
|
|
|
1240
|
|
|
/** |
1241
|
|
|
* @return bool |
1242
|
|
|
*/ |
1243
|
4 |
|
public function isdoKeepHttpAndHttpsPrefixOnExternalAttributes(): bool |
1244
|
|
|
{ |
1245
|
4 |
|
return $this->doKeepHttpAndHttpsPrefixOnExternalAttributes; |
1246
|
|
|
} |
1247
|
|
|
|
1248
|
|
|
/** |
1249
|
|
|
* @return bool |
1250
|
|
|
*/ |
1251
|
38 |
|
public function isDoMakeSameDomainsLinksRelative(): bool |
1252
|
|
|
{ |
1253
|
38 |
|
return $this->doMakeSameDomainsLinksRelative; |
1254
|
|
|
} |
1255
|
|
|
|
1256
|
|
|
/** |
1257
|
|
|
* @return bool |
1258
|
|
|
*/ |
1259
|
|
|
public function isDoRemoveOmittedHtmlTags(): bool |
1260
|
|
|
{ |
1261
|
|
|
return $this->doRemoveOmittedHtmlTags; |
1262
|
|
|
} |
1263
|
|
|
|
1264
|
|
|
/** |
1265
|
|
|
* @return bool |
1266
|
|
|
*/ |
1267
|
|
|
public function isDoRemoveOmittedQuotes(): bool |
1268
|
|
|
{ |
1269
|
|
|
return $this->doRemoveOmittedQuotes; |
1270
|
|
|
} |
1271
|
|
|
|
1272
|
|
|
/** |
1273
|
|
|
* @return bool |
1274
|
|
|
*/ |
1275
|
|
|
public function isDoRemoveSpacesBetweenTags(): bool |
1276
|
|
|
{ |
1277
|
|
|
return $this->doRemoveSpacesBetweenTags; |
1278
|
|
|
} |
1279
|
|
|
|
1280
|
|
|
/** |
1281
|
|
|
* @return bool |
1282
|
|
|
*/ |
1283
|
37 |
|
public function isDoRemoveValueFromEmptyInput(): bool |
1284
|
|
|
{ |
1285
|
37 |
|
return $this->doRemoveValueFromEmptyInput; |
1286
|
|
|
} |
1287
|
|
|
|
1288
|
|
|
/** |
1289
|
|
|
* @return bool |
1290
|
|
|
*/ |
1291
|
|
|
public function isDoRemoveWhitespaceAroundTags(): bool |
1292
|
|
|
{ |
1293
|
|
|
return $this->doRemoveWhitespaceAroundTags; |
1294
|
|
|
} |
1295
|
|
|
|
1296
|
|
|
/** |
1297
|
|
|
* @return bool |
1298
|
|
|
*/ |
1299
|
37 |
|
public function isDoSortCssClassNames(): bool |
1300
|
|
|
{ |
1301
|
37 |
|
return $this->doSortCssClassNames; |
1302
|
|
|
} |
1303
|
|
|
|
1304
|
|
|
/** |
1305
|
|
|
* @return bool |
1306
|
|
|
*/ |
1307
|
38 |
|
public function isDoSortHtmlAttributes(): bool |
1308
|
|
|
{ |
1309
|
38 |
|
return $this->doSortHtmlAttributes; |
1310
|
|
|
} |
1311
|
|
|
|
1312
|
|
|
/** |
1313
|
|
|
* @return bool |
1314
|
|
|
*/ |
1315
|
|
|
public function isDoSumUpWhitespace(): bool |
1316
|
|
|
{ |
1317
|
|
|
return $this->doSumUpWhitespace; |
1318
|
|
|
} |
1319
|
|
|
|
1320
|
|
|
/** |
1321
|
|
|
* @return bool |
1322
|
|
|
*/ |
1323
|
5 |
|
public function isHTML4(): bool |
1324
|
|
|
{ |
1325
|
5 |
|
return $this->isHTML4; |
1326
|
|
|
} |
1327
|
|
|
|
1328
|
|
|
/** |
1329
|
|
|
* @return bool |
1330
|
|
|
*/ |
1331
|
5 |
|
public function isXHTML(): bool |
1332
|
|
|
{ |
1333
|
5 |
|
return $this->isXHTML; |
1334
|
|
|
} |
1335
|
|
|
|
1336
|
|
|
/** |
1337
|
|
|
* @param string $html |
1338
|
|
|
* @param bool $multiDecodeNewHtmlEntity |
1339
|
|
|
* |
1340
|
|
|
* @return string |
1341
|
|
|
*/ |
1342
|
61 |
|
public function minify($html, $multiDecodeNewHtmlEntity = false): string |
1343
|
|
|
{ |
1344
|
61 |
|
$html = (string) $html; |
1345
|
61 |
|
if (!isset($html[0])) { |
1346
|
1 |
|
return ''; |
1347
|
|
|
} |
1348
|
|
|
|
1349
|
61 |
|
$html = \trim($html); |
1350
|
61 |
|
if (!$html) { |
1351
|
3 |
|
return ''; |
1352
|
|
|
} |
1353
|
|
|
|
1354
|
|
|
// reset |
1355
|
58 |
|
$this->protectedChildNodes = []; |
1356
|
|
|
|
1357
|
|
|
// save old content |
1358
|
58 |
|
$origHtml = $html; |
1359
|
58 |
|
$origHtmlLength = \strlen($html); |
1360
|
|
|
|
1361
|
|
|
// ------------------------------------------------------------------------- |
1362
|
|
|
// Minify the HTML via "HtmlDomParser" |
1363
|
|
|
// ------------------------------------------------------------------------- |
1364
|
|
|
|
1365
|
58 |
|
if ($this->doOptimizeViaHtmlDomParser) { |
1366
|
57 |
|
$html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity); |
1367
|
|
|
} |
1368
|
|
|
|
1369
|
|
|
// ------------------------------------------------------------------------- |
1370
|
|
|
// Trim whitespace from html-string. [protected html is still protected] |
1371
|
|
|
// ------------------------------------------------------------------------- |
1372
|
|
|
|
1373
|
|
|
// Remove extra white-space(s) between HTML attribute(s) |
1374
|
58 |
|
if (\strpos($html, ' ') !== false) { |
1375
|
52 |
|
$html = (string) \preg_replace_callback( |
1376
|
52 |
|
'#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#', |
1377
|
|
|
static function ($matches) { |
1378
|
52 |
|
return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>'; |
1379
|
52 |
|
}, |
1380
|
52 |
|
$html |
1381
|
|
|
); |
1382
|
|
|
} |
1383
|
|
|
|
1384
|
58 |
|
if ($this->doRemoveSpacesBetweenTags) { |
1385
|
|
|
/** @noinspection NestedPositiveIfStatementsInspection */ |
1386
|
1 |
|
if (\strpos($html, ' ') !== false) { |
1387
|
|
|
// Remove spaces that are between > and < |
1388
|
1 |
|
$html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html); |
1389
|
|
|
} |
1390
|
|
|
} |
1391
|
|
|
|
1392
|
|
|
// ------------------------------------------------------------------------- |
1393
|
|
|
// Restore protected HTML-code. |
1394
|
|
|
// ------------------------------------------------------------------------- |
1395
|
|
|
|
1396
|
58 |
|
if (\strpos($html, $this->protectedChildNodesHelper) !== false) { |
1397
|
13 |
|
$html = (string) \preg_replace_callback( |
1398
|
13 |
|
'/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/', |
1399
|
13 |
|
[$this, 'restoreProtectedHtml'], |
1400
|
13 |
|
$html |
1401
|
|
|
); |
1402
|
|
|
} |
1403
|
|
|
|
1404
|
|
|
// ------------------------------------------------------------------------- |
1405
|
|
|
// Restore protected HTML-entities. |
1406
|
|
|
// ------------------------------------------------------------------------- |
1407
|
|
|
|
1408
|
58 |
|
if ($this->doOptimizeViaHtmlDomParser) { |
1409
|
57 |
|
$html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html); |
1410
|
|
|
} |
1411
|
|
|
|
1412
|
|
|
// ------------------------------------ |
1413
|
|
|
// Final clean-up |
1414
|
|
|
// ------------------------------------ |
1415
|
|
|
|
1416
|
58 |
|
$html = \str_replace( |
1417
|
|
|
[ |
1418
|
58 |
|
'html>' . "\n", |
1419
|
|
|
"\n" . '<html', |
1420
|
|
|
'html/>' . "\n", |
1421
|
|
|
"\n" . '</html', |
1422
|
|
|
'head>' . "\n", |
1423
|
|
|
"\n" . '<head', |
1424
|
|
|
'head/>' . "\n", |
1425
|
|
|
"\n" . '</head', |
1426
|
|
|
], |
1427
|
|
|
[ |
1428
|
58 |
|
'html>', |
1429
|
|
|
'<html', |
1430
|
|
|
'html/>', |
1431
|
|
|
'</html', |
1432
|
|
|
'head>', |
1433
|
|
|
'<head', |
1434
|
|
|
'head/>', |
1435
|
|
|
'</head', |
1436
|
|
|
], |
1437
|
58 |
|
$html |
1438
|
|
|
); |
1439
|
|
|
|
1440
|
|
|
// self closing tags, don't need a trailing slash ... |
1441
|
58 |
|
$replace = []; |
1442
|
58 |
|
$replacement = []; |
1443
|
58 |
|
foreach (self::$selfClosingTags as $selfClosingTag) { |
1444
|
58 |
|
$replace[] = '<' . $selfClosingTag . '/>'; |
1445
|
58 |
|
$replacement[] = '<' . $selfClosingTag . '>'; |
1446
|
58 |
|
$replace[] = '<' . $selfClosingTag . ' />'; |
1447
|
58 |
|
$replacement[] = '<' . $selfClosingTag . '>'; |
1448
|
58 |
|
$replace[] = '></' . $selfClosingTag . '>'; |
1449
|
58 |
|
$replacement[] = '>'; |
1450
|
|
|
} |
1451
|
58 |
|
$html = \str_replace( |
1452
|
58 |
|
$replace, |
1453
|
58 |
|
$replacement, |
1454
|
58 |
|
$html |
1455
|
|
|
); |
1456
|
|
|
|
1457
|
|
|
// ------------------------------------ |
1458
|
|
|
// check if compression worked |
1459
|
|
|
// ------------------------------------ |
1460
|
|
|
|
1461
|
58 |
|
if ($origHtmlLength < \strlen($html)) { |
1462
|
|
|
$html = $origHtml; |
1463
|
|
|
} |
1464
|
|
|
|
1465
|
58 |
|
return $html; |
1466
|
|
|
} |
1467
|
|
|
|
1468
|
|
|
/** |
1469
|
|
|
* @param \DOMNode $node |
1470
|
|
|
* |
1471
|
|
|
* @return \DOMNode|null |
1472
|
|
|
*/ |
1473
|
56 |
|
protected function getNextSiblingOfTypeDOMElement(\DOMNode $node) |
1474
|
|
|
{ |
1475
|
|
|
do { |
1476
|
|
|
/** @var \DOMElement|\DOMText|null $nodeTmp - false-positive error from phpstan */ |
1477
|
56 |
|
$nodeTmp = $node->nextSibling; |
1478
|
|
|
|
1479
|
56 |
|
if ($nodeTmp instanceof \DOMText) { |
1480
|
|
|
if ( |
1481
|
32 |
|
\trim($nodeTmp->textContent) !== '' |
1482
|
|
|
&& |
1483
|
32 |
|
\strpos($nodeTmp->textContent, '<') === false |
1484
|
|
|
) { |
1485
|
9 |
|
$node = $nodeTmp; |
1486
|
|
|
} else { |
1487
|
32 |
|
$node = $nodeTmp->nextSibling; |
1488
|
|
|
} |
1489
|
|
|
} else { |
1490
|
55 |
|
$node = $nodeTmp; |
1491
|
|
|
} |
1492
|
56 |
|
} while (!($node === null || $node instanceof \DOMElement || $node instanceof \DOMText)); |
1493
|
|
|
|
1494
|
56 |
|
return $node; |
1495
|
|
|
} |
1496
|
|
|
|
1497
|
|
|
/** |
1498
|
|
|
* Check if the current string is an conditional comment. |
1499
|
|
|
* |
1500
|
|
|
* INFO: since IE >= 10 conditional comment are not working anymore |
1501
|
|
|
* |
1502
|
|
|
* <!--[if expression]> HTML <![endif]--> |
1503
|
|
|
* <![if expression]> HTML <![endif]> |
1504
|
|
|
* |
1505
|
|
|
* @param string $comment |
1506
|
|
|
* |
1507
|
|
|
* @return bool |
1508
|
|
|
*/ |
1509
|
5 |
|
private function isConditionalComment($comment): bool |
1510
|
|
|
{ |
1511
|
5 |
View Code Duplication |
if (\strpos($comment, '[if ') !== false) { |
|
|
|
|
1512
|
|
|
/** @noinspection RegExpRedundantEscape */ |
1513
|
|
|
/** @noinspection NestedPositiveIfStatementsInspection */ |
1514
|
2 |
|
if (\preg_match('/^\[if [^\]]+\]/', $comment)) { |
1515
|
2 |
|
return true; |
1516
|
|
|
} |
1517
|
|
|
} |
1518
|
|
|
|
1519
|
5 |
View Code Duplication |
if (\strpos($comment, '[endif]') !== false) { |
|
|
|
|
1520
|
|
|
/** @noinspection RegExpRedundantEscape */ |
1521
|
|
|
/** @noinspection NestedPositiveIfStatementsInspection */ |
1522
|
1 |
|
if (\preg_match('/\[endif\]$/', $comment)) { |
1523
|
1 |
|
return true; |
1524
|
|
|
} |
1525
|
|
|
} |
1526
|
|
|
|
1527
|
5 |
|
return false; |
1528
|
|
|
} |
1529
|
|
|
|
1530
|
|
|
/** |
1531
|
|
|
* Check if the current string is an special comment. |
1532
|
|
|
* |
1533
|
|
|
* @param string $comment |
1534
|
|
|
* |
1535
|
|
|
* @return bool |
1536
|
|
|
*/ |
1537
|
5 |
|
private function isSpecialComment($comment): bool |
1538
|
|
|
{ |
1539
|
5 |
|
foreach ($this->specialHtmlCommentsStaringWith as $search) { |
1540
|
1 |
|
if (\strpos($comment, $search) === 0) { |
1541
|
1 |
|
return true; |
1542
|
|
|
} |
1543
|
|
|
} |
1544
|
|
|
|
1545
|
5 |
|
foreach ($this->specialHtmlCommentsEndingWith as $search) { |
1546
|
1 |
|
if (\substr($comment, -\strlen($search)) === $search) { |
1547
|
1 |
|
return true; |
1548
|
|
|
} |
1549
|
|
|
} |
1550
|
|
|
|
1551
|
5 |
|
return false; |
1552
|
|
|
} |
1553
|
|
|
|
1554
|
|
|
/** |
1555
|
|
|
* @param string $html |
1556
|
|
|
* @param bool $multiDecodeNewHtmlEntity |
1557
|
|
|
* |
1558
|
|
|
* @return string |
1559
|
|
|
*/ |
1560
|
57 |
|
private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string |
1561
|
|
|
{ |
1562
|
|
|
// init dom |
1563
|
57 |
|
$dom = new HtmlDomParser(); |
1564
|
57 |
|
$dom->useKeepBrokenHtml($this->keepBrokenHtml); |
1565
|
|
|
|
1566
|
57 |
|
if ($this->templateLogicSyntaxInSpecialScriptTags !== null) { |
1567
|
1 |
|
$dom->overwriteTemplateLogicSyntaxInSpecialScriptTags($this->templateLogicSyntaxInSpecialScriptTags); |
1568
|
|
|
} |
1569
|
|
|
|
1570
|
57 |
|
$dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space |
1571
|
57 |
|
$dom->getDocument()->formatOutput = false; // do not formats output with indentation |
1572
|
|
|
|
1573
|
|
|
// load dom |
1574
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
1575
|
57 |
|
$dom->loadHtml($html); |
1576
|
|
|
|
1577
|
57 |
|
$this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0); |
1578
|
|
|
|
1579
|
57 |
|
$doctypeStr = $this->getDoctype($dom->getDocument()); |
1580
|
|
|
|
1581
|
57 |
|
if ($doctypeStr) { |
1582
|
12 |
|
$this->isHTML4 = \strpos($doctypeStr, 'html4') !== false; |
1583
|
12 |
|
$this->isXHTML = \strpos($doctypeStr, 'xhtml1') !== false; |
1584
|
|
|
} |
1585
|
|
|
|
1586
|
|
|
// ------------------------------------------------------------------------- |
1587
|
|
|
// Protect <nocompress> HTML tags first. |
1588
|
|
|
// ------------------------------------------------------------------------- |
1589
|
|
|
|
1590
|
57 |
|
$dom = $this->protectTagHelper($dom, 'nocompress'); |
1591
|
|
|
|
1592
|
|
|
// ------------------------------------------------------------------------- |
1593
|
|
|
// Notify the Observer before the minification. |
1594
|
|
|
// ------------------------------------------------------------------------- |
1595
|
|
|
|
1596
|
57 |
|
foreach ($dom->find('*') as $element) { |
1597
|
57 |
|
$this->notifyObserversAboutDomElementBeforeMinification($element); |
1598
|
|
|
} |
1599
|
|
|
|
1600
|
|
|
// ------------------------------------------------------------------------- |
1601
|
|
|
// Protect HTML tags and conditional comments. |
1602
|
|
|
// ------------------------------------------------------------------------- |
1603
|
|
|
|
1604
|
57 |
|
$dom = $this->protectTags($dom); |
1605
|
|
|
|
1606
|
|
|
// ------------------------------------------------------------------------- |
1607
|
|
|
// Remove default HTML comments. [protected html is still protected] |
1608
|
|
|
// ------------------------------------------------------------------------- |
1609
|
|
|
|
1610
|
57 |
|
if ($this->doRemoveComments) { |
1611
|
55 |
|
$dom = $this->removeComments($dom); |
1612
|
|
|
} |
1613
|
|
|
|
1614
|
|
|
// ------------------------------------------------------------------------- |
1615
|
|
|
// Sum-Up extra whitespace from the Dom. [protected html is still protected] |
1616
|
|
|
// ------------------------------------------------------------------------- |
1617
|
|
|
|
1618
|
57 |
|
if ($this->doSumUpWhitespace) { |
1619
|
56 |
|
$dom = $this->sumUpWhitespace($dom); |
1620
|
|
|
} |
1621
|
|
|
|
1622
|
57 |
|
foreach ($dom->find('*') as $element) { |
1623
|
|
|
|
1624
|
|
|
// ------------------------------------------------------------------------- |
1625
|
|
|
// Remove whitespace around tags. [protected html is still protected] |
1626
|
|
|
// ------------------------------------------------------------------------- |
1627
|
|
|
|
1628
|
57 |
|
if ($this->doRemoveWhitespaceAroundTags) { |
1629
|
3 |
|
$this->removeWhitespaceAroundTags($element); |
1630
|
|
|
} |
1631
|
|
|
|
1632
|
|
|
// ------------------------------------------------------------------------- |
1633
|
|
|
// Notify the Observer after the minification. |
1634
|
|
|
// ------------------------------------------------------------------------- |
1635
|
|
|
|
1636
|
57 |
|
$this->notifyObserversAboutDomElementAfterMinification($element); |
1637
|
|
|
} |
1638
|
|
|
|
1639
|
|
|
// ------------------------------------------------------------------------- |
1640
|
|
|
// Convert the Dom into a string. |
1641
|
|
|
// ------------------------------------------------------------------------- |
1642
|
|
|
|
1643
|
57 |
|
return $dom->fixHtmlOutput( |
1644
|
57 |
|
$doctypeStr . $this->domNodeToString($dom->getDocument()), |
1645
|
57 |
|
$multiDecodeNewHtmlEntity |
1646
|
|
|
); |
1647
|
|
|
} |
1648
|
|
|
|
1649
|
|
|
/** |
1650
|
|
|
* @param SimpleHtmlDomInterface $domElement |
1651
|
|
|
* |
1652
|
|
|
* @return void |
1653
|
|
|
*/ |
1654
|
57 |
|
private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement) |
1655
|
|
|
{ |
1656
|
57 |
|
foreach ($this->domLoopObservers as $observer) { |
1657
|
57 |
|
$observer->domElementAfterMinification($domElement, $this); |
1658
|
|
|
} |
1659
|
57 |
|
} |
1660
|
|
|
|
1661
|
|
|
/** |
1662
|
|
|
* @param SimpleHtmlDomInterface $domElement |
1663
|
|
|
* |
1664
|
|
|
* @return void |
1665
|
|
|
*/ |
1666
|
57 |
|
private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement) |
1667
|
|
|
{ |
1668
|
57 |
|
foreach ($this->domLoopObservers as $observer) { |
1669
|
57 |
|
$observer->domElementBeforeMinification($domElement, $this); |
1670
|
|
|
} |
1671
|
57 |
|
} |
1672
|
|
|
|
1673
|
|
|
/** |
1674
|
|
|
* @param HtmlDomParser $dom |
1675
|
|
|
* @param string $selector |
1676
|
|
|
* |
1677
|
|
|
* @return HtmlDomParser |
1678
|
|
|
*/ |
1679
|
57 |
|
private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser |
1680
|
|
|
{ |
1681
|
57 |
|
foreach ($dom->find($selector) as $element) { |
1682
|
6 |
|
if ($element->isRemoved()) { |
1683
|
1 |
|
continue; |
1684
|
|
|
} |
1685
|
|
|
|
1686
|
6 |
|
$this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml(); |
1687
|
6 |
|
$parentNode = $element->getNode()->parentNode; |
1688
|
6 |
|
if ($parentNode !== null) { |
1689
|
6 |
|
$parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>'; |
1690
|
|
|
} |
1691
|
|
|
|
1692
|
6 |
|
++$this->protected_tags_counter; |
1693
|
|
|
} |
1694
|
|
|
|
1695
|
57 |
|
return $dom; |
1696
|
|
|
} |
1697
|
|
|
|
1698
|
|
|
/** |
1699
|
|
|
* Prevent changes of inline "styles" and "scripts". |
1700
|
|
|
* |
1701
|
|
|
* @param HtmlDomParser $dom |
1702
|
|
|
* |
1703
|
|
|
* @return HtmlDomParser |
1704
|
|
|
*/ |
1705
|
57 |
|
private function protectTags(HtmlDomParser $dom): HtmlDomParser |
1706
|
|
|
{ |
1707
|
57 |
|
$this->protectTagHelper($dom, 'code'); |
1708
|
|
|
|
1709
|
57 |
|
foreach ($dom->find('script, style') as $element) { |
1710
|
9 |
|
if ($element->isRemoved()) { |
1711
|
|
|
continue; |
1712
|
|
|
} |
1713
|
|
|
|
1714
|
9 |
|
if ($element->tag === 'script' || $element->tag === 'style') { |
1715
|
9 |
|
$attributes = $element->getAllAttributes(); |
1716
|
|
|
// skip external links |
1717
|
9 |
|
if (isset($attributes['src'])) { |
1718
|
5 |
|
continue; |
1719
|
|
|
} |
1720
|
|
|
} |
1721
|
|
|
|
1722
|
7 |
|
$this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml; |
1723
|
7 |
|
$element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>'; |
1724
|
|
|
|
1725
|
7 |
|
++$this->protected_tags_counter; |
1726
|
|
|
} |
1727
|
|
|
|
1728
|
57 |
|
foreach ($dom->find('//comment()') as $element) { |
1729
|
5 |
|
if ($element->isRemoved()) { |
1730
|
|
|
continue; |
1731
|
|
|
} |
1732
|
|
|
|
1733
|
5 |
|
$text = $element->text(); |
1734
|
|
|
|
1735
|
|
|
if ( |
1736
|
5 |
|
!$this->isConditionalComment($text) |
1737
|
|
|
&& |
1738
|
5 |
|
!$this->isSpecialComment($text) |
1739
|
|
|
) { |
1740
|
5 |
|
continue; |
1741
|
|
|
} |
1742
|
|
|
|
1743
|
3 |
|
$this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->'; |
1744
|
|
|
|
1745
|
|
|
/* @var $node \DOMComment */ |
1746
|
3 |
|
$node = $element->getNode(); |
1747
|
3 |
|
$child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>'); |
1748
|
3 |
|
$parentNode = $element->getNode()->parentNode; |
1749
|
3 |
|
if ($parentNode !== null) { |
1750
|
3 |
|
$parentNode->replaceChild($child, $node); |
1751
|
|
|
} |
1752
|
|
|
|
1753
|
3 |
|
++$this->protected_tags_counter; |
1754
|
|
|
} |
1755
|
|
|
|
1756
|
57 |
|
return $dom; |
1757
|
|
|
} |
1758
|
|
|
|
1759
|
|
|
/** |
1760
|
|
|
* Remove comments in the dom. |
1761
|
|
|
* |
1762
|
|
|
* @param HtmlDomParser $dom |
1763
|
|
|
* |
1764
|
|
|
* @return HtmlDomParser |
1765
|
|
|
*/ |
1766
|
55 |
|
private function removeComments(HtmlDomParser $dom): HtmlDomParser |
1767
|
|
|
{ |
1768
|
55 |
|
foreach ($dom->find('//comment()') as $commentWrapper) { |
1769
|
4 |
|
$comment = $commentWrapper->getNode(); |
1770
|
4 |
|
$val = $comment->nodeValue; |
1771
|
4 |
|
if (\strpos($val, '[') === false) { |
1772
|
4 |
|
$parentNode = $comment->parentNode; |
1773
|
4 |
|
if ($parentNode !== null) { |
1774
|
4 |
|
$parentNode->removeChild($comment); |
1775
|
|
|
} |
1776
|
|
|
} |
1777
|
|
|
} |
1778
|
|
|
|
1779
|
55 |
|
$dom->getDocument()->normalizeDocument(); |
1780
|
|
|
|
1781
|
55 |
|
return $dom; |
1782
|
|
|
} |
1783
|
|
|
|
1784
|
|
|
/** |
1785
|
|
|
* Trim tags in the dom. |
1786
|
|
|
* |
1787
|
|
|
* @param SimpleHtmlDomInterface $element |
1788
|
|
|
* |
1789
|
|
|
* @return void |
1790
|
|
|
*/ |
1791
|
3 |
|
private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element) |
1792
|
|
|
{ |
1793
|
3 |
|
if (isset(self::$trimWhitespaceFromTags[$element->tag])) { |
|
|
|
|
1794
|
2 |
|
$node = $element->getNode(); |
1795
|
|
|
|
1796
|
|
|
/** @var \DOMNode[] $candidates */ |
1797
|
2 |
|
$candidates = []; |
1798
|
2 |
|
if ($node->childNodes->length > 0) { |
1799
|
1 |
|
$candidates[] = $node->firstChild; |
1800
|
1 |
|
$candidates[] = $node->lastChild; |
1801
|
1 |
|
$candidates[] = $node->previousSibling; |
1802
|
1 |
|
$candidates[] = $node->nextSibling; |
1803
|
|
|
} |
1804
|
|
|
|
1805
|
|
|
/** @var mixed $candidate - false-positive error from phpstan */ |
1806
|
2 |
|
foreach ($candidates as &$candidate) { |
1807
|
1 |
|
if ($candidate === null) { |
1808
|
|
|
continue; |
1809
|
|
|
} |
1810
|
|
|
|
1811
|
1 |
|
if ($candidate->nodeType === \XML_TEXT_NODE) { |
1812
|
1 |
|
$nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue); |
1813
|
1 |
|
if ($nodeValueTmp !== null) { |
1814
|
1 |
|
$candidate->nodeValue = $nodeValueTmp; |
1815
|
|
|
} |
1816
|
|
|
} |
1817
|
|
|
} |
1818
|
|
|
} |
1819
|
3 |
|
} |
1820
|
|
|
|
1821
|
|
|
/** |
1822
|
|
|
* Callback function for preg_replace_callback use. |
1823
|
|
|
* |
1824
|
|
|
* @param array $matches PREG matches |
1825
|
|
|
* |
1826
|
|
|
* @return string |
1827
|
|
|
*/ |
1828
|
13 |
|
private function restoreProtectedHtml($matches): string |
1829
|
|
|
{ |
1830
|
13 |
|
\preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner); |
1831
|
|
|
|
1832
|
13 |
|
return $this->protectedChildNodes[$matchesInner['id']] ?? ''; |
1833
|
|
|
} |
1834
|
|
|
|
1835
|
|
|
/** |
1836
|
|
|
* @param string[] $domainsToRemoveHttpPrefixFromAttributes |
1837
|
|
|
* |
1838
|
|
|
* @return $this |
1839
|
|
|
*/ |
1840
|
2 |
|
public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self |
1841
|
|
|
{ |
1842
|
2 |
|
$this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes; |
1843
|
|
|
|
1844
|
2 |
|
return $this; |
1845
|
|
|
} |
1846
|
|
|
|
1847
|
|
|
/** |
1848
|
|
|
* @param string[] $startingWith |
1849
|
|
|
* @param string[] $endingWith |
1850
|
|
|
* |
1851
|
|
|
* @return $this |
1852
|
|
|
*/ |
1853
|
1 |
|
public function setSpecialHtmlComments(array $startingWith, array $endingWith = []): self |
1854
|
|
|
{ |
1855
|
1 |
|
$this->specialHtmlCommentsStaringWith = $startingWith; |
1856
|
1 |
|
$this->specialHtmlCommentsEndingWith = $endingWith; |
1857
|
|
|
|
1858
|
1 |
|
return $this; |
1859
|
|
|
} |
1860
|
|
|
|
1861
|
|
|
/** |
1862
|
|
|
* Sum-up extra whitespace from dom-nodes. |
1863
|
|
|
* |
1864
|
|
|
* @param HtmlDomParser $dom |
1865
|
|
|
* |
1866
|
|
|
* @return HtmlDomParser |
1867
|
|
|
*/ |
1868
|
56 |
|
private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser |
1869
|
|
|
{ |
1870
|
56 |
|
$text_nodes = $dom->find('//text()'); |
1871
|
56 |
|
foreach ($text_nodes as $text_node_wrapper) { |
1872
|
|
|
/* @var $text_node \DOMNode */ |
1873
|
52 |
|
$text_node = $text_node_wrapper->getNode(); |
1874
|
52 |
|
$xp = $text_node->getNodePath(); |
1875
|
52 |
|
if ($xp === null) { |
1876
|
|
|
continue; |
1877
|
|
|
} |
1878
|
|
|
|
1879
|
52 |
|
$doSkip = false; |
1880
|
52 |
|
foreach (self::$skipTagsForRemoveWhitespace as $pattern) { |
1881
|
52 |
|
if (\strpos($xp, "/${pattern}") !== false) { |
1882
|
10 |
|
$doSkip = true; |
1883
|
|
|
|
1884
|
10 |
|
break; |
1885
|
|
|
} |
1886
|
|
|
} |
1887
|
52 |
|
if ($doSkip) { |
1888
|
10 |
|
continue; |
1889
|
|
|
} |
1890
|
|
|
|
1891
|
48 |
|
$nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue); |
1892
|
48 |
|
if ($nodeValueTmp !== null) { |
1893
|
48 |
|
$text_node->nodeValue = $nodeValueTmp; |
1894
|
|
|
} |
1895
|
|
|
} |
1896
|
|
|
|
1897
|
56 |
|
$dom->getDocument()->normalizeDocument(); |
1898
|
|
|
|
1899
|
56 |
|
return $dom; |
1900
|
|
|
} |
1901
|
|
|
|
1902
|
|
|
/** |
1903
|
|
|
* WARNING: maybe bad for performance ... |
1904
|
|
|
* |
1905
|
|
|
* @param bool $keepBrokenHtml |
1906
|
|
|
* |
1907
|
|
|
* @return HtmlMin |
1908
|
|
|
*/ |
1909
|
2 |
|
public function useKeepBrokenHtml(bool $keepBrokenHtml): self |
1910
|
|
|
{ |
1911
|
2 |
|
$this->keepBrokenHtml = $keepBrokenHtml; |
1912
|
|
|
|
1913
|
2 |
|
return $this; |
1914
|
|
|
} |
1915
|
|
|
|
1916
|
|
|
/** |
1917
|
|
|
* @param string[] $templateLogicSyntaxInSpecialScriptTags |
1918
|
|
|
* |
1919
|
|
|
* @return HtmlMin |
1920
|
|
|
*/ |
1921
|
1 |
|
public function overwriteTemplateLogicSyntaxInSpecialScriptTags(array $templateLogicSyntaxInSpecialScriptTags): self |
1922
|
|
|
{ |
1923
|
1 |
|
foreach ($templateLogicSyntaxInSpecialScriptTags as $tmp) { |
1924
|
1 |
|
if (!\is_string($tmp)) { |
1925
|
|
|
throw new \InvalidArgumentException('setTemplateLogicSyntaxInSpecialScriptTags only allows string[]'); |
1926
|
|
|
} |
1927
|
|
|
} |
1928
|
|
|
|
1929
|
1 |
|
$this->templateLogicSyntaxInSpecialScriptTags = $templateLogicSyntaxInSpecialScriptTags; |
1930
|
|
|
|
1931
|
1 |
|
return $this; |
1932
|
|
|
} |
1933
|
|
|
} |
1934
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.