Completed
Push — master ( 1b6d9c...62a1f2 )
by Lars
03:05
created

HtmlMin::isDoRemoveDefaultAttributes()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]+/u";
27
28
    /**
29
     * @var array
30
     */
31
    private static $optional_end_tags = [
32
        'html',
33
        'head',
34
        'body',
35
    ];
36
37
    private static $selfClosingTags = [
38
        'area',
39
        'base',
40
        'basefont',
41
        'br',
42
        'col',
43
        'command',
44
        'embed',
45
        'frame',
46
        'hr',
47
        'img',
48
        'input',
49
        'isindex',
50
        'keygen',
51
        'link',
52
        'meta',
53
        'param',
54
        'source',
55
        'track',
56
        'wbr',
57
    ];
58
59
    private static $trimWhitespaceFromTags = [
60
        'article' => '',
61
        'br'      => '',
62
        'div'     => '',
63
        'footer'  => '',
64
        'hr'      => '',
65
        'nav'     => '',
66
        'p'       => '',
67
        'script'  => '',
68
    ];
69
70
    /**
71
     * @var array
72
     */
73
    private static $booleanAttributes = [
74
        'allowfullscreen' => '',
75
        'async'           => '',
76
        'autofocus'       => '',
77
        'autoplay'        => '',
78
        'checked'         => '',
79
        'compact'         => '',
80
        'controls'        => '',
81
        'declare'         => '',
82
        'default'         => '',
83
        'defaultchecked'  => '',
84
        'defaultmuted'    => '',
85
        'defaultselected' => '',
86
        'defer'           => '',
87
        'disabled'        => '',
88
        'enabled'         => '',
89
        'formnovalidate'  => '',
90
        'hidden'          => '',
91
        'indeterminate'   => '',
92
        'inert'           => '',
93
        'ismap'           => '',
94
        'itemscope'       => '',
95
        'loop'            => '',
96
        'multiple'        => '',
97
        'muted'           => '',
98
        'nohref'          => '',
99
        'noresize'        => '',
100
        'noshade'         => '',
101
        'novalidate'      => '',
102
        'nowrap'          => '',
103
        'open'            => '',
104
        'pauseonexit'     => '',
105
        'readonly'        => '',
106
        'required'        => '',
107
        'reversed'        => '',
108
        'scoped'          => '',
109
        'seamless'        => '',
110
        'selected'        => '',
111
        'sortable'        => '',
112
        'truespeed'       => '',
113
        'typemustmatch'   => '',
114
        'visible'         => '',
115
    ];
116
117
    /**
118
     * @var array
119
     */
120
    private static $skipTagsForRemoveWhitespace = [
121
        'code',
122
        'pre',
123
        'script',
124
        'style',
125
        'textarea',
126
    ];
127
128
    /**
129
     * @var array
130
     */
131
    private $protectedChildNodes = [];
132
133
    /**
134
     * @var string
135
     */
136
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
137
138
    /**
139
     * @var bool
140
     */
141
    private $doOptimizeViaHtmlDomParser = true;
142
143
    /**
144
     * @var bool
145
     */
146
    private $doOptimizeAttributes = true;
147
148
    /**
149
     * @var bool
150
     */
151
    private $doRemoveComments = true;
152
153
    /**
154
     * @var bool
155
     */
156
    private $doRemoveWhitespaceAroundTags = false;
157
158
    /**
159
     * @var bool
160
     */
161
    private $doRemoveOmittedQuotes = true;
162
163
    /**
164
     * @var bool
165
     */
166
    private $doRemoveOmittedHtmlTags = true;
167
168
    /**
169
     * @var bool
170
     */
171
    private $doRemoveHttpPrefixFromAttributes = false;
172
173
    /**
174
     * @var array
175
     */
176
    private $domainsToRemoveHttpPrefixFromAttributes = [
177
        'google.com',
178
        'google.de',
179
    ];
180
181
    /**
182
     * @var bool
183
     */
184
    private $doSortCssClassNames = true;
185
186
    /**
187
     * @var bool
188
     */
189
    private $doSortHtmlAttributes = true;
190
191
    /**
192
     * @var bool
193
     */
194
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
195
196
    /**
197
     * @var bool
198
     */
199
    private $doRemoveDefaultAttributes = false;
200
201
    /**
202
     * @var bool
203
     */
204
    private $doRemoveDeprecatedAnchorName = true;
205
206
    /**
207
     * @var bool
208
     */
209
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
210
211
    /**
212
     * @var bool
213
     */
214
    private $doRemoveDeprecatedTypeFromScriptTag = true;
215
216
    /**
217
     * @var bool
218
     */
219
    private $doRemoveValueFromEmptyInput = true;
220
221
    /**
222
     * @var bool
223
     */
224
    private $doRemoveEmptyAttributes = true;
225
226
    /**
227
     * @var bool
228
     */
229
    private $doSumUpWhitespace = true;
230
231
    /**
232
     * @var bool
233
     */
234
    private $doRemoveSpacesBetweenTags = false;
235
236
    /**
237
     * @var bool
238
     */
239
    private $keepBrokenHtml = false;
240
241
    /**
242
     * @var bool
243
     */
244
    private $withDocType = false;
245
246
    /**
247
     * @var \SplObjectStorage|HtmlMinDomObserverInterface[]
248
     */
249
    private $domLoopObservers;
250
251
    /**
252
     * HtmlMin constructor.
253
     */
254 46
    public function __construct()
255
    {
256 46
        $this->domLoopObservers = new \SplObjectStorage();
257
258 46
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
259 46
    }
260
261
    /**
262
     * @param HtmlMinDomObserverInterface $observer
263
     *
264
     * @return void
265
     */
266 46
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
267
    {
268 46
        $this->domLoopObservers->attach($observer);
269 46
    }
270
271
    /**
272
     * @param $domElement SimpleHtmlDom
273
     *
274
     * @return void
275
     */
276 42
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDom $domElement)
277
    {
278 42
        foreach ($this->domLoopObservers as $observer) {
279 42
            $observer->domElementBeforeMinification($domElement, $this);
280
        }
281 42
    }
282
283 42
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDom $domElement)
284
    {
285 42
        foreach ($this->domLoopObservers as $observer) {
286 42
            $observer->domElementAfterMinification($domElement, $this);
287
        }
288 42
    }
289
290
    /**
291
     * @param bool $doOptimizeAttributes
292
     *
293
     * @return $this
294
     */
295 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
296
    {
297 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
298
299 2
        return $this;
300
    }
301
302
    /**
303
     * @param bool $doOptimizeViaHtmlDomParser
304
     *
305
     * @return $this
306
     */
307 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
308
    {
309 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
310
311 1
        return $this;
312
    }
313
314
    /**
315
     * @param bool $doRemoveComments
316
     *
317
     * @return $this
318
     */
319 3
    public function doRemoveComments(bool $doRemoveComments = true): self
320
    {
321 3
        $this->doRemoveComments = $doRemoveComments;
322
323 3
        return $this;
324
    }
325
326
    /**
327
     * @param bool $doRemoveDefaultAttributes
328
     *
329
     * @return $this
330
     */
331 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
332
    {
333 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
334
335 2
        return $this;
336
    }
337
338
    /**
339
     * @param bool $doRemoveDeprecatedAnchorName
340
     *
341
     * @return $this
342
     */
343 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
344
    {
345 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
346
347 2
        return $this;
348
    }
349
350
    /**
351
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
352
     *
353
     * @return $this
354
     */
355 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
356
    {
357 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
358
359 2
        return $this;
360
    }
361
362
    /**
363
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
364
     *
365
     * @return $this
366
     */
367 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
368
    {
369 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
370
371 2
        return $this;
372
    }
373
374
    /**
375
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
376
     *
377
     * @return $this
378
     */
379 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
380
    {
381 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
382
383 2
        return $this;
384
    }
385
386
    /**
387
     * @param bool $doRemoveEmptyAttributes
388
     *
389
     * @return $this
390
     */
391 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
392
    {
393 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
394
395 2
        return $this;
396
    }
397
398
    /**
399
     * @param bool $doRemoveHttpPrefixFromAttributes
400
     *
401
     * @return $this
402
     */
403 4
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
404
    {
405 4
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
406
407 4
        return $this;
408
    }
409
410
    /**
411
     * @return bool
412
     */
413 26
    public function isDoSortCssClassNames(): bool
414
    {
415 26
        return $this->doSortCssClassNames;
416
    }
417
418
    /**
419
     * @return bool
420
     */
421 26
    public function isDoSortHtmlAttributes(): bool
422
    {
423 26
        return $this->doSortHtmlAttributes;
424
    }
425
426
    /**
427
     * @return bool
428
     */
429 26
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
430
    {
431 26
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
432
    }
433
434
    /**
435
     * @return bool
436
     */
437 26
    public function isDoRemoveDefaultAttributes(): bool
438
    {
439 26
        return $this->doRemoveDefaultAttributes;
440
    }
441
442
    /**
443
     * @return bool
444
     */
445 26
    public function isDoRemoveDeprecatedAnchorName(): bool
446
    {
447 26
        return $this->doRemoveDeprecatedAnchorName;
448
    }
449
450
    /**
451
     * @return bool
452
     */
453 26
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
454
    {
455 26
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
456
    }
457
458
    /**
459
     * @return bool
460
     */
461 26
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
462
    {
463 26
        return $this->doRemoveDeprecatedTypeFromScriptTag;
464
    }
465
466
    /**
467
     * @return bool
468
     */
469 26
    public function isDoRemoveValueFromEmptyInput(): bool
470
    {
471 26
        return $this->doRemoveValueFromEmptyInput;
472
    }
473
474
    /**
475
     * @return bool
476
     */
477 26
    public function isDoRemoveEmptyAttributes(): bool
478
    {
479 26
        return $this->doRemoveEmptyAttributes;
480
    }
481
482
    /**
483
     * @return bool
484
     */
485
    public function isDoSumUpWhitespace(): bool
486
    {
487
        return $this->doSumUpWhitespace;
488
    }
489
490
    /**
491
     * @return bool
492
     */
493
    public function isDoRemoveSpacesBetweenTags(): bool
494
    {
495
        return $this->doRemoveSpacesBetweenTags;
496
    }
497
498
    /**
499
     * @return bool
500
     */
501
    public function isDoOptimizeViaHtmlDomParser(): bool
502
    {
503
        return $this->doOptimizeViaHtmlDomParser;
504
    }
505
506
    /**
507
     * @return bool
508
     */
509
    public function isDoOptimizeAttributes(): bool
510
    {
511
        return $this->doOptimizeAttributes;
512
    }
513
514
    /**
515
     * @return bool
516
     */
517
    public function isDoRemoveComments(): bool
518
    {
519
        return $this->doRemoveComments;
520
    }
521
522
    /**
523
     * @return bool
524
     */
525
    public function isDoRemoveWhitespaceAroundTags(): bool
526
    {
527
        return $this->doRemoveWhitespaceAroundTags;
528
    }
529
530
    /**
531
     * @return bool
532
     */
533
    public function isDoRemoveOmittedQuotes(): bool
534
    {
535
        return $this->doRemoveOmittedQuotes;
536
    }
537
538
    /**
539
     * @return bool
540
     */
541
    public function isDoRemoveOmittedHtmlTags(): bool
542
    {
543
        return $this->doRemoveOmittedHtmlTags;
544
    }
545
546
    /**
547
     * @return bool
548
     */
549 26
    public function isDoRemoveHttpPrefixFromAttributes(): bool
550
    {
551 26
        return $this->doRemoveHttpPrefixFromAttributes;
552
    }
553
554
    /**
555
     * @return array
556
     */
557
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
558
    {
559
        return $this->domainsToRemoveHttpPrefixFromAttributes;
560
    }
561
562
    /**
563
     * @param bool $doRemoveOmittedHtmlTags
564
     *
565
     * @return $this
566
     */
567 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
568
    {
569 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
570
571 1
        return $this;
572
    }
573
574
    /**
575
     * @param bool $doRemoveOmittedQuotes
576
     *
577
     * @return $this
578
     */
579 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
580
    {
581 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
582
583 1
        return $this;
584
    }
585
586
    /**
587
     * @param bool $doRemoveSpacesBetweenTags
588
     *
589
     * @return $this
590
     */
591
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
592
    {
593
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
594
595
        return $this;
596
    }
597
598
    /**
599
     * @param bool $doRemoveValueFromEmptyInput
600
     *
601
     * @return $this
602
     */
603 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
604
    {
605 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
606
607 2
        return $this;
608
    }
609
610
    /**
611
     * @param bool $doRemoveWhitespaceAroundTags
612
     *
613
     * @return $this
614
     */
615 4
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
616
    {
617 4
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
618
619 4
        return $this;
620
    }
621
622
    /**
623
     * @param bool $doSortCssClassNames
624
     *
625
     * @return $this
626
     */
627 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
628
    {
629 2
        $this->doSortCssClassNames = $doSortCssClassNames;
630
631 2
        return $this;
632
    }
633
634
    /**
635
     * @param bool $doSortHtmlAttributes
636
     *
637
     * @return $this
638
     */
639 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
640
    {
641 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
642
643 2
        return $this;
644
    }
645
646
    /**
647
     * @param bool $doSumUpWhitespace
648
     *
649
     * @return $this
650
     */
651 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
652
    {
653 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
654
655 2
        return $this;
656
    }
657
658 42
    private function domNodeAttributesToString(\DOMNode $node): string
659
    {
660
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
661 42
        $attr_str = '';
662 42
        if ($node->attributes !== null) {
663 42
            foreach ($node->attributes as $attribute) {
664 26
                $attr_str .= $attribute->name;
665
666
                if (
667 26
                    $this->doOptimizeAttributes
668
                    &&
669 26
                    isset(self::$booleanAttributes[$attribute->name])
670
                ) {
671 8
                    $attr_str .= ' ';
672
673 8
                    continue;
674
                }
675
676 26
                $attr_str .= '=';
677
678
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
679 26
                $omit_quotes = $this->doRemoveOmittedQuotes
680
                               &&
681 26
                               $attribute->value !== ''
682
                               &&
683 26
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
684
                               &&
685 26
                               \strpos($attribute->name, ' ') === false
686
                               &&
687 26
                               \preg_match('/["\'=<>` \t\r\n\f]+/', $attribute->value) === 0;
688
689 26
                $quoteTmp = '"';
690
                if (
691 26
                    !$omit_quotes
692
                    &&
693 26
                    strpos($attribute->value, '"') !== false
694
                ) {
695 1
                    $quoteTmp = "'";
696
                }
697
698
                if (
699 26
                    $this->doOptimizeAttributes
700
                    &&
701
                    (
702 25
                        $attribute->name === 'srcset'
703
                        ||
704 26
                        $attribute->name === 'sizes'
705
                    )
706
                ) {
707 1
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
708
                } else {
709 26
                    $attr_val = $attribute->value;
710
                }
711
712 26
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
713 26
                $attr_str .= ' ';
714
            }
715
        }
716
717 42
        return \trim($attr_str);
718
    }
719
720
    /**
721
     * @param \DOMNode $node
722
     *
723
     * @return bool
724
     */
725 41
    private function domNodeClosingTagOptional(\DOMNode $node): bool
726
    {
727 41
        $tag_name = $node->nodeName;
728 41
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
729
730
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
731
732
        // Implemented:
733
        //
734
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
735
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
736
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
737
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
738
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
739
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
740
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
741
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
742
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
743
744
        // TODO:
745
        //
746
        // <html> may be omitted if first thing inside is not comment
747
        // <head> may be omitted if first thing inside is an element
748
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
749
        // <colgroup> may be omitted if first thing inside is <col>
750
        // <tbody> may be omitted if first thing inside is <tr>
751
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
752
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
753
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
754
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
755
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
756
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
757
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
758
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
759
        //
760
        // <-- However, a start tag must never be omitted if it has any attributes.
761
762 41
        return \in_array($tag_name, self::$optional_end_tags, true)
763
               ||
764
               (
765 38
                   $tag_name === 'li'
766
                   &&
767
                   (
768 5
                       $nextSibling === null
769
                       ||
770
                       (
771 3
                           $nextSibling instanceof \DOMElement
772
                           &&
773 38
                           $nextSibling->tagName === 'li'
774
                       )
775
                   )
776
               )
777
               ||
778
               (
779
                   (
780 38
                       $tag_name === 'rp'
781
                   )
782
                   &&
783
                   (
784
                       $nextSibling === null
785
                       ||
786
                       (
787
                           $nextSibling instanceof \DOMElement
788
                           &&
789
                           (
790
                               $nextSibling->tagName === 'rp'
791
                               ||
792 38
                               $nextSibling->tagName === 'rt'
793
                           )
794
                       )
795
                   )
796
               )
797
               ||
798
               (
799 38
                   $tag_name === 'tr'
800
                   &&
801
                   (
802 1
                       $nextSibling === null
803
                       ||
804
                       (
805 1
                           $nextSibling instanceof \DOMElement
806
                           &&
807 38
                           $nextSibling->tagName === 'tr'
808
                       )
809
                   )
810
               )
811
               ||
812
               (
813
                   (
814 38
                       $tag_name === 'td'
815
                       ||
816 38
                       $tag_name === 'th'
817
                   )
818
                   &&
819
                   (
820 1
                       $nextSibling === null
821
                       ||
822
                       (
823 1
                           $nextSibling instanceof \DOMElement
824
                           &&
825
                           (
826 1
                               $nextSibling->tagName === 'td'
827
                               ||
828 38
                               $nextSibling->tagName === 'th'
829
                           )
830
                       )
831
                   )
832
               )
833
               ||
834
               (
835
                   (
836 38
                       $tag_name === 'dd'
837
                       ||
838 38
                       $tag_name === 'dt'
839
                   )
840
                   &&
841
                   (
842
                       (
843 3
                           $nextSibling === null
844
                           &&
845 3
                           $tag_name === 'dd'
846
                       )
847
                       ||
848
                       (
849 3
                           $nextSibling instanceof \DOMElement
850
                           &&
851
                           (
852 3
                               $nextSibling->tagName === 'dd'
853
                               ||
854 38
                               $nextSibling->tagName === 'dt'
855
                           )
856
                       )
857
                   )
858
               )
859
               ||
860
               (
861 38
                   $tag_name === 'option'
862
                   &&
863
                   (
864
                       $nextSibling === null
865
                       ||
866
                       (
867
                           $nextSibling instanceof \DOMElement
868
                           &&
869
                           (
870
                               $nextSibling->tagName === 'option'
871
                               ||
872 38
                               $nextSibling->tagName === 'optgroup'
873
                           )
874
                       )
875
                   )
876
               )
877
               ||
878
               (
879 38
                   $tag_name === 'p'
880
                   &&
881
                   (
882
                       (
883 12
                           $nextSibling === null
884
                           &&
885
                           (
886 10
                               $node->parentNode !== null
887
                               &&
888 10
                               !\in_array(
889 10
                                   $node->parentNode->nodeName,
890
                                   [
891 10
                                       'a',
892
                                       'audio',
893
                                       'del',
894
                                       'ins',
895
                                       'map',
896
                                       'noscript',
897
                                       'video',
898
                                   ],
899 10
                                   true
900
                               )
901
                           )
902
                       )
903
                       ||
904
                       (
905 9
                           $nextSibling instanceof \DOMElement
906
                           &&
907 9
                           \in_array(
908 9
                               $nextSibling->tagName,
909
                               [
910 9
                                   'address',
911
                                   'article',
912
                                   'aside',
913
                                   'blockquote',
914
                                   'dir',
915
                                   'div',
916
                                   'dl',
917
                                   'fieldset',
918
                                   'footer',
919
                                   'form',
920
                                   'h1',
921
                                   'h2',
922
                                   'h3',
923
                                   'h4',
924
                                   'h5',
925
                                   'h6',
926
                                   'header',
927
                                   'hgroup',
928
                                   'hr',
929
                                   'menu',
930
                                   'nav',
931
                                   'ol',
932
                                   'p',
933
                                   'pre',
934
                                   'section',
935
                                   'table',
936
                                   'ul',
937
                               ],
938 41
                               true
939
                           )
940
                       )
941
                   )
942
               );
943
    }
944
945 42
    protected function domNodeToString(\DOMNode $node): string
946
    {
947
        // init
948 42
        $html = '';
949 42
        $emptyStringTmp = '';
950
951 42
        foreach ($node->childNodes as $child) {
952 42
            if ($emptyStringTmp === 'is_empty') {
953 24
                $emptyStringTmp = 'last_was_empty';
954
            } else {
955 42
                $emptyStringTmp = '';
956
            }
957
958 42
            if ($child instanceof \DOMDocumentType) {
959
                // add the doc-type only if it wasn't generated by DomDocument
960 11
                if (!$this->withDocType) {
961
                    continue;
962
                }
963
964 11
                if ($child->name) {
965 11
                    if (!$child->publicId && $child->systemId) {
966
                        $tmpTypeSystem = 'SYSTEM';
967
                        $tmpTypePublic = '';
968
                    } else {
969 11
                        $tmpTypeSystem = '';
970 11
                        $tmpTypePublic = 'PUBLIC';
971
                    }
972
973 11
                    $html .= '<!DOCTYPE ' . $child->name . ''
974 11
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
975 11
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
976 11
                             . '>';
977
                }
978 42
            } elseif ($child instanceof \DOMElement) {
979 42
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
980 42
                $html .= '>' . $this->domNodeToString($child);
981
982
                if (
983 42
                    !$this->doRemoveOmittedHtmlTags
984
                    ||
985 42
                    !$this->domNodeClosingTagOptional($child)
986
                ) {
987 37
                    $html .= '</' . $child->tagName . '>';
988
                }
989
990 42
                if (!$this->doRemoveWhitespaceAroundTags) {
991
                    if (
992 41
                        $child->nextSibling instanceof \DOMText
993
                        &&
994 41
                        $child->nextSibling->wholeText === ' '
995
                    ) {
996
                        if (
997 23
                            $emptyStringTmp !== 'last_was_empty'
998
                            &&
999 23
                            \substr($html, -1) !== ' '
1000
                        ) {
1001 23
                            $html .= ' ';
1002
                        }
1003 42
                        $emptyStringTmp = 'is_empty';
1004
                    }
1005
                }
1006 38
            } elseif ($child instanceof \DOMText) {
1007 38
                if ($child->isElementContentWhitespace()) {
1008
                    if (
1009 26
                        $child->previousSibling !== null
1010
                        &&
1011 26
                        $child->nextSibling !== null
1012
                    ) {
1013
                        if (
1014 18
                            $emptyStringTmp !== 'last_was_empty'
1015
                            &&
1016 18
                            \substr($html, -1) !== ' '
1017
                        ) {
1018 5
                            $html .= ' ';
1019
                        }
1020 26
                        $emptyStringTmp = 'is_empty';
1021
                    }
1022
                } else {
1023 38
                    $html .= $child->wholeText;
1024
                }
1025 1
            } elseif ($child instanceof \DOMComment) {
1026 42
                $html .= '<!--' . $child->textContent . '-->';
1027
            }
1028
        }
1029
1030 42
        return $html;
1031
    }
1032
1033
    /**
1034
     * @param \DOMNode $node
1035
     *
1036
     * @return \DOMNode|null
1037
     */
1038 41
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1039
    {
1040
        do {
1041 41
            $node = $node->nextSibling;
1042 41
        } while (!($node === null || $node instanceof \DOMElement));
1043
1044 41
        return $node;
1045
    }
1046
1047
    /**
1048
     * Check if the current string is an conditional comment.
1049
     *
1050
     * INFO: since IE >= 10 conditional comment are not working anymore
1051
     *
1052
     * <!--[if expression]> HTML <![endif]-->
1053
     * <![if expression]> HTML <![endif]>
1054
     *
1055
     * @param string $comment
1056
     *
1057
     * @return bool
1058
     */
1059 4
    private function isConditionalComment($comment): bool
1060
    {
1061 4
        if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1062 2
            return true;
1063
        }
1064
1065 4
        if (\preg_match('/\[endif\]$/', $comment)) {
1066 1
            return true;
1067
        }
1068
1069 4
        return false;
1070
    }
1071
1072
    /**
1073
     * @param string $html
1074
     * @param bool   $decodeUtf8Specials <p>Use this only in special cases, e.g. for PHP 5.3</p>
1075
     *
1076
     * @return string
1077
     */
1078 46
    public function minify($html, $decodeUtf8Specials = false): string
1079
    {
1080 46
        $html = (string) $html;
1081 46
        if (!isset($html[0])) {
1082 1
            return '';
1083
        }
1084
1085 46
        $html = \trim($html);
1086 46
        if (!$html) {
1087 3
            return '';
1088
        }
1089
1090
        // init
1091 43
        static $CACHE_SELF_CLOSING_TAGS = null;
1092 43
        if ($CACHE_SELF_CLOSING_TAGS === null) {
1093 1
            $CACHE_SELF_CLOSING_TAGS = \implode('|', self::$selfClosingTags);
1094
        }
1095
1096
        // reset
1097 43
        $this->protectedChildNodes = [];
1098
1099
        // save old content
1100 43
        $origHtml = $html;
1101 43
        $origHtmlLength = \strlen($html);
1102
1103
        // -------------------------------------------------------------------------
1104
        // Minify the HTML via "HtmlDomParser"
1105
        // -------------------------------------------------------------------------
1106
1107 43
        if ($this->doOptimizeViaHtmlDomParser) {
1108 42
            $html = $this->minifyHtmlDom($html, $decodeUtf8Specials);
1109
        }
1110
1111
        // -------------------------------------------------------------------------
1112
        // Trim whitespace from html-string. [protected html is still protected]
1113
        // -------------------------------------------------------------------------
1114
1115
        // Remove extra white-space(s) between HTML attribute(s)
1116 43
        $html = (string) \preg_replace_callback(
1117 43
            '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1118 43
            function ($matches) {
1119 43
                return '<' . $matches[1] . (string) \preg_replace('#([^\s=]+)(\=([\'"]?)(.*?)\3)?(\s+|$)#s', ' $1$2', $matches[2]) . $matches[3] . '>';
1120 43
            },
1121 43
            $html
1122
        );
1123
1124 43
        if ($this->doRemoveSpacesBetweenTags) {
1125
            // Remove spaces that are between > and <
1126
            $html = (string) \preg_replace('/(>) (<)/', '>$2', $html);
1127
        }
1128
1129
        // -------------------------------------------------------------------------
1130
        // Restore protected HTML-code.
1131
        // -------------------------------------------------------------------------
1132
1133 43
        $html = (string) \preg_replace_callback(
1134 43
            '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1135 43
            [$this, 'restoreProtectedHtml'],
1136 43
            $html
1137
        );
1138
1139
        // -------------------------------------------------------------------------
1140
        // Restore protected HTML-entities.
1141
        // -------------------------------------------------------------------------
1142
1143 43
        if ($this->doOptimizeViaHtmlDomParser) {
1144 42
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1145
        }
1146
1147
        // ------------------------------------
1148
        // Final clean-up
1149
        // ------------------------------------
1150
1151 43
        $html = \str_replace(
1152
            [
1153 43
                'html>' . "\n",
1154
                "\n" . '<html',
1155
                'html/>' . "\n",
1156
                "\n" . '</html',
1157
                'head>' . "\n",
1158
                "\n" . '<head',
1159
                'head/>' . "\n",
1160
                "\n" . '</head',
1161
            ],
1162
            [
1163 43
                'html>',
1164
                '<html',
1165
                'html/>',
1166
                '</html',
1167
                'head>',
1168
                '<head',
1169
                'head/>',
1170
                '</head',
1171
            ],
1172 43
            $html
1173
        );
1174
1175
        // self closing tags, don't need a trailing slash ...
1176 43
        $replace = [];
1177 43
        $replacement = [];
1178 43
        foreach (self::$selfClosingTags as $selfClosingTag) {
1179 43
            $replace[] = '<' . $selfClosingTag . '/>';
1180 43
            $replacement[] = '<' . $selfClosingTag . '>';
1181 43
            $replace[] = '<' . $selfClosingTag . ' />';
1182 43
            $replacement[] = '<' . $selfClosingTag . '>';
1183
        }
1184 43
        $html = \str_replace(
1185 43
            $replace,
1186 43
            $replacement,
1187 43
            $html
1188
        );
1189
1190 43
        $html = (string) \preg_replace('#<\b(' . $CACHE_SELF_CLOSING_TAGS . ')([^>]*+)><\/\b\1>#', '<\\1\\2>', $html);
1191
1192
        // ------------------------------------
1193
        // check if compression worked
1194
        // ------------------------------------
1195
1196 43
        if ($origHtmlLength < \strlen($html)) {
1197 3
            $html = $origHtml;
1198
        }
1199
1200 43
        return $html;
1201
    }
1202
1203
    /**
1204
     * @param $html
1205
     * @param $decodeUtf8Specials
1206
     *
1207
     * @return string
1208
     */
1209 42
    private function minifyHtmlDom($html, $decodeUtf8Specials): string
1210
    {
1211
        // init dom
1212 42
        $dom = new HtmlDomParser();
1213
        /** @noinspection UnusedFunctionResultInspection */
1214 42
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1215
1216 42
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1217 42
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1218
1219
        // load dom
1220
        /** @noinspection UnusedFunctionResultInspection */
1221 42
        $dom->loadHtml($html);
1222
1223 42
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1224
1225 42
        foreach ($dom->find('*') as $element) {
1226 42
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1227
        }
1228
1229
        // -------------------------------------------------------------------------
1230
        // Protect HTML tags and conditional comments.
1231
        // -------------------------------------------------------------------------
1232
1233 42
        $dom = $this->protectTags($dom);
1234
1235
        // -------------------------------------------------------------------------
1236
        // Remove default HTML comments. [protected html is still protected]
1237
        // -------------------------------------------------------------------------
1238
1239 42
        if ($this->doRemoveComments) {
1240 40
            $dom = $this->removeComments($dom);
1241
        }
1242
1243
        // -------------------------------------------------------------------------
1244
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1245
        // -------------------------------------------------------------------------
1246
1247 42
        if ($this->doSumUpWhitespace) {
1248 41
            $dom = $this->sumUpWhitespace($dom);
1249
        }
1250
1251 42
        foreach ($dom->find('*') as $element) {
1252
1253
            // -------------------------------------------------------------------------
1254
            // Remove whitespace around tags. [protected html is still protected]
1255
            // -------------------------------------------------------------------------
1256
1257 42
            if ($this->doRemoveWhitespaceAroundTags) {
1258 3
                $this->removeWhitespaceAroundTags($element);
1259
            }
1260
1261 42
            $this->notifyObserversAboutDomElementAfterMinification($element);
1262
        }
1263
1264
        // -------------------------------------------------------------------------
1265
        // Convert the Dom into a string.
1266
        // -------------------------------------------------------------------------
1267
1268 42
        return $dom->fixHtmlOutput(
1269 42
            $this->domNodeToString($dom->getDocument()),
1270 42
            $decodeUtf8Specials
1271
        );
1272
    }
1273
1274
    /**
1275
     * Prevent changes of inline "styles" and "scripts".
1276
     *
1277
     * @param HtmlDomParser $dom
1278
     *
1279
     * @return HtmlDomParser
1280
     */
1281 42
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1282
    {
1283
        // init
1284 42
        $counter = 0;
1285
1286 42
        foreach ($dom->find('script, style') as $element) {
1287
1288
            // skip external links
1289 5
            if ($element->tag === 'script' || $element->tag === 'style') {
1290 5
                $attributes = $element->getAllAttributes();
1291 5
                if (isset($attributes['src'])) {
1292 3
                    continue;
1293
                }
1294
            }
1295
1296 3
            $this->protectedChildNodes[$counter] = $element->text();
1297 3
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
1298
1299 3
            ++$counter;
1300
        }
1301
1302 42
        foreach ($dom->find('code, nocompress') as $element) {
1303 3
            if ($element->isRemoved()) {
1304 1
                continue;
1305
            }
1306
1307 3
            $this->protectedChildNodes[$counter] = $element->parentNode()->innerHtml();
1308 3
            $element->getNode()->parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
1309
1310 3
            ++$counter;
1311
        }
1312
1313 42
        foreach ($dom->find('//comment()') as $element) {
1314 4
            $text = $element->text();
1315
1316
            // skip normal comments
1317 4
            if (!$this->isConditionalComment($text)) {
1318 4
                continue;
1319
            }
1320
1321 2
            $this->protectedChildNodes[$counter] = '<!--' . $text . '-->';
1322
1323
            /* @var $node \DOMComment */
1324 2
            $node = $element->getNode();
1325 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>');
1326
            /** @noinspection UnusedFunctionResultInspection */
1327 2
            $element->getNode()->parentNode->replaceChild($child, $node);
1328
1329 2
            ++$counter;
1330
        }
1331
1332 42
        return $dom;
1333
    }
1334
1335
    /**
1336
     * Remove comments in the dom.
1337
     *
1338
     * @param HtmlDomParser $dom
1339
     *
1340
     * @return HtmlDomParser
1341
     */
1342 40
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1343
    {
1344 40
        foreach ($dom->find('//comment()') as $commentWrapper) {
1345 3
            $comment = $commentWrapper->getNode();
1346 3
            $val = $comment->nodeValue;
1347 3
            if (\strpos($val, '[') === false) {
1348
                /** @noinspection UnusedFunctionResultInspection */
1349 3
                $comment->parentNode->removeChild($comment);
1350
            }
1351
        }
1352
1353 40
        $dom->getDocument()->normalizeDocument();
1354
1355 40
        return $dom;
1356
    }
1357
1358
    /**
1359
     * Trim tags in the dom.
1360
     *
1361
     * @param SimpleHtmlDom $element
1362
     *
1363
     * @return void
1364
     */
1365 3
    private function removeWhitespaceAroundTags(SimpleHtmlDom $element)
1366
    {
1367 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
1368 1
            $node = $element->getNode();
1369
1370
            /** @var \DOMNode[] $candidates */
1371 1
            $candidates = [];
1372 1
            if ($node->childNodes->length > 0) {
1373 1
                $candidates[] = $node->firstChild;
1374 1
                $candidates[] = $node->lastChild;
1375 1
                $candidates[] = $node->previousSibling;
1376 1
                $candidates[] = $node->nextSibling;
1377
            }
1378
1379 1
            foreach ($candidates as &$candidate) {
1380 1
                if ($candidate === null) {
1381
                    continue;
1382
                }
1383
1384 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1385 1
                    $candidate->nodeValue = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1386
                }
1387
            }
1388
        }
1389 3
    }
1390
1391
    /**
1392
     * Callback function for preg_replace_callback use.
1393
     *
1394
     * @param array $matches PREG matches
1395
     *
1396
     * @return string
1397
     */
1398 6
    private function restoreProtectedHtml($matches): string
1399
    {
1400 6
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1401
1402 6
        $html = '';
1403 6
        if (isset($this->protectedChildNodes[$matchesInner['id']])) {
1404 6
            $html .= $this->protectedChildNodes[$matchesInner['id']];
1405
        }
1406
1407 6
        return $html;
1408
    }
1409
1410
    /**
1411
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1412
     *
1413
     * @return $this
1414
     */
1415 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1416
    {
1417 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1418
1419 2
        return $this;
1420
    }
1421
1422
    /**
1423
     * Sum-up extra whitespace from dom-nodes.
1424
     *
1425
     * @param HtmlDomParser $dom
1426
     *
1427
     * @return HtmlDomParser
1428
     */
1429 41
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1430
    {
1431 41
        $text_nodes = $dom->find('//text()');
1432 41
        foreach ($text_nodes as $text_node_wrapper) {
1433
            /* @var $text_node \DOMNode */
1434 37
            $text_node = $text_node_wrapper->getNode();
1435 37
            $xp = $text_node->getNodePath();
1436
1437 37
            $doSkip = false;
1438 37
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1439 37
                if (\strpos($xp, "/${pattern}") !== false) {
1440 5
                    $doSkip = true;
1441
1442 37
                    break;
1443
                }
1444
            }
1445 37
            if ($doSkip) {
1446 5
                continue;
1447
            }
1448
1449 36
            $text_node->nodeValue = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1450
        }
1451
1452
        $dom->getDocument()->normalizeDocument();
1453
1454
        return $dom;
1455
    }
1456
1457
    /**
1458
     * WARNING: maybe bad for performance ...
1459
     *
1460
     * @param bool $keepBrokenHtml
1461
     *
1462
     * @return HtmlMin
1463
     */
1464
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1465
    {
1466 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1467
1468 2
        return $this;
1469
    }
1470
}
1471