Completed
Push — master ( e0c79b...030b2b )
by Lars
01:37
created

HtmlMin::isHTML4()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $doKeepHttpAndHttpsPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainsLinksRelative = false;
199
200
    /**
201
     * @var string[]
202
     */
203
    private $localDomains = [];
204
205
    /**
206
     * @var array
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var bool
215
     */
216
    private $doSortCssClassNames = true;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doSortHtmlAttributes = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDefaultAttributes = false;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedAnchorName = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedTypeFromStyleAndLinkTag = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveDefaultTypeFromButton = false;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doRemoveDeprecatedTypeFromScriptTag = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveValueFromEmptyInput = true;
267
268
    /**
269
     * @var bool
270
     */
271
    private $doRemoveEmptyAttributes = true;
272
273
    /**
274
     * @var bool
275
     */
276
    private $doSumUpWhitespace = true;
277
278
    /**
279
     * @var bool
280
     */
281
    private $doRemoveSpacesBetweenTags = false;
282
283
    /**
284
     * @var bool
285
     */
286
    private $keepBrokenHtml = false;
287
288
    /**
289
     * @var bool
290
     */
291
    private $withDocType = false;
292
293
    /**
294
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
295
     *
296
     * @psalm-var \SplObjectStorage<HtmlMinDomObserverInterface>
297
     */
298
    private $domLoopObservers;
299
300
    /**
301
     * @var int
302
     */
303
    private $protected_tags_counter = 0;
304
305
    /**
306
     * @var bool
307
     */
308
    private $isHTML4 = false;
309
310
    /**
311
     * @var bool
312
     */
313
    private $isXHTML = false;
314
315
    /**
316
     * @var string[]|null
317
     */
318
    private $templateLogicSyntaxInSpecialScriptTags;
319
320
    /**
321
     * HtmlMin constructor.
322
     */
323 59
    public function __construct()
324
    {
325 59
        $this->domLoopObservers = new \SplObjectStorage();
326
327 59
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
328 59
    }
329
330
    /**
331
     * @param HtmlMinDomObserverInterface $observer
332
     *
333
     * @return void
334
     */
335 59
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
336
    {
337 59
        $this->domLoopObservers->attach($observer);
338 59
    }
339
340
    /**
341
     * @param bool $doOptimizeAttributes
342
     *
343
     * @return $this
344
     */
345 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
346
    {
347 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
348
349 2
        return $this;
350
    }
351
352
    /**
353
     * @param bool $doOptimizeViaHtmlDomParser
354
     *
355
     * @return $this
356
     */
357 2
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
358
    {
359 2
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
360
361 2
        return $this;
362
    }
363
364
    /**
365
     * @param bool $doRemoveComments
366
     *
367
     * @return $this
368
     */
369 3
    public function doRemoveComments(bool $doRemoveComments = true): self
370
    {
371 3
        $this->doRemoveComments = $doRemoveComments;
372
373 3
        return $this;
374
    }
375
376
    /**
377
     * @param bool $doRemoveDefaultAttributes
378
     *
379
     * @return $this
380
     */
381 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
382
    {
383 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
384
385 2
        return $this;
386
    }
387
388
    /**
389
     * @param bool $doRemoveDeprecatedAnchorName
390
     *
391
     * @return $this
392
     */
393 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
394
    {
395 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
396
397 2
        return $this;
398
    }
399
400
    /**
401
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
402
     *
403
     * @return $this
404
     */
405 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
406
    {
407 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
408
409 2
        return $this;
410
    }
411
412
    /**
413
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
414
     *
415
     * @return $this
416
     */
417 3
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
418
    {
419 3
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
420
421 3
        return $this;
422
    }
423
424
    /**
425
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
426
     *
427
     * @return $this
428
     */
429 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
430
    {
431 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
432
433 2
        return $this;
434
    }
435
436
    /**
437
     * @param bool $doRemoveDeprecatedTypeFromStyleAndLinkTag
438
     *
439
     * @return $this
440
     */
441 1
    public function doRemoveDeprecatedTypeFromStyleAndLinkTag(bool $doRemoveDeprecatedTypeFromStyleAndLinkTag = true): self
442
    {
443 1
        $this->doRemoveDeprecatedTypeFromStyleAndLinkTag = $doRemoveDeprecatedTypeFromStyleAndLinkTag;
444
445 1
        return $this;
446
    }
447
448
    /**
449
     * @param bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag
450
     *
451
     * @return $this
452
     */
453 1
    public function doRemoveDefaultMediaTypeFromStyleAndLinkTag(bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true): self
454
    {
455 1
        $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag = $doRemoveDefaultMediaTypeFromStyleAndLinkTag;
456
457 1
        return $this;
458
    }
459
460
    /**
461
     * @param bool $doRemoveDefaultTypeFromButton
462
     *
463
     * @return $this
464
     */
465 1
    public function doRemoveDefaultTypeFromButton(bool $doRemoveDefaultTypeFromButton = true): self
466
    {
467 1
        $this->doRemoveDefaultTypeFromButton = $doRemoveDefaultTypeFromButton;
468
469 1
        return $this;
470
    }
471
472
    /**
473
     * @param bool $doRemoveEmptyAttributes
474
     *
475
     * @return $this
476
     */
477 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
478
    {
479 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
480
481 2
        return $this;
482
    }
483
484
    /**
485
     * @param bool $doRemoveHttpPrefixFromAttributes
486
     *
487
     * @return $this
488
     */
489 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
490
    {
491 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
492
493 6
        return $this;
494
    }
495
496
    /**
497
     * @param bool $doRemoveHttpsPrefixFromAttributes
498
     *
499
     * @return $this
500
     */
501 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
502
    {
503 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
504
505 1
        return $this;
506
    }
507
508
    /**
509
     * @param bool $doKeepHttpAndHttpsPrefixOnExternalAttributes
510
     *
511
     * @return $this
512
     */
513 1
    public function doKeepHttpAndHttpsPrefixOnExternalAttributes(bool $doKeepHttpAndHttpsPrefixOnExternalAttributes = true): self
514
    {
515 1
        $this->doKeepHttpAndHttpsPrefixOnExternalAttributes = $doKeepHttpAndHttpsPrefixOnExternalAttributes;
516
517 1
        return $this;
518
    }
519
520
    /**
521
     * @param string[] $localDomains
522
     *
523
     * @return $this
524
     */
525 1
    public function doMakeSameDomainsLinksRelative(array $localDomains): self
526
    {
527
        /** @noinspection AlterInForeachInspection */
528 1
        foreach ($localDomains as &$localDomain) {
529 1
            $localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
530
        }
531
532 1
        $this->localDomains = $localDomains;
533 1
        $this->doMakeSameDomainsLinksRelative = \count($this->localDomains) > 0;
534
535 1
        return $this;
536
    }
537
538
    /**
539
     * @return string[]
540
     */
541 1
    public function getLocalDomains(): array
542
    {
543 1
        return $this->localDomains;
544
    }
545
546
    /**
547
     * @param bool $doRemoveOmittedHtmlTags
548
     *
549
     * @return $this
550
     */
551 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
552
    {
553 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
554
555 1
        return $this;
556
    }
557
558
    /**
559
     * @param bool $doRemoveOmittedQuotes
560
     *
561
     * @return $this
562
     */
563 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
564
    {
565 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
566
567 1
        return $this;
568
    }
569
570
    /**
571
     * @param bool $doRemoveSpacesBetweenTags
572
     *
573
     * @return $this
574
     */
575 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
576
    {
577 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
578
579 1
        return $this;
580
    }
581
582
    /**
583
     * @param bool $doRemoveValueFromEmptyInput
584
     *
585
     * @return $this
586
     */
587 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
588
    {
589 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
590
591 2
        return $this;
592
    }
593
594
    /**
595
     * @param bool $doRemoveWhitespaceAroundTags
596
     *
597
     * @return $this
598
     */
599 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
600
    {
601 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
602
603 5
        return $this;
604
    }
605
606
    /**
607
     * @param bool $doSortCssClassNames
608
     *
609
     * @return $this
610
     */
611 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
612
    {
613 2
        $this->doSortCssClassNames = $doSortCssClassNames;
614
615 2
        return $this;
616
    }
617
618
    /**
619
     * @param bool $doSortHtmlAttributes
620
     *
621
     * @return $this
622
     */
623 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
624
    {
625 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
626
627 2
        return $this;
628
    }
629
630
    /**
631
     * @param bool $doSumUpWhitespace
632
     *
633
     * @return $this
634
     */
635 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
636
    {
637 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
638
639 2
        return $this;
640
    }
641
642 55
    private function domNodeAttributesToString(\DOMNode $node): string
643
    {
644
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
645 55
        $attr_str = '';
646 55
        if ($node->attributes !== null) {
647 55
            foreach ($node->attributes as $attribute) {
648 37
                $attr_str .= $attribute->name;
649
650
                if (
651 37
                    $this->doOptimizeAttributes
652
                    &&
653 37
                    isset(self::$booleanAttributes[$attribute->name])
654
                ) {
655 10
                    $attr_str .= ' ';
656
657 10
                    continue;
658
                }
659
660 37
                $attr_str .= '=';
661
662
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
663 37
                $omit_quotes = $this->doRemoveOmittedQuotes
664
                               &&
665 37
                               $attribute->value !== ''
666
                               &&
667 37
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
668
                               &&
669 37
                               \strpos($attribute->name, ' ') === false
670
                               &&
671 37
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
672
673 37
                $quoteTmp = '"';
674
                if (
675 37
                    !$omit_quotes
676
                    &&
677 37
                    \strpos($attribute->value, '"') !== false
678
                ) {
679 1
                    $quoteTmp = "'";
680
                }
681
682
                if (
683 37
                    $this->doOptimizeAttributes
684
                    &&
685
                    (
686 36
                        $attribute->name === 'srcset'
687
                        ||
688 37
                        $attribute->name === 'sizes'
689
                    )
690
                ) {
691 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
692
                } else {
693 37
                    $attr_val = $attribute->value;
694
                }
695
696 37
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
697 37
                $attr_str .= ' ';
698
            }
699
        }
700
701 55
        return \trim($attr_str);
702
    }
703
704
    /**
705
     * @param \DOMNode $node
706
     *
707
     * @return bool
708
     */
709 54
    private function domNodeClosingTagOptional(\DOMNode $node): bool
710
    {
711 54
        $tag_name = $node->nodeName;
712
713
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
714 54
        $parent_node = $node->parentNode;
715
716 54
        if ($parent_node) {
717 54
            $parent_tag_name = $parent_node->nodeName;
718
        } else {
719
            $parent_tag_name = null;
720
        }
721
722 54
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
723
724
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
725
726
        // Implemented:
727
        //
728
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
729
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
730
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
731
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
732
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
733
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
734
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
735
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
736
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
737
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
738
739
        /**
740
         * @noinspection TodoComment
741
         *
742
         * TODO: Not Implemented
743
         */
744
        //
745
        // <html> may be omitted if first thing inside is not comment
746
        // <head> may be omitted if first thing inside is an element
747
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
748
        // <colgroup> may be omitted if first thing inside is <col>
749
        // <tbody> may be omitted if first thing inside is <tr>
750
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
751
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
752
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
753
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
754
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
755
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
756
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
757
        //
758
        // <-- However, a start tag must never be omitted if it has any attributes.
759
760
        /** @noinspection InArrayCanBeUsedInspection */
761 54
        return \in_array($tag_name, self::$optional_end_tags, true)
762
               ||
763
               (
764 51
                   $tag_name === 'li'
765
                   &&
766
                   (
767 6
                       $nextSibling === null
768
                       ||
769
                       (
770 4
                           $nextSibling instanceof \DOMElement
771
                           &&
772 51
                           $nextSibling->tagName === 'li'
773
                       )
774
                   )
775
               )
776
               ||
777
               (
778 51
                   $tag_name === 'optgroup'
779
                   &&
780
                   (
781 1
                       $nextSibling === null
782
                       ||
783
                       (
784 1
                           $nextSibling instanceof \DOMElement
785
                           &&
786 51
                           $nextSibling->tagName === 'optgroup'
787
                       )
788
                   )
789
               )
790
               ||
791
               (
792 51
                   $tag_name === 'rp'
793
                   &&
794
                   (
795
                       $nextSibling === null
796
                       ||
797
                       (
798
                           $nextSibling instanceof \DOMElement
799
                           &&
800
                           (
801
                               $nextSibling->tagName === 'rp'
802
                               ||
803 51
                               $nextSibling->tagName === 'rt'
804
                           )
805
                       )
806
                   )
807
               )
808
               ||
809
               (
810 51
                   $tag_name === 'tr'
811
                   &&
812
                   (
813 1
                       $nextSibling === null
814
                       ||
815
                       (
816 1
                           $nextSibling instanceof \DOMElement
817
                           &&
818 51
                           $nextSibling->tagName === 'tr'
819
                       )
820
                   )
821
               )
822
               ||
823
               (
824 51
                   $tag_name === 'source'
825
                   &&
826
                   (
827 1
                       $parent_tag_name === 'audio'
828
                       ||
829 1
                       $parent_tag_name === 'video'
830
                       ||
831 1
                       $parent_tag_name === 'picture'
832
                       ||
833 51
                       $parent_tag_name === 'source'
834
                   )
835
                   &&
836
                   (
837 1
                       $nextSibling === null
838
                       ||
839
                       (
840
                           $nextSibling instanceof \DOMElement
841
                           &&
842 51
                           $nextSibling->tagName === 'source'
843
                       )
844
                   )
845
               )
846
               ||
847
               (
848
                   (
849 51
                       $tag_name === 'td'
850
                       ||
851 51
                       $tag_name === 'th'
852
                   )
853
                   &&
854
                   (
855 1
                       $nextSibling === null
856
                       ||
857
                       (
858 1
                           $nextSibling instanceof \DOMElement
859
                           &&
860
                           (
861 1
                               $nextSibling->tagName === 'td'
862
                               ||
863 51
                               $nextSibling->tagName === 'th'
864
                           )
865
                       )
866
                   )
867
               )
868
               ||
869
               (
870
                   (
871 51
                       $tag_name === 'dd'
872
                       ||
873 51
                       $tag_name === 'dt'
874
                   )
875
                   &&
876
                   (
877
                       (
878 3
                           $nextSibling === null
879
                           &&
880 3
                           $tag_name === 'dd'
881
                       )
882
                       ||
883
                       (
884 3
                           $nextSibling instanceof \DOMElement
885
                           &&
886
                           (
887 3
                               $nextSibling->tagName === 'dd'
888
                               ||
889 51
                               $nextSibling->tagName === 'dt'
890
                           )
891
                       )
892
                   )
893
               )
894
               ||
895
               (
896 51
                   $tag_name === 'option'
897
                   &&
898
                   (
899 2
                       $nextSibling === null
900
                       ||
901
                       (
902 2
                           $nextSibling instanceof \DOMElement
903
                           &&
904
                           (
905 2
                               $nextSibling->tagName === 'option'
906
                               ||
907 51
                               $nextSibling->tagName === 'optgroup'
908
                           )
909
                       )
910
                   )
911
               )
912
               ||
913
               (
914 51
                   $tag_name === 'p'
915
                   &&
916
                   (
917
                       (
918 15
                           $nextSibling === null
919
                           &&
920
                           (
921 13
                               $node->parentNode !== null
922
                               &&
923
                               (
924 13
                                   $node->parentNode->lastChild !== null
925
                                    &&
926
                                    (
927 13
                                        $node->parentNode->lastChild === $node
928
                                        ||
929 13
                                        \trim($node->parentNode->lastChild->textContent) === ''
930
                                    )
931
                               )
932
                               &&
933
                               !\in_array(
934 13
                                   $node->parentNode->nodeName,
935
                                   [
936
                                       'a',
937
                                       'audio',
938
                                       'del',
939
                                       'ins',
940
                                       'map',
941
                                       'noscript',
942
                                       'video',
943
                                   ],
944
                                   true
945
                               )
946
                           )
947
                       )
948
                       ||
949
                       (
950 10
                           $nextSibling instanceof \DOMElement
951
                           &&
952
                           \in_array(
953 54
                               $nextSibling->tagName,
954
                               [
955
                                   'address',
956
                                   'article',
957
                                   'aside',
958
                                   'blockquote',
959
                                   'dir',
960
                                   'div',
961
                                   'dl',
962
                                   'fieldset',
963
                                   'footer',
964
                                   'form',
965
                                   'h1',
966
                                   'h2',
967
                                   'h3',
968
                                   'h4',
969
                                   'h5',
970
                                   'h6',
971
                                   'header',
972
                                   'hgroup',
973
                                   'hr',
974
                                   'menu',
975
                                   'nav',
976
                                   'ol',
977
                                   'p',
978
                                   'pre',
979
                                   'section',
980
                                   'table',
981
                                   'ul',
982
                               ],
983
                               true
984
                           )
985
                       )
986
                   )
987
               );
988
    }
989
990 55
    protected function domNodeToString(\DOMNode $node): string
991
    {
992
        // init
993 55
        $html = '';
994 55
        $emptyStringTmp = '';
995
996 55
        foreach ($node->childNodes as $child) {
997 55
            if ($emptyStringTmp === 'is_empty') {
998 30
                $emptyStringTmp = 'last_was_empty';
999
            } else {
1000 55
                $emptyStringTmp = '';
1001
            }
1002
1003 55
            if ($child instanceof \DOMElement) {
1004 55
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
1005 55
                $html .= '>' . $this->domNodeToString($child);
1006
1007
                if (
1008
                    !(
1009 55
                        $this->doRemoveOmittedHtmlTags
1010
                        &&
1011 55
                        !$this->isHTML4
1012
                        &&
1013 55
                        !$this->isXHTML
1014
                        &&
1015 55
                        $this->domNodeClosingTagOptional($child)
1016
                    )
1017
                ) {
1018 49
                    $html .= '</' . $child->tagName . '>';
1019
                }
1020
1021 55
                if (!$this->doRemoveWhitespaceAroundTags) {
1022
                    /** @noinspection NestedPositiveIfStatementsInspection */
1023
                    if (
1024 54
                        $child->nextSibling instanceof \DOMText
1025
                        &&
1026 54
                        $child->nextSibling->wholeText === ' '
1027
                    ) {
1028
                        if (
1029 29
                            $emptyStringTmp !== 'last_was_empty'
1030
                            &&
1031 29
                            \substr($html, -1) !== ' '
1032
                        ) {
1033 29
                            $html = \rtrim($html);
1034
1035
                            if (
1036 29
                                $child->parentNode
1037
                                &&
1038 29
                                $child->parentNode->nodeName !== 'head'
1039
                            ) {
1040 28
                                $html .= ' ';
1041
                            }
1042
                        }
1043 55
                        $emptyStringTmp = 'is_empty';
1044
                    }
1045
                }
1046 55
            } elseif ($child instanceof \DOMText) {
1047 51
                if ($child->isElementContentWhitespace()) {
1048
                    if (
1049 33
                        $child->previousSibling !== null
1050
                        &&
1051 33
                        $child->nextSibling !== null
1052
                    ) {
1053
                        if (
1054
                            (
1055 23
                                $child->wholeText
1056
                                &&
1057 23
                                \strpos($child->wholeText, ' ') !== false
1058
                            )
1059
                            ||
1060
                            (
1061
                                $emptyStringTmp !== 'last_was_empty'
1062
                                &&
1063 23
                                \substr($html, -1) !== ' '
1064
                            )
1065
                        ) {
1066 23
                            $html = \rtrim($html);
1067
1068
                            if (
1069 23
                                $child->parentNode
1070
                                &&
1071 23
                                $child->parentNode->nodeName !== 'head'
1072
                            ) {
1073 22
                                $html .= ' ';
1074
                            }
1075
                        }
1076 33
                        $emptyStringTmp = 'is_empty';
1077
                    }
1078
                } else {
1079 51
                    $html .= $child->wholeText;
1080
                }
1081 12
            } elseif ($child instanceof \DOMComment) {
1082 1
                $html .= '<!--' . $child->textContent . '-->';
1083
            }
1084
        }
1085
1086 55
        return $html;
1087
    }
1088
1089
    /**
1090
     * @param \DOMNode $node
1091
     *
1092
     * @return string
1093
     */
1094 55
    private function getDoctype(\DOMNode $node): string
1095
    {
1096
        // check the doc-type only if it wasn't generated by DomDocument itself
1097 55
        if (!$this->withDocType) {
1098 47
            return '';
1099
        }
1100
1101 12
        foreach ($node->childNodes as $child) {
1102
            if (
1103 12
                $child instanceof \DOMDocumentType
1104
                &&
1105 12
                $child->name
1106
            ) {
1107 12
                if (!$child->publicId && $child->systemId) {
1108
                    $tmpTypeSystem = 'SYSTEM';
1109
                    $tmpTypePublic = '';
1110
                } else {
1111 12
                    $tmpTypeSystem = '';
1112 12
                    $tmpTypePublic = 'PUBLIC';
1113
                }
1114
1115 12
                return '<!DOCTYPE ' . $child->name . ''
1116 12
                       . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
1117 12
                       . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
1118 12
                       . '>';
1119
            }
1120
        }
1121
1122
        return '';
1123
    }
1124
1125
    /**
1126
     * @return array
1127
     */
1128
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1129
    {
1130
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1131
    }
1132
1133
    /**
1134
     * @return bool
1135
     */
1136
    public function isDoOptimizeAttributes(): bool
1137
    {
1138
        return $this->doOptimizeAttributes;
1139
    }
1140
1141
    /**
1142
     * @return bool
1143
     */
1144
    public function isDoOptimizeViaHtmlDomParser(): bool
1145
    {
1146
        return $this->doOptimizeViaHtmlDomParser;
1147
    }
1148
1149
    /**
1150
     * @return bool
1151
     */
1152
    public function isDoRemoveComments(): bool
1153
    {
1154
        return $this->doRemoveComments;
1155
    }
1156
1157
    /**
1158
     * @return bool
1159
     */
1160 38
    public function isDoRemoveDefaultAttributes(): bool
1161
    {
1162 38
        return $this->doRemoveDefaultAttributes;
1163
    }
1164
1165
    /**
1166
     * @return bool
1167
     */
1168 38
    public function isDoRemoveDeprecatedAnchorName(): bool
1169
    {
1170 38
        return $this->doRemoveDeprecatedAnchorName;
1171
    }
1172
1173
    /**
1174
     * @return bool
1175
     */
1176 38
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1177
    {
1178 38
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1179
    }
1180
1181
    /**
1182
     * @return bool
1183
     */
1184 38
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1185
    {
1186 38
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1187
    }
1188
1189
    /**
1190
     * @return bool
1191
     */
1192 38
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1193
    {
1194 38
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1195
    }
1196
1197
    /**
1198
     * @return bool
1199
     */
1200 38
    public function isDoRemoveDeprecatedTypeFromStyleAndLinkTag(): bool
1201
    {
1202 38
        return $this->doRemoveDeprecatedTypeFromStyleAndLinkTag;
1203
    }
1204
1205
    /**
1206
     * @return bool
1207
     */
1208 38
    public function isDoRemoveDefaultMediaTypeFromStyleAndLinkTag(): bool
1209
    {
1210 38
        return $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag;
1211
    }
1212
1213
    /**
1214
     * @return bool
1215
     */
1216 37
    public function isDoRemoveDefaultTypeFromButton(): bool
1217
    {
1218 37
        return $this->doRemoveDefaultTypeFromButton;
1219
    }
1220
1221
    /**
1222
     * @return bool
1223
     */
1224 37
    public function isDoRemoveEmptyAttributes(): bool
1225
    {
1226 37
        return $this->doRemoveEmptyAttributes;
1227
    }
1228
1229
    /**
1230
     * @return bool
1231
     */
1232 38
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1233
    {
1234 38
        return $this->doRemoveHttpPrefixFromAttributes;
1235
    }
1236
1237
    /**
1238
     * @return bool
1239
     */
1240 38
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1241
    {
1242 38
        return $this->doRemoveHttpsPrefixFromAttributes;
1243
    }
1244
1245
    /**
1246
     * @return bool
1247
     */
1248 4
    public function isdoKeepHttpAndHttpsPrefixOnExternalAttributes(): bool
1249
    {
1250 4
        return $this->doKeepHttpAndHttpsPrefixOnExternalAttributes;
1251
    }
1252
1253
    /**
1254
     * @return bool
1255
     */
1256 38
    public function isDoMakeSameDomainsLinksRelative(): bool
1257
    {
1258 38
        return $this->doMakeSameDomainsLinksRelative;
1259
    }
1260
1261
    /**
1262
     * @return bool
1263
     */
1264
    public function isDoRemoveOmittedHtmlTags(): bool
1265
    {
1266
        return $this->doRemoveOmittedHtmlTags;
1267
    }
1268
1269
    /**
1270
     * @return bool
1271
     */
1272
    public function isDoRemoveOmittedQuotes(): bool
1273
    {
1274
        return $this->doRemoveOmittedQuotes;
1275
    }
1276
1277
    /**
1278
     * @return bool
1279
     */
1280
    public function isDoRemoveSpacesBetweenTags(): bool
1281
    {
1282
        return $this->doRemoveSpacesBetweenTags;
1283
    }
1284
1285
    /**
1286
     * @return bool
1287
     */
1288 37
    public function isDoRemoveValueFromEmptyInput(): bool
1289
    {
1290 37
        return $this->doRemoveValueFromEmptyInput;
1291
    }
1292
1293
    /**
1294
     * @return bool
1295
     */
1296
    public function isDoRemoveWhitespaceAroundTags(): bool
1297
    {
1298
        return $this->doRemoveWhitespaceAroundTags;
1299
    }
1300
1301
    /**
1302
     * @return bool
1303
     */
1304 37
    public function isDoSortCssClassNames(): bool
1305
    {
1306 37
        return $this->doSortCssClassNames;
1307
    }
1308
1309
    /**
1310
     * @return bool
1311
     */
1312 38
    public function isDoSortHtmlAttributes(): bool
1313
    {
1314 38
        return $this->doSortHtmlAttributes;
1315
    }
1316
1317
    /**
1318
     * @return bool
1319
     */
1320
    public function isDoSumUpWhitespace(): bool
1321
    {
1322
        return $this->doSumUpWhitespace;
1323
    }
1324
1325
    /**
1326
     * @return bool
1327
     */
1328 5
    public function isHTML4(): bool
1329
    {
1330 5
        return $this->isHTML4;
1331
    }
1332
1333
    /**
1334
     * @return bool
1335
     */
1336 5
    public function isXHTML(): bool
1337
    {
1338 5
        return $this->isXHTML;
1339
    }
1340
1341
    /**
1342
     * @param string $html
1343
     * @param bool   $multiDecodeNewHtmlEntity
1344
     *
1345
     * @return string
1346
     */
1347 59
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1348
    {
1349 59
        $html = (string) $html;
1350 59
        if (!isset($html[0])) {
1351 1
            return '';
1352
        }
1353
1354 59
        $html = \trim($html);
1355 59
        if (!$html) {
1356 3
            return '';
1357
        }
1358
1359
        // reset
1360 56
        $this->protectedChildNodes = [];
1361
1362
        // save old content
1363 56
        $origHtml = $html;
1364 56
        $origHtmlLength = \strlen($html);
1365
1366
        // -------------------------------------------------------------------------
1367
        // Minify the HTML via "HtmlDomParser"
1368
        // -------------------------------------------------------------------------
1369
1370 56
        if ($this->doOptimizeViaHtmlDomParser) {
1371 55
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1372
        }
1373
1374
        // -------------------------------------------------------------------------
1375
        // Trim whitespace from html-string. [protected html is still protected]
1376
        // -------------------------------------------------------------------------
1377
1378
        // Remove extra white-space(s) between HTML attribute(s)
1379 56
        if (\strpos($html, ' ') !== false) {
1380 50
            $html = (string) \preg_replace_callback(
1381 50
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1382
                static function ($matches) {
1383 50
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1384 50
                },
1385 50
                $html
1386
            );
1387
        }
1388
1389 56
        if ($this->doRemoveSpacesBetweenTags) {
1390
            /** @noinspection NestedPositiveIfStatementsInspection */
1391 1
            if (\strpos($html, ' ') !== false) {
1392
                // Remove spaces that are between > and <
1393 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1394
            }
1395
        }
1396
1397
        // -------------------------------------------------------------------------
1398
        // Restore protected HTML-code.
1399
        // -------------------------------------------------------------------------
1400
1401 56
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1402 12
            $html = (string) \preg_replace_callback(
1403 12
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1404 12
                [$this, 'restoreProtectedHtml'],
1405 12
                $html
1406
            );
1407
        }
1408
1409
        // -------------------------------------------------------------------------
1410
        // Restore protected HTML-entities.
1411
        // -------------------------------------------------------------------------
1412
1413 56
        if ($this->doOptimizeViaHtmlDomParser) {
1414 55
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1415
        }
1416
1417
        // ------------------------------------
1418
        // Final clean-up
1419
        // ------------------------------------
1420
1421 56
        $html = \str_replace(
1422
            [
1423 56
                'html>' . "\n",
1424
                "\n" . '<html',
1425
                'html/>' . "\n",
1426
                "\n" . '</html',
1427
                'head>' . "\n",
1428
                "\n" . '<head',
1429
                'head/>' . "\n",
1430
                "\n" . '</head',
1431
            ],
1432
            [
1433 56
                'html>',
1434
                '<html',
1435
                'html/>',
1436
                '</html',
1437
                'head>',
1438
                '<head',
1439
                'head/>',
1440
                '</head',
1441
            ],
1442 56
            $html
1443
        );
1444
1445
        // self closing tags, don't need a trailing slash ...
1446 56
        $replace = [];
1447 56
        $replacement = [];
1448 56
        foreach (self::$selfClosingTags as $selfClosingTag) {
1449 56
            $replace[] = '<' . $selfClosingTag . '/>';
1450 56
            $replacement[] = '<' . $selfClosingTag . '>';
1451 56
            $replace[] = '<' . $selfClosingTag . ' />';
1452 56
            $replacement[] = '<' . $selfClosingTag . '>';
1453 56
            $replace[] = '></' . $selfClosingTag . '>';
1454 56
            $replacement[] = '>';
1455
        }
1456 56
        $html = \str_replace(
1457 56
            $replace,
1458 56
            $replacement,
1459 56
            $html
1460
        );
1461
1462
        // ------------------------------------
1463
        // check if compression worked
1464
        // ------------------------------------
1465
1466 56
        if ($origHtmlLength < \strlen($html)) {
1467
            $html = $origHtml;
1468
        }
1469
1470 56
        return $html;
1471
    }
1472
1473
    /**
1474
     * @param \DOMNode $node
1475
     *
1476
     * @return \DOMNode|null
1477
     */
1478 54
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1479
    {
1480
        do {
1481
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1482 54
            $node = $node->nextSibling;
1483 54
        } while (!($node === null || $node instanceof \DOMElement));
1484
1485 54
        return $node;
1486
    }
1487
1488
    /**
1489
     * Check if the current string is an conditional comment.
1490
     *
1491
     * INFO: since IE >= 10 conditional comment are not working anymore
1492
     *
1493
     * <!--[if expression]> HTML <![endif]-->
1494
     * <![if expression]> HTML <![endif]>
1495
     *
1496
     * @param string $comment
1497
     *
1498
     * @return bool
1499
     */
1500 4
    private function isConditionalComment($comment): bool
1501
    {
1502 4 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1503
            /** @noinspection RegExpRedundantEscape */
1504
            /** @noinspection NestedPositiveIfStatementsInspection */
1505 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1506 2
                return true;
1507
            }
1508
        }
1509
1510 4 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1511
            /** @noinspection RegExpRedundantEscape */
1512
            /** @noinspection NestedPositiveIfStatementsInspection */
1513 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1514 1
                return true;
1515
            }
1516
        }
1517
1518 4
        return false;
1519
    }
1520
1521
    /**
1522
     * @param string $html
1523
     * @param bool   $multiDecodeNewHtmlEntity
1524
     *
1525
     * @return string
1526
     */
1527 55
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1528
    {
1529
        // init dom
1530 55
        $dom = new HtmlDomParser();
1531 55
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1532
1533 55
        if ($this->templateLogicSyntaxInSpecialScriptTags !== null) {
1534 1
            $dom->overwriteTemplateLogicSyntaxInSpecialScriptTags($this->templateLogicSyntaxInSpecialScriptTags);
1535
        }
1536
1537 55
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1538 55
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1539
1540
        // load dom
1541
        /** @noinspection UnusedFunctionResultInspection */
1542 55
        $dom->loadHtml($html);
1543
1544 55
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1545
1546 55
        $doctypeStr = $this->getDoctype($dom->getDocument());
1547
1548 55
        if ($doctypeStr) {
1549 12
            $this->isHTML4 = \strpos($doctypeStr, 'html4') !== false;
1550 12
            $this->isXHTML = \strpos($doctypeStr, 'xhtml1') !== false;
1551
        }
1552
1553
        // -------------------------------------------------------------------------
1554
        // Protect <nocompress> HTML tags first.
1555
        // -------------------------------------------------------------------------
1556
1557 55
        $dom = $this->protectTagHelper($dom, 'nocompress');
1558
1559
        // -------------------------------------------------------------------------
1560
        // Notify the Observer before the minification.
1561
        // -------------------------------------------------------------------------
1562
1563 55
        foreach ($dom->find('*') as $element) {
1564 55
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1565
        }
1566
1567
        // -------------------------------------------------------------------------
1568
        // Protect HTML tags and conditional comments.
1569
        // -------------------------------------------------------------------------
1570
1571 55
        $dom = $this->protectTags($dom);
1572
1573
        // -------------------------------------------------------------------------
1574
        // Remove default HTML comments. [protected html is still protected]
1575
        // -------------------------------------------------------------------------
1576
1577 55
        if ($this->doRemoveComments) {
1578 53
            $dom = $this->removeComments($dom);
1579
        }
1580
1581
        // -------------------------------------------------------------------------
1582
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1583
        // -------------------------------------------------------------------------
1584
1585 55
        if ($this->doSumUpWhitespace) {
1586 54
            $dom = $this->sumUpWhitespace($dom);
1587
        }
1588
1589 55
        foreach ($dom->find('*') as $element) {
1590
1591
            // -------------------------------------------------------------------------
1592
            // Remove whitespace around tags. [protected html is still protected]
1593
            // -------------------------------------------------------------------------
1594
1595 55
            if ($this->doRemoveWhitespaceAroundTags) {
1596 3
                $this->removeWhitespaceAroundTags($element);
1597
            }
1598
1599
            // -------------------------------------------------------------------------
1600
            // Notify the Observer after the minification.
1601
            // -------------------------------------------------------------------------
1602
1603 55
            $this->notifyObserversAboutDomElementAfterMinification($element);
1604
        }
1605
1606
        // -------------------------------------------------------------------------
1607
        // Convert the Dom into a string.
1608
        // -------------------------------------------------------------------------
1609
1610 55
        return $dom->fixHtmlOutput(
1611 55
            $doctypeStr . $this->domNodeToString($dom->getDocument()),
1612
            $multiDecodeNewHtmlEntity
1613
        );
1614
    }
1615
1616
    /**
1617
     * @param SimpleHtmlDomInterface $domElement
1618
     *
1619
     * @return void
1620
     */
1621 55
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1622
    {
1623 55
        foreach ($this->domLoopObservers as $observer) {
1624 55
            $observer->domElementAfterMinification($domElement, $this);
1625
        }
1626 55
    }
1627
1628
    /**
1629
     * @param SimpleHtmlDomInterface $domElement
1630
     *
1631
     * @return void
1632
     */
1633 55
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1634
    {
1635 55
        foreach ($this->domLoopObservers as $observer) {
1636 55
            $observer->domElementBeforeMinification($domElement, $this);
1637
        }
1638 55
    }
1639
1640
    /**
1641
     * @param HtmlDomParser $dom
1642
     * @param string        $selector
1643
     *
1644
     * @return HtmlDomParser
1645
     */
1646 55
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1647
    {
1648 55
        foreach ($dom->find($selector) as $element) {
1649 6
            if ($element->isRemoved()) {
1650 1
                continue;
1651
            }
1652
1653 6
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1654 6
            $parentNode = $element->getNode()->parentNode;
1655 6
            if ($parentNode !== null) {
1656 6
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1657
            }
1658
1659 6
            ++$this->protected_tags_counter;
1660
        }
1661
1662 55
        return $dom;
1663
    }
1664
1665
    /**
1666
     * Prevent changes of inline "styles" and "scripts".
1667
     *
1668
     * @param HtmlDomParser $dom
1669
     *
1670
     * @return HtmlDomParser
1671
     */
1672 55
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1673
    {
1674 55
        $this->protectTagHelper($dom, 'code');
1675
1676 55
        foreach ($dom->find('script, style') as $element) {
1677 9
            if ($element->isRemoved()) {
1678
                continue;
1679
            }
1680
1681 9
            if ($element->tag === 'script' || $element->tag === 'style') {
1682 9
                $attributes = $element->getAllAttributes();
1683
                // skip external links
1684 9
                if (isset($attributes['src'])) {
1685 5
                    continue;
1686
                }
1687
            }
1688
1689 7
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1690 7
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1691
1692 7
            ++$this->protected_tags_counter;
1693
        }
1694
1695 55
        foreach ($dom->find('//comment()') as $element) {
1696 4
            if ($element->isRemoved()) {
1697
                continue;
1698
            }
1699
1700 4
            $text = $element->text();
1701
1702
            // skip normal comments
1703 4
            if (!$this->isConditionalComment($text)) {
1704 4
                continue;
1705
            }
1706
1707 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1708
1709
            /* @var $node \DOMComment */
1710 2
            $node = $element->getNode();
1711 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1712 2
            $parentNode = $element->getNode()->parentNode;
1713 2
            if ($parentNode !== null) {
1714 2
                $parentNode->replaceChild($child, $node);
1715
            }
1716
1717 2
            ++$this->protected_tags_counter;
1718
        }
1719
1720 55
        return $dom;
1721
    }
1722
1723
    /**
1724
     * Remove comments in the dom.
1725
     *
1726
     * @param HtmlDomParser $dom
1727
     *
1728
     * @return HtmlDomParser
1729
     */
1730 53
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1731
    {
1732 53
        foreach ($dom->find('//comment()') as $commentWrapper) {
1733 3
            $comment = $commentWrapper->getNode();
1734 3
            $val = $comment->nodeValue;
1735 3
            if (\strpos($val, '[') === false) {
1736 3
                $parentNode = $comment->parentNode;
1737 3
                if ($parentNode !== null) {
1738 3
                    $parentNode->removeChild($comment);
1739
                }
1740
            }
1741
        }
1742
1743 53
        $dom->getDocument()->normalizeDocument();
1744
1745 53
        return $dom;
1746
    }
1747
1748
    /**
1749
     * Trim tags in the dom.
1750
     *
1751
     * @param SimpleHtmlDomInterface $element
1752
     *
1753
     * @return void
1754
     */
1755 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1756
    {
1757 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1758 1
            $node = $element->getNode();
1759
1760
            /** @var \DOMNode[] $candidates */
1761 1
            $candidates = [];
1762 1
            if ($node->childNodes->length > 0) {
1763 1
                $candidates[] = $node->firstChild;
1764 1
                $candidates[] = $node->lastChild;
1765 1
                $candidates[] = $node->previousSibling;
1766 1
                $candidates[] = $node->nextSibling;
1767
            }
1768
1769
            /** @var mixed $candidate - false-positive error from phpstan */
1770 1
            foreach ($candidates as &$candidate) {
1771 1
                if ($candidate === null) {
1772
                    continue;
1773
                }
1774
1775 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1776 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1777 1
                    if ($nodeValueTmp !== null) {
1778 1
                        $candidate->nodeValue = $nodeValueTmp;
1779
                    }
1780
                }
1781
            }
1782
        }
1783 3
    }
1784
1785
    /**
1786
     * Callback function for preg_replace_callback use.
1787
     *
1788
     * @param array $matches PREG matches
1789
     *
1790
     * @return string
1791
     */
1792 12
    private function restoreProtectedHtml($matches): string
1793
    {
1794 12
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1795
1796 12
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1797
    }
1798
1799
    /**
1800
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1801
     *
1802
     * @return $this
1803
     */
1804 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1805
    {
1806 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1807
1808 2
        return $this;
1809
    }
1810
1811
    /**
1812
     * Sum-up extra whitespace from dom-nodes.
1813
     *
1814
     * @param HtmlDomParser $dom
1815
     *
1816
     * @return HtmlDomParser
1817
     */
1818 54
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1819
    {
1820 54
        $text_nodes = $dom->find('//text()');
1821 54
        foreach ($text_nodes as $text_node_wrapper) {
1822
            /* @var $text_node \DOMNode */
1823 50
            $text_node = $text_node_wrapper->getNode();
1824 50
            $xp = $text_node->getNodePath();
1825 50
            if ($xp === null) {
1826
                continue;
1827
            }
1828
1829 50
            $doSkip = false;
1830 50
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1831 50
                if (\strpos($xp, "/${pattern}") !== false) {
1832 10
                    $doSkip = true;
1833
1834 10
                    break;
1835
                }
1836
            }
1837 50
            if ($doSkip) {
1838 10
                continue;
1839
            }
1840
1841 46
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1842 46
            if ($nodeValueTmp !== null) {
1843 46
                $text_node->nodeValue = $nodeValueTmp;
1844
            }
1845
        }
1846
1847 54
        $dom->getDocument()->normalizeDocument();
1848
1849 54
        return $dom;
1850
    }
1851
1852
    /**
1853
     * WARNING: maybe bad for performance ...
1854
     *
1855
     * @param bool $keepBrokenHtml
1856
     *
1857
     * @return HtmlMin
1858
     */
1859 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1860
    {
1861 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1862
1863 2
        return $this;
1864
    }
1865
1866
    /**
1867
     * @param string[] $templateLogicSyntaxInSpecialScriptTags
1868
     *
1869
     * @return HtmlMin
1870
     */
1871 1
    public function overwriteTemplateLogicSyntaxInSpecialScriptTags(array $templateLogicSyntaxInSpecialScriptTags): self
1872
    {
1873 1
        foreach ($templateLogicSyntaxInSpecialScriptTags as $tmp) {
1874 1
            if (!\is_string($tmp)) {
1875
                throw new \InvalidArgumentException('setTemplateLogicSyntaxInSpecialScriptTags only allows string[]');
1876
            }
1877
        }
1878
1879 1
        $this->templateLogicSyntaxInSpecialScriptTags = $templateLogicSyntaxInSpecialScriptTags;
1880
1881 1
        return $this;
1882
    }
1883
}
1884