Completed
Push — master ( 030b2b...67d39f )
by Lars
01:26
created

HtmlMin::getNextSiblingOfTypeDOMElement()   B

Complexity

Conditions 8
Paths 4

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 8

Importance

Changes 0
Metric Value
dl 0
loc 24
ccs 10
cts 10
cp 1
rs 8.4444
c 0
b 0
f 0
cc 8
nc 4
nop 1
crap 8
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $doKeepHttpAndHttpsPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainsLinksRelative = false;
199
200
    /**
201
     * @var string[]
202
     */
203
    private $localDomains = [];
204
205
    /**
206
     * @var array
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var bool
215
     */
216
    private $doSortCssClassNames = true;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doSortHtmlAttributes = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDefaultAttributes = false;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedAnchorName = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedTypeFromStyleAndLinkTag = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveDefaultTypeFromButton = false;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doRemoveDeprecatedTypeFromScriptTag = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveValueFromEmptyInput = true;
267
268
    /**
269
     * @var bool
270
     */
271
    private $doRemoveEmptyAttributes = true;
272
273
    /**
274
     * @var bool
275
     */
276
    private $doSumUpWhitespace = true;
277
278
    /**
279
     * @var bool
280
     */
281
    private $doRemoveSpacesBetweenTags = false;
282
283
    /**
284
     * @var bool
285
     */
286
    private $keepBrokenHtml = false;
287
288
    /**
289
     * @var bool
290
     */
291
    private $withDocType = false;
292
293
    /**
294
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
295
     *
296
     * @psalm-var \SplObjectStorage<HtmlMinDomObserverInterface>
297
     */
298
    private $domLoopObservers;
299
300
    /**
301
     * @var int
302
     */
303
    private $protected_tags_counter = 0;
304
305
    /**
306
     * @var bool
307
     */
308
    private $isHTML4 = false;
309
310
    /**
311
     * @var bool
312
     */
313
    private $isXHTML = false;
314
315
    /**
316
     * @var string[]|null
317
     */
318
    private $templateLogicSyntaxInSpecialScriptTags;
319
320
    /**
321
     * HtmlMin constructor.
322
     */
323 60
    public function __construct()
324
    {
325 60
        $this->domLoopObservers = new \SplObjectStorage();
326
327 60
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
328 60
    }
329
330
    /**
331
     * @param HtmlMinDomObserverInterface $observer
332
     *
333
     * @return void
334
     */
335 60
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
336
    {
337 60
        $this->domLoopObservers->attach($observer);
338 60
    }
339
340
    /**
341
     * @param bool $doOptimizeAttributes
342
     *
343
     * @return $this
344
     */
345 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
346
    {
347 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
348
349 2
        return $this;
350
    }
351
352
    /**
353
     * @param bool $doOptimizeViaHtmlDomParser
354
     *
355
     * @return $this
356
     */
357 2
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
358
    {
359 2
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
360
361 2
        return $this;
362
    }
363
364
    /**
365
     * @param bool $doRemoveComments
366
     *
367
     * @return $this
368
     */
369 3
    public function doRemoveComments(bool $doRemoveComments = true): self
370
    {
371 3
        $this->doRemoveComments = $doRemoveComments;
372
373 3
        return $this;
374
    }
375
376
    /**
377
     * @param bool $doRemoveDefaultAttributes
378
     *
379
     * @return $this
380
     */
381 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
382
    {
383 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
384
385 2
        return $this;
386
    }
387
388
    /**
389
     * @param bool $doRemoveDeprecatedAnchorName
390
     *
391
     * @return $this
392
     */
393 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
394
    {
395 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
396
397 2
        return $this;
398
    }
399
400
    /**
401
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
402
     *
403
     * @return $this
404
     */
405 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
406
    {
407 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
408
409 2
        return $this;
410
    }
411
412
    /**
413
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
414
     *
415
     * @return $this
416
     */
417 3
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
418
    {
419 3
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
420
421 3
        return $this;
422
    }
423
424
    /**
425
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
426
     *
427
     * @return $this
428
     */
429 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
430
    {
431 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
432
433 2
        return $this;
434
    }
435
436
    /**
437
     * @param bool $doRemoveDeprecatedTypeFromStyleAndLinkTag
438
     *
439
     * @return $this
440
     */
441 1
    public function doRemoveDeprecatedTypeFromStyleAndLinkTag(bool $doRemoveDeprecatedTypeFromStyleAndLinkTag = true): self
442
    {
443 1
        $this->doRemoveDeprecatedTypeFromStyleAndLinkTag = $doRemoveDeprecatedTypeFromStyleAndLinkTag;
444
445 1
        return $this;
446
    }
447
448
    /**
449
     * @param bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag
450
     *
451
     * @return $this
452
     */
453 1
    public function doRemoveDefaultMediaTypeFromStyleAndLinkTag(bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true): self
454
    {
455 1
        $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag = $doRemoveDefaultMediaTypeFromStyleAndLinkTag;
456
457 1
        return $this;
458
    }
459
460
    /**
461
     * @param bool $doRemoveDefaultTypeFromButton
462
     *
463
     * @return $this
464
     */
465 1
    public function doRemoveDefaultTypeFromButton(bool $doRemoveDefaultTypeFromButton = true): self
466
    {
467 1
        $this->doRemoveDefaultTypeFromButton = $doRemoveDefaultTypeFromButton;
468
469 1
        return $this;
470
    }
471
472
    /**
473
     * @param bool $doRemoveEmptyAttributes
474
     *
475
     * @return $this
476
     */
477 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
478
    {
479 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
480
481 2
        return $this;
482
    }
483
484
    /**
485
     * @param bool $doRemoveHttpPrefixFromAttributes
486
     *
487
     * @return $this
488
     */
489 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
490
    {
491 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
492
493 6
        return $this;
494
    }
495
496
    /**
497
     * @param bool $doRemoveHttpsPrefixFromAttributes
498
     *
499
     * @return $this
500
     */
501 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
502
    {
503 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
504
505 1
        return $this;
506
    }
507
508
    /**
509
     * @param bool $doKeepHttpAndHttpsPrefixOnExternalAttributes
510
     *
511
     * @return $this
512
     */
513 1
    public function doKeepHttpAndHttpsPrefixOnExternalAttributes(bool $doKeepHttpAndHttpsPrefixOnExternalAttributes = true): self
514
    {
515 1
        $this->doKeepHttpAndHttpsPrefixOnExternalAttributes = $doKeepHttpAndHttpsPrefixOnExternalAttributes;
516
517 1
        return $this;
518
    }
519
520
    /**
521
     * @param string[] $localDomains
522
     *
523
     * @return $this
524
     */
525 1
    public function doMakeSameDomainsLinksRelative(array $localDomains): self
526
    {
527
        /** @noinspection AlterInForeachInspection */
528 1
        foreach ($localDomains as &$localDomain) {
529 1
            $localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
530
        }
531
532 1
        $this->localDomains = $localDomains;
533 1
        $this->doMakeSameDomainsLinksRelative = \count($this->localDomains) > 0;
534
535 1
        return $this;
536
    }
537
538
    /**
539
     * @return string[]
540
     */
541 1
    public function getLocalDomains(): array
542
    {
543 1
        return $this->localDomains;
544
    }
545
546
    /**
547
     * @param bool $doRemoveOmittedHtmlTags
548
     *
549
     * @return $this
550
     */
551 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
552
    {
553 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
554
555 1
        return $this;
556
    }
557
558
    /**
559
     * @param bool $doRemoveOmittedQuotes
560
     *
561
     * @return $this
562
     */
563 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
564
    {
565 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
566
567 1
        return $this;
568
    }
569
570
    /**
571
     * @param bool $doRemoveSpacesBetweenTags
572
     *
573
     * @return $this
574
     */
575 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
576
    {
577 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
578
579 1
        return $this;
580
    }
581
582
    /**
583
     * @param bool $doRemoveValueFromEmptyInput
584
     *
585
     * @return $this
586
     */
587 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
588
    {
589 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
590
591 2
        return $this;
592
    }
593
594
    /**
595
     * @param bool $doRemoveWhitespaceAroundTags
596
     *
597
     * @return $this
598
     */
599 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
600
    {
601 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
602
603 5
        return $this;
604
    }
605
606
    /**
607
     * @param bool $doSortCssClassNames
608
     *
609
     * @return $this
610
     */
611 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
612
    {
613 2
        $this->doSortCssClassNames = $doSortCssClassNames;
614
615 2
        return $this;
616
    }
617
618
    /**
619
     * @param bool $doSortHtmlAttributes
620
     *
621
     * @return $this
622
     */
623 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
624
    {
625 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
626
627 2
        return $this;
628
    }
629
630
    /**
631
     * @param bool $doSumUpWhitespace
632
     *
633
     * @return $this
634
     */
635 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
636
    {
637 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
638
639 2
        return $this;
640
    }
641
642 56
    private function domNodeAttributesToString(\DOMNode $node): string
643
    {
644
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
645 56
        $attr_str = '';
646 56
        if ($node->attributes !== null) {
647 56
            foreach ($node->attributes as $attribute) {
648 37
                $attr_str .= $attribute->name;
649
650
                if (
651 37
                    $this->doOptimizeAttributes
652
                    &&
653 37
                    isset(self::$booleanAttributes[$attribute->name])
654
                ) {
655 10
                    $attr_str .= ' ';
656
657 10
                    continue;
658
                }
659
660 37
                $attr_str .= '=';
661
662
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
663 37
                $omit_quotes = $this->doRemoveOmittedQuotes
664
                               &&
665 37
                               $attribute->value !== ''
666
                               &&
667 37
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
668
                               &&
669 37
                               \strpos($attribute->name, ' ') === false
670
                               &&
671 37
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
672
673 37
                $quoteTmp = '"';
674
                if (
675 37
                    !$omit_quotes
676
                    &&
677 37
                    \strpos($attribute->value, '"') !== false
678
                ) {
679 1
                    $quoteTmp = "'";
680
                }
681
682
                if (
683 37
                    $this->doOptimizeAttributes
684
                    &&
685
                    (
686 36
                        $attribute->name === 'srcset'
687
                        ||
688 37
                        $attribute->name === 'sizes'
689
                    )
690
                ) {
691 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
692
                } else {
693 37
                    $attr_val = $attribute->value;
694
                }
695
696 37
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
697 37
                $attr_str .= ' ';
698
            }
699
        }
700
701 56
        return \trim($attr_str);
702
    }
703
704
    /**
705
     * @param \DOMNode $node
706
     *
707
     * @return bool
708
     */
709 55
    private function domNodeClosingTagOptional(\DOMNode $node): bool
710
    {
711 55
        $tag_name = $node->nodeName;
712
713
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
714 55
        $parent_node = $node->parentNode;
715
716 55
        if ($parent_node) {
717 55
            $parent_tag_name = $parent_node->nodeName;
718
        } else {
719
            $parent_tag_name = null;
720
        }
721
722 55
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
723
724
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
725
726
        // Implemented:
727
        //
728
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
729
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
730
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
731
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
732
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
733
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
734
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
735
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
736
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
737
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
738
739
        /**
740
         * @noinspection TodoComment
741
         *
742
         * TODO: Not Implemented
743
         */
744
        //
745
        // <html> may be omitted if first thing inside is not comment
746
        // <head> may be omitted if first thing inside is an element
747
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
748
        // <colgroup> may be omitted if first thing inside is <col>
749
        // <tbody> may be omitted if first thing inside is <tr>
750
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
751
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
752
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
753
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
754
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
755
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
756
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
757
        //
758
        // <-- However, a start tag must never be omitted if it has any attributes.
759
760
        /** @noinspection InArrayCanBeUsedInspection */
761 55
        return \in_array($tag_name, self::$optional_end_tags, true)
762
               ||
763
               (
764 52
                   $tag_name === 'li'
765
                   &&
766
                   (
767 6
                       $nextSibling === null
768
                       ||
769
                       (
770 4
                           $nextSibling instanceof \DOMElement
771
                           &&
772 52
                           $nextSibling->tagName === 'li'
773
                       )
774
                   )
775
               )
776
               ||
777
               (
778 52
                   $tag_name === 'optgroup'
779
                   &&
780
                   (
781 1
                       $nextSibling === null
782
                       ||
783
                       (
784 1
                           $nextSibling instanceof \DOMElement
785
                           &&
786 52
                           $nextSibling->tagName === 'optgroup'
787
                       )
788
                   )
789
               )
790
               ||
791
               (
792 52
                   $tag_name === 'rp'
793
                   &&
794
                   (
795
                       $nextSibling === null
796
                       ||
797
                       (
798
                           $nextSibling instanceof \DOMElement
799
                           &&
800
                           (
801
                               $nextSibling->tagName === 'rp'
802
                               ||
803 52
                               $nextSibling->tagName === 'rt'
804
                           )
805
                       )
806
                   )
807
               )
808
               ||
809
               (
810 52
                   $tag_name === 'tr'
811
                   &&
812
                   (
813 1
                       $nextSibling === null
814
                       ||
815
                       (
816 1
                           $nextSibling instanceof \DOMElement
817
                           &&
818 52
                           $nextSibling->tagName === 'tr'
819
                       )
820
                   )
821
               )
822
               ||
823
               (
824 52
                   $tag_name === 'source'
825
                   &&
826
                   (
827 1
                       $parent_tag_name === 'audio'
828
                       ||
829 1
                       $parent_tag_name === 'video'
830
                       ||
831 1
                       $parent_tag_name === 'picture'
832
                       ||
833 52
                       $parent_tag_name === 'source'
834
                   )
835
                   &&
836
                   (
837 1
                       $nextSibling === null
838
                       ||
839
                       (
840
                           $nextSibling instanceof \DOMElement
841
                           &&
842 52
                           $nextSibling->tagName === 'source'
843
                       )
844
                   )
845
               )
846
               ||
847
               (
848
                   (
849 52
                       $tag_name === 'td'
850
                       ||
851 52
                       $tag_name === 'th'
852
                   )
853
                   &&
854
                   (
855 1
                       $nextSibling === null
856
                       ||
857
                       (
858 1
                           $nextSibling instanceof \DOMElement
859
                           &&
860
                           (
861 1
                               $nextSibling->tagName === 'td'
862
                               ||
863 52
                               $nextSibling->tagName === 'th'
864
                           )
865
                       )
866
                   )
867
               )
868
               ||
869
               (
870
                   (
871 52
                       $tag_name === 'dd'
872
                       ||
873 52
                       $tag_name === 'dt'
874
                   )
875
                   &&
876
                   (
877 3
                       $nextSibling === null
878
                       ||
879
                       (
880 3
                           $nextSibling instanceof \DOMElement
881
                           &&
882
                           (
883 3
                               $nextSibling->tagName === 'dd'
884
                               ||
885 52
                               $nextSibling->tagName === 'dt'
886
                           )
887
                       )
888
                   )
889
               )
890
               ||
891
               (
892 52
                   $tag_name === 'option'
893
                   &&
894
                   (
895 2
                       $nextSibling === null
896
                       ||
897
                       (
898 2
                           $nextSibling instanceof \DOMElement
899
                           &&
900
                           (
901 2
                               $nextSibling->tagName === 'option'
902
                               ||
903 52
                               $nextSibling->tagName === 'optgroup'
904
                           )
905
                       )
906
                   )
907
               )
908
               ||
909
               (
910 52
                   $tag_name === 'p'
911
                   &&
912
                   (
913
                       (
914 16
                           $nextSibling === null
915
                           &&
916 13
                           !\in_array(
917 13
                               $node->parentNode->nodeName,
918
                               [
919 13
                                   'a',
920
                                   'audio',
921
                                   'del',
922
                                   'ins',
923
                                   'map',
924
                                   'noscript',
925
                                   'video',
926
                               ],
927 13
                               true
928
                           )
929
                       )
930
                       ||
931
                       (
932 11
                           $nextSibling instanceof \DOMElement
933
                           &&
934 9
                           \in_array(
935 9
                               $nextSibling->tagName,
936
                               [
937 9
                                   'address',
938
                                   'article',
939
                                   'aside',
940
                                   'blockquote',
941
                                   'dir',
942
                                   'div',
943
                                   'dl',
944
                                   'fieldset',
945
                                   'footer',
946
                                   'form',
947
                                   'h1',
948
                                   'h2',
949
                                   'h3',
950
                                   'h4',
951
                                   'h5',
952
                                   'h6',
953
                                   'header',
954
                                   'hgroup',
955
                                   'hr',
956
                                   'menu',
957
                                   'nav',
958
                                   'ol',
959
                                   'p',
960
                                   'pre',
961
                                   'section',
962
                                   'table',
963
                                   'ul',
964
                               ],
965 55
                               true
966
                           )
967
                       )
968
                   )
969
               );
970
    }
971
972 56
    protected function domNodeToString(\DOMNode $node): string
973
    {
974
        // init
975 56
        $html = '';
976 56
        $emptyStringTmp = '';
977
978 56
        foreach ($node->childNodes as $child) {
979 56
            if ($emptyStringTmp === 'is_empty') {
980 31
                $emptyStringTmp = 'last_was_empty';
981
            } else {
982 56
                $emptyStringTmp = '';
983
            }
984
985 56
            if ($child instanceof \DOMElement) {
986 56
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
987 56
                $html .= '>' . $this->domNodeToString($child);
988
989
                if (
990
                    !(
991 56
                        $this->doRemoveOmittedHtmlTags
992
                        &&
993 56
                        !$this->isHTML4
994
                        &&
995 56
                        !$this->isXHTML
996
                        &&
997 56
                        $this->domNodeClosingTagOptional($child)
998
                    )
999
                ) {
1000 50
                    $html .= '</' . $child->tagName . '>';
1001
                }
1002
1003 56
                if (!$this->doRemoveWhitespaceAroundTags) {
1004
                    /** @noinspection NestedPositiveIfStatementsInspection */
1005
                    if (
1006 55
                        $child->nextSibling instanceof \DOMText
1007
                        &&
1008 55
                        $child->nextSibling->wholeText === ' '
1009
                    ) {
1010
                        if (
1011 30
                            $emptyStringTmp !== 'last_was_empty'
1012
                            &&
1013 30
                            \substr($html, -1) !== ' '
1014
                        ) {
1015 30
                            $html = \rtrim($html);
1016
1017
                            if (
1018 30
                                $child->parentNode
1019
                                &&
1020 30
                                $child->parentNode->nodeName !== 'head'
1021
                            ) {
1022 29
                                $html .= ' ';
1023
                            }
1024
                        }
1025 56
                        $emptyStringTmp = 'is_empty';
1026
                    }
1027
                }
1028 56
            } elseif ($child instanceof \DOMText) {
1029 52
                if ($child->isElementContentWhitespace()) {
1030
                    if (
1031 34
                        $child->previousSibling !== null
1032
                        &&
1033 34
                        $child->nextSibling !== null
1034
                    ) {
1035
                        if (
1036
                            (
1037 23
                                $child->wholeText
1038
                                &&
1039 23
                                \strpos($child->wholeText, ' ') !== false
1040
                            )
1041
                            ||
1042
                            (
1043
                                $emptyStringTmp !== 'last_was_empty'
1044
                                &&
1045 23
                                \substr($html, -1) !== ' '
1046
                            )
1047
                        ) {
1048 23
                            $html = \rtrim($html);
1049
1050
                            if (
1051 23
                                $child->parentNode
1052
                                &&
1053 23
                                $child->parentNode->nodeName !== 'head'
1054
                            ) {
1055 22
                                $html .= ' ';
1056
                            }
1057
                        }
1058 34
                        $emptyStringTmp = 'is_empty';
1059
                    }
1060
                } else {
1061 52
                    $html .= $child->wholeText;
1062
                }
1063 12
            } elseif ($child instanceof \DOMComment) {
1064 56
                $html .= '<!--' . $child->textContent . '-->';
1065
            }
1066
        }
1067
1068 56
        return $html;
1069
    }
1070
1071
    /**
1072
     * @param \DOMNode $node
1073
     *
1074
     * @return string
1075
     */
1076 56
    private function getDoctype(\DOMNode $node): string
1077
    {
1078
        // check the doc-type only if it wasn't generated by DomDocument itself
1079 56
        if (!$this->withDocType) {
1080 48
            return '';
1081
        }
1082
1083 12
        foreach ($node->childNodes as $child) {
1084
            if (
1085 12
                $child instanceof \DOMDocumentType
1086
                &&
1087 12
                $child->name
1088
            ) {
1089 12
                if (!$child->publicId && $child->systemId) {
1090
                    $tmpTypeSystem = 'SYSTEM';
1091
                    $tmpTypePublic = '';
1092
                } else {
1093 12
                    $tmpTypeSystem = '';
1094 12
                    $tmpTypePublic = 'PUBLIC';
1095
                }
1096
1097 12
                return '<!DOCTYPE ' . $child->name . ''
1098 12
                       . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
1099 12
                       . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
1100 12
                       . '>';
1101
            }
1102
        }
1103
1104
        return '';
1105
    }
1106
1107
    /**
1108
     * @return array
1109
     */
1110
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1111
    {
1112
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1113
    }
1114
1115
    /**
1116
     * @return bool
1117
     */
1118
    public function isDoOptimizeAttributes(): bool
1119
    {
1120
        return $this->doOptimizeAttributes;
1121
    }
1122
1123
    /**
1124
     * @return bool
1125
     */
1126
    public function isDoOptimizeViaHtmlDomParser(): bool
1127
    {
1128
        return $this->doOptimizeViaHtmlDomParser;
1129
    }
1130
1131
    /**
1132
     * @return bool
1133
     */
1134
    public function isDoRemoveComments(): bool
1135
    {
1136
        return $this->doRemoveComments;
1137
    }
1138
1139
    /**
1140
     * @return bool
1141
     */
1142 38
    public function isDoRemoveDefaultAttributes(): bool
1143
    {
1144 38
        return $this->doRemoveDefaultAttributes;
1145
    }
1146
1147
    /**
1148
     * @return bool
1149
     */
1150 38
    public function isDoRemoveDeprecatedAnchorName(): bool
1151
    {
1152 38
        return $this->doRemoveDeprecatedAnchorName;
1153
    }
1154
1155
    /**
1156
     * @return bool
1157
     */
1158 38
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1159
    {
1160 38
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1161
    }
1162
1163
    /**
1164
     * @return bool
1165
     */
1166 38
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1167
    {
1168 38
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1169
    }
1170
1171
    /**
1172
     * @return bool
1173
     */
1174 38
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1175
    {
1176 38
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1177
    }
1178
1179
    /**
1180
     * @return bool
1181
     */
1182 38
    public function isDoRemoveDeprecatedTypeFromStyleAndLinkTag(): bool
1183
    {
1184 38
        return $this->doRemoveDeprecatedTypeFromStyleAndLinkTag;
1185
    }
1186
1187
    /**
1188
     * @return bool
1189
     */
1190 38
    public function isDoRemoveDefaultMediaTypeFromStyleAndLinkTag(): bool
1191
    {
1192 38
        return $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag;
1193
    }
1194
1195
    /**
1196
     * @return bool
1197
     */
1198 37
    public function isDoRemoveDefaultTypeFromButton(): bool
1199
    {
1200 37
        return $this->doRemoveDefaultTypeFromButton;
1201
    }
1202
1203
    /**
1204
     * @return bool
1205
     */
1206 37
    public function isDoRemoveEmptyAttributes(): bool
1207
    {
1208 37
        return $this->doRemoveEmptyAttributes;
1209
    }
1210
1211
    /**
1212
     * @return bool
1213
     */
1214 38
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1215
    {
1216 38
        return $this->doRemoveHttpPrefixFromAttributes;
1217
    }
1218
1219
    /**
1220
     * @return bool
1221
     */
1222 38
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1223
    {
1224 38
        return $this->doRemoveHttpsPrefixFromAttributes;
1225
    }
1226
1227
    /**
1228
     * @return bool
1229
     */
1230 4
    public function isdoKeepHttpAndHttpsPrefixOnExternalAttributes(): bool
1231
    {
1232 4
        return $this->doKeepHttpAndHttpsPrefixOnExternalAttributes;
1233
    }
1234
1235
    /**
1236
     * @return bool
1237
     */
1238 38
    public function isDoMakeSameDomainsLinksRelative(): bool
1239
    {
1240 38
        return $this->doMakeSameDomainsLinksRelative;
1241
    }
1242
1243
    /**
1244
     * @return bool
1245
     */
1246
    public function isDoRemoveOmittedHtmlTags(): bool
1247
    {
1248
        return $this->doRemoveOmittedHtmlTags;
1249
    }
1250
1251
    /**
1252
     * @return bool
1253
     */
1254
    public function isDoRemoveOmittedQuotes(): bool
1255
    {
1256
        return $this->doRemoveOmittedQuotes;
1257
    }
1258
1259
    /**
1260
     * @return bool
1261
     */
1262
    public function isDoRemoveSpacesBetweenTags(): bool
1263
    {
1264
        return $this->doRemoveSpacesBetweenTags;
1265
    }
1266
1267
    /**
1268
     * @return bool
1269
     */
1270 37
    public function isDoRemoveValueFromEmptyInput(): bool
1271
    {
1272 37
        return $this->doRemoveValueFromEmptyInput;
1273
    }
1274
1275
    /**
1276
     * @return bool
1277
     */
1278
    public function isDoRemoveWhitespaceAroundTags(): bool
1279
    {
1280
        return $this->doRemoveWhitespaceAroundTags;
1281
    }
1282
1283
    /**
1284
     * @return bool
1285
     */
1286 37
    public function isDoSortCssClassNames(): bool
1287
    {
1288 37
        return $this->doSortCssClassNames;
1289
    }
1290
1291
    /**
1292
     * @return bool
1293
     */
1294 38
    public function isDoSortHtmlAttributes(): bool
1295
    {
1296 38
        return $this->doSortHtmlAttributes;
1297
    }
1298
1299
    /**
1300
     * @return bool
1301
     */
1302
    public function isDoSumUpWhitespace(): bool
1303
    {
1304
        return $this->doSumUpWhitespace;
1305
    }
1306
1307
    /**
1308
     * @return bool
1309
     */
1310 5
    public function isHTML4(): bool
1311
    {
1312 5
        return $this->isHTML4;
1313
    }
1314
1315
    /**
1316
     * @return bool
1317
     */
1318 5
    public function isXHTML(): bool
1319
    {
1320 5
        return $this->isXHTML;
1321
    }
1322
1323
    /**
1324
     * @param string $html
1325
     * @param bool   $multiDecodeNewHtmlEntity
1326
     *
1327
     * @return string
1328
     */
1329 60
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1330
    {
1331 60
        $html = (string) $html;
1332 60
        if (!isset($html[0])) {
1333 1
            return '';
1334
        }
1335
1336 60
        $html = \trim($html);
1337 60
        if (!$html) {
1338 3
            return '';
1339
        }
1340
1341
        // reset
1342 57
        $this->protectedChildNodes = [];
1343
1344
        // save old content
1345 57
        $origHtml = $html;
1346 57
        $origHtmlLength = \strlen($html);
1347
1348
        // -------------------------------------------------------------------------
1349
        // Minify the HTML via "HtmlDomParser"
1350
        // -------------------------------------------------------------------------
1351
1352 57
        if ($this->doOptimizeViaHtmlDomParser) {
1353 56
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1354
        }
1355
1356
        // -------------------------------------------------------------------------
1357
        // Trim whitespace from html-string. [protected html is still protected]
1358
        // -------------------------------------------------------------------------
1359
1360
        // Remove extra white-space(s) between HTML attribute(s)
1361 57
        if (\strpos($html, ' ') !== false) {
1362 51
            $html = (string) \preg_replace_callback(
1363 51
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1364
                static function ($matches) {
1365 51
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1366 51
                },
1367 51
                $html
1368
            );
1369
        }
1370
1371 57
        if ($this->doRemoveSpacesBetweenTags) {
1372
            /** @noinspection NestedPositiveIfStatementsInspection */
1373 1
            if (\strpos($html, ' ') !== false) {
1374
                // Remove spaces that are between > and <
1375 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1376
            }
1377
        }
1378
1379
        // -------------------------------------------------------------------------
1380
        // Restore protected HTML-code.
1381
        // -------------------------------------------------------------------------
1382
1383 57
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1384 12
            $html = (string) \preg_replace_callback(
1385 12
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1386 12
                [$this, 'restoreProtectedHtml'],
1387 12
                $html
1388
            );
1389
        }
1390
1391
        // -------------------------------------------------------------------------
1392
        // Restore protected HTML-entities.
1393
        // -------------------------------------------------------------------------
1394
1395 57
        if ($this->doOptimizeViaHtmlDomParser) {
1396 56
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1397
        }
1398
1399
        // ------------------------------------
1400
        // Final clean-up
1401
        // ------------------------------------
1402
1403 57
        $html = \str_replace(
1404
            [
1405 57
                'html>' . "\n",
1406
                "\n" . '<html',
1407
                'html/>' . "\n",
1408
                "\n" . '</html',
1409
                'head>' . "\n",
1410
                "\n" . '<head',
1411
                'head/>' . "\n",
1412
                "\n" . '</head',
1413
            ],
1414
            [
1415 57
                'html>',
1416
                '<html',
1417
                'html/>',
1418
                '</html',
1419
                'head>',
1420
                '<head',
1421
                'head/>',
1422
                '</head',
1423
            ],
1424 57
            $html
1425
        );
1426
1427
        // self closing tags, don't need a trailing slash ...
1428 57
        $replace = [];
1429 57
        $replacement = [];
1430 57
        foreach (self::$selfClosingTags as $selfClosingTag) {
1431 57
            $replace[] = '<' . $selfClosingTag . '/>';
1432 57
            $replacement[] = '<' . $selfClosingTag . '>';
1433 57
            $replace[] = '<' . $selfClosingTag . ' />';
1434 57
            $replacement[] = '<' . $selfClosingTag . '>';
1435 57
            $replace[] = '></' . $selfClosingTag . '>';
1436 57
            $replacement[] = '>';
1437
        }
1438 57
        $html = \str_replace(
1439 57
            $replace,
1440 57
            $replacement,
1441 57
            $html
1442
        );
1443
1444
        // ------------------------------------
1445
        // check if compression worked
1446
        // ------------------------------------
1447
1448 57
        if ($origHtmlLength < \strlen($html)) {
1449
            $html = $origHtml;
1450
        }
1451
1452 57
        return $html;
1453
    }
1454
1455
    /**
1456
     * @param \DOMNode $node
1457
     *
1458
     * @return \DOMNode|null
1459
     */
1460 55
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1461
    {
1462
        do {
1463
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1464 55
            $nodeTmp = $node->nextSibling;
1465
1466 55
            if ($nodeTmp instanceof \DOMText) {
1467
                if (
1468 31
                    \trim($nodeTmp->textContent) !== ''
1469
                    &&
1470 31
                    \strpos($nodeTmp->textContent, '<') === false
1471
                ) {
1472 8
                    $node = $nodeTmp;
1473
                } else {
1474 31
                    $node = $nodeTmp ? $nodeTmp->nextSibling : null;
1475
                }
1476
            } else {
1477 55
                $node = $nodeTmp;
1478
            }
1479
1480 55
        } while (!($node === null || $node instanceof \DOMElement || $node instanceof \DOMText));
1481
1482 55
        return $node;
1483
    }
1484
1485
    /**
1486
     * Check if the current string is an conditional comment.
1487
     *
1488
     * INFO: since IE >= 10 conditional comment are not working anymore
1489
     *
1490
     * <!--[if expression]> HTML <![endif]-->
1491
     * <![if expression]> HTML <![endif]>
1492
     *
1493
     * @param string $comment
1494
     *
1495
     * @return bool
1496
     */
1497 4
    private function isConditionalComment($comment): bool
1498
    {
1499 4 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1500
            /** @noinspection RegExpRedundantEscape */
1501
            /** @noinspection NestedPositiveIfStatementsInspection */
1502 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1503 2
                return true;
1504
            }
1505
        }
1506
1507 4 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1508
            /** @noinspection RegExpRedundantEscape */
1509
            /** @noinspection NestedPositiveIfStatementsInspection */
1510 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1511 1
                return true;
1512
            }
1513
        }
1514
1515 4
        return false;
1516
    }
1517
1518
    /**
1519
     * @param string $html
1520
     * @param bool   $multiDecodeNewHtmlEntity
1521
     *
1522
     * @return string
1523
     */
1524 56
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1525
    {
1526
        // init dom
1527 56
        $dom = new HtmlDomParser();
1528 56
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1529
1530 56
        if ($this->templateLogicSyntaxInSpecialScriptTags !== null) {
1531 1
            $dom->overwriteTemplateLogicSyntaxInSpecialScriptTags($this->templateLogicSyntaxInSpecialScriptTags);
1532
        }
1533
1534 56
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1535 56
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1536
1537
        // load dom
1538
        /** @noinspection UnusedFunctionResultInspection */
1539 56
        $dom->loadHtml($html);
1540
1541 56
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1542
1543 56
        $doctypeStr = $this->getDoctype($dom->getDocument());
1544
1545 56
        if ($doctypeStr) {
1546 12
            $this->isHTML4 = \strpos($doctypeStr, 'html4') !== false;
1547 12
            $this->isXHTML = \strpos($doctypeStr, 'xhtml1') !== false;
1548
        }
1549
1550
        // -------------------------------------------------------------------------
1551
        // Protect <nocompress> HTML tags first.
1552
        // -------------------------------------------------------------------------
1553
1554 56
        $dom = $this->protectTagHelper($dom, 'nocompress');
1555
1556
        // -------------------------------------------------------------------------
1557
        // Notify the Observer before the minification.
1558
        // -------------------------------------------------------------------------
1559
1560 56
        foreach ($dom->find('*') as $element) {
1561 56
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1562
        }
1563
1564
        // -------------------------------------------------------------------------
1565
        // Protect HTML tags and conditional comments.
1566
        // -------------------------------------------------------------------------
1567
1568 56
        $dom = $this->protectTags($dom);
1569
1570
        // -------------------------------------------------------------------------
1571
        // Remove default HTML comments. [protected html is still protected]
1572
        // -------------------------------------------------------------------------
1573
1574 56
        if ($this->doRemoveComments) {
1575 54
            $dom = $this->removeComments($dom);
1576
        }
1577
1578
        // -------------------------------------------------------------------------
1579
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1580
        // -------------------------------------------------------------------------
1581
1582 56
        if ($this->doSumUpWhitespace) {
1583 55
            $dom = $this->sumUpWhitespace($dom);
1584
        }
1585
1586 56
        foreach ($dom->find('*') as $element) {
1587
1588
            // -------------------------------------------------------------------------
1589
            // Remove whitespace around tags. [protected html is still protected]
1590
            // -------------------------------------------------------------------------
1591
1592 56
            if ($this->doRemoveWhitespaceAroundTags) {
1593 3
                $this->removeWhitespaceAroundTags($element);
1594
            }
1595
1596
            // -------------------------------------------------------------------------
1597
            // Notify the Observer after the minification.
1598
            // -------------------------------------------------------------------------
1599
1600 56
            $this->notifyObserversAboutDomElementAfterMinification($element);
1601
        }
1602
1603
        // -------------------------------------------------------------------------
1604
        // Convert the Dom into a string.
1605
        // -------------------------------------------------------------------------
1606
1607 56
        return $dom->fixHtmlOutput(
1608 56
            $doctypeStr . $this->domNodeToString($dom->getDocument()),
1609 56
            $multiDecodeNewHtmlEntity
1610
        );
1611
    }
1612
1613
    /**
1614
     * @param SimpleHtmlDomInterface $domElement
1615
     *
1616
     * @return void
1617
     */
1618 56
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1619
    {
1620 56
        foreach ($this->domLoopObservers as $observer) {
1621 56
            $observer->domElementAfterMinification($domElement, $this);
1622
        }
1623 56
    }
1624
1625
    /**
1626
     * @param SimpleHtmlDomInterface $domElement
1627
     *
1628
     * @return void
1629
     */
1630 56
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1631
    {
1632 56
        foreach ($this->domLoopObservers as $observer) {
1633 56
            $observer->domElementBeforeMinification($domElement, $this);
1634
        }
1635 56
    }
1636
1637
    /**
1638
     * @param HtmlDomParser $dom
1639
     * @param string        $selector
1640
     *
1641
     * @return HtmlDomParser
1642
     */
1643 56
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1644
    {
1645 56
        foreach ($dom->find($selector) as $element) {
1646 6
            if ($element->isRemoved()) {
1647 1
                continue;
1648
            }
1649
1650 6
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1651 6
            $parentNode = $element->getNode()->parentNode;
1652 6
            if ($parentNode !== null) {
1653 6
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1654
            }
1655
1656 6
            ++$this->protected_tags_counter;
1657
        }
1658
1659 56
        return $dom;
1660
    }
1661
1662
    /**
1663
     * Prevent changes of inline "styles" and "scripts".
1664
     *
1665
     * @param HtmlDomParser $dom
1666
     *
1667
     * @return HtmlDomParser
1668
     */
1669 56
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1670
    {
1671 56
        $this->protectTagHelper($dom, 'code');
1672
1673 56
        foreach ($dom->find('script, style') as $element) {
1674 9
            if ($element->isRemoved()) {
1675
                continue;
1676
            }
1677
1678 9
            if ($element->tag === 'script' || $element->tag === 'style') {
1679 9
                $attributes = $element->getAllAttributes();
1680
                // skip external links
1681 9
                if (isset($attributes['src'])) {
1682 5
                    continue;
1683
                }
1684
            }
1685
1686 7
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1687 7
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1688
1689 7
            ++$this->protected_tags_counter;
1690
        }
1691
1692 56
        foreach ($dom->find('//comment()') as $element) {
1693 4
            if ($element->isRemoved()) {
1694
                continue;
1695
            }
1696
1697 4
            $text = $element->text();
1698
1699
            // skip normal comments
1700 4
            if (!$this->isConditionalComment($text)) {
1701 4
                continue;
1702
            }
1703
1704 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1705
1706
            /* @var $node \DOMComment */
1707 2
            $node = $element->getNode();
1708 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1709 2
            $parentNode = $element->getNode()->parentNode;
1710 2
            if ($parentNode !== null) {
1711 2
                $parentNode->replaceChild($child, $node);
1712
            }
1713
1714 2
            ++$this->protected_tags_counter;
1715
        }
1716
1717 56
        return $dom;
1718
    }
1719
1720
    /**
1721
     * Remove comments in the dom.
1722
     *
1723
     * @param HtmlDomParser $dom
1724
     *
1725
     * @return HtmlDomParser
1726
     */
1727 54
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1728
    {
1729 54
        foreach ($dom->find('//comment()') as $commentWrapper) {
1730 3
            $comment = $commentWrapper->getNode();
1731 3
            $val = $comment->nodeValue;
1732 3
            if (\strpos($val, '[') === false) {
1733 3
                $parentNode = $comment->parentNode;
1734 3
                if ($parentNode !== null) {
1735 3
                    $parentNode->removeChild($comment);
1736
                }
1737
            }
1738
        }
1739
1740 54
        $dom->getDocument()->normalizeDocument();
1741
1742 54
        return $dom;
1743
    }
1744
1745
    /**
1746
     * Trim tags in the dom.
1747
     *
1748
     * @param SimpleHtmlDomInterface $element
1749
     *
1750
     * @return void
1751
     */
1752 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1753
    {
1754 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1755 1
            $node = $element->getNode();
1756
1757
            /** @var \DOMNode[] $candidates */
1758 1
            $candidates = [];
1759 1
            if ($node->childNodes->length > 0) {
1760 1
                $candidates[] = $node->firstChild;
1761 1
                $candidates[] = $node->lastChild;
1762 1
                $candidates[] = $node->previousSibling;
1763 1
                $candidates[] = $node->nextSibling;
1764
            }
1765
1766
            /** @var mixed $candidate - false-positive error from phpstan */
1767 1
            foreach ($candidates as &$candidate) {
1768 1
                if ($candidate === null) {
1769
                    continue;
1770
                }
1771
1772 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1773 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1774 1
                    if ($nodeValueTmp !== null) {
1775 1
                        $candidate->nodeValue = $nodeValueTmp;
1776
                    }
1777
                }
1778
            }
1779
        }
1780 3
    }
1781
1782
    /**
1783
     * Callback function for preg_replace_callback use.
1784
     *
1785
     * @param array $matches PREG matches
1786
     *
1787
     * @return string
1788
     */
1789 12
    private function restoreProtectedHtml($matches): string
1790
    {
1791 12
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1792
1793 12
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1794
    }
1795
1796
    /**
1797
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1798
     *
1799
     * @return $this
1800
     */
1801 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1802
    {
1803 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1804
1805 2
        return $this;
1806
    }
1807
1808
    /**
1809
     * Sum-up extra whitespace from dom-nodes.
1810
     *
1811
     * @param HtmlDomParser $dom
1812
     *
1813
     * @return HtmlDomParser
1814
     */
1815 55
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1816
    {
1817 55
        $text_nodes = $dom->find('//text()');
1818 55
        foreach ($text_nodes as $text_node_wrapper) {
1819
            /* @var $text_node \DOMNode */
1820 51
            $text_node = $text_node_wrapper->getNode();
1821 51
            $xp = $text_node->getNodePath();
1822 51
            if ($xp === null) {
1823
                continue;
1824
            }
1825
1826 51
            $doSkip = false;
1827 51
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1828 51
                if (\strpos($xp, "/${pattern}") !== false) {
1829 10
                    $doSkip = true;
1830
1831 51
                    break;
1832
                }
1833
            }
1834 51
            if ($doSkip) {
1835 10
                continue;
1836
            }
1837
1838 47
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1839 47
            if ($nodeValueTmp !== null) {
1840 47
                $text_node->nodeValue = $nodeValueTmp;
1841
            }
1842
        }
1843
1844 55
        $dom->getDocument()->normalizeDocument();
1845
1846 55
        return $dom;
1847
    }
1848
1849
    /**
1850
     * WARNING: maybe bad for performance ...
1851
     *
1852
     * @param bool $keepBrokenHtml
1853
     *
1854
     * @return HtmlMin
1855
     */
1856 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1857
    {
1858 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1859
1860 2
        return $this;
1861
    }
1862
1863
    /**
1864
     * @param string[] $templateLogicSyntaxInSpecialScriptTags
1865
     *
1866
     * @return HtmlMin
1867
     */
1868 1
    public function overwriteTemplateLogicSyntaxInSpecialScriptTags(array $templateLogicSyntaxInSpecialScriptTags): self
1869
    {
1870 1
        foreach ($templateLogicSyntaxInSpecialScriptTags as $tmp) {
1871 1
            if (!\is_string($tmp)) {
1872 1
                throw new \InvalidArgumentException('setTemplateLogicSyntaxInSpecialScriptTags only allows string[]');
1873
            }
1874
        }
1875
1876 1
        $this->templateLogicSyntaxInSpecialScriptTags = $templateLogicSyntaxInSpecialScriptTags;
1877
1878 1
        return $this;
1879
    }
1880
}
1881