Completed
Push — master ( fa7e00...4f7005 )
by Lars
02:23
created

HtmlMin::minifyHtmlDom()   D

Complexity

Conditions 11
Paths 288

Size

Total Lines 100

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 32
CRAP Score 11

Importance

Changes 0
Metric Value
dl 0
loc 100
ccs 32
cts 32
cp 1
rs 4.3066
c 0
b 0
f 0
cc 11
nc 288
nop 2
crap 11

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $doKeepHttpAndHttpsPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainsLinksRelative = false;
199
200
    /**
201
     * @var string[]
202
     */
203
    private $localDomains = [];
204
205
    /**
206
     * @var string[]
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var string[]
215
     */
216
    private $specialHtmlCommentsStaringWith = [];
217
218
    /**
219
     * @var string[]
220
     */
221
    private $specialHtmlCommentsEndingWith = [];
222
223
    /**
224
     * @var bool
225
     */
226
    private $doSortCssClassNames = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doSortHtmlAttributes = true;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDefaultAttributes = false;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedAnchorName = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveDeprecatedTypeFromStyleAndLinkTag = true;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveDefaultTypeFromButton = false;
267
268
    /**
269
     * @var bool
270
     */
271
    private $doRemoveDeprecatedTypeFromScriptTag = true;
272
273
    /**
274
     * @var bool
275
     */
276
    private $doRemoveValueFromEmptyInput = true;
277
278
    /**
279
     * @var bool
280
     */
281
    private $doRemoveEmptyAttributes = true;
282
283
    /**
284
     * @var bool
285
     */
286
    private $doSumUpWhitespace = true;
287
288
    /**
289
     * @var bool
290
     */
291
    private $doRemoveSpacesBetweenTags = false;
292
293
    /**
294
     * @var bool
295
     */
296
    private $keepBrokenHtml = false;
297
298
    /**
299
     * @var bool
300
     */
301
    private $withDocType = false;
302
303
    /**
304
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
305
     *
306
     * @psalm-var \SplObjectStorage<HtmlMinDomObserverInterface>
307
     */
308
    private $domLoopObservers;
309
310
    /**
311
     * @var int
312
     */
313
    private $protected_tags_counter = 0;
314
315
    /**
316
     * @var bool
317
     */
318
    private $isHTML4 = false;
319
320
    /**
321
     * @var bool
322
     */
323
    private $isXHTML = false;
324
325
    /**
326
     * @var string[]|null
327
     */
328
    private $templateLogicSyntaxInSpecialScriptTags;
329
330
    /**
331
     * HtmlMin constructor.
332
     */
333 63
    public function __construct()
334
    {
335 63
        $this->domLoopObservers = new \SplObjectStorage();
336
337 63
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
338 63
    }
339
340
    /**
341
     * @param HtmlMinDomObserverInterface $observer
342
     *
343
     * @return void
344
     */
345 63
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
346
    {
347 63
        $this->domLoopObservers->attach($observer);
348 63
    }
349
350
    /**
351
     * @param bool $doOptimizeAttributes
352
     *
353
     * @return $this
354
     */
355 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
356
    {
357 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
358
359 2
        return $this;
360
    }
361
362
    /**
363
     * @param bool $doOptimizeViaHtmlDomParser
364
     *
365
     * @return $this
366
     */
367 2
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
368
    {
369 2
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
370
371 2
        return $this;
372
    }
373
374
    /**
375
     * @param bool $doRemoveComments
376
     *
377
     * @return $this
378
     */
379 3
    public function doRemoveComments(bool $doRemoveComments = true): self
380
    {
381 3
        $this->doRemoveComments = $doRemoveComments;
382
383 3
        return $this;
384
    }
385
386
    /**
387
     * @param bool $doRemoveDefaultAttributes
388
     *
389
     * @return $this
390
     */
391 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
392
    {
393 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
394
395 2
        return $this;
396
    }
397
398
    /**
399
     * @param bool $doRemoveDeprecatedAnchorName
400
     *
401
     * @return $this
402
     */
403 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
404
    {
405 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
406
407 2
        return $this;
408
    }
409
410
    /**
411
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
412
     *
413
     * @return $this
414
     */
415 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
416
    {
417 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
418
419 2
        return $this;
420
    }
421
422
    /**
423
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
424
     *
425
     * @return $this
426
     */
427 3
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
428
    {
429 3
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
430
431 3
        return $this;
432
    }
433
434
    /**
435
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
436
     *
437
     * @return $this
438
     */
439 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
440
    {
441 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
442
443 2
        return $this;
444
    }
445
446
    /**
447
     * @param bool $doRemoveDeprecatedTypeFromStyleAndLinkTag
448
     *
449
     * @return $this
450
     */
451 1
    public function doRemoveDeprecatedTypeFromStyleAndLinkTag(bool $doRemoveDeprecatedTypeFromStyleAndLinkTag = true): self
452
    {
453 1
        $this->doRemoveDeprecatedTypeFromStyleAndLinkTag = $doRemoveDeprecatedTypeFromStyleAndLinkTag;
454
455 1
        return $this;
456
    }
457
458
    /**
459
     * @param bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag
460
     *
461
     * @return $this
462
     */
463 1
    public function doRemoveDefaultMediaTypeFromStyleAndLinkTag(bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true): self
464
    {
465 1
        $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag = $doRemoveDefaultMediaTypeFromStyleAndLinkTag;
466
467 1
        return $this;
468
    }
469
470
    /**
471
     * @param bool $doRemoveDefaultTypeFromButton
472
     *
473
     * @return $this
474
     */
475 1
    public function doRemoveDefaultTypeFromButton(bool $doRemoveDefaultTypeFromButton = true): self
476
    {
477 1
        $this->doRemoveDefaultTypeFromButton = $doRemoveDefaultTypeFromButton;
478
479 1
        return $this;
480
    }
481
482
    /**
483
     * @param bool $doRemoveEmptyAttributes
484
     *
485
     * @return $this
486
     */
487 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
488
    {
489 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
490
491 2
        return $this;
492
    }
493
494
    /**
495
     * @param bool $doRemoveHttpPrefixFromAttributes
496
     *
497
     * @return $this
498
     */
499 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
500
    {
501 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
502
503 6
        return $this;
504
    }
505
506
    /**
507
     * @param bool $doRemoveHttpsPrefixFromAttributes
508
     *
509
     * @return $this
510
     */
511 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
512
    {
513 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
514
515 1
        return $this;
516
    }
517
518
    /**
519
     * @param bool $doKeepHttpAndHttpsPrefixOnExternalAttributes
520
     *
521
     * @return $this
522
     */
523 1
    public function doKeepHttpAndHttpsPrefixOnExternalAttributes(bool $doKeepHttpAndHttpsPrefixOnExternalAttributes = true): self
524
    {
525 1
        $this->doKeepHttpAndHttpsPrefixOnExternalAttributes = $doKeepHttpAndHttpsPrefixOnExternalAttributes;
526
527 1
        return $this;
528
    }
529
530
    /**
531
     * @param string[] $localDomains
532
     *
533
     * @return $this
534
     */
535 1
    public function doMakeSameDomainsLinksRelative(array $localDomains): self
536
    {
537
        /** @noinspection AlterInForeachInspection */
538 1
        foreach ($localDomains as &$localDomain) {
539 1
            $localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
540
        }
541
542 1
        $this->localDomains = $localDomains;
543 1
        $this->doMakeSameDomainsLinksRelative = \count($this->localDomains) > 0;
544
545 1
        return $this;
546
    }
547
548
    /**
549
     * @return string[]
550
     */
551 1
    public function getLocalDomains(): array
552
    {
553 1
        return $this->localDomains;
554
    }
555
556
    /**
557
     * @param bool $doRemoveOmittedHtmlTags
558
     *
559
     * @return $this
560
     */
561 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
562
    {
563 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
564
565 1
        return $this;
566
    }
567
568
    /**
569
     * @param bool $doRemoveOmittedQuotes
570
     *
571
     * @return $this
572
     */
573 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
574
    {
575 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
576
577 1
        return $this;
578
    }
579
580
    /**
581
     * @param bool $doRemoveSpacesBetweenTags
582
     *
583
     * @return $this
584
     */
585 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
586
    {
587 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
588
589 1
        return $this;
590
    }
591
592
    /**
593
     * @param bool $doRemoveValueFromEmptyInput
594
     *
595
     * @return $this
596
     */
597 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
598
    {
599 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
600
601 2
        return $this;
602
    }
603
604
    /**
605
     * @param bool $doRemoveWhitespaceAroundTags
606
     *
607
     * @return $this
608
     */
609 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
610
    {
611 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
612
613 5
        return $this;
614
    }
615
616
    /**
617
     * @param bool $doSortCssClassNames
618
     *
619
     * @return $this
620
     */
621 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
622
    {
623 2
        $this->doSortCssClassNames = $doSortCssClassNames;
624
625 2
        return $this;
626
    }
627
628
    /**
629
     * @param bool $doSortHtmlAttributes
630
     *
631
     * @return $this
632
     */
633 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
634
    {
635 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
636
637 2
        return $this;
638
    }
639
640
    /**
641
     * @param bool $doSumUpWhitespace
642
     *
643
     * @return $this
644
     */
645 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
646
    {
647 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
648
649 2
        return $this;
650
    }
651
652 59
    private function domNodeAttributesToString(\DOMNode $node): string
653
    {
654
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
655 59
        $attr_str = '';
656 59
        if ($node->attributes !== null) {
657 59
            foreach ($node->attributes as $attribute) {
658 39
                $attr_str .= $attribute->name;
659
660
                if (
661 39
                    $this->doOptimizeAttributes
662
                    &&
663 39
                    isset(self::$booleanAttributes[$attribute->name])
664
                ) {
665 10
                    $attr_str .= ' ';
666
667 10
                    continue;
668
                }
669
670 39
                $attr_str .= '=';
671
672
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
673 39
                $omit_quotes = $this->doRemoveOmittedQuotes
674
                               &&
675 39
                               $attribute->value !== ''
676
                               &&
677 39
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
678
                               &&
679 39
                               \strpos($attribute->name, ' ') === false
680
                               &&
681 39
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
682
683 39
                $quoteTmp = '"';
684
                if (
685 39
                    !$omit_quotes
686
                    &&
687 39
                    \strpos($attribute->value, '"') !== false
688
                ) {
689 1
                    $quoteTmp = "'";
690
                }
691
692
                if (
693 39
                    $this->doOptimizeAttributes
694
                    &&
695
                    (
696 38
                        $attribute->name === 'srcset'
697
                        ||
698 39
                        $attribute->name === 'sizes'
699
                    )
700
                ) {
701 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
702
                } else {
703 39
                    $attr_val = $attribute->value;
704
                }
705
706 39
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
707 39
                $attr_str .= ' ';
708
            }
709
        }
710
711 59
        return \trim($attr_str);
712
    }
713
714
    /**
715
     * @param \DOMNode $node
716
     *
717
     * @return bool
718
     */
719 58
    private function domNodeClosingTagOptional(\DOMNode $node): bool
720
    {
721 58
        $tag_name = $node->nodeName;
722
723
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
724 58
        $parent_node = $node->parentNode;
725
726 58
        if ($parent_node) {
727 58
            $parent_tag_name = $parent_node->nodeName;
728
        } else {
729
            $parent_tag_name = null;
730
        }
731
732 58
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
733
734
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
735
736
        // Implemented:
737
        //
738
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
739
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
740
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
741
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
742
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
743
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
744
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
745
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
746
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
747
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
748
749
        /**
750
         * @noinspection TodoComment
751
         *
752
         * TODO: Not Implemented
753
         */
754
        //
755
        // <html> may be omitted if first thing inside is not comment
756
        // <head> may be omitted if first thing inside is an element
757
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
758
        // <colgroup> may be omitted if first thing inside is <col>
759
        // <tbody> may be omitted if first thing inside is <tr>
760
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
761
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
762
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
763
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
764
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
765
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
766
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
767
        //
768
        // <-- However, a start tag must never be omitted if it has any attributes.
769
770
        /** @noinspection InArrayCanBeUsedInspection */
771 58
        return \in_array($tag_name, self::$optional_end_tags, true)
772
               ||
773
               (
774 55
                   $tag_name === 'li'
775
                   &&
776
                   (
777 6
                       $nextSibling === null
778
                       ||
779
                       (
780 4
                           $nextSibling instanceof \DOMElement
781
                           &&
782 55
                           $nextSibling->tagName === 'li'
783
                       )
784
                   )
785
               )
786
               ||
787
               (
788 55
                   $tag_name === 'optgroup'
789
                   &&
790
                   (
791 1
                       $nextSibling === null
792
                       ||
793
                       (
794 1
                           $nextSibling instanceof \DOMElement
795
                           &&
796 55
                           $nextSibling->tagName === 'optgroup'
797
                       )
798
                   )
799
               )
800
               ||
801
               (
802 55
                   $tag_name === 'rp'
803
                   &&
804
                   (
805
                       $nextSibling === null
806
                       ||
807
                       (
808
                           $nextSibling instanceof \DOMElement
809
                           &&
810
                           (
811
                               $nextSibling->tagName === 'rp'
812
                               ||
813 55
                               $nextSibling->tagName === 'rt'
814
                           )
815
                       )
816
                   )
817
               )
818
               ||
819
               (
820 55
                   $tag_name === 'tr'
821
                   &&
822
                   (
823 1
                       $nextSibling === null
824
                       ||
825
                       (
826 1
                           $nextSibling instanceof \DOMElement
827
                           &&
828 55
                           $nextSibling->tagName === 'tr'
829
                       )
830
                   )
831
               )
832
               ||
833
               (
834 55
                   $tag_name === 'source'
835
                   &&
836
                   (
837 1
                       $parent_tag_name === 'audio'
838
                       ||
839 1
                       $parent_tag_name === 'video'
840
                       ||
841 1
                       $parent_tag_name === 'picture'
842
                       ||
843 55
                       $parent_tag_name === 'source'
844
                   )
845
                   &&
846
                   (
847 1
                       $nextSibling === null
848
                       ||
849
                       (
850
                           $nextSibling instanceof \DOMElement
851
                           &&
852 55
                           $nextSibling->tagName === 'source'
853
                       )
854
                   )
855
               )
856
               ||
857
               (
858
                   (
859 55
                       $tag_name === 'td'
860
                       ||
861 55
                       $tag_name === 'th'
862
                   )
863
                   &&
864
                   (
865 1
                       $nextSibling === null
866
                       ||
867
                       (
868 1
                           $nextSibling instanceof \DOMElement
869
                           &&
870
                           (
871 1
                               $nextSibling->tagName === 'td'
872
                               ||
873 55
                               $nextSibling->tagName === 'th'
874
                           )
875
                       )
876
                   )
877
               )
878
               ||
879
               (
880
                   (
881 55
                       $tag_name === 'dd'
882
                       ||
883 55
                       $tag_name === 'dt'
884
                   )
885
                   &&
886
                   (
887 3
                       $nextSibling === null
888
                       ||
889
                       (
890 3
                           $nextSibling instanceof \DOMElement
891
                           &&
892
                           (
893 3
                               $nextSibling->tagName === 'dd'
894
                               ||
895 55
                               $nextSibling->tagName === 'dt'
896
                           )
897
                       )
898
                   )
899
               )
900
               ||
901
               (
902 55
                   $tag_name === 'option'
903
                   &&
904
                   (
905 2
                       $nextSibling === null
906
                       ||
907
                       (
908 2
                           $nextSibling instanceof \DOMElement
909
                           &&
910
                           (
911 2
                               $nextSibling->tagName === 'option'
912
                               ||
913 55
                               $nextSibling->tagName === 'optgroup'
914
                           )
915
                       )
916
                   )
917
               )
918
               ||
919
               (
920 55
                   $tag_name === 'p'
921
                   &&
922
                   (
923
                       (
924 18
                           $nextSibling === null
925
                           &&
926 18
                           $node->parentNode !== null
927
                           &&
928 15
                           !\in_array(
929 15
                               $node->parentNode->nodeName,
930
                               [
931 15
                                   'a',
932
                                   'audio',
933
                                   'del',
934
                                   'ins',
935
                                   'map',
936
                                   'noscript',
937
                                   'video',
938
                               ],
939 15
                               true
940
                           )
941
                       )
942
                       ||
943
                       (
944 11
                           $nextSibling instanceof \DOMElement
945
                           &&
946 9
                           \in_array(
947 9
                               $nextSibling->tagName,
948
                               [
949 9
                                   'address',
950
                                   'article',
951
                                   'aside',
952
                                   'blockquote',
953
                                   'dir',
954
                                   'div',
955
                                   'dl',
956
                                   'fieldset',
957
                                   'footer',
958
                                   'form',
959
                                   'h1',
960
                                   'h2',
961
                                   'h3',
962
                                   'h4',
963
                                   'h5',
964
                                   'h6',
965
                                   'header',
966
                                   'hgroup',
967
                                   'hr',
968
                                   'menu',
969
                                   'nav',
970
                                   'ol',
971
                                   'p',
972
                                   'pre',
973
                                   'section',
974
                                   'table',
975
                                   'ul',
976
                               ],
977 58
                               true
978
                           )
979
                       )
980
                   )
981
               );
982
    }
983
984 59
    protected function domNodeToString(\DOMNode $node): string
985
    {
986
        // init
987 59
        $html = '';
988 59
        $emptyStringTmp = '';
989
990 59
        foreach ($node->childNodes as $child) {
991 59
            if ($emptyStringTmp === 'is_empty') {
992 33
                $emptyStringTmp = 'last_was_empty';
993
            } else {
994 59
                $emptyStringTmp = '';
995
            }
996
997 59
            if ($child instanceof \DOMElement) {
998 59
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
999 59
                $html .= '>' . $this->domNodeToString($child);
1000
1001
                if (
1002
                    !(
1003 59
                        $this->doRemoveOmittedHtmlTags
1004
                        &&
1005 59
                        !$this->isHTML4
1006
                        &&
1007 59
                        !$this->isXHTML
1008
                        &&
1009 59
                        $this->domNodeClosingTagOptional($child)
1010
                    )
1011
                ) {
1012 52
                    $html .= '</' . $child->tagName . '>';
1013
                }
1014
1015 59
                if (!$this->doRemoveWhitespaceAroundTags) {
1016
                    /** @var \DOMText|null $nextSiblingTmp - false-positive error from phpstan */
1017 58
                    $nextSiblingTmp = $child->nextSibling;
1018
                    if (
1019 58
                        $nextSiblingTmp instanceof \DOMText
1020
                        &&
1021 58
                        $nextSiblingTmp->wholeText === ' '
1022
                    ) {
1023
                        if (
1024 32
                            $emptyStringTmp !== 'last_was_empty'
1025
                            &&
1026 32
                            \substr($html, -1) !== ' '
1027
                        ) {
1028 32
                            $html = \rtrim($html);
1029
1030
                            if (
1031 32
                                $child->parentNode
1032
                                &&
1033 32
                                $child->parentNode->nodeName !== 'head'
1034
                            ) {
1035 31
                                $html .= ' ';
1036
                            }
1037
                        }
1038 59
                        $emptyStringTmp = 'is_empty';
1039
                    }
1040
                }
1041 59
            } elseif ($child instanceof \DOMText) {
1042 55
                if ($child->isElementContentWhitespace()) {
1043
                    if (
1044 36
                        $child->previousSibling !== null
1045
                        &&
1046 36
                        $child->nextSibling !== null
1047
                    ) {
1048
                        if (
1049
                            (
1050 25
                                $child->wholeText
1051
                                &&
1052 25
                                \strpos($child->wholeText, ' ') !== false
1053
                            )
1054
                            ||
1055
                            (
1056
                                $emptyStringTmp !== 'last_was_empty'
1057
                                &&
1058 25
                                \substr($html, -1) !== ' '
1059
                            )
1060
                        ) {
1061 25
                            $html = \rtrim($html);
1062
1063
                            if (
1064 25
                                $child->parentNode
1065
                                &&
1066 25
                                $child->parentNode->nodeName !== 'head'
1067
                            ) {
1068 24
                                $html .= ' ';
1069
                            }
1070
                        }
1071 36
                        $emptyStringTmp = 'is_empty';
1072
                    }
1073
                } else {
1074 55
                    $html .= $child->wholeText;
1075
                }
1076 14
            } elseif ($child instanceof \DOMComment) {
1077 59
                $html .= '<!--' . $child->textContent . '-->';
1078
            }
1079
        }
1080
1081 59
        return $html;
1082
    }
1083
1084
    /**
1085
     * @param \DOMNode $node
1086
     *
1087
     * @return string
1088
     */
1089 59
    private function getDoctype(\DOMNode $node): string
1090
    {
1091
        // check the doc-type only if it wasn't generated by DomDocument itself
1092 59
        if (!$this->withDocType) {
1093 49
            return '';
1094
        }
1095
1096 14
        foreach ($node->childNodes as $child) {
1097
            if (
1098 14
                $child instanceof \DOMDocumentType
1099
                &&
1100 14
                $child->name
1101
            ) {
1102 14
                if (!$child->publicId && $child->systemId) {
1103
                    $tmpTypeSystem = 'SYSTEM';
1104
                    $tmpTypePublic = '';
1105
                } else {
1106 14
                    $tmpTypeSystem = '';
1107 14
                    $tmpTypePublic = 'PUBLIC';
1108
                }
1109
1110 14
                return '<!DOCTYPE ' . $child->name . ''
1111 14
                       . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
1112 14
                       . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
1113 14
                       . '>';
1114
            }
1115
        }
1116
1117
        return '';
1118
    }
1119
1120
    /**
1121
     * @return array
1122
     */
1123
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1124
    {
1125
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1126
    }
1127
1128
    /**
1129
     * @return bool
1130
     */
1131
    public function isDoOptimizeAttributes(): bool
1132
    {
1133
        return $this->doOptimizeAttributes;
1134
    }
1135
1136
    /**
1137
     * @return bool
1138
     */
1139
    public function isDoOptimizeViaHtmlDomParser(): bool
1140
    {
1141
        return $this->doOptimizeViaHtmlDomParser;
1142
    }
1143
1144
    /**
1145
     * @return bool
1146
     */
1147
    public function isDoRemoveComments(): bool
1148
    {
1149
        return $this->doRemoveComments;
1150
    }
1151
1152
    /**
1153
     * @return bool
1154
     */
1155 40
    public function isDoRemoveDefaultAttributes(): bool
1156
    {
1157 40
        return $this->doRemoveDefaultAttributes;
1158
    }
1159
1160
    /**
1161
     * @return bool
1162
     */
1163 40
    public function isDoRemoveDeprecatedAnchorName(): bool
1164
    {
1165 40
        return $this->doRemoveDeprecatedAnchorName;
1166
    }
1167
1168
    /**
1169
     * @return bool
1170
     */
1171 40
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1172
    {
1173 40
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1174
    }
1175
1176
    /**
1177
     * @return bool
1178
     */
1179 40
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1180
    {
1181 40
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1182
    }
1183
1184
    /**
1185
     * @return bool
1186
     */
1187 40
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1188
    {
1189 40
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1190
    }
1191
1192
    /**
1193
     * @return bool
1194
     */
1195 40
    public function isDoRemoveDeprecatedTypeFromStyleAndLinkTag(): bool
1196
    {
1197 40
        return $this->doRemoveDeprecatedTypeFromStyleAndLinkTag;
1198
    }
1199
1200
    /**
1201
     * @return bool
1202
     */
1203 40
    public function isDoRemoveDefaultMediaTypeFromStyleAndLinkTag(): bool
1204
    {
1205 40
        return $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag;
1206
    }
1207
1208
    /**
1209
     * @return bool
1210
     */
1211 39
    public function isDoRemoveDefaultTypeFromButton(): bool
1212
    {
1213 39
        return $this->doRemoveDefaultTypeFromButton;
1214
    }
1215
1216
    /**
1217
     * @return bool
1218
     */
1219 39
    public function isDoRemoveEmptyAttributes(): bool
1220
    {
1221 39
        return $this->doRemoveEmptyAttributes;
1222
    }
1223
1224
    /**
1225
     * @return bool
1226
     */
1227 40
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1228
    {
1229 40
        return $this->doRemoveHttpPrefixFromAttributes;
1230
    }
1231
1232
    /**
1233
     * @return bool
1234
     */
1235 40
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1236
    {
1237 40
        return $this->doRemoveHttpsPrefixFromAttributes;
1238
    }
1239
1240
    /**
1241
     * @return bool
1242
     */
1243 4
    public function isdoKeepHttpAndHttpsPrefixOnExternalAttributes(): bool
1244
    {
1245 4
        return $this->doKeepHttpAndHttpsPrefixOnExternalAttributes;
1246
    }
1247
1248
    /**
1249
     * @return bool
1250
     */
1251 40
    public function isDoMakeSameDomainsLinksRelative(): bool
1252
    {
1253 40
        return $this->doMakeSameDomainsLinksRelative;
1254
    }
1255
1256
    /**
1257
     * @return bool
1258
     */
1259
    public function isDoRemoveOmittedHtmlTags(): bool
1260
    {
1261
        return $this->doRemoveOmittedHtmlTags;
1262
    }
1263
1264
    /**
1265
     * @return bool
1266
     */
1267
    public function isDoRemoveOmittedQuotes(): bool
1268
    {
1269
        return $this->doRemoveOmittedQuotes;
1270
    }
1271
1272
    /**
1273
     * @return bool
1274
     */
1275
    public function isDoRemoveSpacesBetweenTags(): bool
1276
    {
1277
        return $this->doRemoveSpacesBetweenTags;
1278
    }
1279
1280
    /**
1281
     * @return bool
1282
     */
1283 39
    public function isDoRemoveValueFromEmptyInput(): bool
1284
    {
1285 39
        return $this->doRemoveValueFromEmptyInput;
1286
    }
1287
1288
    /**
1289
     * @return bool
1290
     */
1291
    public function isDoRemoveWhitespaceAroundTags(): bool
1292
    {
1293
        return $this->doRemoveWhitespaceAroundTags;
1294
    }
1295
1296
    /**
1297
     * @return bool
1298
     */
1299 39
    public function isDoSortCssClassNames(): bool
1300
    {
1301 39
        return $this->doSortCssClassNames;
1302
    }
1303
1304
    /**
1305
     * @return bool
1306
     */
1307 40
    public function isDoSortHtmlAttributes(): bool
1308
    {
1309 40
        return $this->doSortHtmlAttributes;
1310
    }
1311
1312
    /**
1313
     * @return bool
1314
     */
1315
    public function isDoSumUpWhitespace(): bool
1316
    {
1317
        return $this->doSumUpWhitespace;
1318
    }
1319
1320
    /**
1321
     * @return bool
1322
     */
1323 5
    public function isHTML4(): bool
1324
    {
1325 5
        return $this->isHTML4;
1326
    }
1327
1328
    /**
1329
     * @return bool
1330
     */
1331 5
    public function isXHTML(): bool
1332
    {
1333 5
        return $this->isXHTML;
1334
    }
1335
1336
    /**
1337
     * @param string $html
1338
     * @param bool   $multiDecodeNewHtmlEntity
1339
     *
1340
     * @return string
1341
     */
1342 63
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1343
    {
1344 63
        $html = (string) $html;
1345 63
        if (!isset($html[0])) {
1346 1
            return '';
1347
        }
1348
1349 63
        $html = \trim($html);
1350 63
        if (!$html) {
1351 3
            return '';
1352
        }
1353
1354
        // reset
1355 60
        $this->protectedChildNodes = [];
1356
1357
        // save old content
1358 60
        $origHtml = $html;
1359 60
        $origHtmlLength = \strlen($html);
1360
1361
        // -------------------------------------------------------------------------
1362
        // Minify the HTML via "HtmlDomParser"
1363
        // -------------------------------------------------------------------------
1364
1365 60
        if ($this->doOptimizeViaHtmlDomParser) {
1366 59
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1367
        }
1368
1369
        // -------------------------------------------------------------------------
1370
        // Trim whitespace from html-string. [protected html is still protected]
1371
        // -------------------------------------------------------------------------
1372
1373
        // Remove extra white-space(s) between HTML attribute(s)
1374 60
        if (\strpos($html, ' ') !== false) {
1375 54
            $html = (string) \preg_replace_callback(
1376 54
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1377
                static function ($matches) {
1378 54
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1379 54
                },
1380 54
                $html
1381
            );
1382
        }
1383
1384 60
        if ($this->doRemoveSpacesBetweenTags) {
1385
            /** @noinspection NestedPositiveIfStatementsInspection */
1386 1
            if (\strpos($html, ' ') !== false) {
1387
                // Remove spaces that are between > and <
1388 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1389
            }
1390
        }
1391
1392
        // -------------------------------------------------------------------------
1393
        // Restore protected HTML-code.
1394
        // -------------------------------------------------------------------------
1395
1396 60
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1397 14
            $html = (string) \preg_replace_callback(
1398 14
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1399 14
                [$this, 'restoreProtectedHtml'],
1400 14
                $html
1401
            );
1402
        }
1403
1404
        // -------------------------------------------------------------------------
1405
        // Restore protected HTML-entities.
1406
        // -------------------------------------------------------------------------
1407
1408 60
        if ($this->doOptimizeViaHtmlDomParser) {
1409 59
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1410
        }
1411
1412
        // ------------------------------------
1413
        // Final clean-up
1414
        // ------------------------------------
1415
1416 60
        $html = \str_replace(
1417
            [
1418 60
                'html>' . "\n",
1419
                "\n" . '<html',
1420
                'html/>' . "\n",
1421
                "\n" . '</html',
1422
                'head>' . "\n",
1423
                "\n" . '<head',
1424
                'head/>' . "\n",
1425
                "\n" . '</head',
1426
            ],
1427
            [
1428 60
                'html>',
1429
                '<html',
1430
                'html/>',
1431
                '</html',
1432
                'head>',
1433
                '<head',
1434
                'head/>',
1435
                '</head',
1436
            ],
1437 60
            $html
1438
        );
1439
1440
        // self closing tags, don't need a trailing slash ...
1441 60
        $replace = [];
1442 60
        $replacement = [];
1443 60
        foreach (self::$selfClosingTags as $selfClosingTag) {
1444 60
            $replace[] = '<' . $selfClosingTag . '/>';
1445 60
            $replacement[] = '<' . $selfClosingTag . '>';
1446 60
            $replace[] = '<' . $selfClosingTag . ' />';
1447 60
            $replacement[] = '<' . $selfClosingTag . '>';
1448 60
            $replace[] = '></' . $selfClosingTag . '>';
1449 60
            $replacement[] = '>';
1450
        }
1451 60
        $html = \str_replace(
1452 60
            $replace,
1453 60
            $replacement,
1454 60
            $html
1455
        );
1456
1457
        // ------------------------------------
1458
        // check if compression worked
1459
        // ------------------------------------
1460
1461 60
        if ($origHtmlLength < \strlen($html)) {
1462
            $html = $origHtml;
1463
        }
1464
1465 60
        return $html;
1466
    }
1467
1468
    /**
1469
     * @param \DOMNode $node
1470
     *
1471
     * @return \DOMNode|null
1472
     */
1473 58
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1474
    {
1475
        do {
1476
            /** @var \DOMElement|\DOMText|null $nodeTmp - false-positive error from phpstan */
1477 58
            $nodeTmp = $node->nextSibling;
1478
1479 58
            if ($nodeTmp instanceof \DOMText) {
1480
                if (
1481 34
                    \trim($nodeTmp->textContent) !== ''
1482
                    &&
1483 34
                    \strpos($nodeTmp->textContent, '<') === false
1484
                ) {
1485 9
                    $node = $nodeTmp;
1486
                } else {
1487 34
                    $node = $nodeTmp->nextSibling;
1488
                }
1489
            } else {
1490 57
                $node = $nodeTmp;
1491
            }
1492 58
        } while (!($node === null || $node instanceof \DOMElement || $node instanceof \DOMText));
1493
1494 58
        return $node;
1495
    }
1496
1497
    /**
1498
     * Check if the current string is an conditional comment.
1499
     *
1500
     * INFO: since IE >= 10 conditional comment are not working anymore
1501
     *
1502
     * <!--[if expression]> HTML <![endif]-->
1503
     * <![if expression]> HTML <![endif]>
1504
     *
1505
     * @param string $comment
1506
     *
1507
     * @return bool
1508
     */
1509 7
    private function isConditionalComment($comment): bool
1510
    {
1511 7 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1512
            /** @noinspection RegExpRedundantEscape */
1513
            /** @noinspection NestedPositiveIfStatementsInspection */
1514 3
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1515 3
                return true;
1516
            }
1517
        }
1518
1519 7 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1520
            /** @noinspection RegExpRedundantEscape */
1521
            /** @noinspection NestedPositiveIfStatementsInspection */
1522 2
            if (\preg_match('/\[endif\]$/', $comment)) {
1523 2
                return true;
1524
            }
1525
        }
1526
1527 7
        return false;
1528
    }
1529
1530
    /**
1531
     * Check if the current string is an special comment.
1532
     *
1533
     * @param string $comment
1534
     *
1535
     * @return bool
1536
     */
1537 7
    private function isSpecialComment($comment): bool
1538
    {
1539 7
        foreach ($this->specialHtmlCommentsStaringWith as $search) {
1540 1
            if (\strpos($comment, $search) === 0) {
1541 1
                return true;
1542
            }
1543
        }
1544
1545 7
        foreach ($this->specialHtmlCommentsEndingWith as $search) {
1546 1
            if (\substr($comment, -\strlen($search)) === $search) {
1547 1
                return true;
1548
            }
1549
        }
1550
1551 7
        return false;
1552
    }
1553
1554
    /**
1555
     * @param string $html
1556
     * @param bool   $multiDecodeNewHtmlEntity
1557
     *
1558
     * @return string
1559
     */
1560 59
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1561
    {
1562
        // init dom
1563 59
        $dom = new HtmlDomParser();
1564 59
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1565
1566 59
        if ($this->templateLogicSyntaxInSpecialScriptTags !== null) {
1567 1
            $dom->overwriteTemplateLogicSyntaxInSpecialScriptTags($this->templateLogicSyntaxInSpecialScriptTags);
1568
        }
1569
1570 59
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1571 59
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1572
1573
        // Remove content before <!DOCTYPE.*> because otherwise the DOMDocument can not handle the input.
1574 59
        if (\stripos($html, '<!DOCTYPE') !== false) {
1575
            /** @noinspection NestedPositiveIfStatementsInspection */
1576
            if (
1577 14
                \preg_match('/(^.*?)<!(?:DOCTYPE)(?: [^>]*)?>/sui', $html, $matches_before_doctype)
1578
                &&
1579 14
                \trim($matches_before_doctype[1])
1580
            ) {
1581 1
                $html = \str_replace($matches_before_doctype[1], '', $html);
1582
            }
1583
        }
1584
1585
        // load dom
1586
        /** @noinspection UnusedFunctionResultInspection */
1587 59
        $dom->loadHtml($html);
1588
1589 59
        $this->withDocType = (\stripos($html, '<!DOCTYPE') === 0);
1590
1591 59
        $doctypeStr = $this->getDoctype($dom->getDocument());
1592
1593 59
        if ($doctypeStr) {
1594 14
            $this->isHTML4 = \strpos($doctypeStr, 'html4') !== false;
1595 14
            $this->isXHTML = \strpos($doctypeStr, 'xhtml1') !== false;
1596
        }
1597
1598
        // -------------------------------------------------------------------------
1599
        // Protect <nocompress> HTML tags first.
1600
        // -------------------------------------------------------------------------
1601
1602 59
        $dom = $this->protectTagHelper($dom, 'nocompress');
1603
1604
        // -------------------------------------------------------------------------
1605
        // Notify the Observer before the minification.
1606
        // -------------------------------------------------------------------------
1607
1608 59
        foreach ($dom->find('*') as $element) {
1609 59
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1610
        }
1611
1612
        // -------------------------------------------------------------------------
1613
        // Protect HTML tags and conditional comments.
1614
        // -------------------------------------------------------------------------
1615
1616 59
        $dom = $this->protectTags($dom);
1617
1618
        // -------------------------------------------------------------------------
1619
        // Remove default HTML comments. [protected html is still protected]
1620
        // -------------------------------------------------------------------------
1621
1622 59
        if ($this->doRemoveComments) {
1623 57
            $dom = $this->removeComments($dom);
1624
        }
1625
1626
        // -------------------------------------------------------------------------
1627
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1628
        // -------------------------------------------------------------------------
1629
1630 59
        if ($this->doSumUpWhitespace) {
1631 58
            $dom = $this->sumUpWhitespace($dom);
1632
        }
1633
1634 59
        foreach ($dom->find('*') as $element) {
1635
1636
            // -------------------------------------------------------------------------
1637
            // Remove whitespace around tags. [protected html is still protected]
1638
            // -------------------------------------------------------------------------
1639
1640 59
            if ($this->doRemoveWhitespaceAroundTags) {
1641 3
                $this->removeWhitespaceAroundTags($element);
1642
            }
1643
1644
            // -------------------------------------------------------------------------
1645
            // Notify the Observer after the minification.
1646
            // -------------------------------------------------------------------------
1647
1648 59
            $this->notifyObserversAboutDomElementAfterMinification($element);
1649
        }
1650
1651
        // -------------------------------------------------------------------------
1652
        // Convert the Dom into a string.
1653
        // -------------------------------------------------------------------------
1654
1655 59
        return $dom->fixHtmlOutput(
1656 59
            $doctypeStr . $this->domNodeToString($dom->getDocument()),
1657 59
            $multiDecodeNewHtmlEntity
1658
        );
1659
    }
1660
1661
    /**
1662
     * @param SimpleHtmlDomInterface $domElement
1663
     *
1664
     * @return void
1665
     */
1666 59
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1667
    {
1668 59
        foreach ($this->domLoopObservers as $observer) {
1669 59
            $observer->domElementAfterMinification($domElement, $this);
1670
        }
1671 59
    }
1672
1673
    /**
1674
     * @param SimpleHtmlDomInterface $domElement
1675
     *
1676
     * @return void
1677
     */
1678 59
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1679
    {
1680 59
        foreach ($this->domLoopObservers as $observer) {
1681 59
            $observer->domElementBeforeMinification($domElement, $this);
1682
        }
1683 59
    }
1684
1685
    /**
1686
     * @param HtmlDomParser $dom
1687
     * @param string        $selector
1688
     *
1689
     * @return HtmlDomParser
1690
     */
1691 59
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1692
    {
1693 59
        foreach ($dom->find($selector) as $element) {
1694 6
            if ($element->isRemoved()) {
1695 1
                continue;
1696
            }
1697
1698 6
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1699 6
            $parentNode = $element->getNode()->parentNode;
1700 6
            if ($parentNode !== null) {
1701 6
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1702
            }
1703
1704 6
            ++$this->protected_tags_counter;
1705
        }
1706
1707 59
        return $dom;
1708
    }
1709
1710
    /**
1711
     * Prevent changes of inline "styles" and "scripts".
1712
     *
1713
     * @param HtmlDomParser $dom
1714
     *
1715
     * @return HtmlDomParser
1716
     */
1717 59
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1718
    {
1719 59
        $this->protectTagHelper($dom, 'code');
1720
1721 59
        foreach ($dom->find('script, style') as $element) {
1722 9
            if ($element->isRemoved()) {
1723
                continue;
1724
            }
1725
1726 9
            if ($element->tag === 'script' || $element->tag === 'style') {
1727 9
                $attributes = $element->getAllAttributes();
1728
                // skip external links
1729 9
                if (isset($attributes['src'])) {
1730 5
                    continue;
1731
                }
1732
            }
1733
1734 7
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1735 7
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1736
1737 7
            ++$this->protected_tags_counter;
1738
        }
1739
1740 59
        foreach ($dom->find('//comment()') as $element) {
1741 7
            if ($element->isRemoved()) {
1742
                continue;
1743
            }
1744
1745 7
            $text = $element->text();
1746
1747
            if (
1748 7
                !$this->isConditionalComment($text)
1749
                &&
1750 7
                !$this->isSpecialComment($text)
1751
            ) {
1752 7
                continue;
1753
            }
1754
1755 4
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1756
1757
            /* @var $node \DOMComment */
1758 4
            $node = $element->getNode();
1759 4
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1760 4
            $parentNode = $element->getNode()->parentNode;
1761 4
            if ($parentNode !== null) {
1762 4
                $parentNode->replaceChild($child, $node);
1763
            }
1764
1765 4
            ++$this->protected_tags_counter;
1766
        }
1767
1768 59
        return $dom;
1769
    }
1770
1771
    /**
1772
     * Remove comments in the dom.
1773
     *
1774
     * @param HtmlDomParser $dom
1775
     *
1776
     * @return HtmlDomParser
1777
     */
1778 57
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1779
    {
1780 57
        foreach ($dom->find('//comment()') as $commentWrapper) {
1781 6
            $comment = $commentWrapper->getNode();
1782 6
            $val = $comment->nodeValue;
1783 6
            if (\strpos($val, '[') === false) {
1784 6
                $parentNode = $comment->parentNode;
1785 6
                if ($parentNode !== null) {
1786 6
                    $parentNode->removeChild($comment);
1787
                }
1788
            }
1789
        }
1790
1791 57
        $dom->getDocument()->normalizeDocument();
1792
1793 57
        return $dom;
1794
    }
1795
1796
    /**
1797
     * Trim tags in the dom.
1798
     *
1799
     * @param SimpleHtmlDomInterface $element
1800
     *
1801
     * @return void
1802
     */
1803 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1804
    {
1805 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1806 2
            $node = $element->getNode();
1807
1808
            /** @var \DOMNode[] $candidates */
1809 2
            $candidates = [];
1810 2
            if ($node->childNodes->length > 0) {
1811 1
                $candidates[] = $node->firstChild;
1812 1
                $candidates[] = $node->lastChild;
1813 1
                $candidates[] = $node->previousSibling;
1814 1
                $candidates[] = $node->nextSibling;
1815
            }
1816
1817
            /** @var mixed $candidate - false-positive error from phpstan */
1818 2
            foreach ($candidates as &$candidate) {
1819 1
                if ($candidate === null) {
1820
                    continue;
1821
                }
1822
1823 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1824 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1825 1
                    if ($nodeValueTmp !== null) {
1826 1
                        $candidate->nodeValue = $nodeValueTmp;
1827
                    }
1828
                }
1829
            }
1830
        }
1831 3
    }
1832
1833
    /**
1834
     * Callback function for preg_replace_callback use.
1835
     *
1836
     * @param array $matches PREG matches
1837
     *
1838
     * @return string
1839
     */
1840 14
    private function restoreProtectedHtml($matches): string
1841
    {
1842 14
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1843
1844 14
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1845
    }
1846
1847
    /**
1848
     * @param string[] $domainsToRemoveHttpPrefixFromAttributes
1849
     *
1850
     * @return $this
1851
     */
1852 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1853
    {
1854 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1855
1856 2
        return $this;
1857
    }
1858
1859
    /**
1860
     * @param string[] $startingWith
1861
     * @param string[] $endingWith
1862
     *
1863
     * @return $this
1864
     */
1865 1
    public function setSpecialHtmlComments(array $startingWith, array $endingWith = []): self
1866
    {
1867 1
        $this->specialHtmlCommentsStaringWith = $startingWith;
1868 1
        $this->specialHtmlCommentsEndingWith = $endingWith;
1869
1870 1
        return $this;
1871
    }
1872
1873
    /**
1874
     * Sum-up extra whitespace from dom-nodes.
1875
     *
1876
     * @param HtmlDomParser $dom
1877
     *
1878
     * @return HtmlDomParser
1879
     */
1880 58
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1881
    {
1882 58
        $text_nodes = $dom->find('//text()');
1883 58
        foreach ($text_nodes as $text_node_wrapper) {
1884
            /* @var $text_node \DOMNode */
1885 54
            $text_node = $text_node_wrapper->getNode();
1886 54
            $xp = $text_node->getNodePath();
1887 54
            if ($xp === null) {
1888
                continue;
1889
            }
1890
1891 54
            $doSkip = false;
1892 54
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1893 54
                if (\strpos($xp, "/${pattern}") !== false) {
1894 10
                    $doSkip = true;
1895
1896 54
                    break;
1897
                }
1898
            }
1899 54
            if ($doSkip) {
1900 10
                continue;
1901
            }
1902
1903 50
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1904 50
            if ($nodeValueTmp !== null) {
1905 50
                $text_node->nodeValue = $nodeValueTmp;
1906
            }
1907
        }
1908
1909 58
        $dom->getDocument()->normalizeDocument();
1910
1911 58
        return $dom;
1912
    }
1913
1914
    /**
1915
     * WARNING: maybe bad for performance ...
1916
     *
1917
     * @param bool $keepBrokenHtml
1918
     *
1919
     * @return HtmlMin
1920
     */
1921 3
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1922
    {
1923 3
        $this->keepBrokenHtml = $keepBrokenHtml;
1924
1925 3
        return $this;
1926
    }
1927
1928
    /**
1929
     * @param string[] $templateLogicSyntaxInSpecialScriptTags
1930
     *
1931
     * @return HtmlMin
1932
     */
1933 1
    public function overwriteTemplateLogicSyntaxInSpecialScriptTags(array $templateLogicSyntaxInSpecialScriptTags): self
1934
    {
1935 1
        foreach ($templateLogicSyntaxInSpecialScriptTags as $tmp) {
1936 1
            if (!\is_string($tmp)) {
1937 1
                throw new \InvalidArgumentException('setTemplateLogicSyntaxInSpecialScriptTags only allows string[]');
1938
            }
1939
        }
1940
1941 1
        $this->templateLogicSyntaxInSpecialScriptTags = $templateLogicSyntaxInSpecialScriptTags;
1942
1943 1
        return $this;
1944
    }
1945
}
1946