Completed
Push — master ( 5efab3...3c6811 )
by Lars
01:21
created

HtmlMin::doRemoveHttpsPrefixFromAttributes()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $doKeepHttpAndHttpsPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainsLinksRelative = false;
199
200
    /**
201
     * @var string[]
202
     */
203
    private $localDomains = [];
204
205
    /**
206
     * @var string[]
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var string[]
215
     */
216
    private $specialHtmlCommentsStaringWith = [];
217
218
    /**
219
     * @var string[]
220
     */
221
    private $specialHtmlCommentsEndingWith = [];
222
223
    /**
224
     * @var bool
225
     */
226
    private $doSortCssClassNames = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doSortHtmlAttributes = true;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDefaultAttributes = false;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedAnchorName = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveDeprecatedTypeFromStyleAndLinkTag = true;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveDefaultTypeFromButton = false;
267
268
    /**
269
     * @var bool
270
     */
271
    private $doRemoveDeprecatedTypeFromScriptTag = true;
272
273
    /**
274
     * @var bool
275
     */
276
    private $doRemoveValueFromEmptyInput = true;
277
278
    /**
279
     * @var bool
280
     */
281
    private $doRemoveEmptyAttributes = true;
282
283
    /**
284
     * @var bool
285
     */
286
    private $doSumUpWhitespace = true;
287
288
    /**
289
     * @var bool
290
     */
291
    private $doRemoveSpacesBetweenTags = false;
292
293
    /**
294
     * @var bool
295
     */
296
    private $keepBrokenHtml = false;
297
298
    /**
299
     * @var bool
300
     */
301
    private $withDocType = false;
302
303
    /**
304
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
305
     *
306
     * @psalm-var \SplObjectStorage<HtmlMinDomObserverInterface>
307
     */
308
    private $domLoopObservers;
309
310
    /**
311
     * @var int
312
     */
313
    private $protected_tags_counter = 0;
314
315
    /**
316
     * @var bool
317
     */
318
    private $isHTML4 = false;
319
320
    /**
321
     * @var bool
322
     */
323
    private $isXHTML = false;
324
325
    /**
326
     * @var string[]|null
327
     */
328
    private $templateLogicSyntaxInSpecialScriptTags;
329
330
    /**
331
     * HtmlMin constructor.
332
     */
333 61
    public function __construct()
334
    {
335 61
        $this->domLoopObservers = new \SplObjectStorage();
336
337 61
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
338 61
    }
339
340
    /**
341
     * @param HtmlMinDomObserverInterface $observer
342
     *
343
     * @return void
344
     */
345 61
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
346
    {
347 61
        $this->domLoopObservers->attach($observer);
348 61
    }
349
350
    /**
351
     * @param bool $doOptimizeAttributes
352
     *
353
     * @return $this
354
     */
355 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
356
    {
357 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
358
359 2
        return $this;
360
    }
361
362
    /**
363
     * @param bool $doOptimizeViaHtmlDomParser
364
     *
365
     * @return $this
366
     */
367 2
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
368
    {
369 2
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
370
371 2
        return $this;
372
    }
373
374
    /**
375
     * @param bool $doRemoveComments
376
     *
377
     * @return $this
378
     */
379 3
    public function doRemoveComments(bool $doRemoveComments = true): self
380
    {
381 3
        $this->doRemoveComments = $doRemoveComments;
382
383 3
        return $this;
384
    }
385
386
    /**
387
     * @param bool $doRemoveDefaultAttributes
388
     *
389
     * @return $this
390
     */
391 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
392
    {
393 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
394
395 2
        return $this;
396
    }
397
398
    /**
399
     * @param bool $doRemoveDeprecatedAnchorName
400
     *
401
     * @return $this
402
     */
403 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
404
    {
405 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
406
407 2
        return $this;
408
    }
409
410
    /**
411
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
412
     *
413
     * @return $this
414
     */
415 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
416
    {
417 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
418
419 2
        return $this;
420
    }
421
422
    /**
423
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
424
     *
425
     * @return $this
426
     */
427 3
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
428
    {
429 3
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
430
431 3
        return $this;
432
    }
433
434
    /**
435
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
436
     *
437
     * @return $this
438
     */
439 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
440
    {
441 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
442
443 2
        return $this;
444
    }
445
446
    /**
447
     * @param bool $doRemoveDeprecatedTypeFromStyleAndLinkTag
448
     *
449
     * @return $this
450
     */
451 1
    public function doRemoveDeprecatedTypeFromStyleAndLinkTag(bool $doRemoveDeprecatedTypeFromStyleAndLinkTag = true): self
452
    {
453 1
        $this->doRemoveDeprecatedTypeFromStyleAndLinkTag = $doRemoveDeprecatedTypeFromStyleAndLinkTag;
454
455 1
        return $this;
456
    }
457
458
    /**
459
     * @param bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag
460
     *
461
     * @return $this
462
     */
463 1
    public function doRemoveDefaultMediaTypeFromStyleAndLinkTag(bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true): self
464
    {
465 1
        $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag = $doRemoveDefaultMediaTypeFromStyleAndLinkTag;
466
467 1
        return $this;
468
    }
469
470
    /**
471
     * @param bool $doRemoveDefaultTypeFromButton
472
     *
473
     * @return $this
474
     */
475 1
    public function doRemoveDefaultTypeFromButton(bool $doRemoveDefaultTypeFromButton = true): self
476
    {
477 1
        $this->doRemoveDefaultTypeFromButton = $doRemoveDefaultTypeFromButton;
478
479 1
        return $this;
480
    }
481
482
    /**
483
     * @param bool $doRemoveEmptyAttributes
484
     *
485
     * @return $this
486
     */
487 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
488
    {
489 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
490
491 2
        return $this;
492
    }
493
494
    /**
495
     * @param bool $doRemoveHttpPrefixFromAttributes
496
     *
497
     * @return $this
498
     */
499 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
500
    {
501 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
502
503 6
        return $this;
504
    }
505
506
    /**
507
     * @param bool $doRemoveHttpsPrefixFromAttributes
508
     *
509
     * @return $this
510
     */
511 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
512
    {
513 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
514
515 1
        return $this;
516
    }
517
518
    /**
519
     * @param bool $doKeepHttpAndHttpsPrefixOnExternalAttributes
520
     *
521
     * @return $this
522
     */
523 1
    public function doKeepHttpAndHttpsPrefixOnExternalAttributes(bool $doKeepHttpAndHttpsPrefixOnExternalAttributes = true): self
524
    {
525 1
        $this->doKeepHttpAndHttpsPrefixOnExternalAttributes = $doKeepHttpAndHttpsPrefixOnExternalAttributes;
526
527 1
        return $this;
528
    }
529
530
    /**
531
     * @param string[] $localDomains
532
     *
533
     * @return $this
534
     */
535 1
    public function doMakeSameDomainsLinksRelative(array $localDomains): self
536
    {
537
        /** @noinspection AlterInForeachInspection */
538 1
        foreach ($localDomains as &$localDomain) {
539 1
            $localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
540
        }
541
542 1
        $this->localDomains = $localDomains;
543 1
        $this->doMakeSameDomainsLinksRelative = \count($this->localDomains) > 0;
544
545 1
        return $this;
546
    }
547
548
    /**
549
     * @return string[]
550
     */
551 1
    public function getLocalDomains(): array
552
    {
553 1
        return $this->localDomains;
554
    }
555
556
    /**
557
     * @param bool $doRemoveOmittedHtmlTags
558
     *
559
     * @return $this
560
     */
561 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
562
    {
563 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
564
565 1
        return $this;
566
    }
567
568
    /**
569
     * @param bool $doRemoveOmittedQuotes
570
     *
571
     * @return $this
572
     */
573 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
574
    {
575 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
576
577 1
        return $this;
578
    }
579
580
    /**
581
     * @param bool $doRemoveSpacesBetweenTags
582
     *
583
     * @return $this
584
     */
585 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
586
    {
587 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
588
589 1
        return $this;
590
    }
591
592
    /**
593
     * @param bool $doRemoveValueFromEmptyInput
594
     *
595
     * @return $this
596
     */
597 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
598
    {
599 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
600
601 2
        return $this;
602
    }
603
604
    /**
605
     * @param bool $doRemoveWhitespaceAroundTags
606
     *
607
     * @return $this
608
     */
609 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
610
    {
611 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
612
613 5
        return $this;
614
    }
615
616
    /**
617
     * @param bool $doSortCssClassNames
618
     *
619
     * @return $this
620
     */
621 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
622
    {
623 2
        $this->doSortCssClassNames = $doSortCssClassNames;
624
625 2
        return $this;
626
    }
627
628
    /**
629
     * @param bool $doSortHtmlAttributes
630
     *
631
     * @return $this
632
     */
633 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
634
    {
635 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
636
637 2
        return $this;
638
    }
639
640
    /**
641
     * @param bool $doSumUpWhitespace
642
     *
643
     * @return $this
644
     */
645 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
646
    {
647 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
648
649 2
        return $this;
650
    }
651
652 57
    private function domNodeAttributesToString(\DOMNode $node): string
653
    {
654
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
655 57
        $attr_str = '';
656 57
        if ($node->attributes !== null) {
657 57
            foreach ($node->attributes as $attribute) {
658 37
                $attr_str .= $attribute->name;
659
660
                if (
661 37
                    $this->doOptimizeAttributes
662
                    &&
663 37
                    isset(self::$booleanAttributes[$attribute->name])
664
                ) {
665 10
                    $attr_str .= ' ';
666
667 10
                    continue;
668
                }
669
670 37
                $attr_str .= '=';
671
672
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
673 37
                $omit_quotes = $this->doRemoveOmittedQuotes
674
                               &&
675 37
                               $attribute->value !== ''
676
                               &&
677 37
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
678
                               &&
679 37
                               \strpos($attribute->name, ' ') === false
680
                               &&
681 37
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
682
683 37
                $quoteTmp = '"';
684
                if (
685 37
                    !$omit_quotes
686
                    &&
687 37
                    \strpos($attribute->value, '"') !== false
688
                ) {
689 1
                    $quoteTmp = "'";
690
                }
691
692
                if (
693 37
                    $this->doOptimizeAttributes
694
                    &&
695
                    (
696 36
                        $attribute->name === 'srcset'
697
                        ||
698 37
                        $attribute->name === 'sizes'
699
                    )
700
                ) {
701 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
702
                } else {
703 37
                    $attr_val = $attribute->value;
704
                }
705
706 37
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
707 37
                $attr_str .= ' ';
708
            }
709
        }
710
711 57
        return \trim($attr_str);
712
    }
713
714
    /**
715
     * @param \DOMNode $node
716
     *
717
     * @return bool
718
     */
719 56
    private function domNodeClosingTagOptional(\DOMNode $node): bool
720
    {
721 56
        $tag_name = $node->nodeName;
722
723
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
724 56
        $parent_node = $node->parentNode;
725
726 56
        if ($parent_node) {
727 56
            $parent_tag_name = $parent_node->nodeName;
728
        } else {
729
            $parent_tag_name = null;
730
        }
731
732 56
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
733
734
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
735
736
        // Implemented:
737
        //
738
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
739
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
740
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
741
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
742
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
743
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
744
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
745
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
746
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
747
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
748
749
        /**
750
         * @noinspection TodoComment
751
         *
752
         * TODO: Not Implemented
753
         */
754
        //
755
        // <html> may be omitted if first thing inside is not comment
756
        // <head> may be omitted if first thing inside is an element
757
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
758
        // <colgroup> may be omitted if first thing inside is <col>
759
        // <tbody> may be omitted if first thing inside is <tr>
760
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
761
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
762
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
763
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
764
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
765
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
766
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
767
        //
768
        // <-- However, a start tag must never be omitted if it has any attributes.
769
770
        /** @noinspection InArrayCanBeUsedInspection */
771 56
        return \in_array($tag_name, self::$optional_end_tags, true)
772
               ||
773
               (
774 53
                   $tag_name === 'li'
775
                   &&
776
                   (
777 6
                       $nextSibling === null
778
                       ||
779
                       (
780 4
                           $nextSibling instanceof \DOMElement
781
                           &&
782 53
                           $nextSibling->tagName === 'li'
783
                       )
784
                   )
785
               )
786
               ||
787
               (
788 53
                   $tag_name === 'optgroup'
789
                   &&
790
                   (
791 1
                       $nextSibling === null
792
                       ||
793
                       (
794 1
                           $nextSibling instanceof \DOMElement
795
                           &&
796 53
                           $nextSibling->tagName === 'optgroup'
797
                       )
798
                   )
799
               )
800
               ||
801
               (
802 53
                   $tag_name === 'rp'
803
                   &&
804
                   (
805
                       $nextSibling === null
806
                       ||
807
                       (
808
                           $nextSibling instanceof \DOMElement
809
                           &&
810
                           (
811
                               $nextSibling->tagName === 'rp'
812
                               ||
813 53
                               $nextSibling->tagName === 'rt'
814
                           )
815
                       )
816
                   )
817
               )
818
               ||
819
               (
820 53
                   $tag_name === 'tr'
821
                   &&
822
                   (
823 1
                       $nextSibling === null
824
                       ||
825
                       (
826 1
                           $nextSibling instanceof \DOMElement
827
                           &&
828 53
                           $nextSibling->tagName === 'tr'
829
                       )
830
                   )
831
               )
832
               ||
833
               (
834 53
                   $tag_name === 'source'
835
                   &&
836
                   (
837 1
                       $parent_tag_name === 'audio'
838
                       ||
839 1
                       $parent_tag_name === 'video'
840
                       ||
841 1
                       $parent_tag_name === 'picture'
842
                       ||
843 53
                       $parent_tag_name === 'source'
844
                   )
845
                   &&
846
                   (
847 1
                       $nextSibling === null
848
                       ||
849
                       (
850
                           $nextSibling instanceof \DOMElement
851
                           &&
852 53
                           $nextSibling->tagName === 'source'
853
                       )
854
                   )
855
               )
856
               ||
857
               (
858
                   (
859 53
                       $tag_name === 'td'
860
                       ||
861 53
                       $tag_name === 'th'
862
                   )
863
                   &&
864
                   (
865 1
                       $nextSibling === null
866
                       ||
867
                       (
868 1
                           $nextSibling instanceof \DOMElement
869
                           &&
870
                           (
871 1
                               $nextSibling->tagName === 'td'
872
                               ||
873 53
                               $nextSibling->tagName === 'th'
874
                           )
875
                       )
876
                   )
877
               )
878
               ||
879
               (
880
                   (
881 53
                       $tag_name === 'dd'
882
                       ||
883 53
                       $tag_name === 'dt'
884
                   )
885
                   &&
886
                   (
887 3
                       $nextSibling === null
888
                       ||
889
                       (
890 3
                           $nextSibling instanceof \DOMElement
891
                           &&
892
                           (
893 3
                               $nextSibling->tagName === 'dd'
894
                               ||
895 53
                               $nextSibling->tagName === 'dt'
896
                           )
897
                       )
898
                   )
899
               )
900
               ||
901
               (
902 53
                   $tag_name === 'option'
903
                   &&
904
                   (
905 2
                       $nextSibling === null
906
                       ||
907
                       (
908 2
                           $nextSibling instanceof \DOMElement
909
                           &&
910
                           (
911 2
                               $nextSibling->tagName === 'option'
912
                               ||
913 53
                               $nextSibling->tagName === 'optgroup'
914
                           )
915
                       )
916
                   )
917
               )
918
               ||
919
               (
920 53
                   $tag_name === 'p'
921
                   &&
922
                   (
923
                       (
924 17
                           $nextSibling === null
925
                           &&
926 17
                           $node->parentNode !== null
927
                           &&
928 14
                           !\in_array(
929 14
                               $node->parentNode->nodeName,
930
                               [
931 14
                                   'a',
932
                                   'audio',
933
                                   'del',
934
                                   'ins',
935
                                   'map',
936
                                   'noscript',
937
                                   'video',
938
                               ],
939 14
                               true
940
                           )
941
                       )
942
                       ||
943
                       (
944 11
                           $nextSibling instanceof \DOMElement
945
                           &&
946 9
                           \in_array(
947 9
                               $nextSibling->tagName,
948
                               [
949 9
                                   'address',
950
                                   'article',
951
                                   'aside',
952
                                   'blockquote',
953
                                   'dir',
954
                                   'div',
955
                                   'dl',
956
                                   'fieldset',
957
                                   'footer',
958
                                   'form',
959
                                   'h1',
960
                                   'h2',
961
                                   'h3',
962
                                   'h4',
963
                                   'h5',
964
                                   'h6',
965
                                   'header',
966
                                   'hgroup',
967
                                   'hr',
968
                                   'menu',
969
                                   'nav',
970
                                   'ol',
971
                                   'p',
972
                                   'pre',
973
                                   'section',
974
                                   'table',
975
                                   'ul',
976
                               ],
977 56
                               true
978
                           )
979
                       )
980
                   )
981
               );
982
    }
983
984 57
    protected function domNodeToString(\DOMNode $node): string
985
    {
986
        // init
987 57
        $html = '';
988 57
        $emptyStringTmp = '';
989
990 57
        foreach ($node->childNodes as $child) {
991 57
            if ($emptyStringTmp === 'is_empty') {
992 31
                $emptyStringTmp = 'last_was_empty';
993
            } else {
994 57
                $emptyStringTmp = '';
995
            }
996
997 57
            if ($child instanceof \DOMElement) {
998 57
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
999 57
                $html .= '>' . $this->domNodeToString($child);
1000
1001
                if (
1002
                    !(
1003 57
                        $this->doRemoveOmittedHtmlTags
1004
                        &&
1005 57
                        !$this->isHTML4
1006
                        &&
1007 57
                        !$this->isXHTML
1008
                        &&
1009 57
                        $this->domNodeClosingTagOptional($child)
1010
                    )
1011
                ) {
1012 50
                    $html .= '</' . $child->tagName . '>';
1013
                }
1014
1015 57
                if (!$this->doRemoveWhitespaceAroundTags) {
1016
                    /** @var \DOMText|null $nextSiblingTmp - false-positive error from phpstan */
1017 56
                    $nextSiblingTmp = $child->nextSibling;
1018
                    if (
1019 56
                        $nextSiblingTmp instanceof \DOMText
1020
                        &&
1021 56
                        $nextSiblingTmp->wholeText === ' '
1022
                    ) {
1023
                        if (
1024 30
                            $emptyStringTmp !== 'last_was_empty'
1025
                            &&
1026 30
                            \substr($html, -1) !== ' '
1027
                        ) {
1028 30
                            $html = \rtrim($html);
1029
1030
                            if (
1031 30
                                $child->parentNode
1032
                                &&
1033 30
                                $child->parentNode->nodeName !== 'head'
1034
                            ) {
1035 29
                                $html .= ' ';
1036
                            }
1037
                        }
1038 57
                        $emptyStringTmp = 'is_empty';
1039
                    }
1040
                }
1041 57
            } elseif ($child instanceof \DOMText) {
1042 53
                if ($child->isElementContentWhitespace()) {
1043
                    if (
1044 34
                        $child->previousSibling !== null
1045
                        &&
1046 34
                        $child->nextSibling !== null
1047
                    ) {
1048
                        if (
1049
                            (
1050 23
                                $child->wholeText
1051
                                &&
1052 23
                                \strpos($child->wholeText, ' ') !== false
1053
                            )
1054
                            ||
1055
                            (
1056
                                $emptyStringTmp !== 'last_was_empty'
1057
                                &&
1058 23
                                \substr($html, -1) !== ' '
1059
                            )
1060
                        ) {
1061 23
                            $html = \rtrim($html);
1062
1063
                            if (
1064 23
                                $child->parentNode
1065
                                &&
1066 23
                                $child->parentNode->nodeName !== 'head'
1067
                            ) {
1068 22
                                $html .= ' ';
1069
                            }
1070
                        }
1071 34
                        $emptyStringTmp = 'is_empty';
1072
                    }
1073
                } else {
1074 53
                    $html .= $child->wholeText;
1075
                }
1076 12
            } elseif ($child instanceof \DOMComment) {
1077 57
                $html .= '<!--' . $child->textContent . '-->';
1078
            }
1079
        }
1080
1081 57
        return $html;
1082
    }
1083
1084
    /**
1085
     * @param \DOMNode $node
1086
     *
1087
     * @return string
1088
     */
1089 57
    private function getDoctype(\DOMNode $node): string
1090
    {
1091
        // check the doc-type only if it wasn't generated by DomDocument itself
1092 57
        if (!$this->withDocType) {
1093 49
            return '';
1094
        }
1095
1096 12
        foreach ($node->childNodes as $child) {
1097
            if (
1098 12
                $child instanceof \DOMDocumentType
1099
                &&
1100 12
                $child->name
1101
            ) {
1102 12
                if (!$child->publicId && $child->systemId) {
1103
                    $tmpTypeSystem = 'SYSTEM';
1104
                    $tmpTypePublic = '';
1105
                } else {
1106 12
                    $tmpTypeSystem = '';
1107 12
                    $tmpTypePublic = 'PUBLIC';
1108
                }
1109
1110 12
                return '<!DOCTYPE ' . $child->name . ''
1111 12
                       . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
1112 12
                       . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
1113 12
                       . '>';
1114
            }
1115
        }
1116
1117
        return '';
1118
    }
1119
1120
    /**
1121
     * @return array
1122
     */
1123
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1124
    {
1125
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1126
    }
1127
1128
    /**
1129
     * @return bool
1130
     */
1131
    public function isDoOptimizeAttributes(): bool
1132
    {
1133
        return $this->doOptimizeAttributes;
1134
    }
1135
1136
    /**
1137
     * @return bool
1138
     */
1139
    public function isDoOptimizeViaHtmlDomParser(): bool
1140
    {
1141
        return $this->doOptimizeViaHtmlDomParser;
1142
    }
1143
1144
    /**
1145
     * @return bool
1146
     */
1147
    public function isDoRemoveComments(): bool
1148
    {
1149
        return $this->doRemoveComments;
1150
    }
1151
1152
    /**
1153
     * @return bool
1154
     */
1155 38
    public function isDoRemoveDefaultAttributes(): bool
1156
    {
1157 38
        return $this->doRemoveDefaultAttributes;
1158
    }
1159
1160
    /**
1161
     * @return bool
1162
     */
1163 38
    public function isDoRemoveDeprecatedAnchorName(): bool
1164
    {
1165 38
        return $this->doRemoveDeprecatedAnchorName;
1166
    }
1167
1168
    /**
1169
     * @return bool
1170
     */
1171 38
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1172
    {
1173 38
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1174
    }
1175
1176
    /**
1177
     * @return bool
1178
     */
1179 38
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1180
    {
1181 38
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1182
    }
1183
1184
    /**
1185
     * @return bool
1186
     */
1187 38
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1188
    {
1189 38
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1190
    }
1191
1192
    /**
1193
     * @return bool
1194
     */
1195 38
    public function isDoRemoveDeprecatedTypeFromStyleAndLinkTag(): bool
1196
    {
1197 38
        return $this->doRemoveDeprecatedTypeFromStyleAndLinkTag;
1198
    }
1199
1200
    /**
1201
     * @return bool
1202
     */
1203 38
    public function isDoRemoveDefaultMediaTypeFromStyleAndLinkTag(): bool
1204
    {
1205 38
        return $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag;
1206
    }
1207
1208
    /**
1209
     * @return bool
1210
     */
1211 37
    public function isDoRemoveDefaultTypeFromButton(): bool
1212
    {
1213 37
        return $this->doRemoveDefaultTypeFromButton;
1214
    }
1215
1216
    /**
1217
     * @return bool
1218
     */
1219 37
    public function isDoRemoveEmptyAttributes(): bool
1220
    {
1221 37
        return $this->doRemoveEmptyAttributes;
1222
    }
1223
1224
    /**
1225
     * @return bool
1226
     */
1227 38
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1228
    {
1229 38
        return $this->doRemoveHttpPrefixFromAttributes;
1230
    }
1231
1232
    /**
1233
     * @return bool
1234
     */
1235 38
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1236
    {
1237 38
        return $this->doRemoveHttpsPrefixFromAttributes;
1238
    }
1239
1240
    /**
1241
     * @return bool
1242
     */
1243 4
    public function isdoKeepHttpAndHttpsPrefixOnExternalAttributes(): bool
1244
    {
1245 4
        return $this->doKeepHttpAndHttpsPrefixOnExternalAttributes;
1246
    }
1247
1248
    /**
1249
     * @return bool
1250
     */
1251 38
    public function isDoMakeSameDomainsLinksRelative(): bool
1252
    {
1253 38
        return $this->doMakeSameDomainsLinksRelative;
1254
    }
1255
1256
    /**
1257
     * @return bool
1258
     */
1259
    public function isDoRemoveOmittedHtmlTags(): bool
1260
    {
1261
        return $this->doRemoveOmittedHtmlTags;
1262
    }
1263
1264
    /**
1265
     * @return bool
1266
     */
1267
    public function isDoRemoveOmittedQuotes(): bool
1268
    {
1269
        return $this->doRemoveOmittedQuotes;
1270
    }
1271
1272
    /**
1273
     * @return bool
1274
     */
1275
    public function isDoRemoveSpacesBetweenTags(): bool
1276
    {
1277
        return $this->doRemoveSpacesBetweenTags;
1278
    }
1279
1280
    /**
1281
     * @return bool
1282
     */
1283 37
    public function isDoRemoveValueFromEmptyInput(): bool
1284
    {
1285 37
        return $this->doRemoveValueFromEmptyInput;
1286
    }
1287
1288
    /**
1289
     * @return bool
1290
     */
1291
    public function isDoRemoveWhitespaceAroundTags(): bool
1292
    {
1293
        return $this->doRemoveWhitespaceAroundTags;
1294
    }
1295
1296
    /**
1297
     * @return bool
1298
     */
1299 37
    public function isDoSortCssClassNames(): bool
1300
    {
1301 37
        return $this->doSortCssClassNames;
1302
    }
1303
1304
    /**
1305
     * @return bool
1306
     */
1307 38
    public function isDoSortHtmlAttributes(): bool
1308
    {
1309 38
        return $this->doSortHtmlAttributes;
1310
    }
1311
1312
    /**
1313
     * @return bool
1314
     */
1315
    public function isDoSumUpWhitespace(): bool
1316
    {
1317
        return $this->doSumUpWhitespace;
1318
    }
1319
1320
    /**
1321
     * @return bool
1322
     */
1323 5
    public function isHTML4(): bool
1324
    {
1325 5
        return $this->isHTML4;
1326
    }
1327
1328
    /**
1329
     * @return bool
1330
     */
1331 5
    public function isXHTML(): bool
1332
    {
1333 5
        return $this->isXHTML;
1334
    }
1335
1336
    /**
1337
     * @param string $html
1338
     * @param bool   $multiDecodeNewHtmlEntity
1339
     *
1340
     * @return string
1341
     */
1342 61
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1343
    {
1344 61
        $html = (string) $html;
1345 61
        if (!isset($html[0])) {
1346 1
            return '';
1347
        }
1348
1349 61
        $html = \trim($html);
1350 61
        if (!$html) {
1351 3
            return '';
1352
        }
1353
1354
        // reset
1355 58
        $this->protectedChildNodes = [];
1356
1357
        // save old content
1358 58
        $origHtml = $html;
1359 58
        $origHtmlLength = \strlen($html);
1360
1361
        // -------------------------------------------------------------------------
1362
        // Minify the HTML via "HtmlDomParser"
1363
        // -------------------------------------------------------------------------
1364
1365 58
        if ($this->doOptimizeViaHtmlDomParser) {
1366 57
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1367
        }
1368
1369
        // -------------------------------------------------------------------------
1370
        // Trim whitespace from html-string. [protected html is still protected]
1371
        // -------------------------------------------------------------------------
1372
1373
        // Remove extra white-space(s) between HTML attribute(s)
1374 58
        if (\strpos($html, ' ') !== false) {
1375 52
            $html = (string) \preg_replace_callback(
1376 52
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1377
                static function ($matches) {
1378 52
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1379 52
                },
1380 52
                $html
1381
            );
1382
        }
1383
1384 58
        if ($this->doRemoveSpacesBetweenTags) {
1385
            /** @noinspection NestedPositiveIfStatementsInspection */
1386 1
            if (\strpos($html, ' ') !== false) {
1387
                // Remove spaces that are between > and <
1388 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1389
            }
1390
        }
1391
1392
        // -------------------------------------------------------------------------
1393
        // Restore protected HTML-code.
1394
        // -------------------------------------------------------------------------
1395
1396 58
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1397 13
            $html = (string) \preg_replace_callback(
1398 13
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1399 13
                [$this, 'restoreProtectedHtml'],
1400 13
                $html
1401
            );
1402
        }
1403
1404
        // -------------------------------------------------------------------------
1405
        // Restore protected HTML-entities.
1406
        // -------------------------------------------------------------------------
1407
1408 58
        if ($this->doOptimizeViaHtmlDomParser) {
1409 57
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1410
        }
1411
1412
        // ------------------------------------
1413
        // Final clean-up
1414
        // ------------------------------------
1415
1416 58
        $html = \str_replace(
1417
            [
1418 58
                'html>' . "\n",
1419
                "\n" . '<html',
1420
                'html/>' . "\n",
1421
                "\n" . '</html',
1422
                'head>' . "\n",
1423
                "\n" . '<head',
1424
                'head/>' . "\n",
1425
                "\n" . '</head',
1426
            ],
1427
            [
1428 58
                'html>',
1429
                '<html',
1430
                'html/>',
1431
                '</html',
1432
                'head>',
1433
                '<head',
1434
                'head/>',
1435
                '</head',
1436
            ],
1437 58
            $html
1438
        );
1439
1440
        // self closing tags, don't need a trailing slash ...
1441 58
        $replace = [];
1442 58
        $replacement = [];
1443 58
        foreach (self::$selfClosingTags as $selfClosingTag) {
1444 58
            $replace[] = '<' . $selfClosingTag . '/>';
1445 58
            $replacement[] = '<' . $selfClosingTag . '>';
1446 58
            $replace[] = '<' . $selfClosingTag . ' />';
1447 58
            $replacement[] = '<' . $selfClosingTag . '>';
1448 58
            $replace[] = '></' . $selfClosingTag . '>';
1449 58
            $replacement[] = '>';
1450
        }
1451 58
        $html = \str_replace(
1452 58
            $replace,
1453 58
            $replacement,
1454 58
            $html
1455
        );
1456
1457
        // ------------------------------------
1458
        // check if compression worked
1459
        // ------------------------------------
1460
1461 58
        if ($origHtmlLength < \strlen($html)) {
1462
            $html = $origHtml;
1463
        }
1464
1465 58
        return $html;
1466
    }
1467
1468
    /**
1469
     * @param \DOMNode $node
1470
     *
1471
     * @return \DOMNode|null
1472
     */
1473 56
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1474
    {
1475
        do {
1476
            /** @var \DOMElement|\DOMText|null $nodeTmp - false-positive error from phpstan */
1477 56
            $nodeTmp = $node->nextSibling;
1478
1479 56
            if ($nodeTmp instanceof \DOMText) {
1480
                if (
1481 32
                    \trim($nodeTmp->textContent) !== ''
1482
                    &&
1483 32
                    \strpos($nodeTmp->textContent, '<') === false
1484
                ) {
1485 9
                    $node = $nodeTmp;
1486
                } else {
1487 32
                    $node = $nodeTmp->nextSibling;
1488
                }
1489
            } else {
1490 55
                $node = $nodeTmp;
1491
            }
1492 56
        } while (!($node === null || $node instanceof \DOMElement || $node instanceof \DOMText));
1493
1494 56
        return $node;
1495
    }
1496
1497
    /**
1498
     * Check if the current string is an conditional comment.
1499
     *
1500
     * INFO: since IE >= 10 conditional comment are not working anymore
1501
     *
1502
     * <!--[if expression]> HTML <![endif]-->
1503
     * <![if expression]> HTML <![endif]>
1504
     *
1505
     * @param string $comment
1506
     *
1507
     * @return bool
1508
     */
1509 5
    private function isConditionalComment($comment): bool
1510
    {
1511 5 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1512
            /** @noinspection RegExpRedundantEscape */
1513
            /** @noinspection NestedPositiveIfStatementsInspection */
1514 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1515 2
                return true;
1516
            }
1517
        }
1518
1519 5 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1520
            /** @noinspection RegExpRedundantEscape */
1521
            /** @noinspection NestedPositiveIfStatementsInspection */
1522 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1523 1
                return true;
1524
            }
1525
        }
1526
1527 5
        return false;
1528
    }
1529
1530
    /**
1531
     * Check if the current string is an special comment.
1532
     *
1533
     * @param string $comment
1534
     *
1535
     * @return bool
1536
     */
1537 5
    private function isSpecialComment($comment): bool
1538
    {
1539 5
        foreach ($this->specialHtmlCommentsStaringWith as $search) {
1540 1
            if (\strpos($comment, $search) === 0) {
1541 1
                return true;
1542
            }
1543
        }
1544
1545 5
        foreach ($this->specialHtmlCommentsEndingWith as $search) {
1546 1
            if (\substr($comment, -\strlen($search)) === $search) {
1547 1
                return true;
1548
            }
1549
        }
1550
1551 5
        return false;
1552
    }
1553
1554
    /**
1555
     * @param string $html
1556
     * @param bool   $multiDecodeNewHtmlEntity
1557
     *
1558
     * @return string
1559
     */
1560 57
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1561
    {
1562
        // init dom
1563 57
        $dom = new HtmlDomParser();
1564 57
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1565
1566 57
        if ($this->templateLogicSyntaxInSpecialScriptTags !== null) {
1567 1
            $dom->overwriteTemplateLogicSyntaxInSpecialScriptTags($this->templateLogicSyntaxInSpecialScriptTags);
1568
        }
1569
1570 57
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1571 57
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1572
1573
        // load dom
1574
        /** @noinspection UnusedFunctionResultInspection */
1575 57
        $dom->loadHtml($html);
1576
1577 57
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1578
1579 57
        $doctypeStr = $this->getDoctype($dom->getDocument());
1580
1581 57
        if ($doctypeStr) {
1582 12
            $this->isHTML4 = \strpos($doctypeStr, 'html4') !== false;
1583 12
            $this->isXHTML = \strpos($doctypeStr, 'xhtml1') !== false;
1584
        }
1585
1586
        // -------------------------------------------------------------------------
1587
        // Protect <nocompress> HTML tags first.
1588
        // -------------------------------------------------------------------------
1589
1590 57
        $dom = $this->protectTagHelper($dom, 'nocompress');
1591
1592
        // -------------------------------------------------------------------------
1593
        // Notify the Observer before the minification.
1594
        // -------------------------------------------------------------------------
1595
1596 57
        foreach ($dom->find('*') as $element) {
1597 57
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1598
        }
1599
1600
        // -------------------------------------------------------------------------
1601
        // Protect HTML tags and conditional comments.
1602
        // -------------------------------------------------------------------------
1603
1604 57
        $dom = $this->protectTags($dom);
1605
1606
        // -------------------------------------------------------------------------
1607
        // Remove default HTML comments. [protected html is still protected]
1608
        // -------------------------------------------------------------------------
1609
1610 57
        if ($this->doRemoveComments) {
1611 55
            $dom = $this->removeComments($dom);
1612
        }
1613
1614
        // -------------------------------------------------------------------------
1615
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1616
        // -------------------------------------------------------------------------
1617
1618 57
        if ($this->doSumUpWhitespace) {
1619 56
            $dom = $this->sumUpWhitespace($dom);
1620
        }
1621
1622 57
        foreach ($dom->find('*') as $element) {
1623
1624
            // -------------------------------------------------------------------------
1625
            // Remove whitespace around tags. [protected html is still protected]
1626
            // -------------------------------------------------------------------------
1627
1628 57
            if ($this->doRemoveWhitespaceAroundTags) {
1629 3
                $this->removeWhitespaceAroundTags($element);
1630
            }
1631
1632
            // -------------------------------------------------------------------------
1633
            // Notify the Observer after the minification.
1634
            // -------------------------------------------------------------------------
1635
1636 57
            $this->notifyObserversAboutDomElementAfterMinification($element);
1637
        }
1638
1639
        // -------------------------------------------------------------------------
1640
        // Convert the Dom into a string.
1641
        // -------------------------------------------------------------------------
1642
1643 57
        return $dom->fixHtmlOutput(
1644 57
            $doctypeStr . $this->domNodeToString($dom->getDocument()),
1645 57
            $multiDecodeNewHtmlEntity
1646
        );
1647
    }
1648
1649
    /**
1650
     * @param SimpleHtmlDomInterface $domElement
1651
     *
1652
     * @return void
1653
     */
1654 57
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1655
    {
1656 57
        foreach ($this->domLoopObservers as $observer) {
1657 57
            $observer->domElementAfterMinification($domElement, $this);
1658
        }
1659 57
    }
1660
1661
    /**
1662
     * @param SimpleHtmlDomInterface $domElement
1663
     *
1664
     * @return void
1665
     */
1666 57
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1667
    {
1668 57
        foreach ($this->domLoopObservers as $observer) {
1669 57
            $observer->domElementBeforeMinification($domElement, $this);
1670
        }
1671 57
    }
1672
1673
    /**
1674
     * @param HtmlDomParser $dom
1675
     * @param string        $selector
1676
     *
1677
     * @return HtmlDomParser
1678
     */
1679 57
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1680
    {
1681 57
        foreach ($dom->find($selector) as $element) {
1682 6
            if ($element->isRemoved()) {
1683 1
                continue;
1684
            }
1685
1686 6
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1687 6
            $parentNode = $element->getNode()->parentNode;
1688 6
            if ($parentNode !== null) {
1689 6
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1690
            }
1691
1692 6
            ++$this->protected_tags_counter;
1693
        }
1694
1695 57
        return $dom;
1696
    }
1697
1698
    /**
1699
     * Prevent changes of inline "styles" and "scripts".
1700
     *
1701
     * @param HtmlDomParser $dom
1702
     *
1703
     * @return HtmlDomParser
1704
     */
1705 57
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1706
    {
1707 57
        $this->protectTagHelper($dom, 'code');
1708
1709 57
        foreach ($dom->find('script, style') as $element) {
1710 9
            if ($element->isRemoved()) {
1711
                continue;
1712
            }
1713
1714 9
            if ($element->tag === 'script' || $element->tag === 'style') {
1715 9
                $attributes = $element->getAllAttributes();
1716
                // skip external links
1717 9
                if (isset($attributes['src'])) {
1718 5
                    continue;
1719
                }
1720
            }
1721
1722 7
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1723 7
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1724
1725 7
            ++$this->protected_tags_counter;
1726
        }
1727
1728 57
        foreach ($dom->find('//comment()') as $element) {
1729 5
            if ($element->isRemoved()) {
1730
                continue;
1731
            }
1732
1733 5
            $text = $element->text();
1734
1735
            if (
1736 5
                !$this->isConditionalComment($text)
1737
                &&
1738 5
                !$this->isSpecialComment($text)
1739
            ) {
1740 5
                continue;
1741
            }
1742
1743 3
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1744
1745
            /* @var $node \DOMComment */
1746 3
            $node = $element->getNode();
1747 3
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1748 3
            $parentNode = $element->getNode()->parentNode;
1749 3
            if ($parentNode !== null) {
1750 3
                $parentNode->replaceChild($child, $node);
1751
            }
1752
1753 3
            ++$this->protected_tags_counter;
1754
        }
1755
1756 57
        return $dom;
1757
    }
1758
1759
    /**
1760
     * Remove comments in the dom.
1761
     *
1762
     * @param HtmlDomParser $dom
1763
     *
1764
     * @return HtmlDomParser
1765
     */
1766 55
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1767
    {
1768 55
        foreach ($dom->find('//comment()') as $commentWrapper) {
1769 4
            $comment = $commentWrapper->getNode();
1770 4
            $val = $comment->nodeValue;
1771 4
            if (\strpos($val, '[') === false) {
1772 4
                $parentNode = $comment->parentNode;
1773 4
                if ($parentNode !== null) {
1774 4
                    $parentNode->removeChild($comment);
1775
                }
1776
            }
1777
        }
1778
1779 55
        $dom->getDocument()->normalizeDocument();
1780
1781 55
        return $dom;
1782
    }
1783
1784
    /**
1785
     * Trim tags in the dom.
1786
     *
1787
     * @param SimpleHtmlDomInterface $element
1788
     *
1789
     * @return void
1790
     */
1791 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1792
    {
1793 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1794 2
            $node = $element->getNode();
1795
1796
            /** @var \DOMNode[] $candidates */
1797 2
            $candidates = [];
1798 2
            if ($node->childNodes->length > 0) {
1799 1
                $candidates[] = $node->firstChild;
1800 1
                $candidates[] = $node->lastChild;
1801 1
                $candidates[] = $node->previousSibling;
1802 1
                $candidates[] = $node->nextSibling;
1803
            }
1804
1805
            /** @var mixed $candidate - false-positive error from phpstan */
1806 2
            foreach ($candidates as &$candidate) {
1807 1
                if ($candidate === null) {
1808
                    continue;
1809
                }
1810
1811 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1812 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1813 1
                    if ($nodeValueTmp !== null) {
1814 1
                        $candidate->nodeValue = $nodeValueTmp;
1815
                    }
1816
                }
1817
            }
1818
        }
1819 3
    }
1820
1821
    /**
1822
     * Callback function for preg_replace_callback use.
1823
     *
1824
     * @param array $matches PREG matches
1825
     *
1826
     * @return string
1827
     */
1828 13
    private function restoreProtectedHtml($matches): string
1829
    {
1830 13
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1831
1832 13
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1833
    }
1834
1835
    /**
1836
     * @param string[] $domainsToRemoveHttpPrefixFromAttributes
1837
     *
1838
     * @return $this
1839
     */
1840 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1841
    {
1842 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1843
1844 2
        return $this;
1845
    }
1846
1847
    /**
1848
     * @param string[] $startingWith
1849
     * @param string[] $endingWith
1850
     *
1851
     * @return $this
1852
     */
1853 1
    public function setSpecialHtmlComments(array $startingWith, array $endingWith = []): self
1854
    {
1855 1
        $this->specialHtmlCommentsStaringWith = $startingWith;
1856 1
        $this->specialHtmlCommentsEndingWith = $endingWith;
1857
1858 1
        return $this;
1859
    }
1860
1861
    /**
1862
     * Sum-up extra whitespace from dom-nodes.
1863
     *
1864
     * @param HtmlDomParser $dom
1865
     *
1866
     * @return HtmlDomParser
1867
     */
1868 56
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1869
    {
1870 56
        $text_nodes = $dom->find('//text()');
1871 56
        foreach ($text_nodes as $text_node_wrapper) {
1872
            /* @var $text_node \DOMNode */
1873 52
            $text_node = $text_node_wrapper->getNode();
1874 52
            $xp = $text_node->getNodePath();
1875 52
            if ($xp === null) {
1876
                continue;
1877
            }
1878
1879 52
            $doSkip = false;
1880 52
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1881 52
                if (\strpos($xp, "/${pattern}") !== false) {
1882 10
                    $doSkip = true;
1883
1884 52
                    break;
1885
                }
1886
            }
1887 52
            if ($doSkip) {
1888 10
                continue;
1889
            }
1890
1891 48
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1892 48
            if ($nodeValueTmp !== null) {
1893 48
                $text_node->nodeValue = $nodeValueTmp;
1894
            }
1895
        }
1896
1897 56
        $dom->getDocument()->normalizeDocument();
1898
1899 56
        return $dom;
1900
    }
1901
1902
    /**
1903
     * WARNING: maybe bad for performance ...
1904
     *
1905
     * @param bool $keepBrokenHtml
1906
     *
1907
     * @return HtmlMin
1908
     */
1909 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1910
    {
1911 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1912
1913 2
        return $this;
1914
    }
1915
1916
    /**
1917
     * @param string[] $templateLogicSyntaxInSpecialScriptTags
1918
     *
1919
     * @return HtmlMin
1920
     */
1921 1
    public function overwriteTemplateLogicSyntaxInSpecialScriptTags(array $templateLogicSyntaxInSpecialScriptTags): self
1922
    {
1923 1
        foreach ($templateLogicSyntaxInSpecialScriptTags as $tmp) {
1924 1
            if (!\is_string($tmp)) {
1925 1
                throw new \InvalidArgumentException('setTemplateLogicSyntaxInSpecialScriptTags only allows string[]');
1926
            }
1927
        }
1928
1929 1
        $this->templateLogicSyntaxInSpecialScriptTags = $templateLogicSyntaxInSpecialScriptTags;
1930
1931 1
        return $this;
1932
    }
1933
}
1934