Completed
Push — master ( 25af33...fa7e00 )
by Lars
08:39
created

HtmlMin::doRemoveHttpsPrefixFromAttributes()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $doKeepHttpAndHttpsPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainsLinksRelative = false;
199
200
    /**
201
     * @var string[]
202
     */
203
    private $localDomains = [];
204
205
    /**
206
     * @var string[]
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var string[]
215
     */
216
    private $specialHtmlCommentsStaringWith = [];
217
218
    /**
219
     * @var string[]
220
     */
221
    private $specialHtmlCommentsEndingWith = [];
222
223
    /**
224
     * @var bool
225
     */
226
    private $doSortCssClassNames = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doSortHtmlAttributes = true;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDefaultAttributes = false;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedAnchorName = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveDeprecatedTypeFromStyleAndLinkTag = true;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveDefaultTypeFromButton = false;
267
268
    /**
269
     * @var bool
270
     */
271
    private $doRemoveDeprecatedTypeFromScriptTag = true;
272
273
    /**
274
     * @var bool
275
     */
276
    private $doRemoveValueFromEmptyInput = true;
277
278
    /**
279
     * @var bool
280
     */
281
    private $doRemoveEmptyAttributes = true;
282
283
    /**
284
     * @var bool
285
     */
286
    private $doSumUpWhitespace = true;
287
288
    /**
289
     * @var bool
290
     */
291
    private $doRemoveSpacesBetweenTags = false;
292
293
    /**
294
     * @var bool
295
     */
296
    private $keepBrokenHtml = false;
297
298
    /**
299
     * @var bool
300
     */
301
    private $withDocType = false;
302
303
    /**
304
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
305
     *
306
     * @psalm-var \SplObjectStorage<HtmlMinDomObserverInterface>
307
     */
308
    private $domLoopObservers;
309
310
    /**
311
     * @var int
312
     */
313
    private $protected_tags_counter = 0;
314
315
    /**
316
     * @var bool
317
     */
318
    private $isHTML4 = false;
319
320
    /**
321
     * @var bool
322
     */
323
    private $isXHTML = false;
324
325
    /**
326
     * @var string[]|null
327
     */
328
    private $templateLogicSyntaxInSpecialScriptTags;
329
330
    /**
331
     * HtmlMin constructor.
332
     */
333 62
    public function __construct()
334
    {
335 62
        $this->domLoopObservers = new \SplObjectStorage();
336
337 62
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
338 62
    }
339
340
    /**
341
     * @param HtmlMinDomObserverInterface $observer
342
     *
343
     * @return void
344
     */
345 62
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
346
    {
347 62
        $this->domLoopObservers->attach($observer);
348 62
    }
349
350
    /**
351
     * @param bool $doOptimizeAttributes
352
     *
353
     * @return $this
354
     */
355 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
356
    {
357 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
358
359 2
        return $this;
360
    }
361
362
    /**
363
     * @param bool $doOptimizeViaHtmlDomParser
364
     *
365
     * @return $this
366
     */
367 2
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
368
    {
369 2
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
370
371 2
        return $this;
372
    }
373
374
    /**
375
     * @param bool $doRemoveComments
376
     *
377
     * @return $this
378
     */
379 3
    public function doRemoveComments(bool $doRemoveComments = true): self
380
    {
381 3
        $this->doRemoveComments = $doRemoveComments;
382
383 3
        return $this;
384
    }
385
386
    /**
387
     * @param bool $doRemoveDefaultAttributes
388
     *
389
     * @return $this
390
     */
391 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
392
    {
393 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
394
395 2
        return $this;
396
    }
397
398
    /**
399
     * @param bool $doRemoveDeprecatedAnchorName
400
     *
401
     * @return $this
402
     */
403 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
404
    {
405 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
406
407 2
        return $this;
408
    }
409
410
    /**
411
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
412
     *
413
     * @return $this
414
     */
415 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
416
    {
417 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
418
419 2
        return $this;
420
    }
421
422
    /**
423
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
424
     *
425
     * @return $this
426
     */
427 3
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
428
    {
429 3
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
430
431 3
        return $this;
432
    }
433
434
    /**
435
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
436
     *
437
     * @return $this
438
     */
439 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
440
    {
441 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
442
443 2
        return $this;
444
    }
445
446
    /**
447
     * @param bool $doRemoveDeprecatedTypeFromStyleAndLinkTag
448
     *
449
     * @return $this
450
     */
451 1
    public function doRemoveDeprecatedTypeFromStyleAndLinkTag(bool $doRemoveDeprecatedTypeFromStyleAndLinkTag = true): self
452
    {
453 1
        $this->doRemoveDeprecatedTypeFromStyleAndLinkTag = $doRemoveDeprecatedTypeFromStyleAndLinkTag;
454
455 1
        return $this;
456
    }
457
458
    /**
459
     * @param bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag
460
     *
461
     * @return $this
462
     */
463 1
    public function doRemoveDefaultMediaTypeFromStyleAndLinkTag(bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true): self
464
    {
465 1
        $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag = $doRemoveDefaultMediaTypeFromStyleAndLinkTag;
466
467 1
        return $this;
468
    }
469
470
    /**
471
     * @param bool $doRemoveDefaultTypeFromButton
472
     *
473
     * @return $this
474
     */
475 1
    public function doRemoveDefaultTypeFromButton(bool $doRemoveDefaultTypeFromButton = true): self
476
    {
477 1
        $this->doRemoveDefaultTypeFromButton = $doRemoveDefaultTypeFromButton;
478
479 1
        return $this;
480
    }
481
482
    /**
483
     * @param bool $doRemoveEmptyAttributes
484
     *
485
     * @return $this
486
     */
487 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
488
    {
489 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
490
491 2
        return $this;
492
    }
493
494
    /**
495
     * @param bool $doRemoveHttpPrefixFromAttributes
496
     *
497
     * @return $this
498
     */
499 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
500
    {
501 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
502
503 6
        return $this;
504
    }
505
506
    /**
507
     * @param bool $doRemoveHttpsPrefixFromAttributes
508
     *
509
     * @return $this
510
     */
511 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
512
    {
513 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
514
515 1
        return $this;
516
    }
517
518
    /**
519
     * @param bool $doKeepHttpAndHttpsPrefixOnExternalAttributes
520
     *
521
     * @return $this
522
     */
523 1
    public function doKeepHttpAndHttpsPrefixOnExternalAttributes(bool $doKeepHttpAndHttpsPrefixOnExternalAttributes = true): self
524
    {
525 1
        $this->doKeepHttpAndHttpsPrefixOnExternalAttributes = $doKeepHttpAndHttpsPrefixOnExternalAttributes;
526
527 1
        return $this;
528
    }
529
530
    /**
531
     * @param string[] $localDomains
532
     *
533
     * @return $this
534
     */
535 1
    public function doMakeSameDomainsLinksRelative(array $localDomains): self
536
    {
537
        /** @noinspection AlterInForeachInspection */
538 1
        foreach ($localDomains as &$localDomain) {
539 1
            $localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
540
        }
541
542 1
        $this->localDomains = $localDomains;
543 1
        $this->doMakeSameDomainsLinksRelative = \count($this->localDomains) > 0;
544
545 1
        return $this;
546
    }
547
548
    /**
549
     * @return string[]
550
     */
551 1
    public function getLocalDomains(): array
552
    {
553 1
        return $this->localDomains;
554
    }
555
556
    /**
557
     * @param bool $doRemoveOmittedHtmlTags
558
     *
559
     * @return $this
560
     */
561 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
562
    {
563 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
564
565 1
        return $this;
566
    }
567
568
    /**
569
     * @param bool $doRemoveOmittedQuotes
570
     *
571
     * @return $this
572
     */
573 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
574
    {
575 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
576
577 1
        return $this;
578
    }
579
580
    /**
581
     * @param bool $doRemoveSpacesBetweenTags
582
     *
583
     * @return $this
584
     */
585 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
586
    {
587 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
588
589 1
        return $this;
590
    }
591
592
    /**
593
     * @param bool $doRemoveValueFromEmptyInput
594
     *
595
     * @return $this
596
     */
597 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
598
    {
599 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
600
601 2
        return $this;
602
    }
603
604
    /**
605
     * @param bool $doRemoveWhitespaceAroundTags
606
     *
607
     * @return $this
608
     */
609 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
610
    {
611 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
612
613 5
        return $this;
614
    }
615
616
    /**
617
     * @param bool $doSortCssClassNames
618
     *
619
     * @return $this
620
     */
621 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
622
    {
623 2
        $this->doSortCssClassNames = $doSortCssClassNames;
624
625 2
        return $this;
626
    }
627
628
    /**
629
     * @param bool $doSortHtmlAttributes
630
     *
631
     * @return $this
632
     */
633 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
634
    {
635 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
636
637 2
        return $this;
638
    }
639
640
    /**
641
     * @param bool $doSumUpWhitespace
642
     *
643
     * @return $this
644
     */
645 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
646
    {
647 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
648
649 2
        return $this;
650
    }
651
652 58
    private function domNodeAttributesToString(\DOMNode $node): string
653
    {
654
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
655 58
        $attr_str = '';
656 58
        if ($node->attributes !== null) {
657 58
            foreach ($node->attributes as $attribute) {
658 38
                $attr_str .= $attribute->name;
659
660
                if (
661 38
                    $this->doOptimizeAttributes
662
                    &&
663 38
                    isset(self::$booleanAttributes[$attribute->name])
664
                ) {
665 10
                    $attr_str .= ' ';
666
667 10
                    continue;
668
                }
669
670 38
                $attr_str .= '=';
671
672
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
673 38
                $omit_quotes = $this->doRemoveOmittedQuotes
674
                               &&
675 38
                               $attribute->value !== ''
676
                               &&
677 38
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
678
                               &&
679 38
                               \strpos($attribute->name, ' ') === false
680
                               &&
681 38
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
682
683 38
                $quoteTmp = '"';
684
                if (
685 38
                    !$omit_quotes
686
                    &&
687 38
                    \strpos($attribute->value, '"') !== false
688
                ) {
689 1
                    $quoteTmp = "'";
690
                }
691
692
                if (
693 38
                    $this->doOptimizeAttributes
694
                    &&
695
                    (
696 37
                        $attribute->name === 'srcset'
697
                        ||
698 38
                        $attribute->name === 'sizes'
699
                    )
700
                ) {
701 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
702
                } else {
703 38
                    $attr_val = $attribute->value;
704
                }
705
706 38
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
707 38
                $attr_str .= ' ';
708
            }
709
        }
710
711 58
        return \trim($attr_str);
712
    }
713
714
    /**
715
     * @param \DOMNode $node
716
     *
717
     * @return bool
718
     */
719 57
    private function domNodeClosingTagOptional(\DOMNode $node): bool
720
    {
721 57
        $tag_name = $node->nodeName;
722
723
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
724 57
        $parent_node = $node->parentNode;
725
726 57
        if ($parent_node) {
727 57
            $parent_tag_name = $parent_node->nodeName;
728
        } else {
729
            $parent_tag_name = null;
730
        }
731
732 57
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
733
734
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
735
736
        // Implemented:
737
        //
738
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
739
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
740
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
741
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
742
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
743
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
744
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
745
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
746
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
747
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
748
749
        /**
750
         * @noinspection TodoComment
751
         *
752
         * TODO: Not Implemented
753
         */
754
        //
755
        // <html> may be omitted if first thing inside is not comment
756
        // <head> may be omitted if first thing inside is an element
757
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
758
        // <colgroup> may be omitted if first thing inside is <col>
759
        // <tbody> may be omitted if first thing inside is <tr>
760
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
761
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
762
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
763
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
764
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
765
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
766
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
767
        //
768
        // <-- However, a start tag must never be omitted if it has any attributes.
769
770
        /** @noinspection InArrayCanBeUsedInspection */
771 57
        return \in_array($tag_name, self::$optional_end_tags, true)
772
               ||
773
               (
774 54
                   $tag_name === 'li'
775
                   &&
776
                   (
777 6
                       $nextSibling === null
778
                       ||
779
                       (
780 4
                           $nextSibling instanceof \DOMElement
781
                           &&
782 54
                           $nextSibling->tagName === 'li'
783
                       )
784
                   )
785
               )
786
               ||
787
               (
788 54
                   $tag_name === 'optgroup'
789
                   &&
790
                   (
791 1
                       $nextSibling === null
792
                       ||
793
                       (
794 1
                           $nextSibling instanceof \DOMElement
795
                           &&
796 54
                           $nextSibling->tagName === 'optgroup'
797
                       )
798
                   )
799
               )
800
               ||
801
               (
802 54
                   $tag_name === 'rp'
803
                   &&
804
                   (
805
                       $nextSibling === null
806
                       ||
807
                       (
808
                           $nextSibling instanceof \DOMElement
809
                           &&
810
                           (
811
                               $nextSibling->tagName === 'rp'
812
                               ||
813 54
                               $nextSibling->tagName === 'rt'
814
                           )
815
                       )
816
                   )
817
               )
818
               ||
819
               (
820 54
                   $tag_name === 'tr'
821
                   &&
822
                   (
823 1
                       $nextSibling === null
824
                       ||
825
                       (
826 1
                           $nextSibling instanceof \DOMElement
827
                           &&
828 54
                           $nextSibling->tagName === 'tr'
829
                       )
830
                   )
831
               )
832
               ||
833
               (
834 54
                   $tag_name === 'source'
835
                   &&
836
                   (
837 1
                       $parent_tag_name === 'audio'
838
                       ||
839 1
                       $parent_tag_name === 'video'
840
                       ||
841 1
                       $parent_tag_name === 'picture'
842
                       ||
843 54
                       $parent_tag_name === 'source'
844
                   )
845
                   &&
846
                   (
847 1
                       $nextSibling === null
848
                       ||
849
                       (
850
                           $nextSibling instanceof \DOMElement
851
                           &&
852 54
                           $nextSibling->tagName === 'source'
853
                       )
854
                   )
855
               )
856
               ||
857
               (
858
                   (
859 54
                       $tag_name === 'td'
860
                       ||
861 54
                       $tag_name === 'th'
862
                   )
863
                   &&
864
                   (
865 1
                       $nextSibling === null
866
                       ||
867
                       (
868 1
                           $nextSibling instanceof \DOMElement
869
                           &&
870
                           (
871 1
                               $nextSibling->tagName === 'td'
872
                               ||
873 54
                               $nextSibling->tagName === 'th'
874
                           )
875
                       )
876
                   )
877
               )
878
               ||
879
               (
880
                   (
881 54
                       $tag_name === 'dd'
882
                       ||
883 54
                       $tag_name === 'dt'
884
                   )
885
                   &&
886
                   (
887 3
                       $nextSibling === null
888
                       ||
889
                       (
890 3
                           $nextSibling instanceof \DOMElement
891
                           &&
892
                           (
893 3
                               $nextSibling->tagName === 'dd'
894
                               ||
895 54
                               $nextSibling->tagName === 'dt'
896
                           )
897
                       )
898
                   )
899
               )
900
               ||
901
               (
902 54
                   $tag_name === 'option'
903
                   &&
904
                   (
905 2
                       $nextSibling === null
906
                       ||
907
                       (
908 2
                           $nextSibling instanceof \DOMElement
909
                           &&
910
                           (
911 2
                               $nextSibling->tagName === 'option'
912
                               ||
913 54
                               $nextSibling->tagName === 'optgroup'
914
                           )
915
                       )
916
                   )
917
               )
918
               ||
919
               (
920 54
                   $tag_name === 'p'
921
                   &&
922
                   (
923
                       (
924 18
                           $nextSibling === null
925
                           &&
926 18
                           $node->parentNode !== null
927
                           &&
928
                           !\in_array(
929 15
                               $node->parentNode->nodeName,
930
                               [
931
                                   'a',
932
                                   'audio',
933
                                   'del',
934
                                   'ins',
935
                                   'map',
936
                                   'noscript',
937
                                   'video',
938
                               ],
939
                               true
940
                           )
941
                       )
942
                       ||
943
                       (
944 11
                           $nextSibling instanceof \DOMElement
945
                           &&
946
                           \in_array(
947 57
                               $nextSibling->tagName,
948
                               [
949
                                   'address',
950
                                   'article',
951
                                   'aside',
952
                                   'blockquote',
953
                                   'dir',
954
                                   'div',
955
                                   'dl',
956
                                   'fieldset',
957
                                   'footer',
958
                                   'form',
959
                                   'h1',
960
                                   'h2',
961
                                   'h3',
962
                                   'h4',
963
                                   'h5',
964
                                   'h6',
965
                                   'header',
966
                                   'hgroup',
967
                                   'hr',
968
                                   'menu',
969
                                   'nav',
970
                                   'ol',
971
                                   'p',
972
                                   'pre',
973
                                   'section',
974
                                   'table',
975
                                   'ul',
976
                               ],
977
                               true
978
                           )
979
                       )
980
                   )
981
               );
982
    }
983
984 58
    protected function domNodeToString(\DOMNode $node): string
985
    {
986
        // init
987 58
        $html = '';
988 58
        $emptyStringTmp = '';
989
990 58
        foreach ($node->childNodes as $child) {
991 58
            if ($emptyStringTmp === 'is_empty') {
992 32
                $emptyStringTmp = 'last_was_empty';
993
            } else {
994 58
                $emptyStringTmp = '';
995
            }
996
997 58
            if ($child instanceof \DOMElement) {
998 58
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
999 58
                $html .= '>' . $this->domNodeToString($child);
1000
1001
                if (
1002
                    !(
1003 58
                        $this->doRemoveOmittedHtmlTags
1004
                        &&
1005 58
                        !$this->isHTML4
1006
                        &&
1007 58
                        !$this->isXHTML
1008
                        &&
1009 58
                        $this->domNodeClosingTagOptional($child)
1010
                    )
1011
                ) {
1012 51
                    $html .= '</' . $child->tagName . '>';
1013
                }
1014
1015 58
                if (!$this->doRemoveWhitespaceAroundTags) {
1016
                    /** @var \DOMText|null $nextSiblingTmp - false-positive error from phpstan */
1017 57
                    $nextSiblingTmp = $child->nextSibling;
1018
                    if (
1019 57
                        $nextSiblingTmp instanceof \DOMText
1020
                        &&
1021 57
                        $nextSiblingTmp->wholeText === ' '
1022
                    ) {
1023
                        if (
1024 31
                            $emptyStringTmp !== 'last_was_empty'
1025
                            &&
1026 31
                            \substr($html, -1) !== ' '
1027
                        ) {
1028 31
                            $html = \rtrim($html);
1029
1030
                            if (
1031 31
                                $child->parentNode
1032
                                &&
1033 31
                                $child->parentNode->nodeName !== 'head'
1034
                            ) {
1035 30
                                $html .= ' ';
1036
                            }
1037
                        }
1038 58
                        $emptyStringTmp = 'is_empty';
1039
                    }
1040
                }
1041 58
            } elseif ($child instanceof \DOMText) {
1042 54
                if ($child->isElementContentWhitespace()) {
1043
                    if (
1044 35
                        $child->previousSibling !== null
1045
                        &&
1046 35
                        $child->nextSibling !== null
1047
                    ) {
1048
                        if (
1049
                            (
1050 24
                                $child->wholeText
1051
                                &&
1052 24
                                \strpos($child->wholeText, ' ') !== false
1053
                            )
1054
                            ||
1055
                            (
1056
                                $emptyStringTmp !== 'last_was_empty'
1057
                                &&
1058 24
                                \substr($html, -1) !== ' '
1059
                            )
1060
                        ) {
1061 24
                            $html = \rtrim($html);
1062
1063
                            if (
1064 24
                                $child->parentNode
1065
                                &&
1066 24
                                $child->parentNode->nodeName !== 'head'
1067
                            ) {
1068 23
                                $html .= ' ';
1069
                            }
1070
                        }
1071 35
                        $emptyStringTmp = 'is_empty';
1072
                    }
1073
                } else {
1074 54
                    $html .= $child->wholeText;
1075
                }
1076 13
            } elseif ($child instanceof \DOMComment) {
1077 1
                $html .= '<!--' . $child->textContent . '-->';
1078
            }
1079
        }
1080
1081 58
        return $html;
1082
    }
1083
1084
    /**
1085
     * @param \DOMNode $node
1086
     *
1087
     * @return string
1088
     */
1089 58
    private function getDoctype(\DOMNode $node): string
1090
    {
1091
        // check the doc-type only if it wasn't generated by DomDocument itself
1092 58
        if (!$this->withDocType) {
1093 49
            return '';
1094
        }
1095
1096 13
        foreach ($node->childNodes as $child) {
1097
            if (
1098 13
                $child instanceof \DOMDocumentType
1099
                &&
1100 13
                $child->name
1101
            ) {
1102 13
                if (!$child->publicId && $child->systemId) {
1103
                    $tmpTypeSystem = 'SYSTEM';
1104
                    $tmpTypePublic = '';
1105
                } else {
1106 13
                    $tmpTypeSystem = '';
1107 13
                    $tmpTypePublic = 'PUBLIC';
1108
                }
1109
1110 13
                return '<!DOCTYPE ' . $child->name . ''
1111 13
                       . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
1112 13
                       . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
1113 13
                       . '>';
1114
            }
1115
        }
1116
1117
        return '';
1118
    }
1119
1120
    /**
1121
     * @return array
1122
     */
1123
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1124
    {
1125
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1126
    }
1127
1128
    /**
1129
     * @return bool
1130
     */
1131
    public function isDoOptimizeAttributes(): bool
1132
    {
1133
        return $this->doOptimizeAttributes;
1134
    }
1135
1136
    /**
1137
     * @return bool
1138
     */
1139
    public function isDoOptimizeViaHtmlDomParser(): bool
1140
    {
1141
        return $this->doOptimizeViaHtmlDomParser;
1142
    }
1143
1144
    /**
1145
     * @return bool
1146
     */
1147
    public function isDoRemoveComments(): bool
1148
    {
1149
        return $this->doRemoveComments;
1150
    }
1151
1152
    /**
1153
     * @return bool
1154
     */
1155 39
    public function isDoRemoveDefaultAttributes(): bool
1156
    {
1157 39
        return $this->doRemoveDefaultAttributes;
1158
    }
1159
1160
    /**
1161
     * @return bool
1162
     */
1163 39
    public function isDoRemoveDeprecatedAnchorName(): bool
1164
    {
1165 39
        return $this->doRemoveDeprecatedAnchorName;
1166
    }
1167
1168
    /**
1169
     * @return bool
1170
     */
1171 39
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1172
    {
1173 39
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1174
    }
1175
1176
    /**
1177
     * @return bool
1178
     */
1179 39
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1180
    {
1181 39
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1182
    }
1183
1184
    /**
1185
     * @return bool
1186
     */
1187 39
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1188
    {
1189 39
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1190
    }
1191
1192
    /**
1193
     * @return bool
1194
     */
1195 39
    public function isDoRemoveDeprecatedTypeFromStyleAndLinkTag(): bool
1196
    {
1197 39
        return $this->doRemoveDeprecatedTypeFromStyleAndLinkTag;
1198
    }
1199
1200
    /**
1201
     * @return bool
1202
     */
1203 39
    public function isDoRemoveDefaultMediaTypeFromStyleAndLinkTag(): bool
1204
    {
1205 39
        return $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag;
1206
    }
1207
1208
    /**
1209
     * @return bool
1210
     */
1211 38
    public function isDoRemoveDefaultTypeFromButton(): bool
1212
    {
1213 38
        return $this->doRemoveDefaultTypeFromButton;
1214
    }
1215
1216
    /**
1217
     * @return bool
1218
     */
1219 38
    public function isDoRemoveEmptyAttributes(): bool
1220
    {
1221 38
        return $this->doRemoveEmptyAttributes;
1222
    }
1223
1224
    /**
1225
     * @return bool
1226
     */
1227 39
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1228
    {
1229 39
        return $this->doRemoveHttpPrefixFromAttributes;
1230
    }
1231
1232
    /**
1233
     * @return bool
1234
     */
1235 39
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1236
    {
1237 39
        return $this->doRemoveHttpsPrefixFromAttributes;
1238
    }
1239
1240
    /**
1241
     * @return bool
1242
     */
1243 4
    public function isdoKeepHttpAndHttpsPrefixOnExternalAttributes(): bool
1244
    {
1245 4
        return $this->doKeepHttpAndHttpsPrefixOnExternalAttributes;
1246
    }
1247
1248
    /**
1249
     * @return bool
1250
     */
1251 39
    public function isDoMakeSameDomainsLinksRelative(): bool
1252
    {
1253 39
        return $this->doMakeSameDomainsLinksRelative;
1254
    }
1255
1256
    /**
1257
     * @return bool
1258
     */
1259
    public function isDoRemoveOmittedHtmlTags(): bool
1260
    {
1261
        return $this->doRemoveOmittedHtmlTags;
1262
    }
1263
1264
    /**
1265
     * @return bool
1266
     */
1267
    public function isDoRemoveOmittedQuotes(): bool
1268
    {
1269
        return $this->doRemoveOmittedQuotes;
1270
    }
1271
1272
    /**
1273
     * @return bool
1274
     */
1275
    public function isDoRemoveSpacesBetweenTags(): bool
1276
    {
1277
        return $this->doRemoveSpacesBetweenTags;
1278
    }
1279
1280
    /**
1281
     * @return bool
1282
     */
1283 38
    public function isDoRemoveValueFromEmptyInput(): bool
1284
    {
1285 38
        return $this->doRemoveValueFromEmptyInput;
1286
    }
1287
1288
    /**
1289
     * @return bool
1290
     */
1291
    public function isDoRemoveWhitespaceAroundTags(): bool
1292
    {
1293
        return $this->doRemoveWhitespaceAroundTags;
1294
    }
1295
1296
    /**
1297
     * @return bool
1298
     */
1299 38
    public function isDoSortCssClassNames(): bool
1300
    {
1301 38
        return $this->doSortCssClassNames;
1302
    }
1303
1304
    /**
1305
     * @return bool
1306
     */
1307 39
    public function isDoSortHtmlAttributes(): bool
1308
    {
1309 39
        return $this->doSortHtmlAttributes;
1310
    }
1311
1312
    /**
1313
     * @return bool
1314
     */
1315
    public function isDoSumUpWhitespace(): bool
1316
    {
1317
        return $this->doSumUpWhitespace;
1318
    }
1319
1320
    /**
1321
     * @return bool
1322
     */
1323 5
    public function isHTML4(): bool
1324
    {
1325 5
        return $this->isHTML4;
1326
    }
1327
1328
    /**
1329
     * @return bool
1330
     */
1331 5
    public function isXHTML(): bool
1332
    {
1333 5
        return $this->isXHTML;
1334
    }
1335
1336
    /**
1337
     * @param string $html
1338
     * @param bool   $multiDecodeNewHtmlEntity
1339
     *
1340
     * @return string
1341
     */
1342 62
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1343
    {
1344 62
        $html = (string) $html;
1345 62
        if (!isset($html[0])) {
1346 1
            return '';
1347
        }
1348
1349 62
        $html = \trim($html);
1350 62
        if (!$html) {
1351 3
            return '';
1352
        }
1353
1354
        // reset
1355 59
        $this->protectedChildNodes = [];
1356
1357
        // save old content
1358 59
        $origHtml = $html;
1359 59
        $origHtmlLength = \strlen($html);
1360
1361
        // -------------------------------------------------------------------------
1362
        // Minify the HTML via "HtmlDomParser"
1363
        // -------------------------------------------------------------------------
1364
1365 59
        if ($this->doOptimizeViaHtmlDomParser) {
1366 58
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1367
        }
1368
1369
        // -------------------------------------------------------------------------
1370
        // Trim whitespace from html-string. [protected html is still protected]
1371
        // -------------------------------------------------------------------------
1372
1373
        // Remove extra white-space(s) between HTML attribute(s)
1374 59
        if (\strpos($html, ' ') !== false) {
1375 53
            $html = (string) \preg_replace_callback(
1376 53
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1377
                static function ($matches) {
1378 53
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1379 53
                },
1380 53
                $html
1381
            );
1382
        }
1383
1384 59
        if ($this->doRemoveSpacesBetweenTags) {
1385
            /** @noinspection NestedPositiveIfStatementsInspection */
1386 1
            if (\strpos($html, ' ') !== false) {
1387
                // Remove spaces that are between > and <
1388 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1389
            }
1390
        }
1391
1392
        // -------------------------------------------------------------------------
1393
        // Restore protected HTML-code.
1394
        // -------------------------------------------------------------------------
1395
1396 59
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1397 13
            $html = (string) \preg_replace_callback(
1398 13
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1399 13
                [$this, 'restoreProtectedHtml'],
1400 13
                $html
1401
            );
1402
        }
1403
1404
        // -------------------------------------------------------------------------
1405
        // Restore protected HTML-entities.
1406
        // -------------------------------------------------------------------------
1407
1408 59
        if ($this->doOptimizeViaHtmlDomParser) {
1409 58
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1410
        }
1411
1412
        // ------------------------------------
1413
        // Final clean-up
1414
        // ------------------------------------
1415
1416 59
        $html = \str_replace(
1417
            [
1418 59
                'html>' . "\n",
1419
                "\n" . '<html',
1420
                'html/>' . "\n",
1421
                "\n" . '</html',
1422
                'head>' . "\n",
1423
                "\n" . '<head',
1424
                'head/>' . "\n",
1425
                "\n" . '</head',
1426
            ],
1427
            [
1428 59
                'html>',
1429
                '<html',
1430
                'html/>',
1431
                '</html',
1432
                'head>',
1433
                '<head',
1434
                'head/>',
1435
                '</head',
1436
            ],
1437 59
            $html
1438
        );
1439
1440
        // self closing tags, don't need a trailing slash ...
1441 59
        $replace = [];
1442 59
        $replacement = [];
1443 59
        foreach (self::$selfClosingTags as $selfClosingTag) {
1444 59
            $replace[] = '<' . $selfClosingTag . '/>';
1445 59
            $replacement[] = '<' . $selfClosingTag . '>';
1446 59
            $replace[] = '<' . $selfClosingTag . ' />';
1447 59
            $replacement[] = '<' . $selfClosingTag . '>';
1448 59
            $replace[] = '></' . $selfClosingTag . '>';
1449 59
            $replacement[] = '>';
1450
        }
1451 59
        $html = \str_replace(
1452 59
            $replace,
1453 59
            $replacement,
1454 59
            $html
1455
        );
1456
1457
        // ------------------------------------
1458
        // check if compression worked
1459
        // ------------------------------------
1460
1461 59
        if ($origHtmlLength < \strlen($html)) {
1462
            $html = $origHtml;
1463
        }
1464
1465 59
        return $html;
1466
    }
1467
1468
    /**
1469
     * @param \DOMNode $node
1470
     *
1471
     * @return \DOMNode|null
1472
     */
1473 57
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1474
    {
1475
        do {
1476
            /** @var \DOMElement|\DOMText|null $nodeTmp - false-positive error from phpstan */
1477 57
            $nodeTmp = $node->nextSibling;
1478
1479 57
            if ($nodeTmp instanceof \DOMText) {
1480
                if (
1481 33
                    \trim($nodeTmp->textContent) !== ''
1482
                    &&
1483 33
                    \strpos($nodeTmp->textContent, '<') === false
1484
                ) {
1485 9
                    $node = $nodeTmp;
1486
                } else {
1487 33
                    $node = $nodeTmp->nextSibling;
1488
                }
1489
            } else {
1490 56
                $node = $nodeTmp;
1491
            }
1492 57
        } while (!($node === null || $node instanceof \DOMElement || $node instanceof \DOMText));
1493
1494 57
        return $node;
1495
    }
1496
1497
    /**
1498
     * Check if the current string is an conditional comment.
1499
     *
1500
     * INFO: since IE >= 10 conditional comment are not working anymore
1501
     *
1502
     * <!--[if expression]> HTML <![endif]-->
1503
     * <![if expression]> HTML <![endif]>
1504
     *
1505
     * @param string $comment
1506
     *
1507
     * @return bool
1508
     */
1509 6
    private function isConditionalComment($comment): bool
1510
    {
1511 6 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1512
            /** @noinspection RegExpRedundantEscape */
1513
            /** @noinspection NestedPositiveIfStatementsInspection */
1514 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1515 2
                return true;
1516
            }
1517
        }
1518
1519 6 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1520
            /** @noinspection RegExpRedundantEscape */
1521
            /** @noinspection NestedPositiveIfStatementsInspection */
1522 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1523 1
                return true;
1524
            }
1525
        }
1526
1527 6
        return false;
1528
    }
1529
1530
    /**
1531
     * Check if the current string is an special comment.
1532
     *
1533
     * @param string $comment
1534
     *
1535
     * @return bool
1536
     */
1537 6
    private function isSpecialComment($comment): bool
1538
    {
1539 6
        foreach ($this->specialHtmlCommentsStaringWith as $search) {
1540 1
            if (\strpos($comment, $search) === 0) {
1541 1
                return true;
1542
            }
1543
        }
1544
1545 6
        foreach ($this->specialHtmlCommentsEndingWith as $search) {
1546 1
            if (\substr($comment, -\strlen($search)) === $search) {
1547 1
                return true;
1548
            }
1549
        }
1550
1551 6
        return false;
1552
    }
1553
1554
    /**
1555
     * @param string $html
1556
     * @param bool   $multiDecodeNewHtmlEntity
1557
     *
1558
     * @return string
1559
     */
1560 58
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1561
    {
1562
        // init dom
1563 58
        $dom = new HtmlDomParser();
1564 58
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1565
1566 58
        if ($this->templateLogicSyntaxInSpecialScriptTags !== null) {
1567 1
            $dom->overwriteTemplateLogicSyntaxInSpecialScriptTags($this->templateLogicSyntaxInSpecialScriptTags);
1568
        }
1569
1570 58
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1571 58
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1572
1573
        // load dom
1574
        /** @noinspection UnusedFunctionResultInspection */
1575 58
        $dom->loadHtml($html);
1576
1577 58
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1578
1579 58
        $doctypeStr = $this->getDoctype($dom->getDocument());
1580
1581 58
        if ($doctypeStr) {
1582 13
            $this->isHTML4 = \strpos($doctypeStr, 'html4') !== false;
1583 13
            $this->isXHTML = \strpos($doctypeStr, 'xhtml1') !== false;
1584
        }
1585
1586
        // -------------------------------------------------------------------------
1587
        // Protect <nocompress> HTML tags first.
1588
        // -------------------------------------------------------------------------
1589
1590 58
        $dom = $this->protectTagHelper($dom, 'nocompress');
1591
1592
        // -------------------------------------------------------------------------
1593
        // Notify the Observer before the minification.
1594
        // -------------------------------------------------------------------------
1595
1596 58
        foreach ($dom->find('*') as $element) {
1597 58
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1598
        }
1599
1600
        // -------------------------------------------------------------------------
1601
        // Protect HTML tags and conditional comments.
1602
        // -------------------------------------------------------------------------
1603
1604 58
        $dom = $this->protectTags($dom);
1605
1606
        // -------------------------------------------------------------------------
1607
        // Remove default HTML comments. [protected html is still protected]
1608
        // -------------------------------------------------------------------------
1609
1610 58
        if ($this->doRemoveComments) {
1611 56
            $dom = $this->removeComments($dom);
1612
        }
1613
1614
        // -------------------------------------------------------------------------
1615
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1616
        // -------------------------------------------------------------------------
1617
1618 58
        if ($this->doSumUpWhitespace) {
1619 57
            $dom = $this->sumUpWhitespace($dom);
1620
        }
1621
1622 58
        foreach ($dom->find('*') as $element) {
1623
1624
            // -------------------------------------------------------------------------
1625
            // Remove whitespace around tags. [protected html is still protected]
1626
            // -------------------------------------------------------------------------
1627
1628 58
            if ($this->doRemoveWhitespaceAroundTags) {
1629 3
                $this->removeWhitespaceAroundTags($element);
1630
            }
1631
1632
            // -------------------------------------------------------------------------
1633
            // Notify the Observer after the minification.
1634
            // -------------------------------------------------------------------------
1635
1636 58
            $this->notifyObserversAboutDomElementAfterMinification($element);
1637
        }
1638
1639
        // -------------------------------------------------------------------------
1640
        // Convert the Dom into a string.
1641
        // -------------------------------------------------------------------------
1642
1643 58
        return $dom->fixHtmlOutput(
1644 58
            $doctypeStr . $this->domNodeToString($dom->getDocument()),
1645 58
            $multiDecodeNewHtmlEntity
1646
        );
1647
    }
1648
1649
    /**
1650
     * @param SimpleHtmlDomInterface $domElement
1651
     *
1652
     * @return void
1653
     */
1654 58
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1655
    {
1656 58
        foreach ($this->domLoopObservers as $observer) {
1657 58
            $observer->domElementAfterMinification($domElement, $this);
1658
        }
1659 58
    }
1660
1661
    /**
1662
     * @param SimpleHtmlDomInterface $domElement
1663
     *
1664
     * @return void
1665
     */
1666 58
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1667
    {
1668 58
        foreach ($this->domLoopObservers as $observer) {
1669 58
            $observer->domElementBeforeMinification($domElement, $this);
1670
        }
1671 58
    }
1672
1673
    /**
1674
     * @param HtmlDomParser $dom
1675
     * @param string        $selector
1676
     *
1677
     * @return HtmlDomParser
1678
     */
1679 58
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1680
    {
1681 58
        foreach ($dom->find($selector) as $element) {
1682 6
            if ($element->isRemoved()) {
1683 1
                continue;
1684
            }
1685
1686 6
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1687 6
            $parentNode = $element->getNode()->parentNode;
1688 6
            if ($parentNode !== null) {
1689 6
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1690
            }
1691
1692 6
            ++$this->protected_tags_counter;
1693
        }
1694
1695 58
        return $dom;
1696
    }
1697
1698
    /**
1699
     * Prevent changes of inline "styles" and "scripts".
1700
     *
1701
     * @param HtmlDomParser $dom
1702
     *
1703
     * @return HtmlDomParser
1704
     */
1705 58
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1706
    {
1707 58
        $this->protectTagHelper($dom, 'code');
1708
1709 58
        foreach ($dom->find('script, style') as $element) {
1710 9
            if ($element->isRemoved()) {
1711
                continue;
1712
            }
1713
1714 9
            if ($element->tag === 'script' || $element->tag === 'style') {
1715 9
                $attributes = $element->getAllAttributes();
1716
                // skip external links
1717 9
                if (isset($attributes['src'])) {
1718 5
                    continue;
1719
                }
1720
            }
1721
1722 7
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1723 7
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1724
1725 7
            ++$this->protected_tags_counter;
1726
        }
1727
1728 58
        foreach ($dom->find('//comment()') as $element) {
1729 6
            if ($element->isRemoved()) {
1730
                continue;
1731
            }
1732
1733 6
            $text = $element->text();
1734
1735
            if (
1736 6
                !$this->isConditionalComment($text)
1737
                &&
1738 6
                !$this->isSpecialComment($text)
1739
            ) {
1740 6
                continue;
1741
            }
1742
1743 3
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1744
1745
            /* @var $node \DOMComment */
1746 3
            $node = $element->getNode();
1747 3
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1748 3
            $parentNode = $element->getNode()->parentNode;
1749 3
            if ($parentNode !== null) {
1750 3
                $parentNode->replaceChild($child, $node);
1751
            }
1752
1753 3
            ++$this->protected_tags_counter;
1754
        }
1755
1756 58
        return $dom;
1757
    }
1758
1759
    /**
1760
     * Remove comments in the dom.
1761
     *
1762
     * @param HtmlDomParser $dom
1763
     *
1764
     * @return HtmlDomParser
1765
     */
1766 56
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1767
    {
1768 56
        foreach ($dom->find('//comment()') as $commentWrapper) {
1769 5
            $comment = $commentWrapper->getNode();
1770 5
            $val = $comment->nodeValue;
1771 5
            if (\strpos($val, '[') === false) {
1772 5
                $parentNode = $comment->parentNode;
1773 5
                if ($parentNode !== null) {
1774 5
                    $parentNode->removeChild($comment);
1775
                }
1776
            }
1777
        }
1778
1779 56
        $dom->getDocument()->normalizeDocument();
1780
1781 56
        return $dom;
1782
    }
1783
1784
    /**
1785
     * Trim tags in the dom.
1786
     *
1787
     * @param SimpleHtmlDomInterface $element
1788
     *
1789
     * @return void
1790
     */
1791 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1792
    {
1793 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1794 2
            $node = $element->getNode();
1795
1796
            /** @var \DOMNode[] $candidates */
1797 2
            $candidates = [];
1798 2
            if ($node->childNodes->length > 0) {
1799 1
                $candidates[] = $node->firstChild;
1800 1
                $candidates[] = $node->lastChild;
1801 1
                $candidates[] = $node->previousSibling;
1802 1
                $candidates[] = $node->nextSibling;
1803
            }
1804
1805
            /** @var mixed $candidate - false-positive error from phpstan */
1806 2
            foreach ($candidates as &$candidate) {
1807 1
                if ($candidate === null) {
1808
                    continue;
1809
                }
1810
1811 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1812 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1813 1
                    if ($nodeValueTmp !== null) {
1814 1
                        $candidate->nodeValue = $nodeValueTmp;
1815
                    }
1816
                }
1817
            }
1818
        }
1819 3
    }
1820
1821
    /**
1822
     * Callback function for preg_replace_callback use.
1823
     *
1824
     * @param array $matches PREG matches
1825
     *
1826
     * @return string
1827
     */
1828 13
    private function restoreProtectedHtml($matches): string
1829
    {
1830 13
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1831
1832 13
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1833
    }
1834
1835
    /**
1836
     * @param string[] $domainsToRemoveHttpPrefixFromAttributes
1837
     *
1838
     * @return $this
1839
     */
1840 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1841
    {
1842 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1843
1844 2
        return $this;
1845
    }
1846
1847
    /**
1848
     * @param string[] $startingWith
1849
     * @param string[] $endingWith
1850
     *
1851
     * @return $this
1852
     */
1853 1
    public function setSpecialHtmlComments(array $startingWith, array $endingWith = []): self
1854
    {
1855 1
        $this->specialHtmlCommentsStaringWith = $startingWith;
1856 1
        $this->specialHtmlCommentsEndingWith = $endingWith;
1857
1858 1
        return $this;
1859
    }
1860
1861
    /**
1862
     * Sum-up extra whitespace from dom-nodes.
1863
     *
1864
     * @param HtmlDomParser $dom
1865
     *
1866
     * @return HtmlDomParser
1867
     */
1868 57
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1869
    {
1870 57
        $text_nodes = $dom->find('//text()');
1871 57
        foreach ($text_nodes as $text_node_wrapper) {
1872
            /* @var $text_node \DOMNode */
1873 53
            $text_node = $text_node_wrapper->getNode();
1874 53
            $xp = $text_node->getNodePath();
1875 53
            if ($xp === null) {
1876
                continue;
1877
            }
1878
1879 53
            $doSkip = false;
1880 53
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1881 53
                if (\strpos($xp, "/${pattern}") !== false) {
1882 10
                    $doSkip = true;
1883
1884 10
                    break;
1885
                }
1886
            }
1887 53
            if ($doSkip) {
1888 10
                continue;
1889
            }
1890
1891 49
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1892 49
            if ($nodeValueTmp !== null) {
1893 49
                $text_node->nodeValue = $nodeValueTmp;
1894
            }
1895
        }
1896
1897 57
        $dom->getDocument()->normalizeDocument();
1898
1899 57
        return $dom;
1900
    }
1901
1902
    /**
1903
     * WARNING: maybe bad for performance ...
1904
     *
1905
     * @param bool $keepBrokenHtml
1906
     *
1907
     * @return HtmlMin
1908
     */
1909 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1910
    {
1911 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1912
1913 2
        return $this;
1914
    }
1915
1916
    /**
1917
     * @param string[] $templateLogicSyntaxInSpecialScriptTags
1918
     *
1919
     * @return HtmlMin
1920
     */
1921 1
    public function overwriteTemplateLogicSyntaxInSpecialScriptTags(array $templateLogicSyntaxInSpecialScriptTags): self
1922
    {
1923 1
        foreach ($templateLogicSyntaxInSpecialScriptTags as $tmp) {
1924 1
            if (!\is_string($tmp)) {
1925
                throw new \InvalidArgumentException('setTemplateLogicSyntaxInSpecialScriptTags only allows string[]');
1926
            }
1927
        }
1928
1929 1
        $this->templateLogicSyntaxInSpecialScriptTags = $templateLogicSyntaxInSpecialScriptTags;
1930
1931 1
        return $this;
1932
    }
1933
}
1934