Completed
Push — master ( bcaaf4...497890 )
by Lars
01:38
created

HtmlMin::isXHTML()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $doKeepHttpAndHttpsPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainsLinksRelative = false;
199
200
    /**
201
     * @var string[]
202
     */
203
    private $localDomains = [];
204
205
    /**
206
     * @var array
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var bool
215
     */
216
    private $doSortCssClassNames = true;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doSortHtmlAttributes = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDefaultAttributes = false;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedAnchorName = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedTypeFromScriptTag = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveValueFromEmptyInput = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveEmptyAttributes = true;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doSumUpWhitespace = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveSpacesBetweenTags = false;
267
268
    /**
269
     * @var bool
270
     */
271
    private $keepBrokenHtml = false;
272
273
    /**
274
     * @var bool
275
     */
276
    private $withDocType = false;
277
278
    /**
279
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
280
     *
281
     * @psalm-var \SplObjectStorage<HtmlMinDomObserverInterface>
282
     */
283
    private $domLoopObservers;
284
285
    /**
286
     * @var int
287
     */
288
    private $protected_tags_counter = 0;
289
290
    /**
291
     * @var bool
292
     */
293
    private $isHTML4 = false;
294
295
    /**
296
     * @var bool
297
     */
298
    private $isXHTML = false;
299
300
    /**
301
     * HtmlMin constructor.
302
     */
303 56
    public function __construct()
304
    {
305 56
        $this->domLoopObservers = new \SplObjectStorage();
306
307 56
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
308 56
    }
309
310
    /**
311
     * @param HtmlMinDomObserverInterface $observer
312
     *
313
     * @return void
314
     */
315 56
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
316
    {
317 56
        $this->domLoopObservers->attach($observer);
318 56
    }
319
320
    /**
321
     * @param bool $doOptimizeAttributes
322
     *
323
     * @return $this
324
     */
325 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
326
    {
327 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
328
329 2
        return $this;
330
    }
331
332
    /**
333
     * @param bool $doOptimizeViaHtmlDomParser
334
     *
335
     * @return $this
336
     */
337 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
338
    {
339 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
340
341 1
        return $this;
342
    }
343
344
    /**
345
     * @param bool $doRemoveComments
346
     *
347
     * @return $this
348
     */
349 3
    public function doRemoveComments(bool $doRemoveComments = true): self
350
    {
351 3
        $this->doRemoveComments = $doRemoveComments;
352
353 3
        return $this;
354
    }
355
356
    /**
357
     * @param bool $doRemoveDefaultAttributes
358
     *
359
     * @return $this
360
     */
361 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
362
    {
363 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
364
365 2
        return $this;
366
    }
367
368
    /**
369
     * @param bool $doRemoveDeprecatedAnchorName
370
     *
371
     * @return $this
372
     */
373 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
374
    {
375 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
376
377 2
        return $this;
378
    }
379
380
    /**
381
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
382
     *
383
     * @return $this
384
     */
385 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
386
    {
387 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
388
389 2
        return $this;
390
    }
391
392
    /**
393
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
394
     *
395
     * @return $this
396
     */
397 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
398
    {
399 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
400
401 2
        return $this;
402
    }
403
404
    /**
405
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
406
     *
407
     * @return $this
408
     */
409 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
410
    {
411 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
412
413 2
        return $this;
414
    }
415
416
    /**
417
     * @param bool $doRemoveEmptyAttributes
418
     *
419
     * @return $this
420
     */
421 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
422
    {
423 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
424
425 2
        return $this;
426
    }
427
428
    /**
429
     * @param bool $doRemoveHttpPrefixFromAttributes
430
     *
431
     * @return $this
432
     */
433 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
434
    {
435 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
436
437 6
        return $this;
438
    }
439
440
    /**
441
     * @param bool $doRemoveHttpsPrefixFromAttributes
442
     *
443
     * @return $this
444
     */
445 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
446
    {
447 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
448
449 1
        return $this;
450
    }
451
452
    /**
453
     * @param bool $doKeepHttpAndHttpsPrefixOnExternalAttributes
454
     *
455
     * @return $this
456
     */
457 1
    public function doKeepHttpAndHttpsPrefixOnExternalAttributes(bool $doKeepHttpAndHttpsPrefixOnExternalAttributes = true): self
458
    {
459 1
        $this->doKeepHttpAndHttpsPrefixOnExternalAttributes = $doKeepHttpAndHttpsPrefixOnExternalAttributes;
460
461 1
        return $this;
462
    }
463
464
    /**
465
     * @param string[] $localDomains
466
     *
467
     * @return $this
468
     */
469 1
    public function doMakeSameDomainsLinksRelative(array $localDomains): self
470
    {
471
        /** @noinspection AlterInForeachInspection */
472 1
        foreach ($localDomains as &$localDomain) {
473 1
            $localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
474
        }
475
476 1
        $this->localDomains = $localDomains;
477 1
        $this->doMakeSameDomainsLinksRelative = \count($this->localDomains) > 0;
478
479 1
        return $this;
480
    }
481
482
    /**
483
     * @return string[]
484
     */
485 1
    public function getLocalDomains(): array
486
    {
487 1
        return $this->localDomains;
488
    }
489
490
    /**
491
     * @param bool $doRemoveOmittedHtmlTags
492
     *
493
     * @return $this
494
     */
495 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
496
    {
497 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
498
499 1
        return $this;
500
    }
501
502
    /**
503
     * @param bool $doRemoveOmittedQuotes
504
     *
505
     * @return $this
506
     */
507 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
508
    {
509 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
510
511 1
        return $this;
512
    }
513
514
    /**
515
     * @param bool $doRemoveSpacesBetweenTags
516
     *
517
     * @return $this
518
     */
519 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
520
    {
521 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
522
523 1
        return $this;
524
    }
525
526
    /**
527
     * @param bool $doRemoveValueFromEmptyInput
528
     *
529
     * @return $this
530
     */
531 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
532
    {
533 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
534
535 2
        return $this;
536
    }
537
538
    /**
539
     * @param bool $doRemoveWhitespaceAroundTags
540
     *
541
     * @return $this
542
     */
543 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
544
    {
545 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
546
547 5
        return $this;
548
    }
549
550
    /**
551
     * @param bool $doSortCssClassNames
552
     *
553
     * @return $this
554
     */
555 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
556
    {
557 2
        $this->doSortCssClassNames = $doSortCssClassNames;
558
559 2
        return $this;
560
    }
561
562
    /**
563
     * @param bool $doSortHtmlAttributes
564
     *
565
     * @return $this
566
     */
567 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
568
    {
569 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
570
571 2
        return $this;
572
    }
573
574
    /**
575
     * @param bool $doSumUpWhitespace
576
     *
577
     * @return $this
578
     */
579 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
580
    {
581 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
582
583 2
        return $this;
584
    }
585
586 52
    private function domNodeAttributesToString(\DOMNode $node): string
587
    {
588
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
589 52
        $attr_str = '';
590 52
        if ($node->attributes !== null) {
591 52
            foreach ($node->attributes as $attribute) {
592 34
                $attr_str .= $attribute->name;
593
594
                if (
595 34
                    $this->doOptimizeAttributes
596
                    &&
597 34
                    isset(self::$booleanAttributes[$attribute->name])
598
                ) {
599 10
                    $attr_str .= ' ';
600
601 10
                    continue;
602
                }
603
604 34
                $attr_str .= '=';
605
606
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
607 34
                $omit_quotes = $this->doRemoveOmittedQuotes
608
                               &&
609 34
                               $attribute->value !== ''
610
                               &&
611 34
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
612
                               &&
613 34
                               \strpos($attribute->name, ' ') === false
614
                               &&
615 34
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
616
617 34
                $quoteTmp = '"';
618
                if (
619 34
                    !$omit_quotes
620
                    &&
621 34
                    \strpos($attribute->value, '"') !== false
622
                ) {
623 1
                    $quoteTmp = "'";
624
                }
625
626
                if (
627 34
                    $this->doOptimizeAttributes
628
                    &&
629
                    (
630 33
                        $attribute->name === 'srcset'
631
                        ||
632 34
                        $attribute->name === 'sizes'
633
                    )
634
                ) {
635 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
636
                } else {
637 34
                    $attr_val = $attribute->value;
638
                }
639
640 34
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
641 34
                $attr_str .= ' ';
642
            }
643
        }
644
645 52
        return \trim($attr_str);
646
    }
647
648
    /**
649
     * @param \DOMNode $node
650
     *
651
     * @return bool
652
     */
653 51
    private function domNodeClosingTagOptional(\DOMNode $node): bool
654
    {
655 51
        $tag_name = $node->nodeName;
656
657
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
658 51
        $parent_node = $node->parentNode;
659
660 51
        if ($parent_node) {
661 51
            $parent_tag_name = $parent_node->nodeName;
662
        } else {
663
            $parent_tag_name = null;
664
        }
665
666 51
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
667
668
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
669
670
        // Implemented:
671
        //
672
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
673
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
674
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
675
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
676
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
677
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
678
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
679
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
680
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
681
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
682
683
        /**
684
         * @noinspection TodoComment
685
         *
686
         * TODO: Not Implemented
687
         */
688
        //
689
        // <html> may be omitted if first thing inside is not comment
690
        // <head> may be omitted if first thing inside is an element
691
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
692
        // <colgroup> may be omitted if first thing inside is <col>
693
        // <tbody> may be omitted if first thing inside is <tr>
694
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
695
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
696
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
697
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
698
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
699
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
700
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
701
        //
702
        // <-- However, a start tag must never be omitted if it has any attributes.
703
704
        /** @noinspection InArrayCanBeUsedInspection */
705 51
        return \in_array($tag_name, self::$optional_end_tags, true)
706
               ||
707
               (
708 48
                   $tag_name === 'li'
709
                   &&
710
                   (
711 6
                       $nextSibling === null
712
                       ||
713
                       (
714 4
                           $nextSibling instanceof \DOMElement
715
                           &&
716 48
                           $nextSibling->tagName === 'li'
717
                       )
718
                   )
719
               )
720
               ||
721
               (
722 48
                   $tag_name === 'optgroup'
723
                   &&
724
                   (
725 1
                       $nextSibling === null
726
                       ||
727
                       (
728 1
                           $nextSibling instanceof \DOMElement
729
                           &&
730 48
                           $nextSibling->tagName === 'optgroup'
731
                       )
732
                   )
733
               )
734
               ||
735
               (
736 48
                   $tag_name === 'rp'
737
                   &&
738
                   (
739
                       $nextSibling === null
740
                       ||
741
                       (
742
                           $nextSibling instanceof \DOMElement
743
                           &&
744
                           (
745
                               $nextSibling->tagName === 'rp'
746
                               ||
747 48
                               $nextSibling->tagName === 'rt'
748
                           )
749
                       )
750
                   )
751
               )
752
               ||
753
               (
754 48
                   $tag_name === 'tr'
755
                   &&
756
                   (
757 1
                       $nextSibling === null
758
                       ||
759
                       (
760 1
                           $nextSibling instanceof \DOMElement
761
                           &&
762 48
                           $nextSibling->tagName === 'tr'
763
                       )
764
                   )
765
               )
766
               ||
767
               (
768 48
                   $tag_name === 'source'
769
                   &&
770
                   (
771 1
                       $parent_tag_name === 'audio'
772
                       ||
773 1
                       $parent_tag_name === 'video'
774
                       ||
775 1
                       $parent_tag_name === 'picture'
776
                       ||
777 48
                       $parent_tag_name === 'source'
778
                   )
779
                   &&
780
                   (
781 1
                       $nextSibling === null
782
                       ||
783
                       (
784
                           $nextSibling instanceof \DOMElement
785
                           &&
786 48
                           $nextSibling->tagName === 'source'
787
                       )
788
                   )
789
               )
790
               ||
791
               (
792
                   (
793 48
                       $tag_name === 'td'
794
                       ||
795 48
                       $tag_name === 'th'
796
                   )
797
                   &&
798
                   (
799 1
                       $nextSibling === null
800
                       ||
801
                       (
802 1
                           $nextSibling instanceof \DOMElement
803
                           &&
804
                           (
805 1
                               $nextSibling->tagName === 'td'
806
                               ||
807 48
                               $nextSibling->tagName === 'th'
808
                           )
809
                       )
810
                   )
811
               )
812
               ||
813
               (
814
                   (
815 48
                       $tag_name === 'dd'
816
                       ||
817 48
                       $tag_name === 'dt'
818
                   )
819
                   &&
820
                   (
821
                       (
822 3
                           $nextSibling === null
823
                           &&
824 3
                           $tag_name === 'dd'
825
                       )
826
                       ||
827
                       (
828 3
                           $nextSibling instanceof \DOMElement
829
                           &&
830
                           (
831 3
                               $nextSibling->tagName === 'dd'
832
                               ||
833 48
                               $nextSibling->tagName === 'dt'
834
                           )
835
                       )
836
                   )
837
               )
838
               ||
839
               (
840 48
                   $tag_name === 'option'
841
                   &&
842
                   (
843 2
                       $nextSibling === null
844
                       ||
845
                       (
846 2
                           $nextSibling instanceof \DOMElement
847
                           &&
848
                           (
849 2
                               $nextSibling->tagName === 'option'
850
                               ||
851 48
                               $nextSibling->tagName === 'optgroup'
852
                           )
853
                       )
854
                   )
855
               )
856
               ||
857
               (
858 48
                   $tag_name === 'p'
859
                   &&
860
                   (
861
                       (
862 14
                           $nextSibling === null
863
                           &&
864
                           (
865 12
                               $node->parentNode !== null
866
                               &&
867 12
                               !\in_array(
868 12
                                   $node->parentNode->nodeName,
869
                                   [
870 12
                                       'a',
871
                                       'audio',
872
                                       'del',
873
                                       'ins',
874
                                       'map',
875
                                       'noscript',
876
                                       'video',
877
                                   ],
878 12
                                   true
879
                               )
880
                           )
881
                       )
882
                       ||
883
                       (
884 9
                           $nextSibling instanceof \DOMElement
885
                           &&
886 9
                           \in_array(
887 9
                               $nextSibling->tagName,
888
                               [
889 9
                                   'address',
890
                                   'article',
891
                                   'aside',
892
                                   'blockquote',
893
                                   'dir',
894
                                   'div',
895
                                   'dl',
896
                                   'fieldset',
897
                                   'footer',
898
                                   'form',
899
                                   'h1',
900
                                   'h2',
901
                                   'h3',
902
                                   'h4',
903
                                   'h5',
904
                                   'h6',
905
                                   'header',
906
                                   'hgroup',
907
                                   'hr',
908
                                   'menu',
909
                                   'nav',
910
                                   'ol',
911
                                   'p',
912
                                   'pre',
913
                                   'section',
914
                                   'table',
915
                                   'ul',
916
                               ],
917 51
                               true
918
                           )
919
                       )
920
                   )
921
               );
922
    }
923
924 52
    protected function domNodeToString(\DOMNode $node): string
925
    {
926
        // init
927 52
        $html = '';
928 52
        $emptyStringTmp = '';
929
930 52
        foreach ($node->childNodes as $child) {
931 52
            if ($emptyStringTmp === 'is_empty') {
932 28
                $emptyStringTmp = 'last_was_empty';
933
            } else {
934 52
                $emptyStringTmp = '';
935
            }
936
937 52
            if ($child instanceof \DOMElement) {
938 52
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
939 52
                $html .= '>' . $this->domNodeToString($child);
940
941
                if (
942
                    !(
943 52
                        $this->doRemoveOmittedHtmlTags
944
                        &&
945 52
                        !$this->isHTML4
946
                        &&
947 52
                        !$this->isXHTML
948
                        &&
949 52
                        $this->domNodeClosingTagOptional($child)
950
                    )
951
                ) {
952 46
                    $html .= '</' . $child->tagName . '>';
953
                }
954
955 52
                if (!$this->doRemoveWhitespaceAroundTags) {
956
                    /** @noinspection NestedPositiveIfStatementsInspection */
957
                    if (
958 51
                        $child->nextSibling instanceof \DOMText
959
                        &&
960 51
                        $child->nextSibling->wholeText === ' '
961
                    ) {
962
                        if (
963 27
                            $emptyStringTmp !== 'last_was_empty'
964
                            &&
965 27
                            \substr($html, -1) !== ' '
966
                        ) {
967 27
                            $html = \rtrim($html);
968
969
                            if (
970 27
                                $child->parentNode
971
                                &&
972 27
                                $child->parentNode->nodeName !== 'head'
973
                            ) {
974 27
                                $html .= ' ';
975
                            }
976
                        }
977 52
                        $emptyStringTmp = 'is_empty';
978
                    }
979
                }
980 52
            } elseif ($child instanceof \DOMText) {
981 48
                if ($child->isElementContentWhitespace()) {
982
                    if (
983 31
                        $child->previousSibling !== null
984
                        &&
985 31
                        $child->nextSibling !== null
986
                    ) {
987
                        if (
988
                            (
989 22
                                $child->wholeText
990
                                &&
991 22
                                \strpos($child->wholeText, ' ') !== false
992
                            )
993
                            ||
994
                            (
995
                                $emptyStringTmp !== 'last_was_empty'
996
                                &&
997 22
                                \substr($html, -1) !== ' '
998
                            )
999
                        ) {
1000 22
                            $html = \rtrim($html);
1001
1002
                            if (
1003 22
                                $child->parentNode
1004
                                &&
1005 22
                                $child->parentNode->nodeName !== 'head'
1006
                            ) {
1007 22
                                $html .= ' ';
1008
                            }
1009
                        }
1010 31
                        $emptyStringTmp = 'is_empty';
1011
                    }
1012
                } else {
1013 48
                    $html .= $child->wholeText;
1014
                }
1015 12
            } elseif ($child instanceof \DOMComment) {
1016 52
                $html .= '<!--' . $child->textContent . '-->';
1017
            }
1018
        }
1019
1020 52
        return $html;
1021
    }
1022
1023
    /**
1024
     * @param \DOMNode $node
1025
     *
1026
     * @return string
1027
     */
1028 52
    private function getDoctype(\DOMNode $node): string
1029
    {
1030
        // check the doc-type only if it wasn't generated by DomDocument itself
1031 52
        if (!$this->withDocType) {
1032 44
            return '';
1033
        }
1034
1035 12
        foreach ($node->childNodes as $child) {
1036
            if (
1037 12
                $child instanceof \DOMDocumentType
1038
                &&
1039 12
                $child->name
1040
            ) {
1041 12
                if (!$child->publicId && $child->systemId) {
1042
                    $tmpTypeSystem = 'SYSTEM';
1043
                    $tmpTypePublic = '';
1044
                } else {
1045 12
                    $tmpTypeSystem = '';
1046 12
                    $tmpTypePublic = 'PUBLIC';
1047
                }
1048
1049 12
                return '<!DOCTYPE ' . $child->name . ''
1050 12
                       . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
1051 12
                       . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
1052 12
                       . '>';
1053
            }
1054
        }
1055
1056
        return '';
1057
    }
1058
1059
    /**
1060
     * @return array
1061
     */
1062
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1063
    {
1064
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1065
    }
1066
1067
    /**
1068
     * @return bool
1069
     */
1070
    public function isDoOptimizeAttributes(): bool
1071
    {
1072
        return $this->doOptimizeAttributes;
1073
    }
1074
1075
    /**
1076
     * @return bool
1077
     */
1078
    public function isDoOptimizeViaHtmlDomParser(): bool
1079
    {
1080
        return $this->doOptimizeViaHtmlDomParser;
1081
    }
1082
1083
    /**
1084
     * @return bool
1085
     */
1086
    public function isDoRemoveComments(): bool
1087
    {
1088
        return $this->doRemoveComments;
1089
    }
1090
1091
    /**
1092
     * @return bool
1093
     */
1094 35
    public function isDoRemoveDefaultAttributes(): bool
1095
    {
1096 35
        return $this->doRemoveDefaultAttributes;
1097
    }
1098
1099
    /**
1100
     * @return bool
1101
     */
1102 35
    public function isDoRemoveDeprecatedAnchorName(): bool
1103
    {
1104 35
        return $this->doRemoveDeprecatedAnchorName;
1105
    }
1106
1107
    /**
1108
     * @return bool
1109
     */
1110 35
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1111
    {
1112 35
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1113
    }
1114
1115
    /**
1116
     * @return bool
1117
     */
1118 35
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1119
    {
1120 35
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1121
    }
1122
1123
    /**
1124
     * @return bool
1125
     */
1126 35
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1127
    {
1128 35
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1129
    }
1130
1131
    /**
1132
     * @return bool
1133
     */
1134 34
    public function isDoRemoveEmptyAttributes(): bool
1135
    {
1136 34
        return $this->doRemoveEmptyAttributes;
1137
    }
1138
1139
    /**
1140
     * @return bool
1141
     */
1142 35
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1143
    {
1144 35
        return $this->doRemoveHttpPrefixFromAttributes;
1145
    }
1146
1147
    /**
1148
     * @return bool
1149
     */
1150 35
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1151
    {
1152 35
        return $this->doRemoveHttpsPrefixFromAttributes;
1153
    }
1154
1155
    /**
1156
     * @return bool
1157
     */
1158 4
    public function isdoKeepHttpAndHttpsPrefixOnExternalAttributes(): bool
1159
    {
1160 4
        return $this->doKeepHttpAndHttpsPrefixOnExternalAttributes;
1161
    }
1162
1163
    /**
1164
     * @return bool
1165
     */
1166 35
    public function isDoMakeSameDomainsLinksRelative(): bool
1167
    {
1168 35
        return $this->doMakeSameDomainsLinksRelative;
1169
    }
1170
1171
    /**
1172
     * @return bool
1173
     */
1174
    public function isDoRemoveOmittedHtmlTags(): bool
1175
    {
1176
        return $this->doRemoveOmittedHtmlTags;
1177
    }
1178
1179
    /**
1180
     * @return bool
1181
     */
1182
    public function isDoRemoveOmittedQuotes(): bool
1183
    {
1184
        return $this->doRemoveOmittedQuotes;
1185
    }
1186
1187
    /**
1188
     * @return bool
1189
     */
1190
    public function isDoRemoveSpacesBetweenTags(): bool
1191
    {
1192
        return $this->doRemoveSpacesBetweenTags;
1193
    }
1194
1195
    /**
1196
     * @return bool
1197
     */
1198 34
    public function isDoRemoveValueFromEmptyInput(): bool
1199
    {
1200 34
        return $this->doRemoveValueFromEmptyInput;
1201
    }
1202
1203
    /**
1204
     * @return bool
1205
     */
1206
    public function isDoRemoveWhitespaceAroundTags(): bool
1207
    {
1208
        return $this->doRemoveWhitespaceAroundTags;
1209
    }
1210
1211
    /**
1212
     * @return bool
1213
     */
1214 34
    public function isDoSortCssClassNames(): bool
1215
    {
1216 34
        return $this->doSortCssClassNames;
1217
    }
1218
1219
    /**
1220
     * @return bool
1221
     */
1222 35
    public function isDoSortHtmlAttributes(): bool
1223
    {
1224 35
        return $this->doSortHtmlAttributes;
1225
    }
1226
1227
    /**
1228
     * @return bool
1229
     */
1230
    public function isDoSumUpWhitespace(): bool
1231
    {
1232
        return $this->doSumUpWhitespace;
1233
    }
1234
1235
    /**
1236
     * @return bool
1237
     */
1238 5
    public function isHTML4(): bool
1239
    {
1240 5
        return $this->isHTML4;
1241
    }
1242
1243
    /**
1244
     * @return bool
1245
     */
1246 5
    public function isXHTML(): bool
1247
    {
1248 5
        return $this->isXHTML;
1249
    }
1250
1251
    /**
1252
     * @param string $html
1253
     * @param bool   $multiDecodeNewHtmlEntity
1254
     *
1255
     * @return string
1256
     */
1257 56
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1258
    {
1259 56
        $html = (string) $html;
1260 56
        if (!isset($html[0])) {
1261 1
            return '';
1262
        }
1263
1264 56
        $html = \trim($html);
1265 56
        if (!$html) {
1266 3
            return '';
1267
        }
1268
1269
        // reset
1270 53
        $this->protectedChildNodes = [];
1271
1272
        // save old content
1273 53
        $origHtml = $html;
1274 53
        $origHtmlLength = \strlen($html);
1275
1276
        // -------------------------------------------------------------------------
1277
        // Minify the HTML via "HtmlDomParser"
1278
        // -------------------------------------------------------------------------
1279
1280 53
        if ($this->doOptimizeViaHtmlDomParser) {
1281 52
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1282
        }
1283
1284
        // -------------------------------------------------------------------------
1285
        // Trim whitespace from html-string. [protected html is still protected]
1286
        // -------------------------------------------------------------------------
1287
1288
        // Remove extra white-space(s) between HTML attribute(s)
1289 53
        if (\strpos($html, ' ') !== false) {
1290 47
            $html = (string) \preg_replace_callback(
1291 47
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1292
                static function ($matches) {
1293 47
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1294 47
                },
1295 47
                $html
1296
            );
1297
        }
1298
1299 53
        if ($this->doRemoveSpacesBetweenTags) {
1300
            /** @noinspection NestedPositiveIfStatementsInspection */
1301 1
            if (\strpos($html, ' ') !== false) {
1302
                // Remove spaces that are between > and <
1303 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1304
            }
1305
        }
1306
1307
        // -------------------------------------------------------------------------
1308
        // Restore protected HTML-code.
1309
        // -------------------------------------------------------------------------
1310
1311 53
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1312 10
            $html = (string) \preg_replace_callback(
1313 10
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1314 10
                [$this, 'restoreProtectedHtml'],
1315 10
                $html
1316
            );
1317
        }
1318
1319
        // -------------------------------------------------------------------------
1320
        // Restore protected HTML-entities.
1321
        // -------------------------------------------------------------------------
1322
1323 53
        if ($this->doOptimizeViaHtmlDomParser) {
1324 52
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1325
        }
1326
1327
        // ------------------------------------
1328
        // Final clean-up
1329
        // ------------------------------------
1330
1331 53
        $html = \str_replace(
1332
            [
1333 53
                'html>' . "\n",
1334
                "\n" . '<html',
1335
                'html/>' . "\n",
1336
                "\n" . '</html',
1337
                'head>' . "\n",
1338
                "\n" . '<head',
1339
                'head/>' . "\n",
1340
                "\n" . '</head',
1341
            ],
1342
            [
1343 53
                'html>',
1344
                '<html',
1345
                'html/>',
1346
                '</html',
1347
                'head>',
1348
                '<head',
1349
                'head/>',
1350
                '</head',
1351
            ],
1352 53
            $html
1353
        );
1354
1355
        // self closing tags, don't need a trailing slash ...
1356 53
        $replace = [];
1357 53
        $replacement = [];
1358 53
        foreach (self::$selfClosingTags as $selfClosingTag) {
1359 53
            $replace[] = '<' . $selfClosingTag . '/>';
1360 53
            $replacement[] = '<' . $selfClosingTag . '>';
1361 53
            $replace[] = '<' . $selfClosingTag . ' />';
1362 53
            $replacement[] = '<' . $selfClosingTag . '>';
1363 53
            $replace[] = '></' . $selfClosingTag . '>';
1364 53
            $replacement[] = '>';
1365
        }
1366 53
        $html = \str_replace(
1367 53
            $replace,
1368 53
            $replacement,
1369 53
            $html
1370
        );
1371
1372
        // ------------------------------------
1373
        // check if compression worked
1374
        // ------------------------------------
1375
1376 53
        if ($origHtmlLength < \strlen($html)) {
1377
            $html = $origHtml;
1378
        }
1379
1380 53
        return $html;
1381
    }
1382
1383
    /**
1384
     * @param \DOMNode $node
1385
     *
1386
     * @return \DOMNode|null
1387
     */
1388 51
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1389
    {
1390
        do {
1391
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1392 51
            $node = $node->nextSibling;
1393 51
        } while (!($node === null || $node instanceof \DOMElement));
1394
1395 51
        return $node;
1396
    }
1397
1398
    /**
1399
     * Check if the current string is an conditional comment.
1400
     *
1401
     * INFO: since IE >= 10 conditional comment are not working anymore
1402
     *
1403
     * <!--[if expression]> HTML <![endif]-->
1404
     * <![if expression]> HTML <![endif]>
1405
     *
1406
     * @param string $comment
1407
     *
1408
     * @return bool
1409
     */
1410 4
    private function isConditionalComment($comment): bool
1411
    {
1412 4 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1413
            /** @noinspection RegExpRedundantEscape */
1414
            /** @noinspection NestedPositiveIfStatementsInspection */
1415 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1416 2
                return true;
1417
            }
1418
        }
1419
1420 4 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1421
            /** @noinspection RegExpRedundantEscape */
1422
            /** @noinspection NestedPositiveIfStatementsInspection */
1423 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1424 1
                return true;
1425
            }
1426
        }
1427
1428 4
        return false;
1429
    }
1430
1431
    /**
1432
     * @param string $html
1433
     * @param bool   $multiDecodeNewHtmlEntity
1434
     *
1435
     * @return string
1436
     */
1437 52
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1438
    {
1439
        // init dom
1440 52
        $dom = new HtmlDomParser();
1441
        /** @noinspection UnusedFunctionResultInspection */
1442 52
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1443
1444 52
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1445 52
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1446
1447
        // load dom
1448
        /** @noinspection UnusedFunctionResultInspection */
1449 52
        $dom->loadHtml($html);
1450
1451 52
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1452
1453 52
        $doctypeStr = $this->getDoctype($dom->getDocument());
1454
1455 52
        if ($doctypeStr) {
1456 12
            $this->isHTML4 = \strpos($doctypeStr, 'html4') !== false;
1457 12
            $this->isXHTML = \strpos($doctypeStr, 'xhtml1') !== false;
1458
        }
1459
1460
        // -------------------------------------------------------------------------
1461
        // Protect <nocompress> HTML tags first.
1462
        // -------------------------------------------------------------------------
1463
1464 52
        $dom = $this->protectTagHelper($dom, 'nocompress');
1465
1466
        // -------------------------------------------------------------------------
1467
        // Notify the Observer before the minification.
1468
        // -------------------------------------------------------------------------
1469
1470 52
        foreach ($dom->find('*') as $element) {
1471 52
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1472
        }
1473
1474
        // -------------------------------------------------------------------------
1475
        // Protect HTML tags and conditional comments.
1476
        // -------------------------------------------------------------------------
1477
1478 52
        $dom = $this->protectTags($dom);
1479
1480
        // -------------------------------------------------------------------------
1481
        // Remove default HTML comments. [protected html is still protected]
1482
        // -------------------------------------------------------------------------
1483
1484 52
        if ($this->doRemoveComments) {
1485 50
            $dom = $this->removeComments($dom);
1486
        }
1487
1488
        // -------------------------------------------------------------------------
1489
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1490
        // -------------------------------------------------------------------------
1491
1492 52
        if ($this->doSumUpWhitespace) {
1493 51
            $dom = $this->sumUpWhitespace($dom);
1494
        }
1495
1496 52
        foreach ($dom->find('*') as $element) {
1497
1498
            // -------------------------------------------------------------------------
1499
            // Remove whitespace around tags. [protected html is still protected]
1500
            // -------------------------------------------------------------------------
1501
1502 52
            if ($this->doRemoveWhitespaceAroundTags) {
1503 3
                $this->removeWhitespaceAroundTags($element);
1504
            }
1505
1506
            // -------------------------------------------------------------------------
1507
            // Notify the Observer after the minification.
1508
            // -------------------------------------------------------------------------
1509
1510 52
            $this->notifyObserversAboutDomElementAfterMinification($element);
1511
        }
1512
1513
        // -------------------------------------------------------------------------
1514
        // Convert the Dom into a string.
1515
        // -------------------------------------------------------------------------
1516
1517 52
        return $dom->fixHtmlOutput(
1518 52
            $doctypeStr . $this->domNodeToString($dom->getDocument()),
1519 52
            $multiDecodeNewHtmlEntity
1520
        );
1521
    }
1522
1523
    /**
1524
     * @param SimpleHtmlDomInterface $domElement
1525
     *
1526
     * @return void
1527
     */
1528 52
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1529
    {
1530 52
        foreach ($this->domLoopObservers as $observer) {
1531 52
            $observer->domElementAfterMinification($domElement, $this);
1532
        }
1533 52
    }
1534
1535
    /**
1536
     * @param SimpleHtmlDomInterface $domElement
1537
     *
1538
     * @return void
1539
     */
1540 52
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1541
    {
1542 52
        foreach ($this->domLoopObservers as $observer) {
1543 52
            $observer->domElementBeforeMinification($domElement, $this);
1544
        }
1545 52
    }
1546
1547
    /**
1548
     * @param HtmlDomParser $dom
1549
     * @param string        $selector
1550
     *
1551
     * @return HtmlDomParser
1552
     */
1553 52
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1554
    {
1555 52
        foreach ($dom->find($selector) as $element) {
1556 5
            if ($element->isRemoved()) {
1557 1
                continue;
1558
            }
1559
1560 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1561 5
            $parentNode = $element->getNode()->parentNode;
1562 5
            if ($parentNode !== null) {
1563 5
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1564
            }
1565
1566 5
            ++$this->protected_tags_counter;
1567
        }
1568
1569 52
        return $dom;
1570
    }
1571
1572
    /**
1573
     * Prevent changes of inline "styles" and "scripts".
1574
     *
1575
     * @param HtmlDomParser $dom
1576
     *
1577
     * @return HtmlDomParser
1578
     */
1579 52
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1580
    {
1581 52
        $this->protectTagHelper($dom, 'code');
1582
1583 52
        foreach ($dom->find('script, style') as $element) {
1584 8
            if ($element->isRemoved()) {
1585
                continue;
1586
            }
1587
1588 8
            if ($element->tag === 'script' || $element->tag === 'style') {
1589 8
                $attributes = $element->getAllAttributes();
1590
                // skip external links
1591 8
                if (isset($attributes['src'])) {
1592 4
                    continue;
1593
                }
1594
            }
1595
1596 6
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1597 6
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1598
1599 6
            ++$this->protected_tags_counter;
1600
        }
1601
1602 52
        foreach ($dom->find('//comment()') as $element) {
1603 4
            if ($element->isRemoved()) {
1604
                continue;
1605
            }
1606
1607 4
            $text = $element->text();
1608
1609
            // skip normal comments
1610 4
            if (!$this->isConditionalComment($text)) {
1611 4
                continue;
1612
            }
1613
1614 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1615
1616
            /* @var $node \DOMComment */
1617 2
            $node = $element->getNode();
1618 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1619 2
            $parentNode = $element->getNode()->parentNode;
1620 2
            if ($parentNode !== null) {
1621 2
                $parentNode->replaceChild($child, $node);
1622
            }
1623
1624 2
            ++$this->protected_tags_counter;
1625
        }
1626
1627 52
        return $dom;
1628
    }
1629
1630
    /**
1631
     * Remove comments in the dom.
1632
     *
1633
     * @param HtmlDomParser $dom
1634
     *
1635
     * @return HtmlDomParser
1636
     */
1637 50
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1638
    {
1639 50
        foreach ($dom->find('//comment()') as $commentWrapper) {
1640 3
            $comment = $commentWrapper->getNode();
1641 3
            $val = $comment->nodeValue;
1642 3
            if (\strpos($val, '[') === false) {
1643 3
                $parentNode = $comment->parentNode;
1644 3
                if ($parentNode !== null) {
1645 3
                    $parentNode->removeChild($comment);
1646
                }
1647
            }
1648
        }
1649
1650 50
        $dom->getDocument()->normalizeDocument();
1651
1652 50
        return $dom;
1653
    }
1654
1655
    /**
1656
     * Trim tags in the dom.
1657
     *
1658
     * @param SimpleHtmlDomInterface $element
1659
     *
1660
     * @return void
1661
     */
1662 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1663
    {
1664 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1665 1
            $node = $element->getNode();
1666
1667
            /** @var \DOMNode[] $candidates */
1668 1
            $candidates = [];
1669 1
            if ($node->childNodes->length > 0) {
1670 1
                $candidates[] = $node->firstChild;
1671 1
                $candidates[] = $node->lastChild;
1672 1
                $candidates[] = $node->previousSibling;
1673 1
                $candidates[] = $node->nextSibling;
1674
            }
1675
1676
            /** @var mixed $candidate - false-positive error from phpstan */
1677 1
            foreach ($candidates as &$candidate) {
1678 1
                if ($candidate === null) {
1679
                    continue;
1680
                }
1681
1682 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1683 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1684 1
                    if ($nodeValueTmp !== null) {
1685 1
                        $candidate->nodeValue = $nodeValueTmp;
1686
                    }
1687
                }
1688
            }
1689
        }
1690 3
    }
1691
1692
    /**
1693
     * Callback function for preg_replace_callback use.
1694
     *
1695
     * @param array $matches PREG matches
1696
     *
1697
     * @return string
1698
     */
1699 10
    private function restoreProtectedHtml($matches): string
1700
    {
1701 10
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1702
1703 10
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1704
    }
1705
1706
    /**
1707
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1708
     *
1709
     * @return $this
1710
     */
1711 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1712
    {
1713 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1714
1715 2
        return $this;
1716
    }
1717
1718
    /**
1719
     * Sum-up extra whitespace from dom-nodes.
1720
     *
1721
     * @param HtmlDomParser $dom
1722
     *
1723
     * @return HtmlDomParser
1724
     */
1725 51
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1726
    {
1727 51
        $text_nodes = $dom->find('//text()');
1728 51
        foreach ($text_nodes as $text_node_wrapper) {
1729
            /* @var $text_node \DOMNode */
1730 47
            $text_node = $text_node_wrapper->getNode();
1731 47
            $xp = $text_node->getNodePath();
1732 47
            if ($xp === null) {
1733
                continue;
1734
            }
1735
1736 47
            $doSkip = false;
1737 47
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1738 47
                if (\strpos($xp, "/${pattern}") !== false) {
1739 9
                    $doSkip = true;
1740
1741 47
                    break;
1742
                }
1743
            }
1744 47
            if ($doSkip) {
1745 9
                continue;
1746
            }
1747
1748 43
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1749 43
            if ($nodeValueTmp !== null) {
1750 43
                $text_node->nodeValue = $nodeValueTmp;
1751
            }
1752
        }
1753
1754 51
        $dom->getDocument()->normalizeDocument();
1755
1756 51
        return $dom;
1757
    }
1758
1759
    /**
1760
     * WARNING: maybe bad for performance ...
1761
     *
1762
     * @param bool $keepBrokenHtml
1763
     *
1764
     * @return HtmlMin
1765
     */
1766 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1767
    {
1768 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1769
1770 2
        return $this;
1771
    }
1772
}
1773