Completed
Push — master ( 1cb287...e0c79b )
by Lars
02:28
created

HtmlMin::minify()   C

Complexity

Conditions 11
Paths 194

Size

Total Lines 125

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 48
CRAP Score 11.001

Importance

Changes 0
Metric Value
dl 0
loc 125
ccs 48
cts 49
cp 0.9796
rs 5.2266
c 0
b 0
f 0
cc 11
nc 194
nop 2
crap 11.001

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $doKeepHttpAndHttpsPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainsLinksRelative = false;
199
200
    /**
201
     * @var string[]
202
     */
203
    private $localDomains = [];
204
205
    /**
206
     * @var array
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var bool
215
     */
216
    private $doSortCssClassNames = true;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doSortHtmlAttributes = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDefaultAttributes = false;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedAnchorName = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedTypeFromStyleAndLinkTag = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveDefaultTypeFromButton = false;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doRemoveDeprecatedTypeFromScriptTag = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveValueFromEmptyInput = true;
267
268
    /**
269
     * @var bool
270
     */
271
    private $doRemoveEmptyAttributes = true;
272
273
    /**
274
     * @var bool
275
     */
276
    private $doSumUpWhitespace = true;
277
278
    /**
279
     * @var bool
280
     */
281
    private $doRemoveSpacesBetweenTags = false;
282
283
    /**
284
     * @var bool
285
     */
286
    private $keepBrokenHtml = false;
287
288
    /**
289
     * @var bool
290
     */
291
    private $withDocType = false;
292
293
    /**
294
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
295
     *
296
     * @psalm-var \SplObjectStorage<HtmlMinDomObserverInterface>
297
     */
298
    private $domLoopObservers;
299
300
    /**
301
     * @var int
302
     */
303
    private $protected_tags_counter = 0;
304
305
    /**
306
     * @var bool
307
     */
308
    private $isHTML4 = false;
309
310
    /**
311
     * @var bool
312
     */
313
    private $isXHTML = false;
314
315
    /**
316
     * @var string[]|null
317
     */
318
    private $templateLogicSyntaxInSpecialScriptTags;
319
320
    /**
321
     * HtmlMin constructor.
322
     */
323 58
    public function __construct()
324
    {
325 58
        $this->domLoopObservers = new \SplObjectStorage();
326
327 58
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
328 58
    }
329
330
    /**
331
     * @param HtmlMinDomObserverInterface $observer
332
     *
333
     * @return void
334
     */
335 58
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
336
    {
337 58
        $this->domLoopObservers->attach($observer);
338 58
    }
339
340
    /**
341
     * @param bool $doOptimizeAttributes
342
     *
343
     * @return $this
344
     */
345 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
346
    {
347 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
348
349 2
        return $this;
350
    }
351
352
    /**
353
     * @param bool $doOptimizeViaHtmlDomParser
354
     *
355
     * @return $this
356
     */
357 2
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
358
    {
359 2
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
360
361 2
        return $this;
362
    }
363
364
    /**
365
     * @param bool $doRemoveComments
366
     *
367
     * @return $this
368
     */
369 3
    public function doRemoveComments(bool $doRemoveComments = true): self
370
    {
371 3
        $this->doRemoveComments = $doRemoveComments;
372
373 3
        return $this;
374
    }
375
376
    /**
377
     * @param bool $doRemoveDefaultAttributes
378
     *
379
     * @return $this
380
     */
381 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
382
    {
383 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
384
385 2
        return $this;
386
    }
387
388
    /**
389
     * @param bool $doRemoveDeprecatedAnchorName
390
     *
391
     * @return $this
392
     */
393 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
394
    {
395 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
396
397 2
        return $this;
398
    }
399
400
    /**
401
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
402
     *
403
     * @return $this
404
     */
405 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
406
    {
407 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
408
409 2
        return $this;
410
    }
411
412
    /**
413
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
414
     *
415
     * @return $this
416
     */
417 3
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
418
    {
419 3
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
420
421 3
        return $this;
422
    }
423
424
    /**
425
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
426
     *
427
     * @return $this
428
     */
429 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
430
    {
431 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
432
433 2
        return $this;
434
    }
435
436
    /**
437
     * @param bool $doRemoveDeprecatedTypeFromStyleAndLinkTag
438
     *
439
     * @return $this
440
     */
441 1
    public function doRemoveDeprecatedTypeFromStyleAndLinkTag(bool $doRemoveDeprecatedTypeFromStyleAndLinkTag = true): self
442
    {
443 1
        $this->doRemoveDeprecatedTypeFromStyleAndLinkTag = $doRemoveDeprecatedTypeFromStyleAndLinkTag;
444
445 1
        return $this;
446
    }
447
448
    /**
449
     * @param bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag
450
     *
451
     * @return $this
452
     */
453 1
    public function doRemoveDefaultMediaTypeFromStyleAndLinkTag(bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true): self
454
    {
455 1
        $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag = $doRemoveDefaultMediaTypeFromStyleAndLinkTag;
456
457 1
        return $this;
458
    }
459
460
    /**
461
     * @param bool $doRemoveDefaultTypeFromButton
462
     *
463
     * @return $this
464
     */
465 1
    public function doRemoveDefaultTypeFromButton(bool $doRemoveDefaultTypeFromButton = true): self
466
    {
467 1
        $this->doRemoveDefaultTypeFromButton = $doRemoveDefaultTypeFromButton;
468
469 1
        return $this;
470
    }
471
472
    /**
473
     * @param bool $doRemoveEmptyAttributes
474
     *
475
     * @return $this
476
     */
477 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
478
    {
479 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
480
481 2
        return $this;
482
    }
483
484
    /**
485
     * @param bool $doRemoveHttpPrefixFromAttributes
486
     *
487
     * @return $this
488
     */
489 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
490
    {
491 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
492
493 6
        return $this;
494
    }
495
496
    /**
497
     * @param bool $doRemoveHttpsPrefixFromAttributes
498
     *
499
     * @return $this
500
     */
501 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
502
    {
503 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
504
505 1
        return $this;
506
    }
507
508
    /**
509
     * @param bool $doKeepHttpAndHttpsPrefixOnExternalAttributes
510
     *
511
     * @return $this
512
     */
513 1
    public function doKeepHttpAndHttpsPrefixOnExternalAttributes(bool $doKeepHttpAndHttpsPrefixOnExternalAttributes = true): self
514
    {
515 1
        $this->doKeepHttpAndHttpsPrefixOnExternalAttributes = $doKeepHttpAndHttpsPrefixOnExternalAttributes;
516
517 1
        return $this;
518
    }
519
520
    /**
521
     * @param string[] $localDomains
522
     *
523
     * @return $this
524
     */
525 1
    public function doMakeSameDomainsLinksRelative(array $localDomains): self
526
    {
527
        /** @noinspection AlterInForeachInspection */
528 1
        foreach ($localDomains as &$localDomain) {
529 1
            $localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
530
        }
531
532 1
        $this->localDomains = $localDomains;
533 1
        $this->doMakeSameDomainsLinksRelative = \count($this->localDomains) > 0;
534
535 1
        return $this;
536
    }
537
538
    /**
539
     * @return string[]
540
     */
541 1
    public function getLocalDomains(): array
542
    {
543 1
        return $this->localDomains;
544
    }
545
546
    /**
547
     * @param bool $doRemoveOmittedHtmlTags
548
     *
549
     * @return $this
550
     */
551 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
552
    {
553 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
554
555 1
        return $this;
556
    }
557
558
    /**
559
     * @param bool $doRemoveOmittedQuotes
560
     *
561
     * @return $this
562
     */
563 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
564
    {
565 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
566
567 1
        return $this;
568
    }
569
570
    /**
571
     * @param bool $doRemoveSpacesBetweenTags
572
     *
573
     * @return $this
574
     */
575 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
576
    {
577 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
578
579 1
        return $this;
580
    }
581
582
    /**
583
     * @param bool $doRemoveValueFromEmptyInput
584
     *
585
     * @return $this
586
     */
587 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
588
    {
589 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
590
591 2
        return $this;
592
    }
593
594
    /**
595
     * @param bool $doRemoveWhitespaceAroundTags
596
     *
597
     * @return $this
598
     */
599 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
600
    {
601 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
602
603 5
        return $this;
604
    }
605
606
    /**
607
     * @param bool $doSortCssClassNames
608
     *
609
     * @return $this
610
     */
611 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
612
    {
613 2
        $this->doSortCssClassNames = $doSortCssClassNames;
614
615 2
        return $this;
616
    }
617
618
    /**
619
     * @param bool $doSortHtmlAttributes
620
     *
621
     * @return $this
622
     */
623 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
624
    {
625 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
626
627 2
        return $this;
628
    }
629
630
    /**
631
     * @param bool $doSumUpWhitespace
632
     *
633
     * @return $this
634
     */
635 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
636
    {
637 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
638
639 2
        return $this;
640
    }
641
642 54
    private function domNodeAttributesToString(\DOMNode $node): string
643
    {
644
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
645 54
        $attr_str = '';
646 54
        if ($node->attributes !== null) {
647 54
            foreach ($node->attributes as $attribute) {
648 36
                $attr_str .= $attribute->name;
649
650
                if (
651 36
                    $this->doOptimizeAttributes
652
                    &&
653 36
                    isset(self::$booleanAttributes[$attribute->name])
654
                ) {
655 10
                    $attr_str .= ' ';
656
657 10
                    continue;
658
                }
659
660 36
                $attr_str .= '=';
661
662
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
663 36
                $omit_quotes = $this->doRemoveOmittedQuotes
664
                               &&
665 36
                               $attribute->value !== ''
666
                               &&
667 36
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
668
                               &&
669 36
                               \strpos($attribute->name, ' ') === false
670
                               &&
671 36
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
672
673 36
                $quoteTmp = '"';
674
                if (
675 36
                    !$omit_quotes
676
                    &&
677 36
                    \strpos($attribute->value, '"') !== false
678
                ) {
679 1
                    $quoteTmp = "'";
680
                }
681
682
                if (
683 36
                    $this->doOptimizeAttributes
684
                    &&
685
                    (
686 35
                        $attribute->name === 'srcset'
687
                        ||
688 36
                        $attribute->name === 'sizes'
689
                    )
690
                ) {
691 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
692
                } else {
693 36
                    $attr_val = $attribute->value;
694
                }
695
696 36
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
697 36
                $attr_str .= ' ';
698
            }
699
        }
700
701 54
        return \trim($attr_str);
702
    }
703
704
    /**
705
     * @param \DOMNode $node
706
     *
707
     * @return bool
708
     */
709 53
    private function domNodeClosingTagOptional(\DOMNode $node): bool
710
    {
711 53
        $tag_name = $node->nodeName;
712
713
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
714 53
        $parent_node = $node->parentNode;
715
716 53
        if ($parent_node) {
717 53
            $parent_tag_name = $parent_node->nodeName;
718
        } else {
719
            $parent_tag_name = null;
720
        }
721
722 53
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
723
724
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
725
726
        // Implemented:
727
        //
728
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
729
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
730
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
731
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
732
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
733
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
734
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
735
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
736
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
737
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
738
739
        /**
740
         * @noinspection TodoComment
741
         *
742
         * TODO: Not Implemented
743
         */
744
        //
745
        // <html> may be omitted if first thing inside is not comment
746
        // <head> may be omitted if first thing inside is an element
747
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
748
        // <colgroup> may be omitted if first thing inside is <col>
749
        // <tbody> may be omitted if first thing inside is <tr>
750
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
751
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
752
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
753
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
754
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
755
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
756
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
757
        //
758
        // <-- However, a start tag must never be omitted if it has any attributes.
759
760
        /** @noinspection InArrayCanBeUsedInspection */
761 53
        return \in_array($tag_name, self::$optional_end_tags, true)
762
               ||
763
               (
764 50
                   $tag_name === 'li'
765
                   &&
766
                   (
767 6
                       $nextSibling === null
768
                       ||
769
                       (
770 4
                           $nextSibling instanceof \DOMElement
771
                           &&
772 50
                           $nextSibling->tagName === 'li'
773
                       )
774
                   )
775
               )
776
               ||
777
               (
778 50
                   $tag_name === 'optgroup'
779
                   &&
780
                   (
781 1
                       $nextSibling === null
782
                       ||
783
                       (
784 1
                           $nextSibling instanceof \DOMElement
785
                           &&
786 50
                           $nextSibling->tagName === 'optgroup'
787
                       )
788
                   )
789
               )
790
               ||
791
               (
792 50
                   $tag_name === 'rp'
793
                   &&
794
                   (
795
                       $nextSibling === null
796
                       ||
797
                       (
798
                           $nextSibling instanceof \DOMElement
799
                           &&
800
                           (
801
                               $nextSibling->tagName === 'rp'
802
                               ||
803 50
                               $nextSibling->tagName === 'rt'
804
                           )
805
                       )
806
                   )
807
               )
808
               ||
809
               (
810 50
                   $tag_name === 'tr'
811
                   &&
812
                   (
813 1
                       $nextSibling === null
814
                       ||
815
                       (
816 1
                           $nextSibling instanceof \DOMElement
817
                           &&
818 50
                           $nextSibling->tagName === 'tr'
819
                       )
820
                   )
821
               )
822
               ||
823
               (
824 50
                   $tag_name === 'source'
825
                   &&
826
                   (
827 1
                       $parent_tag_name === 'audio'
828
                       ||
829 1
                       $parent_tag_name === 'video'
830
                       ||
831 1
                       $parent_tag_name === 'picture'
832
                       ||
833 50
                       $parent_tag_name === 'source'
834
                   )
835
                   &&
836
                   (
837 1
                       $nextSibling === null
838
                       ||
839
                       (
840
                           $nextSibling instanceof \DOMElement
841
                           &&
842 50
                           $nextSibling->tagName === 'source'
843
                       )
844
                   )
845
               )
846
               ||
847
               (
848
                   (
849 50
                       $tag_name === 'td'
850
                       ||
851 50
                       $tag_name === 'th'
852
                   )
853
                   &&
854
                   (
855 1
                       $nextSibling === null
856
                       ||
857
                       (
858 1
                           $nextSibling instanceof \DOMElement
859
                           &&
860
                           (
861 1
                               $nextSibling->tagName === 'td'
862
                               ||
863 50
                               $nextSibling->tagName === 'th'
864
                           )
865
                       )
866
                   )
867
               )
868
               ||
869
               (
870
                   (
871 50
                       $tag_name === 'dd'
872
                       ||
873 50
                       $tag_name === 'dt'
874
                   )
875
                   &&
876
                   (
877
                       (
878 3
                           $nextSibling === null
879
                           &&
880 3
                           $tag_name === 'dd'
881
                       )
882
                       ||
883
                       (
884 3
                           $nextSibling instanceof \DOMElement
885
                           &&
886
                           (
887 3
                               $nextSibling->tagName === 'dd'
888
                               ||
889 50
                               $nextSibling->tagName === 'dt'
890
                           )
891
                       )
892
                   )
893
               )
894
               ||
895
               (
896 50
                   $tag_name === 'option'
897
                   &&
898
                   (
899 2
                       $nextSibling === null
900
                       ||
901
                       (
902 2
                           $nextSibling instanceof \DOMElement
903
                           &&
904
                           (
905 2
                               $nextSibling->tagName === 'option'
906
                               ||
907 50
                               $nextSibling->tagName === 'optgroup'
908
                           )
909
                       )
910
                   )
911
               )
912
               ||
913
               (
914 50
                   $tag_name === 'p'
915
                   &&
916
                   (
917
                       (
918 14
                           $nextSibling === null
919
                           &&
920
                           (
921 12
                               $node->parentNode !== null
922
                               &&
923 12
                               !\in_array(
924 12
                                   $node->parentNode->nodeName,
925
                                   [
926 12
                                       'a',
927
                                       'audio',
928
                                       'del',
929
                                       'ins',
930
                                       'map',
931
                                       'noscript',
932
                                       'video',
933
                                   ],
934 12
                                   true
935
                               )
936
                           )
937
                       )
938
                       ||
939
                       (
940 9
                           $nextSibling instanceof \DOMElement
941
                           &&
942 9
                           \in_array(
943 9
                               $nextSibling->tagName,
944
                               [
945 9
                                   'address',
946
                                   'article',
947
                                   'aside',
948
                                   'blockquote',
949
                                   'dir',
950
                                   'div',
951
                                   'dl',
952
                                   'fieldset',
953
                                   'footer',
954
                                   'form',
955
                                   'h1',
956
                                   'h2',
957
                                   'h3',
958
                                   'h4',
959
                                   'h5',
960
                                   'h6',
961
                                   'header',
962
                                   'hgroup',
963
                                   'hr',
964
                                   'menu',
965
                                   'nav',
966
                                   'ol',
967
                                   'p',
968
                                   'pre',
969
                                   'section',
970
                                   'table',
971
                                   'ul',
972
                               ],
973 53
                               true
974
                           )
975
                       )
976
                   )
977
               );
978
    }
979
980 54
    protected function domNodeToString(\DOMNode $node): string
981
    {
982
        // init
983 54
        $html = '';
984 54
        $emptyStringTmp = '';
985
986 54
        foreach ($node->childNodes as $child) {
987 54
            if ($emptyStringTmp === 'is_empty') {
988 29
                $emptyStringTmp = 'last_was_empty';
989
            } else {
990 54
                $emptyStringTmp = '';
991
            }
992
993 54
            if ($child instanceof \DOMElement) {
994 54
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
995 54
                $html .= '>' . $this->domNodeToString($child);
996
997
                if (
998
                    !(
999 54
                        $this->doRemoveOmittedHtmlTags
1000
                        &&
1001 54
                        !$this->isHTML4
1002
                        &&
1003 54
                        !$this->isXHTML
1004
                        &&
1005 54
                        $this->domNodeClosingTagOptional($child)
1006
                    )
1007
                ) {
1008 48
                    $html .= '</' . $child->tagName . '>';
1009
                }
1010
1011 54
                if (!$this->doRemoveWhitespaceAroundTags) {
1012
                    /** @noinspection NestedPositiveIfStatementsInspection */
1013
                    if (
1014 53
                        $child->nextSibling instanceof \DOMText
1015
                        &&
1016 53
                        $child->nextSibling->wholeText === ' '
1017
                    ) {
1018
                        if (
1019 28
                            $emptyStringTmp !== 'last_was_empty'
1020
                            &&
1021 28
                            \substr($html, -1) !== ' '
1022
                        ) {
1023 28
                            $html = \rtrim($html);
1024
1025
                            if (
1026 28
                                $child->parentNode
1027
                                &&
1028 28
                                $child->parentNode->nodeName !== 'head'
1029
                            ) {
1030 27
                                $html .= ' ';
1031
                            }
1032
                        }
1033 54
                        $emptyStringTmp = 'is_empty';
1034
                    }
1035
                }
1036 54
            } elseif ($child instanceof \DOMText) {
1037 50
                if ($child->isElementContentWhitespace()) {
1038
                    if (
1039 32
                        $child->previousSibling !== null
1040
                        &&
1041 32
                        $child->nextSibling !== null
1042
                    ) {
1043
                        if (
1044
                            (
1045 23
                                $child->wholeText
1046
                                &&
1047 23
                                \strpos($child->wholeText, ' ') !== false
1048
                            )
1049
                            ||
1050
                            (
1051
                                $emptyStringTmp !== 'last_was_empty'
1052
                                &&
1053 23
                                \substr($html, -1) !== ' '
1054
                            )
1055
                        ) {
1056 23
                            $html = \rtrim($html);
1057
1058
                            if (
1059 23
                                $child->parentNode
1060
                                &&
1061 23
                                $child->parentNode->nodeName !== 'head'
1062
                            ) {
1063 22
                                $html .= ' ';
1064
                            }
1065
                        }
1066 32
                        $emptyStringTmp = 'is_empty';
1067
                    }
1068
                } else {
1069 50
                    $html .= $child->wholeText;
1070
                }
1071 12
            } elseif ($child instanceof \DOMComment) {
1072 54
                $html .= '<!--' . $child->textContent . '-->';
1073
            }
1074
        }
1075
1076 54
        return $html;
1077
    }
1078
1079
    /**
1080
     * @param \DOMNode $node
1081
     *
1082
     * @return string
1083
     */
1084 54
    private function getDoctype(\DOMNode $node): string
1085
    {
1086
        // check the doc-type only if it wasn't generated by DomDocument itself
1087 54
        if (!$this->withDocType) {
1088 46
            return '';
1089
        }
1090
1091 12
        foreach ($node->childNodes as $child) {
1092
            if (
1093 12
                $child instanceof \DOMDocumentType
1094
                &&
1095 12
                $child->name
1096
            ) {
1097 12
                if (!$child->publicId && $child->systemId) {
1098
                    $tmpTypeSystem = 'SYSTEM';
1099
                    $tmpTypePublic = '';
1100
                } else {
1101 12
                    $tmpTypeSystem = '';
1102 12
                    $tmpTypePublic = 'PUBLIC';
1103
                }
1104
1105 12
                return '<!DOCTYPE ' . $child->name . ''
1106 12
                       . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
1107 12
                       . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
1108 12
                       . '>';
1109
            }
1110
        }
1111
1112
        return '';
1113
    }
1114
1115
    /**
1116
     * @return array
1117
     */
1118
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1119
    {
1120
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1121
    }
1122
1123
    /**
1124
     * @return bool
1125
     */
1126
    public function isDoOptimizeAttributes(): bool
1127
    {
1128
        return $this->doOptimizeAttributes;
1129
    }
1130
1131
    /**
1132
     * @return bool
1133
     */
1134
    public function isDoOptimizeViaHtmlDomParser(): bool
1135
    {
1136
        return $this->doOptimizeViaHtmlDomParser;
1137
    }
1138
1139
    /**
1140
     * @return bool
1141
     */
1142
    public function isDoRemoveComments(): bool
1143
    {
1144
        return $this->doRemoveComments;
1145
    }
1146
1147
    /**
1148
     * @return bool
1149
     */
1150 37
    public function isDoRemoveDefaultAttributes(): bool
1151
    {
1152 37
        return $this->doRemoveDefaultAttributes;
1153
    }
1154
1155
    /**
1156
     * @return bool
1157
     */
1158 37
    public function isDoRemoveDeprecatedAnchorName(): bool
1159
    {
1160 37
        return $this->doRemoveDeprecatedAnchorName;
1161
    }
1162
1163
    /**
1164
     * @return bool
1165
     */
1166 37
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1167
    {
1168 37
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1169
    }
1170
1171
    /**
1172
     * @return bool
1173
     */
1174 37
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1175
    {
1176 37
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1177
    }
1178
1179
    /**
1180
     * @return bool
1181
     */
1182 37
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1183
    {
1184 37
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1185
    }
1186
1187
    /**
1188
     * @return bool
1189
     */
1190 37
    public function isDoRemoveDeprecatedTypeFromStyleAndLinkTag(): bool
1191
    {
1192 37
        return $this->doRemoveDeprecatedTypeFromStyleAndLinkTag;
1193
    }
1194
1195
    /**
1196
     * @return bool
1197
     */
1198 37
    public function isDoRemoveDefaultMediaTypeFromStyleAndLinkTag(): bool
1199
    {
1200 37
        return $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag;
1201
    }
1202
1203
    /**
1204
     * @return bool
1205
     */
1206 36
    public function isDoRemoveDefaultTypeFromButton(): bool
1207
    {
1208 36
        return $this->doRemoveDefaultTypeFromButton;
1209
    }
1210
1211
    /**
1212
     * @return bool
1213
     */
1214 36
    public function isDoRemoveEmptyAttributes(): bool
1215
    {
1216 36
        return $this->doRemoveEmptyAttributes;
1217
    }
1218
1219
    /**
1220
     * @return bool
1221
     */
1222 37
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1223
    {
1224 37
        return $this->doRemoveHttpPrefixFromAttributes;
1225
    }
1226
1227
    /**
1228
     * @return bool
1229
     */
1230 37
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1231
    {
1232 37
        return $this->doRemoveHttpsPrefixFromAttributes;
1233
    }
1234
1235
    /**
1236
     * @return bool
1237
     */
1238 4
    public function isdoKeepHttpAndHttpsPrefixOnExternalAttributes(): bool
1239
    {
1240 4
        return $this->doKeepHttpAndHttpsPrefixOnExternalAttributes;
1241
    }
1242
1243
    /**
1244
     * @return bool
1245
     */
1246 37
    public function isDoMakeSameDomainsLinksRelative(): bool
1247
    {
1248 37
        return $this->doMakeSameDomainsLinksRelative;
1249
    }
1250
1251
    /**
1252
     * @return bool
1253
     */
1254
    public function isDoRemoveOmittedHtmlTags(): bool
1255
    {
1256
        return $this->doRemoveOmittedHtmlTags;
1257
    }
1258
1259
    /**
1260
     * @return bool
1261
     */
1262
    public function isDoRemoveOmittedQuotes(): bool
1263
    {
1264
        return $this->doRemoveOmittedQuotes;
1265
    }
1266
1267
    /**
1268
     * @return bool
1269
     */
1270
    public function isDoRemoveSpacesBetweenTags(): bool
1271
    {
1272
        return $this->doRemoveSpacesBetweenTags;
1273
    }
1274
1275
    /**
1276
     * @return bool
1277
     */
1278 36
    public function isDoRemoveValueFromEmptyInput(): bool
1279
    {
1280 36
        return $this->doRemoveValueFromEmptyInput;
1281
    }
1282
1283
    /**
1284
     * @return bool
1285
     */
1286
    public function isDoRemoveWhitespaceAroundTags(): bool
1287
    {
1288
        return $this->doRemoveWhitespaceAroundTags;
1289
    }
1290
1291
    /**
1292
     * @return bool
1293
     */
1294 36
    public function isDoSortCssClassNames(): bool
1295
    {
1296 36
        return $this->doSortCssClassNames;
1297
    }
1298
1299
    /**
1300
     * @return bool
1301
     */
1302 37
    public function isDoSortHtmlAttributes(): bool
1303
    {
1304 37
        return $this->doSortHtmlAttributes;
1305
    }
1306
1307
    /**
1308
     * @return bool
1309
     */
1310
    public function isDoSumUpWhitespace(): bool
1311
    {
1312
        return $this->doSumUpWhitespace;
1313
    }
1314
1315
    /**
1316
     * @return bool
1317
     */
1318 5
    public function isHTML4(): bool
1319
    {
1320 5
        return $this->isHTML4;
1321
    }
1322
1323
    /**
1324
     * @return bool
1325
     */
1326 5
    public function isXHTML(): bool
1327
    {
1328 5
        return $this->isXHTML;
1329
    }
1330
1331
    /**
1332
     * @param string $html
1333
     * @param bool   $multiDecodeNewHtmlEntity
1334
     *
1335
     * @return string
1336
     */
1337 58
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1338
    {
1339 58
        $html = (string) $html;
1340 58
        if (!isset($html[0])) {
1341 1
            return '';
1342
        }
1343
1344 58
        $html = \trim($html);
1345 58
        if (!$html) {
1346 3
            return '';
1347
        }
1348
1349
        // reset
1350 55
        $this->protectedChildNodes = [];
1351
1352
        // save old content
1353 55
        $origHtml = $html;
1354 55
        $origHtmlLength = \strlen($html);
1355
1356
        // -------------------------------------------------------------------------
1357
        // Minify the HTML via "HtmlDomParser"
1358
        // -------------------------------------------------------------------------
1359
1360 55
        if ($this->doOptimizeViaHtmlDomParser) {
1361 54
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1362
        }
1363
1364
        // -------------------------------------------------------------------------
1365
        // Trim whitespace from html-string. [protected html is still protected]
1366
        // -------------------------------------------------------------------------
1367
1368
        // Remove extra white-space(s) between HTML attribute(s)
1369 55
        if (\strpos($html, ' ') !== false) {
1370 49
            $html = (string) \preg_replace_callback(
1371 49
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1372 49
                static function ($matches) {
1373 49
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1374 49
                },
1375 49
                $html
1376
            );
1377
        }
1378
1379 55
        if ($this->doRemoveSpacesBetweenTags) {
1380
            /** @noinspection NestedPositiveIfStatementsInspection */
1381 1
            if (\strpos($html, ' ') !== false) {
1382
                // Remove spaces that are between > and <
1383 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1384
            }
1385
        }
1386
1387
        // -------------------------------------------------------------------------
1388
        // Restore protected HTML-code.
1389
        // -------------------------------------------------------------------------
1390
1391 55
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1392 12
            $html = (string) \preg_replace_callback(
1393 12
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1394 12
                [$this, 'restoreProtectedHtml'],
1395 12
                $html
1396
            );
1397
        }
1398
1399
        // -------------------------------------------------------------------------
1400
        // Restore protected HTML-entities.
1401
        // -------------------------------------------------------------------------
1402
1403 55
        if ($this->doOptimizeViaHtmlDomParser) {
1404 54
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1405
        }
1406
1407
        // ------------------------------------
1408
        // Final clean-up
1409
        // ------------------------------------
1410
1411 55
        $html = \str_replace(
1412
            [
1413 55
                'html>' . "\n",
1414
                "\n" . '<html',
1415
                'html/>' . "\n",
1416
                "\n" . '</html',
1417
                'head>' . "\n",
1418
                "\n" . '<head',
1419
                'head/>' . "\n",
1420
                "\n" . '</head',
1421
            ],
1422
            [
1423 55
                'html>',
1424
                '<html',
1425
                'html/>',
1426
                '</html',
1427
                'head>',
1428
                '<head',
1429
                'head/>',
1430
                '</head',
1431
            ],
1432 55
            $html
1433
        );
1434
1435
        // self closing tags, don't need a trailing slash ...
1436 55
        $replace = [];
1437 55
        $replacement = [];
1438 55
        foreach (self::$selfClosingTags as $selfClosingTag) {
1439 55
            $replace[] = '<' . $selfClosingTag . '/>';
1440 55
            $replacement[] = '<' . $selfClosingTag . '>';
1441 55
            $replace[] = '<' . $selfClosingTag . ' />';
1442 55
            $replacement[] = '<' . $selfClosingTag . '>';
1443 55
            $replace[] = '></' . $selfClosingTag . '>';
1444 55
            $replacement[] = '>';
1445
        }
1446 55
        $html = \str_replace(
1447 55
            $replace,
1448 55
            $replacement,
1449 55
            $html
1450
        );
1451
1452
        // ------------------------------------
1453
        // check if compression worked
1454
        // ------------------------------------
1455
1456 55
        if ($origHtmlLength < \strlen($html)) {
1457
            $html = $origHtml;
1458
        }
1459
1460 55
        return $html;
1461
    }
1462
1463
    /**
1464
     * @param \DOMNode $node
1465
     *
1466
     * @return \DOMNode|null
1467
     */
1468 53
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1469
    {
1470
        do {
1471
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1472 53
            $node = $node->nextSibling;
1473 53
        } while (!($node === null || $node instanceof \DOMElement));
1474
1475 53
        return $node;
1476
    }
1477
1478
    /**
1479
     * Check if the current string is an conditional comment.
1480
     *
1481
     * INFO: since IE >= 10 conditional comment are not working anymore
1482
     *
1483
     * <!--[if expression]> HTML <![endif]-->
1484
     * <![if expression]> HTML <![endif]>
1485
     *
1486
     * @param string $comment
1487
     *
1488
     * @return bool
1489
     */
1490 4
    private function isConditionalComment($comment): bool
1491
    {
1492 4 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1493
            /** @noinspection RegExpRedundantEscape */
1494
            /** @noinspection NestedPositiveIfStatementsInspection */
1495 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1496 2
                return true;
1497
            }
1498
        }
1499
1500 4 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1501
            /** @noinspection RegExpRedundantEscape */
1502
            /** @noinspection NestedPositiveIfStatementsInspection */
1503 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1504 1
                return true;
1505
            }
1506
        }
1507
1508 4
        return false;
1509
    }
1510
1511
    /**
1512
     * @param string $html
1513
     * @param bool   $multiDecodeNewHtmlEntity
1514
     *
1515
     * @return string
1516
     */
1517 54
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1518
    {
1519
        // init dom
1520 54
        $dom = new HtmlDomParser();
1521 54
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1522
1523 54
        if ($this->templateLogicSyntaxInSpecialScriptTags !== null) {
1524 1
            $dom->overwriteTemplateLogicSyntaxInSpecialScriptTags($this->templateLogicSyntaxInSpecialScriptTags);
1525
        }
1526
1527 54
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1528 54
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1529
1530
        // load dom
1531
        /** @noinspection UnusedFunctionResultInspection */
1532 54
        $dom->loadHtml($html);
1533
1534 54
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1535
1536 54
        $doctypeStr = $this->getDoctype($dom->getDocument());
1537
1538 54
        if ($doctypeStr) {
1539 12
            $this->isHTML4 = \strpos($doctypeStr, 'html4') !== false;
1540 12
            $this->isXHTML = \strpos($doctypeStr, 'xhtml1') !== false;
1541
        }
1542
1543
        // -------------------------------------------------------------------------
1544
        // Protect <nocompress> HTML tags first.
1545
        // -------------------------------------------------------------------------
1546
1547 54
        $dom = $this->protectTagHelper($dom, 'nocompress');
1548
1549
        // -------------------------------------------------------------------------
1550
        // Notify the Observer before the minification.
1551
        // -------------------------------------------------------------------------
1552
1553 54
        foreach ($dom->find('*') as $element) {
1554 54
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1555
        }
1556
1557
        // -------------------------------------------------------------------------
1558
        // Protect HTML tags and conditional comments.
1559
        // -------------------------------------------------------------------------
1560
1561 54
        $dom = $this->protectTags($dom);
1562
1563
        // -------------------------------------------------------------------------
1564
        // Remove default HTML comments. [protected html is still protected]
1565
        // -------------------------------------------------------------------------
1566
1567 54
        if ($this->doRemoveComments) {
1568 52
            $dom = $this->removeComments($dom);
1569
        }
1570
1571
        // -------------------------------------------------------------------------
1572
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1573
        // -------------------------------------------------------------------------
1574
1575 54
        if ($this->doSumUpWhitespace) {
1576 53
            $dom = $this->sumUpWhitespace($dom);
1577
        }
1578
1579 54
        foreach ($dom->find('*') as $element) {
1580
1581
            // -------------------------------------------------------------------------
1582
            // Remove whitespace around tags. [protected html is still protected]
1583
            // -------------------------------------------------------------------------
1584
1585 54
            if ($this->doRemoveWhitespaceAroundTags) {
1586 3
                $this->removeWhitespaceAroundTags($element);
1587
            }
1588
1589
            // -------------------------------------------------------------------------
1590
            // Notify the Observer after the minification.
1591
            // -------------------------------------------------------------------------
1592
1593 54
            $this->notifyObserversAboutDomElementAfterMinification($element);
1594
        }
1595
1596
        // -------------------------------------------------------------------------
1597
        // Convert the Dom into a string.
1598
        // -------------------------------------------------------------------------
1599
1600 54
        return $dom->fixHtmlOutput(
1601 54
            $doctypeStr . $this->domNodeToString($dom->getDocument()),
1602 54
            $multiDecodeNewHtmlEntity
1603
        );
1604
    }
1605
1606
    /**
1607
     * @param SimpleHtmlDomInterface $domElement
1608
     *
1609
     * @return void
1610
     */
1611 54
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1612
    {
1613 54
        foreach ($this->domLoopObservers as $observer) {
1614 54
            $observer->domElementAfterMinification($domElement, $this);
1615
        }
1616 54
    }
1617
1618
    /**
1619
     * @param SimpleHtmlDomInterface $domElement
1620
     *
1621
     * @return void
1622
     */
1623 54
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1624
    {
1625 54
        foreach ($this->domLoopObservers as $observer) {
1626 54
            $observer->domElementBeforeMinification($domElement, $this);
1627
        }
1628 54
    }
1629
1630
    /**
1631
     * @param HtmlDomParser $dom
1632
     * @param string        $selector
1633
     *
1634
     * @return HtmlDomParser
1635
     */
1636 54
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1637
    {
1638 54
        foreach ($dom->find($selector) as $element) {
1639 6
            if ($element->isRemoved()) {
1640 1
                continue;
1641
            }
1642
1643 6
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1644 6
            $parentNode = $element->getNode()->parentNode;
1645 6
            if ($parentNode !== null) {
1646 6
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1647
            }
1648
1649 6
            ++$this->protected_tags_counter;
1650
        }
1651
1652 54
        return $dom;
1653
    }
1654
1655
    /**
1656
     * Prevent changes of inline "styles" and "scripts".
1657
     *
1658
     * @param HtmlDomParser $dom
1659
     *
1660
     * @return HtmlDomParser
1661
     */
1662 54
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1663
    {
1664 54
        $this->protectTagHelper($dom, 'code');
1665
1666 54
        foreach ($dom->find('script, style') as $element) {
1667 9
            if ($element->isRemoved()) {
1668
                continue;
1669
            }
1670
1671 9
            if ($element->tag === 'script' || $element->tag === 'style') {
1672 9
                $attributes = $element->getAllAttributes();
1673
                // skip external links
1674 9
                if (isset($attributes['src'])) {
1675 5
                    continue;
1676
                }
1677
            }
1678
1679 7
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1680 7
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1681
1682 7
            ++$this->protected_tags_counter;
1683
        }
1684
1685 54
        foreach ($dom->find('//comment()') as $element) {
1686 4
            if ($element->isRemoved()) {
1687
                continue;
1688
            }
1689
1690 4
            $text = $element->text();
1691
1692
            // skip normal comments
1693 4
            if (!$this->isConditionalComment($text)) {
1694 4
                continue;
1695
            }
1696
1697 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1698
1699
            /* @var $node \DOMComment */
1700 2
            $node = $element->getNode();
1701 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1702 2
            $parentNode = $element->getNode()->parentNode;
1703 2
            if ($parentNode !== null) {
1704 2
                $parentNode->replaceChild($child, $node);
1705
            }
1706
1707 2
            ++$this->protected_tags_counter;
1708
        }
1709
1710 54
        return $dom;
1711
    }
1712
1713
    /**
1714
     * Remove comments in the dom.
1715
     *
1716
     * @param HtmlDomParser $dom
1717
     *
1718
     * @return HtmlDomParser
1719
     */
1720 52
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1721
    {
1722 52
        foreach ($dom->find('//comment()') as $commentWrapper) {
1723 3
            $comment = $commentWrapper->getNode();
1724 3
            $val = $comment->nodeValue;
1725 3
            if (\strpos($val, '[') === false) {
1726 3
                $parentNode = $comment->parentNode;
1727 3
                if ($parentNode !== null) {
1728 3
                    $parentNode->removeChild($comment);
1729
                }
1730
            }
1731
        }
1732
1733 52
        $dom->getDocument()->normalizeDocument();
1734
1735 52
        return $dom;
1736
    }
1737
1738
    /**
1739
     * Trim tags in the dom.
1740
     *
1741
     * @param SimpleHtmlDomInterface $element
1742
     *
1743
     * @return void
1744
     */
1745 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1746
    {
1747 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1748 1
            $node = $element->getNode();
1749
1750
            /** @var \DOMNode[] $candidates */
1751 1
            $candidates = [];
1752 1
            if ($node->childNodes->length > 0) {
1753 1
                $candidates[] = $node->firstChild;
1754 1
                $candidates[] = $node->lastChild;
1755 1
                $candidates[] = $node->previousSibling;
1756 1
                $candidates[] = $node->nextSibling;
1757
            }
1758
1759
            /** @var mixed $candidate - false-positive error from phpstan */
1760 1
            foreach ($candidates as &$candidate) {
1761 1
                if ($candidate === null) {
1762
                    continue;
1763
                }
1764
1765 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1766 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1767 1
                    if ($nodeValueTmp !== null) {
1768 1
                        $candidate->nodeValue = $nodeValueTmp;
1769
                    }
1770
                }
1771
            }
1772
        }
1773 3
    }
1774
1775
    /**
1776
     * Callback function for preg_replace_callback use.
1777
     *
1778
     * @param array $matches PREG matches
1779
     *
1780
     * @return string
1781
     */
1782 12
    private function restoreProtectedHtml($matches): string
1783
    {
1784 12
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1785
1786 12
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1787
    }
1788
1789
    /**
1790
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1791
     *
1792
     * @return $this
1793
     */
1794 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1795
    {
1796 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1797
1798 2
        return $this;
1799
    }
1800
1801
    /**
1802
     * Sum-up extra whitespace from dom-nodes.
1803
     *
1804
     * @param HtmlDomParser $dom
1805
     *
1806
     * @return HtmlDomParser
1807
     */
1808 53
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1809
    {
1810 53
        $text_nodes = $dom->find('//text()');
1811 53
        foreach ($text_nodes as $text_node_wrapper) {
1812
            /* @var $text_node \DOMNode */
1813 49
            $text_node = $text_node_wrapper->getNode();
1814 49
            $xp = $text_node->getNodePath();
1815 49
            if ($xp === null) {
1816
                continue;
1817
            }
1818
1819 49
            $doSkip = false;
1820 49
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1821 49
                if (\strpos($xp, "/${pattern}") !== false) {
1822 10
                    $doSkip = true;
1823
1824 49
                    break;
1825
                }
1826
            }
1827 49
            if ($doSkip) {
1828 10
                continue;
1829
            }
1830
1831 45
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1832 45
            if ($nodeValueTmp !== null) {
1833 45
                $text_node->nodeValue = $nodeValueTmp;
1834
            }
1835
        }
1836
1837
        $dom->getDocument()->normalizeDocument();
1838
1839
        return $dom;
1840
    }
1841
1842
    /**
1843
     * WARNING: maybe bad for performance ...
1844
     *
1845
     * @param bool $keepBrokenHtml
1846
     *
1847
     * @return HtmlMin
1848
     */
1849
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1850
    {
1851 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1852
1853 2
        return $this;
1854
    }
1855
1856
    /**
1857
     * @param string[] $templateLogicSyntaxInSpecialScriptTags
1858
     *
1859
     * @return HtmlMin
1860
     */
1861
    public function overwriteTemplateLogicSyntaxInSpecialScriptTags(array $templateLogicSyntaxInSpecialScriptTags): self
1862
    {
1863 1
        foreach ($templateLogicSyntaxInSpecialScriptTags as $tmp) {
1864 1
            if (!\is_string($tmp)) {
1865 1
                throw new \InvalidArgumentException('setTemplateLogicSyntaxInSpecialScriptTags only allows string[]');
1866
            }
1867
        }
1868
1869 1
        $this->templateLogicSyntaxInSpecialScriptTags = $templateLogicSyntaxInSpecialScriptTags;
1870
1871 1
        return $this;
1872
    }
1873
}
1874