Completed
Push — master ( 70fde5...835fac )
by Lars
01:26
created

HtmlMin::domNodeToString()   D

Complexity

Conditions 26
Paths 35

Size

Total Lines 99

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 42
CRAP Score 26.0085

Importance

Changes 0
Metric Value
dl 0
loc 99
ccs 42
cts 43
cp 0.9767
rs 4.1666
c 0
b 0
f 0
cc 26
nc 35
nop 1
crap 26.0085

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $doKeepHttpAndHttpsPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainsLinksRelative = false;
199
200
    /**
201
     * @var string[]
202
     */
203
    private $localDomains = [];
204
205
    /**
206
     * @var array
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var bool
215
     */
216
    private $doSortCssClassNames = true;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doSortHtmlAttributes = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDefaultAttributes = false;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedAnchorName = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedTypeFromStyleAndLinkTag = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveDefaultTypeFromButton = false;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doRemoveDeprecatedTypeFromScriptTag = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveValueFromEmptyInput = true;
267
268
    /**
269
     * @var bool
270
     */
271
    private $doRemoveEmptyAttributes = true;
272
273
    /**
274
     * @var bool
275
     */
276
    private $doSumUpWhitespace = true;
277
278
    /**
279
     * @var bool
280
     */
281
    private $doRemoveSpacesBetweenTags = false;
282
283
    /**
284
     * @var bool
285
     */
286
    private $keepBrokenHtml = false;
287
288
    /**
289
     * @var bool
290
     */
291
    private $withDocType = false;
292
293
    /**
294
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
295
     *
296
     * @psalm-var \SplObjectStorage<HtmlMinDomObserverInterface>
297
     */
298
    private $domLoopObservers;
299
300
    /**
301
     * @var int
302
     */
303
    private $protected_tags_counter = 0;
304
305
    /**
306
     * @var bool
307
     */
308
    private $isHTML4 = false;
309
310
    /**
311
     * @var bool
312
     */
313
    private $isXHTML = false;
314
315
    /**
316
     * @var string[]|null
317
     */
318
    private $templateLogicSyntaxInSpecialScriptTags;
319
320
    /**
321
     * HtmlMin constructor.
322
     */
323 60
    public function __construct()
324
    {
325 60
        $this->domLoopObservers = new \SplObjectStorage();
326
327 60
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
328 60
    }
329
330
    /**
331
     * @param HtmlMinDomObserverInterface $observer
332
     *
333
     * @return void
334
     */
335 60
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
336
    {
337 60
        $this->domLoopObservers->attach($observer);
338 60
    }
339
340
    /**
341
     * @param bool $doOptimizeAttributes
342
     *
343
     * @return $this
344
     */
345 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
346
    {
347 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
348
349 2
        return $this;
350
    }
351
352
    /**
353
     * @param bool $doOptimizeViaHtmlDomParser
354
     *
355
     * @return $this
356
     */
357 2
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
358
    {
359 2
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
360
361 2
        return $this;
362
    }
363
364
    /**
365
     * @param bool $doRemoveComments
366
     *
367
     * @return $this
368
     */
369 3
    public function doRemoveComments(bool $doRemoveComments = true): self
370
    {
371 3
        $this->doRemoveComments = $doRemoveComments;
372
373 3
        return $this;
374
    }
375
376
    /**
377
     * @param bool $doRemoveDefaultAttributes
378
     *
379
     * @return $this
380
     */
381 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
382
    {
383 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
384
385 2
        return $this;
386
    }
387
388
    /**
389
     * @param bool $doRemoveDeprecatedAnchorName
390
     *
391
     * @return $this
392
     */
393 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
394
    {
395 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
396
397 2
        return $this;
398
    }
399
400
    /**
401
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
402
     *
403
     * @return $this
404
     */
405 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
406
    {
407 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
408
409 2
        return $this;
410
    }
411
412
    /**
413
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
414
     *
415
     * @return $this
416
     */
417 3
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
418
    {
419 3
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
420
421 3
        return $this;
422
    }
423
424
    /**
425
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
426
     *
427
     * @return $this
428
     */
429 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
430
    {
431 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
432
433 2
        return $this;
434
    }
435
436
    /**
437
     * @param bool $doRemoveDeprecatedTypeFromStyleAndLinkTag
438
     *
439
     * @return $this
440
     */
441 1
    public function doRemoveDeprecatedTypeFromStyleAndLinkTag(bool $doRemoveDeprecatedTypeFromStyleAndLinkTag = true): self
442
    {
443 1
        $this->doRemoveDeprecatedTypeFromStyleAndLinkTag = $doRemoveDeprecatedTypeFromStyleAndLinkTag;
444
445 1
        return $this;
446
    }
447
448
    /**
449
     * @param bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag
450
     *
451
     * @return $this
452
     */
453 1
    public function doRemoveDefaultMediaTypeFromStyleAndLinkTag(bool $doRemoveDefaultMediaTypeFromStyleAndLinkTag = true): self
454
    {
455 1
        $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag = $doRemoveDefaultMediaTypeFromStyleAndLinkTag;
456
457 1
        return $this;
458
    }
459
460
    /**
461
     * @param bool $doRemoveDefaultTypeFromButton
462
     *
463
     * @return $this
464
     */
465 1
    public function doRemoveDefaultTypeFromButton(bool $doRemoveDefaultTypeFromButton = true): self
466
    {
467 1
        $this->doRemoveDefaultTypeFromButton = $doRemoveDefaultTypeFromButton;
468
469 1
        return $this;
470
    }
471
472
    /**
473
     * @param bool $doRemoveEmptyAttributes
474
     *
475
     * @return $this
476
     */
477 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
478
    {
479 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
480
481 2
        return $this;
482
    }
483
484
    /**
485
     * @param bool $doRemoveHttpPrefixFromAttributes
486
     *
487
     * @return $this
488
     */
489 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
490
    {
491 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
492
493 6
        return $this;
494
    }
495
496
    /**
497
     * @param bool $doRemoveHttpsPrefixFromAttributes
498
     *
499
     * @return $this
500
     */
501 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
502
    {
503 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
504
505 1
        return $this;
506
    }
507
508
    /**
509
     * @param bool $doKeepHttpAndHttpsPrefixOnExternalAttributes
510
     *
511
     * @return $this
512
     */
513 1
    public function doKeepHttpAndHttpsPrefixOnExternalAttributes(bool $doKeepHttpAndHttpsPrefixOnExternalAttributes = true): self
514
    {
515 1
        $this->doKeepHttpAndHttpsPrefixOnExternalAttributes = $doKeepHttpAndHttpsPrefixOnExternalAttributes;
516
517 1
        return $this;
518
    }
519
520
    /**
521
     * @param string[] $localDomains
522
     *
523
     * @return $this
524
     */
525 1
    public function doMakeSameDomainsLinksRelative(array $localDomains): self
526
    {
527
        /** @noinspection AlterInForeachInspection */
528 1
        foreach ($localDomains as &$localDomain) {
529 1
            $localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
530
        }
531
532 1
        $this->localDomains = $localDomains;
533 1
        $this->doMakeSameDomainsLinksRelative = \count($this->localDomains) > 0;
534
535 1
        return $this;
536
    }
537
538
    /**
539
     * @return string[]
540
     */
541 1
    public function getLocalDomains(): array
542
    {
543 1
        return $this->localDomains;
544
    }
545
546
    /**
547
     * @param bool $doRemoveOmittedHtmlTags
548
     *
549
     * @return $this
550
     */
551 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
552
    {
553 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
554
555 1
        return $this;
556
    }
557
558
    /**
559
     * @param bool $doRemoveOmittedQuotes
560
     *
561
     * @return $this
562
     */
563 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
564
    {
565 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
566
567 1
        return $this;
568
    }
569
570
    /**
571
     * @param bool $doRemoveSpacesBetweenTags
572
     *
573
     * @return $this
574
     */
575 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
576
    {
577 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
578
579 1
        return $this;
580
    }
581
582
    /**
583
     * @param bool $doRemoveValueFromEmptyInput
584
     *
585
     * @return $this
586
     */
587 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
588
    {
589 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
590
591 2
        return $this;
592
    }
593
594
    /**
595
     * @param bool $doRemoveWhitespaceAroundTags
596
     *
597
     * @return $this
598
     */
599 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
600
    {
601 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
602
603 5
        return $this;
604
    }
605
606
    /**
607
     * @param bool $doSortCssClassNames
608
     *
609
     * @return $this
610
     */
611 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
612
    {
613 2
        $this->doSortCssClassNames = $doSortCssClassNames;
614
615 2
        return $this;
616
    }
617
618
    /**
619
     * @param bool $doSortHtmlAttributes
620
     *
621
     * @return $this
622
     */
623 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
624
    {
625 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
626
627 2
        return $this;
628
    }
629
630
    /**
631
     * @param bool $doSumUpWhitespace
632
     *
633
     * @return $this
634
     */
635 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
636
    {
637 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
638
639 2
        return $this;
640
    }
641
642 56
    private function domNodeAttributesToString(\DOMNode $node): string
643
    {
644
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
645 56
        $attr_str = '';
646 56
        if ($node->attributes !== null) {
647 56
            foreach ($node->attributes as $attribute) {
648 37
                $attr_str .= $attribute->name;
649
650
                if (
651 37
                    $this->doOptimizeAttributes
652
                    &&
653 37
                    isset(self::$booleanAttributes[$attribute->name])
654
                ) {
655 10
                    $attr_str .= ' ';
656
657 10
                    continue;
658
                }
659
660 37
                $attr_str .= '=';
661
662
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
663 37
                $omit_quotes = $this->doRemoveOmittedQuotes
664
                               &&
665 37
                               $attribute->value !== ''
666
                               &&
667 37
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
668
                               &&
669 37
                               \strpos($attribute->name, ' ') === false
670
                               &&
671 37
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
672
673 37
                $quoteTmp = '"';
674
                if (
675 37
                    !$omit_quotes
676
                    &&
677 37
                    \strpos($attribute->value, '"') !== false
678
                ) {
679 1
                    $quoteTmp = "'";
680
                }
681
682
                if (
683 37
                    $this->doOptimizeAttributes
684
                    &&
685
                    (
686 36
                        $attribute->name === 'srcset'
687
                        ||
688 37
                        $attribute->name === 'sizes'
689
                    )
690
                ) {
691 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
692
                } else {
693 37
                    $attr_val = $attribute->value;
694
                }
695
696 37
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
697 37
                $attr_str .= ' ';
698
            }
699
        }
700
701 56
        return \trim($attr_str);
702
    }
703
704
    /**
705
     * @param \DOMNode $node
706
     *
707
     * @return bool
708
     */
709 55
    private function domNodeClosingTagOptional(\DOMNode $node): bool
710
    {
711 55
        $tag_name = $node->nodeName;
712
713
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
714 55
        $parent_node = $node->parentNode;
715
716 55
        if ($parent_node) {
717 55
            $parent_tag_name = $parent_node->nodeName;
718
        } else {
719
            $parent_tag_name = null;
720
        }
721
722 55
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
723
724
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
725
726
        // Implemented:
727
        //
728
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
729
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
730
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
731
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
732
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
733
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
734
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
735
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
736
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
737
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
738
739
        /**
740
         * @noinspection TodoComment
741
         *
742
         * TODO: Not Implemented
743
         */
744
        //
745
        // <html> may be omitted if first thing inside is not comment
746
        // <head> may be omitted if first thing inside is an element
747
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
748
        // <colgroup> may be omitted if first thing inside is <col>
749
        // <tbody> may be omitted if first thing inside is <tr>
750
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
751
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
752
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
753
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
754
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
755
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
756
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
757
        //
758
        // <-- However, a start tag must never be omitted if it has any attributes.
759
760
        /** @noinspection InArrayCanBeUsedInspection */
761 55
        return \in_array($tag_name, self::$optional_end_tags, true)
762
               ||
763
               (
764 52
                   $tag_name === 'li'
765
                   &&
766
                   (
767 6
                       $nextSibling === null
768
                       ||
769
                       (
770 4
                           $nextSibling instanceof \DOMElement
771
                           &&
772 52
                           $nextSibling->tagName === 'li'
773
                       )
774
                   )
775
               )
776
               ||
777
               (
778 52
                   $tag_name === 'optgroup'
779
                   &&
780
                   (
781 1
                       $nextSibling === null
782
                       ||
783
                       (
784 1
                           $nextSibling instanceof \DOMElement
785
                           &&
786 52
                           $nextSibling->tagName === 'optgroup'
787
                       )
788
                   )
789
               )
790
               ||
791
               (
792 52
                   $tag_name === 'rp'
793
                   &&
794
                   (
795
                       $nextSibling === null
796
                       ||
797
                       (
798
                           $nextSibling instanceof \DOMElement
799
                           &&
800
                           (
801
                               $nextSibling->tagName === 'rp'
802
                               ||
803 52
                               $nextSibling->tagName === 'rt'
804
                           )
805
                       )
806
                   )
807
               )
808
               ||
809
               (
810 52
                   $tag_name === 'tr'
811
                   &&
812
                   (
813 1
                       $nextSibling === null
814
                       ||
815
                       (
816 1
                           $nextSibling instanceof \DOMElement
817
                           &&
818 52
                           $nextSibling->tagName === 'tr'
819
                       )
820
                   )
821
               )
822
               ||
823
               (
824 52
                   $tag_name === 'source'
825
                   &&
826
                   (
827 1
                       $parent_tag_name === 'audio'
828
                       ||
829 1
                       $parent_tag_name === 'video'
830
                       ||
831 1
                       $parent_tag_name === 'picture'
832
                       ||
833 52
                       $parent_tag_name === 'source'
834
                   )
835
                   &&
836
                   (
837 1
                       $nextSibling === null
838
                       ||
839
                       (
840
                           $nextSibling instanceof \DOMElement
841
                           &&
842 52
                           $nextSibling->tagName === 'source'
843
                       )
844
                   )
845
               )
846
               ||
847
               (
848
                   (
849 52
                       $tag_name === 'td'
850
                       ||
851 52
                       $tag_name === 'th'
852
                   )
853
                   &&
854
                   (
855 1
                       $nextSibling === null
856
                       ||
857
                       (
858 1
                           $nextSibling instanceof \DOMElement
859
                           &&
860
                           (
861 1
                               $nextSibling->tagName === 'td'
862
                               ||
863 52
                               $nextSibling->tagName === 'th'
864
                           )
865
                       )
866
                   )
867
               )
868
               ||
869
               (
870
                   (
871 52
                       $tag_name === 'dd'
872
                       ||
873 52
                       $tag_name === 'dt'
874
                   )
875
                   &&
876
                   (
877 3
                       $nextSibling === null
878
                       ||
879
                       (
880 3
                           $nextSibling instanceof \DOMElement
881
                           &&
882
                           (
883 3
                               $nextSibling->tagName === 'dd'
884
                               ||
885 52
                               $nextSibling->tagName === 'dt'
886
                           )
887
                       )
888
                   )
889
               )
890
               ||
891
               (
892 52
                   $tag_name === 'option'
893
                   &&
894
                   (
895 2
                       $nextSibling === null
896
                       ||
897
                       (
898 2
                           $nextSibling instanceof \DOMElement
899
                           &&
900
                           (
901 2
                               $nextSibling->tagName === 'option'
902
                               ||
903 52
                               $nextSibling->tagName === 'optgroup'
904
                           )
905
                       )
906
                   )
907
               )
908
               ||
909
               (
910 52
                   $tag_name === 'p'
911
                   &&
912
                   (
913
                       (
914 16
                           $nextSibling === null
915
                           &&
916 16
                           $node->parentNode !== null
917
                           &&
918
                           !\in_array(
919 13
                               $node->parentNode->nodeName,
920
                               [
921
                                   'a',
922
                                   'audio',
923
                                   'del',
924
                                   'ins',
925
                                   'map',
926
                                   'noscript',
927
                                   'video',
928
                               ],
929
                               true
930
                           )
931
                       )
932
                       ||
933
                       (
934 11
                           $nextSibling instanceof \DOMElement
935
                           &&
936
                           \in_array(
937 55
                               $nextSibling->tagName,
938
                               [
939
                                   'address',
940
                                   'article',
941
                                   'aside',
942
                                   'blockquote',
943
                                   'dir',
944
                                   'div',
945
                                   'dl',
946
                                   'fieldset',
947
                                   'footer',
948
                                   'form',
949
                                   'h1',
950
                                   'h2',
951
                                   'h3',
952
                                   'h4',
953
                                   'h5',
954
                                   'h6',
955
                                   'header',
956
                                   'hgroup',
957
                                   'hr',
958
                                   'menu',
959
                                   'nav',
960
                                   'ol',
961
                                   'p',
962
                                   'pre',
963
                                   'section',
964
                                   'table',
965
                                   'ul',
966
                               ],
967
                               true
968
                           )
969
                       )
970
                   )
971
               );
972
    }
973
974 56
    protected function domNodeToString(\DOMNode $node): string
975
    {
976
        // init
977 56
        $html = '';
978 56
        $emptyStringTmp = '';
979
980 56
        foreach ($node->childNodes as $child) {
981 56
            if ($emptyStringTmp === 'is_empty') {
982 31
                $emptyStringTmp = 'last_was_empty';
983
            } else {
984 56
                $emptyStringTmp = '';
985
            }
986
987 56
            if ($child instanceof \DOMElement) {
988 56
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
989 56
                $html .= '>' . $this->domNodeToString($child);
990
991
                if (
992
                    !(
993 56
                        $this->doRemoveOmittedHtmlTags
994
                        &&
995 56
                        !$this->isHTML4
996
                        &&
997 56
                        !$this->isXHTML
998
                        &&
999 56
                        $this->domNodeClosingTagOptional($child)
1000
                    )
1001
                ) {
1002 50
                    $html .= '</' . $child->tagName . '>';
1003
                }
1004
1005 56
                if (!$this->doRemoveWhitespaceAroundTags) {
1006
                    /** @var \DOMText|null $nextSiblingTmp - false-positive error from phpstan */
1007 55
                    $nextSiblingTmp = $child->nextSibling;
1008
                    if (
1009 55
                        $nextSiblingTmp instanceof \DOMText
1010
                        &&
1011 55
                        $nextSiblingTmp->wholeText === ' '
1012
                    ) {
1013
                        if (
1014 30
                            $emptyStringTmp !== 'last_was_empty'
1015
                            &&
1016 30
                            \substr($html, -1) !== ' '
1017
                        ) {
1018 30
                            $html = \rtrim($html);
1019
1020
                            if (
1021 30
                                $child->parentNode
1022
                                &&
1023 30
                                $child->parentNode->nodeName !== 'head'
1024
                            ) {
1025 29
                                $html .= ' ';
1026
                            }
1027
                        }
1028 56
                        $emptyStringTmp = 'is_empty';
1029
                    }
1030
                }
1031 56
            } elseif ($child instanceof \DOMText) {
1032 52
                if ($child->isElementContentWhitespace()) {
1033
                    if (
1034 34
                        $child->previousSibling !== null
1035
                        &&
1036 34
                        $child->nextSibling !== null
1037
                    ) {
1038
                        if (
1039
                            (
1040 23
                                $child->wholeText
1041
                                &&
1042 23
                                \strpos($child->wholeText, ' ') !== false
1043
                            )
1044
                            ||
1045
                            (
1046
                                $emptyStringTmp !== 'last_was_empty'
1047
                                &&
1048 23
                                \substr($html, -1) !== ' '
1049
                            )
1050
                        ) {
1051 23
                            $html = \rtrim($html);
1052
1053
                            if (
1054 23
                                $child->parentNode
1055
                                &&
1056 23
                                $child->parentNode->nodeName !== 'head'
1057
                            ) {
1058 22
                                $html .= ' ';
1059
                            }
1060
                        }
1061 34
                        $emptyStringTmp = 'is_empty';
1062
                    }
1063
                } else {
1064 52
                    $html .= $child->wholeText;
1065
                }
1066 12
            } elseif ($child instanceof \DOMComment) {
1067 1
                $html .= '<!--' . $child->textContent . '-->';
1068
            }
1069
        }
1070
1071 56
        return $html;
1072
    }
1073
1074
    /**
1075
     * @param \DOMNode $node
1076
     *
1077
     * @return string
1078
     */
1079 56
    private function getDoctype(\DOMNode $node): string
1080
    {
1081
        // check the doc-type only if it wasn't generated by DomDocument itself
1082 56
        if (!$this->withDocType) {
1083 48
            return '';
1084
        }
1085
1086 12
        foreach ($node->childNodes as $child) {
1087
            if (
1088 12
                $child instanceof \DOMDocumentType
1089
                &&
1090 12
                $child->name
1091
            ) {
1092 12
                if (!$child->publicId && $child->systemId) {
1093
                    $tmpTypeSystem = 'SYSTEM';
1094
                    $tmpTypePublic = '';
1095
                } else {
1096 12
                    $tmpTypeSystem = '';
1097 12
                    $tmpTypePublic = 'PUBLIC';
1098
                }
1099
1100 12
                return '<!DOCTYPE ' . $child->name . ''
1101 12
                       . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
1102 12
                       . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
1103 12
                       . '>';
1104
            }
1105
        }
1106
1107
        return '';
1108
    }
1109
1110
    /**
1111
     * @return array
1112
     */
1113
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1114
    {
1115
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1116
    }
1117
1118
    /**
1119
     * @return bool
1120
     */
1121
    public function isDoOptimizeAttributes(): bool
1122
    {
1123
        return $this->doOptimizeAttributes;
1124
    }
1125
1126
    /**
1127
     * @return bool
1128
     */
1129
    public function isDoOptimizeViaHtmlDomParser(): bool
1130
    {
1131
        return $this->doOptimizeViaHtmlDomParser;
1132
    }
1133
1134
    /**
1135
     * @return bool
1136
     */
1137
    public function isDoRemoveComments(): bool
1138
    {
1139
        return $this->doRemoveComments;
1140
    }
1141
1142
    /**
1143
     * @return bool
1144
     */
1145 38
    public function isDoRemoveDefaultAttributes(): bool
1146
    {
1147 38
        return $this->doRemoveDefaultAttributes;
1148
    }
1149
1150
    /**
1151
     * @return bool
1152
     */
1153 38
    public function isDoRemoveDeprecatedAnchorName(): bool
1154
    {
1155 38
        return $this->doRemoveDeprecatedAnchorName;
1156
    }
1157
1158
    /**
1159
     * @return bool
1160
     */
1161 38
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1162
    {
1163 38
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1164
    }
1165
1166
    /**
1167
     * @return bool
1168
     */
1169 38
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1170
    {
1171 38
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1172
    }
1173
1174
    /**
1175
     * @return bool
1176
     */
1177 38
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1178
    {
1179 38
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1180
    }
1181
1182
    /**
1183
     * @return bool
1184
     */
1185 38
    public function isDoRemoveDeprecatedTypeFromStyleAndLinkTag(): bool
1186
    {
1187 38
        return $this->doRemoveDeprecatedTypeFromStyleAndLinkTag;
1188
    }
1189
1190
    /**
1191
     * @return bool
1192
     */
1193 38
    public function isDoRemoveDefaultMediaTypeFromStyleAndLinkTag(): bool
1194
    {
1195 38
        return $this->doRemoveDefaultMediaTypeFromStyleAndLinkTag;
1196
    }
1197
1198
    /**
1199
     * @return bool
1200
     */
1201 37
    public function isDoRemoveDefaultTypeFromButton(): bool
1202
    {
1203 37
        return $this->doRemoveDefaultTypeFromButton;
1204
    }
1205
1206
    /**
1207
     * @return bool
1208
     */
1209 37
    public function isDoRemoveEmptyAttributes(): bool
1210
    {
1211 37
        return $this->doRemoveEmptyAttributes;
1212
    }
1213
1214
    /**
1215
     * @return bool
1216
     */
1217 38
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1218
    {
1219 38
        return $this->doRemoveHttpPrefixFromAttributes;
1220
    }
1221
1222
    /**
1223
     * @return bool
1224
     */
1225 38
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1226
    {
1227 38
        return $this->doRemoveHttpsPrefixFromAttributes;
1228
    }
1229
1230
    /**
1231
     * @return bool
1232
     */
1233 4
    public function isdoKeepHttpAndHttpsPrefixOnExternalAttributes(): bool
1234
    {
1235 4
        return $this->doKeepHttpAndHttpsPrefixOnExternalAttributes;
1236
    }
1237
1238
    /**
1239
     * @return bool
1240
     */
1241 38
    public function isDoMakeSameDomainsLinksRelative(): bool
1242
    {
1243 38
        return $this->doMakeSameDomainsLinksRelative;
1244
    }
1245
1246
    /**
1247
     * @return bool
1248
     */
1249
    public function isDoRemoveOmittedHtmlTags(): bool
1250
    {
1251
        return $this->doRemoveOmittedHtmlTags;
1252
    }
1253
1254
    /**
1255
     * @return bool
1256
     */
1257
    public function isDoRemoveOmittedQuotes(): bool
1258
    {
1259
        return $this->doRemoveOmittedQuotes;
1260
    }
1261
1262
    /**
1263
     * @return bool
1264
     */
1265
    public function isDoRemoveSpacesBetweenTags(): bool
1266
    {
1267
        return $this->doRemoveSpacesBetweenTags;
1268
    }
1269
1270
    /**
1271
     * @return bool
1272
     */
1273 37
    public function isDoRemoveValueFromEmptyInput(): bool
1274
    {
1275 37
        return $this->doRemoveValueFromEmptyInput;
1276
    }
1277
1278
    /**
1279
     * @return bool
1280
     */
1281
    public function isDoRemoveWhitespaceAroundTags(): bool
1282
    {
1283
        return $this->doRemoveWhitespaceAroundTags;
1284
    }
1285
1286
    /**
1287
     * @return bool
1288
     */
1289 37
    public function isDoSortCssClassNames(): bool
1290
    {
1291 37
        return $this->doSortCssClassNames;
1292
    }
1293
1294
    /**
1295
     * @return bool
1296
     */
1297 38
    public function isDoSortHtmlAttributes(): bool
1298
    {
1299 38
        return $this->doSortHtmlAttributes;
1300
    }
1301
1302
    /**
1303
     * @return bool
1304
     */
1305
    public function isDoSumUpWhitespace(): bool
1306
    {
1307
        return $this->doSumUpWhitespace;
1308
    }
1309
1310
    /**
1311
     * @return bool
1312
     */
1313 5
    public function isHTML4(): bool
1314
    {
1315 5
        return $this->isHTML4;
1316
    }
1317
1318
    /**
1319
     * @return bool
1320
     */
1321 5
    public function isXHTML(): bool
1322
    {
1323 5
        return $this->isXHTML;
1324
    }
1325
1326
    /**
1327
     * @param string $html
1328
     * @param bool   $multiDecodeNewHtmlEntity
1329
     *
1330
     * @return string
1331
     */
1332 60
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1333
    {
1334 60
        $html = (string) $html;
1335 60
        if (!isset($html[0])) {
1336 1
            return '';
1337
        }
1338
1339 60
        $html = \trim($html);
1340 60
        if (!$html) {
1341 3
            return '';
1342
        }
1343
1344
        // reset
1345 57
        $this->protectedChildNodes = [];
1346
1347
        // save old content
1348 57
        $origHtml = $html;
1349 57
        $origHtmlLength = \strlen($html);
1350
1351
        // -------------------------------------------------------------------------
1352
        // Minify the HTML via "HtmlDomParser"
1353
        // -------------------------------------------------------------------------
1354
1355 57
        if ($this->doOptimizeViaHtmlDomParser) {
1356 56
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1357
        }
1358
1359
        // -------------------------------------------------------------------------
1360
        // Trim whitespace from html-string. [protected html is still protected]
1361
        // -------------------------------------------------------------------------
1362
1363
        // Remove extra white-space(s) between HTML attribute(s)
1364 57
        if (\strpos($html, ' ') !== false) {
1365 51
            $html = (string) \preg_replace_callback(
1366 51
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1367
                static function ($matches) {
1368 51
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1369 51
                },
1370 51
                $html
1371
            );
1372
        }
1373
1374 57
        if ($this->doRemoveSpacesBetweenTags) {
1375
            /** @noinspection NestedPositiveIfStatementsInspection */
1376 1
            if (\strpos($html, ' ') !== false) {
1377
                // Remove spaces that are between > and <
1378 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1379
            }
1380
        }
1381
1382
        // -------------------------------------------------------------------------
1383
        // Restore protected HTML-code.
1384
        // -------------------------------------------------------------------------
1385
1386 57
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1387 12
            $html = (string) \preg_replace_callback(
1388 12
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1389 12
                [$this, 'restoreProtectedHtml'],
1390 12
                $html
1391
            );
1392
        }
1393
1394
        // -------------------------------------------------------------------------
1395
        // Restore protected HTML-entities.
1396
        // -------------------------------------------------------------------------
1397
1398 57
        if ($this->doOptimizeViaHtmlDomParser) {
1399 56
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1400
        }
1401
1402
        // ------------------------------------
1403
        // Final clean-up
1404
        // ------------------------------------
1405
1406 57
        $html = \str_replace(
1407
            [
1408 57
                'html>' . "\n",
1409
                "\n" . '<html',
1410
                'html/>' . "\n",
1411
                "\n" . '</html',
1412
                'head>' . "\n",
1413
                "\n" . '<head',
1414
                'head/>' . "\n",
1415
                "\n" . '</head',
1416
            ],
1417
            [
1418 57
                'html>',
1419
                '<html',
1420
                'html/>',
1421
                '</html',
1422
                'head>',
1423
                '<head',
1424
                'head/>',
1425
                '</head',
1426
            ],
1427 57
            $html
1428
        );
1429
1430
        // self closing tags, don't need a trailing slash ...
1431 57
        $replace = [];
1432 57
        $replacement = [];
1433 57
        foreach (self::$selfClosingTags as $selfClosingTag) {
1434 57
            $replace[] = '<' . $selfClosingTag . '/>';
1435 57
            $replacement[] = '<' . $selfClosingTag . '>';
1436 57
            $replace[] = '<' . $selfClosingTag . ' />';
1437 57
            $replacement[] = '<' . $selfClosingTag . '>';
1438 57
            $replace[] = '></' . $selfClosingTag . '>';
1439 57
            $replacement[] = '>';
1440
        }
1441 57
        $html = \str_replace(
1442 57
            $replace,
1443 57
            $replacement,
1444 57
            $html
1445
        );
1446
1447
        // ------------------------------------
1448
        // check if compression worked
1449
        // ------------------------------------
1450
1451 57
        if ($origHtmlLength < \strlen($html)) {
1452
            $html = $origHtml;
1453
        }
1454
1455 57
        return $html;
1456
    }
1457
1458
    /**
1459
     * @param \DOMNode $node
1460
     *
1461
     * @return \DOMNode|null
1462
     */
1463 55
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1464
    {
1465
        do {
1466
            /** @var \DOMElement|\DOMText|null $nodeTmp - false-positive error from phpstan */
1467 55
            $nodeTmp = $node->nextSibling;
1468
1469 55
            if ($nodeTmp instanceof \DOMText) {
1470
                if (
1471 31
                    \trim($nodeTmp->textContent) !== ''
1472
                    &&
1473 31
                    \strpos($nodeTmp->textContent, '<') === false
1474
                ) {
1475 9
                    $node = $nodeTmp;
1476
                } else {
1477 31
                    $node = $nodeTmp->nextSibling;
1478
                }
1479
            } else {
1480 55
                $node = $nodeTmp;
1481
            }
1482 55
        } while (!($node === null || $node instanceof \DOMElement || $node instanceof \DOMText));
1483
1484 55
        return $node;
1485
    }
1486
1487
    /**
1488
     * Check if the current string is an conditional comment.
1489
     *
1490
     * INFO: since IE >= 10 conditional comment are not working anymore
1491
     *
1492
     * <!--[if expression]> HTML <![endif]-->
1493
     * <![if expression]> HTML <![endif]>
1494
     *
1495
     * @param string $comment
1496
     *
1497
     * @return bool
1498
     */
1499 4
    private function isConditionalComment($comment): bool
1500
    {
1501 4 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1502
            /** @noinspection RegExpRedundantEscape */
1503
            /** @noinspection NestedPositiveIfStatementsInspection */
1504 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1505 2
                return true;
1506
            }
1507
        }
1508
1509 4 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1510
            /** @noinspection RegExpRedundantEscape */
1511
            /** @noinspection NestedPositiveIfStatementsInspection */
1512 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1513 1
                return true;
1514
            }
1515
        }
1516
1517 4
        return false;
1518
    }
1519
1520
    /**
1521
     * @param string $html
1522
     * @param bool   $multiDecodeNewHtmlEntity
1523
     *
1524
     * @return string
1525
     */
1526 56
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1527
    {
1528
        // init dom
1529 56
        $dom = new HtmlDomParser();
1530 56
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1531
1532 56
        if ($this->templateLogicSyntaxInSpecialScriptTags !== null) {
1533 1
            $dom->overwriteTemplateLogicSyntaxInSpecialScriptTags($this->templateLogicSyntaxInSpecialScriptTags);
1534
        }
1535
1536 56
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1537 56
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1538
1539
        // load dom
1540
        /** @noinspection UnusedFunctionResultInspection */
1541 56
        $dom->loadHtml($html);
1542
1543 56
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1544
1545 56
        $doctypeStr = $this->getDoctype($dom->getDocument());
1546
1547 56
        if ($doctypeStr) {
1548 12
            $this->isHTML4 = \strpos($doctypeStr, 'html4') !== false;
1549 12
            $this->isXHTML = \strpos($doctypeStr, 'xhtml1') !== false;
1550
        }
1551
1552
        // -------------------------------------------------------------------------
1553
        // Protect <nocompress> HTML tags first.
1554
        // -------------------------------------------------------------------------
1555
1556 56
        $dom = $this->protectTagHelper($dom, 'nocompress');
1557
1558
        // -------------------------------------------------------------------------
1559
        // Notify the Observer before the minification.
1560
        // -------------------------------------------------------------------------
1561
1562 56
        foreach ($dom->find('*') as $element) {
1563 56
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1564
        }
1565
1566
        // -------------------------------------------------------------------------
1567
        // Protect HTML tags and conditional comments.
1568
        // -------------------------------------------------------------------------
1569
1570 56
        $dom = $this->protectTags($dom);
1571
1572
        // -------------------------------------------------------------------------
1573
        // Remove default HTML comments. [protected html is still protected]
1574
        // -------------------------------------------------------------------------
1575
1576 56
        if ($this->doRemoveComments) {
1577 54
            $dom = $this->removeComments($dom);
1578
        }
1579
1580
        // -------------------------------------------------------------------------
1581
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1582
        // -------------------------------------------------------------------------
1583
1584 56
        if ($this->doSumUpWhitespace) {
1585 55
            $dom = $this->sumUpWhitespace($dom);
1586
        }
1587
1588 56
        foreach ($dom->find('*') as $element) {
1589
1590
            // -------------------------------------------------------------------------
1591
            // Remove whitespace around tags. [protected html is still protected]
1592
            // -------------------------------------------------------------------------
1593
1594 56
            if ($this->doRemoveWhitespaceAroundTags) {
1595 3
                $this->removeWhitespaceAroundTags($element);
1596
            }
1597
1598
            // -------------------------------------------------------------------------
1599
            // Notify the Observer after the minification.
1600
            // -------------------------------------------------------------------------
1601
1602 56
            $this->notifyObserversAboutDomElementAfterMinification($element);
1603
        }
1604
1605
        // -------------------------------------------------------------------------
1606
        // Convert the Dom into a string.
1607
        // -------------------------------------------------------------------------
1608
1609 56
        return $dom->fixHtmlOutput(
1610 56
            $doctypeStr . $this->domNodeToString($dom->getDocument()),
1611 56
            $multiDecodeNewHtmlEntity
1612
        );
1613
    }
1614
1615
    /**
1616
     * @param SimpleHtmlDomInterface $domElement
1617
     *
1618
     * @return void
1619
     */
1620 56
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1621
    {
1622 56
        foreach ($this->domLoopObservers as $observer) {
1623 56
            $observer->domElementAfterMinification($domElement, $this);
1624
        }
1625 56
    }
1626
1627
    /**
1628
     * @param SimpleHtmlDomInterface $domElement
1629
     *
1630
     * @return void
1631
     */
1632 56
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1633
    {
1634 56
        foreach ($this->domLoopObservers as $observer) {
1635 56
            $observer->domElementBeforeMinification($domElement, $this);
1636
        }
1637 56
    }
1638
1639
    /**
1640
     * @param HtmlDomParser $dom
1641
     * @param string        $selector
1642
     *
1643
     * @return HtmlDomParser
1644
     */
1645 56
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1646
    {
1647 56
        foreach ($dom->find($selector) as $element) {
1648 6
            if ($element->isRemoved()) {
1649 1
                continue;
1650
            }
1651
1652 6
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1653 6
            $parentNode = $element->getNode()->parentNode;
1654 6
            if ($parentNode !== null) {
1655 6
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1656
            }
1657
1658 6
            ++$this->protected_tags_counter;
1659
        }
1660
1661 56
        return $dom;
1662
    }
1663
1664
    /**
1665
     * Prevent changes of inline "styles" and "scripts".
1666
     *
1667
     * @param HtmlDomParser $dom
1668
     *
1669
     * @return HtmlDomParser
1670
     */
1671 56
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1672
    {
1673 56
        $this->protectTagHelper($dom, 'code');
1674
1675 56
        foreach ($dom->find('script, style') as $element) {
1676 9
            if ($element->isRemoved()) {
1677
                continue;
1678
            }
1679
1680 9
            if ($element->tag === 'script' || $element->tag === 'style') {
1681 9
                $attributes = $element->getAllAttributes();
1682
                // skip external links
1683 9
                if (isset($attributes['src'])) {
1684 5
                    continue;
1685
                }
1686
            }
1687
1688 7
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1689 7
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1690
1691 7
            ++$this->protected_tags_counter;
1692
        }
1693
1694 56
        foreach ($dom->find('//comment()') as $element) {
1695 4
            if ($element->isRemoved()) {
1696
                continue;
1697
            }
1698
1699 4
            $text = $element->text();
1700
1701
            // skip normal comments
1702 4
            if (!$this->isConditionalComment($text)) {
1703 4
                continue;
1704
            }
1705
1706 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1707
1708
            /* @var $node \DOMComment */
1709 2
            $node = $element->getNode();
1710 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1711 2
            $parentNode = $element->getNode()->parentNode;
1712 2
            if ($parentNode !== null) {
1713 2
                $parentNode->replaceChild($child, $node);
1714
            }
1715
1716 2
            ++$this->protected_tags_counter;
1717
        }
1718
1719 56
        return $dom;
1720
    }
1721
1722
    /**
1723
     * Remove comments in the dom.
1724
     *
1725
     * @param HtmlDomParser $dom
1726
     *
1727
     * @return HtmlDomParser
1728
     */
1729 54
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1730
    {
1731 54
        foreach ($dom->find('//comment()') as $commentWrapper) {
1732 3
            $comment = $commentWrapper->getNode();
1733 3
            $val = $comment->nodeValue;
1734 3
            if (\strpos($val, '[') === false) {
1735 3
                $parentNode = $comment->parentNode;
1736 3
                if ($parentNode !== null) {
1737 3
                    $parentNode->removeChild($comment);
1738
                }
1739
            }
1740
        }
1741
1742 54
        $dom->getDocument()->normalizeDocument();
1743
1744 54
        return $dom;
1745
    }
1746
1747
    /**
1748
     * Trim tags in the dom.
1749
     *
1750
     * @param SimpleHtmlDomInterface $element
1751
     *
1752
     * @return void
1753
     */
1754 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1755
    {
1756 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1757 2
            $node = $element->getNode();
1758
1759
            /** @var \DOMNode[] $candidates */
1760 2
            $candidates = [];
1761 2
            if ($node->childNodes->length > 0) {
1762 1
                $candidates[] = $node->firstChild;
1763 1
                $candidates[] = $node->lastChild;
1764 1
                $candidates[] = $node->previousSibling;
1765 1
                $candidates[] = $node->nextSibling;
1766
            }
1767
1768
            /** @var mixed $candidate - false-positive error from phpstan */
1769 2
            foreach ($candidates as &$candidate) {
1770 1
                if ($candidate === null) {
1771
                    continue;
1772
                }
1773
1774 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1775 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1776 1
                    if ($nodeValueTmp !== null) {
1777 1
                        $candidate->nodeValue = $nodeValueTmp;
1778
                    }
1779
                }
1780
            }
1781
        }
1782 3
    }
1783
1784
    /**
1785
     * Callback function for preg_replace_callback use.
1786
     *
1787
     * @param array $matches PREG matches
1788
     *
1789
     * @return string
1790
     */
1791 12
    private function restoreProtectedHtml($matches): string
1792
    {
1793 12
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1794
1795 12
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1796
    }
1797
1798
    /**
1799
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1800
     *
1801
     * @return $this
1802
     */
1803 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1804
    {
1805 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1806
1807 2
        return $this;
1808
    }
1809
1810
    /**
1811
     * Sum-up extra whitespace from dom-nodes.
1812
     *
1813
     * @param HtmlDomParser $dom
1814
     *
1815
     * @return HtmlDomParser
1816
     */
1817 55
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1818
    {
1819 55
        $text_nodes = $dom->find('//text()');
1820 55
        foreach ($text_nodes as $text_node_wrapper) {
1821
            /* @var $text_node \DOMNode */
1822 51
            $text_node = $text_node_wrapper->getNode();
1823 51
            $xp = $text_node->getNodePath();
1824 51
            if ($xp === null) {
1825
                continue;
1826
            }
1827
1828 51
            $doSkip = false;
1829 51
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1830 51
                if (\strpos($xp, "/${pattern}") !== false) {
1831 10
                    $doSkip = true;
1832
1833 10
                    break;
1834
                }
1835
            }
1836 51
            if ($doSkip) {
1837 10
                continue;
1838
            }
1839
1840 47
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1841 47
            if ($nodeValueTmp !== null) {
1842 47
                $text_node->nodeValue = $nodeValueTmp;
1843
            }
1844
        }
1845
1846 55
        $dom->getDocument()->normalizeDocument();
1847
1848 55
        return $dom;
1849
    }
1850
1851
    /**
1852
     * WARNING: maybe bad for performance ...
1853
     *
1854
     * @param bool $keepBrokenHtml
1855
     *
1856
     * @return HtmlMin
1857
     */
1858 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1859
    {
1860 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1861
1862 2
        return $this;
1863
    }
1864
1865
    /**
1866
     * @param string[] $templateLogicSyntaxInSpecialScriptTags
1867
     *
1868
     * @return HtmlMin
1869
     */
1870 1
    public function overwriteTemplateLogicSyntaxInSpecialScriptTags(array $templateLogicSyntaxInSpecialScriptTags): self
1871
    {
1872 1
        foreach ($templateLogicSyntaxInSpecialScriptTags as $tmp) {
1873 1
            if (!\is_string($tmp)) {
1874
                throw new \InvalidArgumentException('setTemplateLogicSyntaxInSpecialScriptTags only allows string[]');
1875
            }
1876
        }
1877
1878 1
        $this->templateLogicSyntaxInSpecialScriptTags = $templateLogicSyntaxInSpecialScriptTags;
1879
1880 1
        return $this;
1881
    }
1882
}
1883