Completed
Push — master ( 91add2...61ae49 )
by Lars
13:45
created

HtmlMin::doRemoveHttpsPrefixFromAttributes()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 0
cts 0
cp 0
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
crap 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = true;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var array
192
     */
193
    private $domainsToRemoveHttpPrefixFromAttributes = [
194
        'google.com',
195
        'google.de',
196
    ];
197
198
    /**
199
     * @var bool
200
     */
201
    private $doSortCssClassNames = true;
202
203
    /**
204
     * @var bool
205
     */
206
    private $doSortHtmlAttributes = true;
207
208
    /**
209
     * @var bool
210
     */
211
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
212
213
    /**
214
     * @var bool
215
     */
216
    private $doRemoveDefaultAttributes = false;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doRemoveDeprecatedAnchorName = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDeprecatedTypeFromScriptTag = true;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveValueFromEmptyInput = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveEmptyAttributes = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doSumUpWhitespace = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveSpacesBetweenTags = false;
252
253
    /**
254
     * @var bool
255
     */
256
    private $keepBrokenHtml = false;
257
258
    /**
259 52
     * @var bool
260
     */
261 52
    private $withDocType = false;
262
263 52
    /**
264 52
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
265
     */
266
    private $domLoopObservers;
267
268
    /**
269
     * @var int
270
     */
271 52
    private $protected_tags_counter = 0;
272
273 52
    /**
274 52
     * HtmlMin constructor.
275
     */
276
    public function __construct()
277
    {
278
        $this->domLoopObservers = new \SplObjectStorage();
279
280
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
281 2
    }
282
283 2
    /**
284
     * @param HtmlMinDomObserverInterface $observer
285 2
     *
286
     * @return void
287
     */
288
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
289
    {
290
        $this->domLoopObservers->attach($observer);
291
    }
292
293 1
    /**
294
     * @param bool $doOptimizeAttributes
295 1
     *
296
     * @return $this
297 1
     */
298
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
299
    {
300
        $this->doOptimizeAttributes = $doOptimizeAttributes;
301
302
        return $this;
303
    }
304
305 3
    /**
306
     * @param bool $doOptimizeViaHtmlDomParser
307 3
     *
308
     * @return $this
309 3
     */
310
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
311
    {
312
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
313
314
        return $this;
315
    }
316
317 2
    /**
318
     * @param bool $doRemoveComments
319 2
     *
320
     * @return $this
321 2
     */
322
    public function doRemoveComments(bool $doRemoveComments = true): self
323
    {
324
        $this->doRemoveComments = $doRemoveComments;
325
326
        return $this;
327
    }
328
329 2
    /**
330
     * @param bool $doRemoveDefaultAttributes
331 2
     *
332
     * @return $this
333 2
     */
334
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
335
    {
336
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
337
338
        return $this;
339
    }
340
341 2
    /**
342
     * @param bool $doRemoveDeprecatedAnchorName
343 2
     *
344
     * @return $this
345 2
     */
346
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
347
    {
348
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
349
350
        return $this;
351
    }
352
353 2
    /**
354
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
355 2
     *
356
     * @return $this
357 2
     */
358
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
359
    {
360
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
361
362
        return $this;
363
    }
364
365 2
    /**
366
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
367 2
     *
368
     * @return $this
369 2
     */
370
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
371
    {
372
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
373
374
        return $this;
375
    }
376
377 2
    /**
378
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
379 2
     *
380
     * @return $this
381 2
     */
382
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
383
    {
384
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
385
386
        return $this;
387
    }
388
389 4
    /**
390
     * @param bool $doRemoveEmptyAttributes
391 4
     *
392
     * @return $this
393 4
     */
394
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
395
    {
396
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
397
398
        return $this;
399
    }
400
401 1
    /**
402
     * @param bool $doRemoveHttpPrefixFromAttributes
403 1
     *
404
     * @return $this
405 1
     */
406
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
407
    {
408
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
409
410
        return $this;
411
    }
412
413 1
    /**
414
     * @param bool $doRemoveHttpsPrefixFromAttributes
415 1
     *
416
     * @return $this
417 1
     */
418
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
419
    {
420
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
421
422
        return $this;
423
    }
424
425 1
    /**
426
     * @param bool $doRemoveOmittedHtmlTags
427 1
     *
428
     * @return $this
429 1
     */
430
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
431
    {
432
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
433
434
        return $this;
435
    }
436
437 2
    /**
438
     * @param bool $doRemoveOmittedQuotes
439 2
     *
440
     * @return $this
441 2
     */
442
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
443
    {
444
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
445
446
        return $this;
447
    }
448
449 5
    /**
450
     * @param bool $doRemoveSpacesBetweenTags
451 5
     *
452
     * @return $this
453 5
     */
454
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
455
    {
456
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
457
458
        return $this;
459
    }
460
461 2
    /**
462
     * @param bool $doRemoveValueFromEmptyInput
463 2
     *
464
     * @return $this
465 2
     */
466
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
467
    {
468
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
469
470
        return $this;
471
    }
472
473 2
    /**
474
     * @param bool $doRemoveWhitespaceAroundTags
475 2
     *
476
     * @return $this
477 2
     */
478
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
479
    {
480
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
481
482
        return $this;
483
    }
484
485 2
    /**
486
     * @param bool $doSortCssClassNames
487 2
     *
488
     * @return $this
489 2
     */
490
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
491
    {
492 48
        $this->doSortCssClassNames = $doSortCssClassNames;
493
494
        return $this;
495 48
    }
496 48
497 48
    /**
498 31
     * @param bool $doSortHtmlAttributes
499
     *
500
     * @return $this
501 31
     */
502
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
503 31
    {
504
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
505 9
506
        return $this;
507 9
    }
508
509
    /**
510 31
     * @param bool $doSumUpWhitespace
511
     *
512
     * @return $this
513 31
     */
514
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
515 31
    {
516
        $this->doSumUpWhitespace = $doSumUpWhitespace;
517 31
518
        return $this;
519 31
    }
520
521 31
    private function domNodeAttributesToString(\DOMNode $node): string
522
    {
523 31
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
524
        $attr_str = '';
525 31
        if ($node->attributes !== null) {
526
            foreach ($node->attributes as $attribute) {
527 31
                $attr_str .= $attribute->name;
528
529 1
                if (
530
                    $this->doOptimizeAttributes
531
                    &&
532
                    isset(self::$booleanAttributes[$attribute->name])
533 31
                ) {
534
                    $attr_str .= ' ';
535
536 30
                    continue;
537
                }
538 31
539
                $attr_str .= '=';
540
541 2
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
542
                $omit_quotes = $this->doRemoveOmittedQuotes
543 31
                               &&
544
                               $attribute->value !== ''
545
                               &&
546 31
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
547 31
                               &&
548
                               \strpos($attribute->name, ' ') === false
549
                               &&
550
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
551 48
552
                $quoteTmp = '"';
553
                if (
554
                    !$omit_quotes
555
                    &&
556
                    \strpos($attribute->value, '"') !== false
557
                ) {
558
                    $quoteTmp = "'";
559 47
                }
560
561 47
                if (
562
                    $this->doOptimizeAttributes
563
                    &&
564 47
                    (
565
                        $attribute->name === 'srcset'
566 47
                        ||
567 47
                        $attribute->name === 'sizes'
568
                    )
569
                ) {
570
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
571
                } else {
572 47
                    $attr_val = $attribute->value;
573
                }
574
575
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
576
                $attr_str .= ' ';
577
            }
578
        }
579
580
        return \trim($attr_str);
581
    }
582
583
    /**
584
     * @param \DOMNode $node
585
     *
586
     * @return bool
587
     */
588
    private function domNodeClosingTagOptional(\DOMNode $node): bool
589
    {
590
        $tag_name = $node->nodeName;
591
592
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
593
        $parent_node = $node->parentNode;
594
595
        if ($parent_node) {
596
            $parent_tag_name = $parent_node->nodeName;
597
        } else {
598
            $parent_tag_name = null;
599
        }
600
601
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
602
603
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
604
605
        // Implemented:
606
        //
607
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
608
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
609
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
610 47
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
611
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
612
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
613 44
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
614
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
615
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
616 6
617
        /**
618
         * @noinspection TodoComment
619 4
         *
620
         * TODO: Not Implemented
621 44
         */
622
        //
623
        // <html> may be omitted if first thing inside is not comment
624
        // <head> may be omitted if first thing inside is an element
625
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
626
        // <colgroup> may be omitted if first thing inside is <col>
627 44
        // <tbody> may be omitted if first thing inside is <tr>
628
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
629
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
630
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
631
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
632
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
633
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
634
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
635
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
636
        //
637
        // <-- However, a start tag must never be omitted if it has any attributes.
638 44
639
        return \in_array($tag_name, self::$optional_end_tags, true)
640
               ||
641
               (
642
                   $tag_name === 'li'
643
                   &&
644
                   (
645 44
                       $nextSibling === null
646
                       ||
647
                       (
648 1
                           $nextSibling instanceof \DOMElement
649
                           &&
650
                           $nextSibling->tagName === 'li'
651 1
                       )
652
                   )
653 44
               )
654
               ||
655
               (
656
                   $tag_name === 'rp'
657
                   &&
658
                   (
659 44
                       $nextSibling === null
660
                       ||
661
                       (
662 1
                           $nextSibling instanceof \DOMElement
663
                           &&
664 1
                           (
665
                               $nextSibling->tagName === 'rp'
666 1
                               ||
667
                               $nextSibling->tagName === 'rt'
668 44
                           )
669
                       )
670
                   )
671
               )
672 1
               ||
673
               (
674
                   $tag_name === 'tr'
675
                   &&
676
                   (
677 44
                       $nextSibling === null
678
                       ||
679
                       (
680
                           $nextSibling instanceof \DOMElement
681
                           &&
682
                           $nextSibling->tagName === 'tr'
683
                       )
684 44
                   )
685
               )
686 44
               ||
687
               (
688
                   $tag_name === 'source'
689
                   &&
690 1
                   (
691
                       $parent_tag_name === 'audio'
692
                       ||
693 1
                       $parent_tag_name === 'video'
694
                       ||
695
                       $parent_tag_name === 'picture'
696 1
                       ||
697
                       $parent_tag_name === 'source'
698 44
                   )
699
                   &&
700
                   (
701
                       $nextSibling === null
702
                       ||
703
                       (
704
                           $nextSibling instanceof \DOMElement
705
                           &&
706 44
                           $nextSibling->tagName === 'source'
707
                       )
708 44
                   )
709
               )
710
               ||
711
               (
712
                   (
713 3
                       $tag_name === 'td'
714
                       ||
715 3
                       $tag_name === 'th'
716
                   )
717
                   &&
718
                   (
719 3
                       $nextSibling === null
720
                       ||
721
                       (
722 3
                           $nextSibling instanceof \DOMElement
723
                           &&
724 44
                           (
725
                               $nextSibling->tagName === 'td'
726
                               ||
727
                               $nextSibling->tagName === 'th'
728
                           )
729
                       )
730
                   )
731 44
               )
732
               ||
733
               (
734 1
                   (
735
                       $tag_name === 'dd'
736
                       ||
737 1
                       $tag_name === 'dt'
738
                   )
739
                   &&
740 1
                   (
741
                       (
742 44
                           $nextSibling === null
743
                           &&
744
                           $tag_name === 'dd'
745
                       )
746
                       ||
747
                       (
748
                           $nextSibling instanceof \DOMElement
749 44
                           &&
750
                           (
751
                               $nextSibling->tagName === 'dd'
752
                               ||
753 14
                               $nextSibling->tagName === 'dt'
754
                           )
755
                       )
756 12
                   )
757
               )
758
               ||
759 12
               (
760
                   $tag_name === 'option'
761
                   &&
762
                   (
763
                       $nextSibling === null
764
                       ||
765
                       (
766
                           $nextSibling instanceof \DOMElement
767
                           &&
768
                           (
769
                               $nextSibling->tagName === 'option'
770
                               ||
771
                               $nextSibling->tagName === 'optgroup'
772
                           )
773
                       )
774
                   )
775 9
               )
776
               ||
777
               (
778 47
                   $tag_name === 'p'
779
                   &&
780
                   (
781
                       (
782
                           $nextSibling === null
783
                           &&
784
                           (
785
                               $node->parentNode !== null
786
                               &&
787
                               !\in_array(
788
                                   $node->parentNode->nodeName,
789
                                   [
790
                                       'a',
791
                                       'audio',
792
                                       'del',
793
                                       'ins',
794
                                       'map',
795
                                       'noscript',
796
                                       'video',
797
                                   ],
798
                                   true
799
                               )
800
                           )
801
                       )
802
                       ||
803
                       (
804
                           $nextSibling instanceof \DOMElement
805
                           &&
806
                           \in_array(
807
                               $nextSibling->tagName,
808
                               [
809
                                   'address',
810
                                   'article',
811
                                   'aside',
812
                                   'blockquote',
813
                                   'dir',
814
                                   'div',
815 48
                                   'dl',
816
                                   'fieldset',
817
                                   'footer',
818 48
                                   'form',
819 48
                                   'h1',
820
                                   'h2',
821 48
                                   'h3',
822 48
                                   'h4',
823 27
                                   'h5',
824
                                   'h6',
825 48
                                   'header',
826
                                   'hgroup',
827
                                   'hr',
828 48
                                   'menu',
829
                                   'nav',
830 12
                                   'ol',
831
                                   'p',
832
                                   'pre',
833
                                   'section',
834 12
                                   'table',
835 12
                                   'ul',
836
                               ],
837
                               true
838
                           )
839 12
                       )
840 12
                   )
841
               );
842
    }
843 12
844 12
    protected function domNodeToString(\DOMNode $node): string
845 12
    {
846 12
        // init
847
        $html = '';
848 48
        $emptyStringTmp = '';
849 48
850 48
        foreach ($node->childNodes as $child) {
851
            if ($emptyStringTmp === 'is_empty') {
852
                $emptyStringTmp = 'last_was_empty';
853 48
            } else {
854
                $emptyStringTmp = '';
855 48
            }
856
857 42
            if ($child instanceof \DOMDocumentType) {
858
                // add the doc-type only if it wasn't generated by DomDocument
859
                if (!$this->withDocType) {
860 48
                    continue;
861
                }
862
863 47
                if ($child->name) {
864
                    if (!$child->publicId && $child->systemId) {
865 47
                        $tmpTypeSystem = 'SYSTEM';
866
                        $tmpTypePublic = '';
867
                    } else {
868 26
                        $tmpTypeSystem = '';
869
                        $tmpTypePublic = 'PUBLIC';
870 26
                    }
871
872 26
                    $html .= '<!DOCTYPE ' . $child->name . ''
873
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
874
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
875 26
                             . '>';
876
                }
877 26
            } elseif ($child instanceof \DOMElement) {
878
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
879 26
                $html .= '>' . $this->domNodeToString($child);
880
881
                if (
882 48
                    !$this->doRemoveOmittedHtmlTags
883
                    ||
884
                    !$this->domNodeClosingTagOptional($child)
885 44
                ) {
886 44
                    $html .= '</' . $child->tagName . '>';
887
                }
888 30
889
                if (!$this->doRemoveWhitespaceAroundTags) {
890 30
                    /** @noinspection NestedPositiveIfStatementsInspection */
891
                    if (
892
                        $child->nextSibling instanceof \DOMText
893
                        &&
894 21
                        $child->nextSibling->wholeText === ' '
895
                    ) {
896 21
                        if (
897
                            $emptyStringTmp !== 'last_was_empty'
898
                            &&
899
                            \substr($html, -1) !== ' '
900
                        ) {
901
                            $html = \rtrim($html);
902 21
903
                            if (
904
                                $child->parentNode
905 21
                                &&
906
                                $child->parentNode->nodeName !== 'head'
907
                            ) {
908 21
                                $html .= ' ';
909
                            }
910 21
                        }
911
                        $emptyStringTmp = 'is_empty';
912 21
                    }
913
                }
914
            } elseif ($child instanceof \DOMText) {
915 30
                if ($child->isElementContentWhitespace()) {
916
                    if (
917
                        $child->previousSibling !== null
918 44
                        &&
919
                        $child->nextSibling !== null
920 1
                    ) {
921 1
                        if (
922
                            (
923
                                $child->wholeText
924
                                &&
925 48
                                \strpos($child->wholeText, ' ') !== false
926
                            )
927
                            ||
928
                            (
929
                                $emptyStringTmp !== 'last_was_empty'
930
                                &&
931
                                \substr($html, -1) !== ' '
932
                            )
933
                        ) {
934
                            $html = \rtrim($html);
935
936
                            if (
937
                                $child->parentNode
938
                                &&
939
                                $child->parentNode->nodeName !== 'head'
940
                            ) {
941
                                $html .= ' ';
942
                            }
943
                        }
944
                        $emptyStringTmp = 'is_empty';
945
                    }
946
                } else {
947
                    $html .= $child->wholeText;
948
                }
949
            } elseif ($child instanceof \DOMComment) {
950
                $html .= '<!--' . $child->textContent . '-->';
951
            }
952
        }
953
954
        return $html;
955
    }
956
957
    /**
958
     * @return array
959
     */
960
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
961
    {
962
        return $this->domainsToRemoveHttpPrefixFromAttributes;
963 31
    }
964
965 31
    /**
966
     * @return bool
967
     */
968
    public function isDoOptimizeAttributes(): bool
969
    {
970
        return $this->doOptimizeAttributes;
971 31
    }
972
973 31
    /**
974
     * @return bool
975
     */
976
    public function isDoOptimizeViaHtmlDomParser(): bool
977
    {
978
        return $this->doOptimizeViaHtmlDomParser;
979 31
    }
980
981 31
    /**
982
     * @return bool
983
     */
984
    public function isDoRemoveComments(): bool
985
    {
986
        return $this->doRemoveComments;
987 31
    }
988
989 31
    /**
990
     * @return bool
991
     */
992
    public function isDoRemoveDefaultAttributes(): bool
993
    {
994
        return $this->doRemoveDefaultAttributes;
995 31
    }
996
997 31
    /**
998
     * @return bool
999
     */
1000
    public function isDoRemoveDeprecatedAnchorName(): bool
1001
    {
1002
        return $this->doRemoveDeprecatedAnchorName;
1003 31
    }
1004
1005 31
    /**
1006
     * @return bool
1007
     */
1008
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1009
    {
1010
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1011 31
    }
1012
1013 31
    /**
1014
     * @return bool
1015
     */
1016
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1017
    {
1018
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1019
    }
1020
1021
    /**
1022
     * @return bool
1023
     */
1024
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1025
    {
1026
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1027
    }
1028
1029
    /**
1030
     * @return bool
1031
     */
1032
    public function isDoRemoveEmptyAttributes(): bool
1033
    {
1034
        return $this->doRemoveEmptyAttributes;
1035
    }
1036
1037
    /**
1038
     * @return bool
1039
     */
1040
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1041
    {
1042
        return $this->doRemoveHttpPrefixFromAttributes;
1043 31
    }
1044
1045 31
    /**
1046
     * @return bool
1047
     */
1048
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1049
    {
1050
        return $this->doRemoveHttpsPrefixFromAttributes;
1051
    }
1052
1053
    /**
1054
     * @return bool
1055
     */
1056
    public function isDoRemoveOmittedHtmlTags(): bool
1057
    {
1058
        return $this->doRemoveOmittedHtmlTags;
1059 31
    }
1060
1061 31
    /**
1062
     * @return bool
1063
     */
1064
    public function isDoRemoveOmittedQuotes(): bool
1065
    {
1066
        return $this->doRemoveOmittedQuotes;
1067 31
    }
1068
1069 31
    /**
1070
     * @return bool
1071
     */
1072
    public function isDoRemoveSpacesBetweenTags(): bool
1073
    {
1074
        return $this->doRemoveSpacesBetweenTags;
1075
    }
1076
1077
    /**
1078
     * @return bool
1079
     */
1080
    public function isDoRemoveValueFromEmptyInput(): bool
1081
    {
1082
        return $this->doRemoveValueFromEmptyInput;
1083
    }
1084
1085
    /**
1086 52
     * @return bool
1087
     */
1088 52
    public function isDoRemoveWhitespaceAroundTags(): bool
1089 52
    {
1090 1
        return $this->doRemoveWhitespaceAroundTags;
1091
    }
1092
1093 52
    /**
1094 52
     * @return bool
1095 3
     */
1096
    public function isDoSortCssClassNames(): bool
1097
    {
1098
        return $this->doSortCssClassNames;
1099 49
    }
1100
1101
    /**
1102 49
     * @return bool
1103 49
     */
1104
    public function isDoSortHtmlAttributes(): bool
1105
    {
1106
        return $this->doSortHtmlAttributes;
1107
    }
1108
1109 49
    /**
1110 48
     * @return bool
1111
     */
1112
    public function isDoSumUpWhitespace(): bool
1113
    {
1114
        return $this->doSumUpWhitespace;
1115
    }
1116
1117
    /**
1118 49
     * @param string $html
1119 43
     * @param bool   $multiDecodeNewHtmlEntity
1120 43
     *
1121
     * @return string
1122 43
     */
1123 43
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1124 43
    {
1125
        $html = (string) $html;
1126
        if (!isset($html[0])) {
1127
            return '';
1128 49
        }
1129
1130 1
        $html = \trim($html);
1131
        if (!$html) {
1132 1
            return '';
1133
        }
1134
1135
        // reset
1136
        $this->protectedChildNodes = [];
1137
1138
        // save old content
1139
        $origHtml = $html;
1140 49
        $origHtmlLength = \strlen($html);
1141 9
1142 9
        // -------------------------------------------------------------------------
1143 9
        // Minify the HTML via "HtmlDomParser"
1144 9
        // -------------------------------------------------------------------------
1145
1146
        if ($this->doOptimizeViaHtmlDomParser) {
1147
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1148
        }
1149
1150
        // -------------------------------------------------------------------------
1151
        // Trim whitespace from html-string. [protected html is still protected]
1152 49
        // -------------------------------------------------------------------------
1153 48
1154
        // Remove extra white-space(s) between HTML attribute(s)
1155
        if (\strpos($html, ' ') !== false) {
1156
            $html = (string) \preg_replace_callback(
1157
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1158
                static function ($matches) {
1159
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1160 49
                },
1161
                $html
1162 49
            );
1163
        }
1164
1165
        if ($this->doRemoveSpacesBetweenTags) {
1166
            /** @noinspection NestedPositiveIfStatementsInspection */
1167
            if (\strpos($html, ' ') !== false) {
1168
                // Remove spaces that are between > and <
1169
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1170
            }
1171
        }
1172 49
1173
        // -------------------------------------------------------------------------
1174
        // Restore protected HTML-code.
1175
        // -------------------------------------------------------------------------
1176
1177
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1178
            $html = (string) \preg_replace_callback(
1179
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1180
                [$this, 'restoreProtectedHtml'],
1181 49
                $html
1182
            );
1183
        }
1184
1185 49
        // -------------------------------------------------------------------------
1186 49
        // Restore protected HTML-entities.
1187 49
        // -------------------------------------------------------------------------
1188 49
1189 49
        if ($this->doOptimizeViaHtmlDomParser) {
1190 49
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1191 49
        }
1192 49
1193 49
        // ------------------------------------
1194
        // Final clean-up
1195 49
        // ------------------------------------
1196 49
1197 49
        $html = \str_replace(
1198 49
            [
1199
                'html>' . "\n",
1200
                "\n" . '<html',
1201
                'html/>' . "\n",
1202
                "\n" . '</html',
1203
                'head>' . "\n",
1204
                "\n" . '<head',
1205 49
                'head/>' . "\n",
1206
                "\n" . '</head',
1207
            ],
1208
            [
1209 49
                'html>',
1210
                '<html',
1211
                'html/>',
1212
                '</html',
1213
                'head>',
1214
                '<head',
1215
                'head/>',
1216
                '</head',
1217 47
            ],
1218
            $html
1219
        );
1220
1221 47
        // self closing tags, don't need a trailing slash ...
1222 47
        $replace = [];
1223
        $replacement = [];
1224 47
        foreach (self::$selfClosingTags as $selfClosingTag) {
1225
            $replace[] = '<' . $selfClosingTag . '/>';
1226
            $replacement[] = '<' . $selfClosingTag . '>';
1227
            $replace[] = '<' . $selfClosingTag . ' />';
1228
            $replacement[] = '<' . $selfClosingTag . '>';
1229
            $replace[] = '></' . $selfClosingTag . '>';
1230
            $replacement[] = '>';
1231
        }
1232
        $html = \str_replace(
1233
            $replace,
1234
            $replacement,
1235
            $html
1236
        );
1237
1238
        // ------------------------------------
1239 4
        // check if compression worked
1240
        // ------------------------------------
1241 4
1242
        if ($origHtmlLength < \strlen($html)) {
1243 2
            $html = $origHtml;
1244 2
        }
1245
1246
        return $html;
1247
    }
1248 4
1249
    /**
1250 1
     * @param \DOMNode $node
1251 1
     *
1252
     * @return \DOMNode|null
1253
     */
1254
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1255 4
    {
1256
        do {
1257
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1258
            $node = $node->nextSibling;
1259
        } while (!($node === null || $node instanceof \DOMElement));
1260
1261
        return $node;
1262
    }
1263
1264 48
    /**
1265
     * Check if the current string is an conditional comment.
1266
     *
1267 48
     * INFO: since IE >= 10 conditional comment are not working anymore
1268
     *
1269 48
     * <!--[if expression]> HTML <![endif]-->
1270
     * <![if expression]> HTML <![endif]>
1271 48
     *
1272 48
     * @param string $comment
1273
     *
1274
     * @return bool
1275
     */
1276 48
    private function isConditionalComment($comment): bool
1277
    {
1278 48 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1279
            /** @noinspection RegExpRedundantEscape */
1280
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1281
                return true;
1282
            }
1283
        }
1284 48
1285 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1286
            /** @noinspection RegExpRedundantEscape */
1287
            if (\preg_match('/\[endif\]$/', $comment)) {
1288
                return true;
1289
            }
1290 48
        }
1291 48
1292
        return false;
1293
    }
1294
1295
    /**
1296
     * @param string $html
1297
     * @param bool   $multiDecodeNewHtmlEntity
1298 48
     *
1299
     * @return string
1300
     */
1301
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1302
    {
1303
        // init dom
1304 48
        $dom = new HtmlDomParser();
1305 46
        /** @noinspection UnusedFunctionResultInspection */
1306
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1307
1308
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1309
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1310
1311
        // load dom
1312 48
        /** @noinspection UnusedFunctionResultInspection */
1313 47
        $dom->loadHtml($html);
1314
1315
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1316 48
1317
        // -------------------------------------------------------------------------
1318
        // Protect <nocompress> HTML tags first.
1319
        // -------------------------------------------------------------------------
1320
1321
        $dom = $this->protectTagHelper($dom, 'nocompress');
1322 48
1323 3
        // -------------------------------------------------------------------------
1324
        // Notify the Observer before the minification.
1325
        // -------------------------------------------------------------------------
1326
1327
        foreach ($dom->find('*') as $element) {
1328
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1329
        }
1330 48
1331
        // -------------------------------------------------------------------------
1332
        // Protect HTML tags and conditional comments.
1333
        // -------------------------------------------------------------------------
1334
1335
        $dom = $this->protectTags($dom);
1336
1337 48
        // -------------------------------------------------------------------------
1338 48
        // Remove default HTML comments. [protected html is still protected]
1339
        // -------------------------------------------------------------------------
1340
1341
        if ($this->doRemoveComments) {
1342
            $dom = $this->removeComments($dom);
1343
        }
1344
1345
        // -------------------------------------------------------------------------
1346
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1347
        // -------------------------------------------------------------------------
1348 48
1349
        if ($this->doSumUpWhitespace) {
1350 48
            $dom = $this->sumUpWhitespace($dom);
1351 48
        }
1352
1353 48
        foreach ($dom->find('*') as $element) {
1354
1355
            // -------------------------------------------------------------------------
1356
            // Remove whitespace around tags. [protected html is still protected]
1357
            // -------------------------------------------------------------------------
1358
1359
            if ($this->doRemoveWhitespaceAroundTags) {
1360 48
                $this->removeWhitespaceAroundTags($element);
1361
            }
1362 48
1363 48
            // -------------------------------------------------------------------------
1364
            // Notify the Observer after the minification.
1365 48
            // -------------------------------------------------------------------------
1366
1367
            $this->notifyObserversAboutDomElementAfterMinification($element);
1368
        }
1369
1370
        // -------------------------------------------------------------------------
1371
        // Convert the Dom into a string.
1372
        // -------------------------------------------------------------------------
1373 48
1374
        return $dom->fixHtmlOutput(
1375 48
            $this->domNodeToString($dom->getDocument()),
1376 5
            $multiDecodeNewHtmlEntity
1377 1
        );
1378
    }
1379
1380 5
    /**
1381 5
     * @param SimpleHtmlDomInterface $domElement
1382
     *
1383 5
     * @return void
1384
     */
1385
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1386 48
    {
1387
        foreach ($this->domLoopObservers as $observer) {
1388
            $observer->domElementAfterMinification($domElement, $this);
1389
        }
1390
    }
1391
1392
    /**
1393
     * @param SimpleHtmlDomInterface $domElement
1394
     *
1395
     * @return void
1396 48
     */
1397
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1398 48
    {
1399
        foreach ($this->domLoopObservers as $observer) {
1400 48
            $observer->domElementBeforeMinification($domElement, $this);
1401 7
        }
1402
    }
1403
1404
    /**
1405 7
     * @param HtmlDomParser $dom
1406 7
     * @param string        $selector
1407
     *
1408 7
     * @return HtmlDomParser
1409 4
     */
1410
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1411
    {
1412
        foreach ($dom->find($selector) as $element) {
1413 5
            if ($element->isRemoved()) {
1414 5
                continue;
1415
            }
1416 5
1417
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1418
            $parentNode = $element->getNode()->parentNode;
1419 48
            if ($parentNode !== null) {
1420 4
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1421
            }
1422
1423
            ++$this->protected_tags_counter;
1424 4
        }
1425
1426
        return $dom;
1427 4
    }
1428 4
1429
    /**
1430
     * Prevent changes of inline "styles" and "scripts".
1431 2
     *
1432
     * @param HtmlDomParser $dom
1433
     *
1434 2
     * @return HtmlDomParser
1435 2
     */
1436
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1437 2
    {
1438
        $this->protectTagHelper($dom, 'code');
1439 2
1440
        foreach ($dom->find('script, style') as $element) {
1441
            if ($element->isRemoved()) {
1442 48
                continue;
1443
            }
1444
1445
            if ($element->tag === 'script' || $element->tag === 'style') {
1446
                $attributes = $element->getAllAttributes();
1447
                // skip external links
1448
                if (isset($attributes['src'])) {
1449
                    continue;
1450
                }
1451
            }
1452 46
1453
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1454 46
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1455 3
1456 3
            ++$this->protected_tags_counter;
1457 3
        }
1458
1459 3
        foreach ($dom->find('//comment()') as $element) {
1460
            if ($element->isRemoved()) {
1461
                continue;
1462
            }
1463 46
1464
            $text = $element->text();
1465 46
1466
            // skip normal comments
1467
            if (!$this->isConditionalComment($text)) {
1468
                continue;
1469
            }
1470
1471
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1472
1473
            /* @var $node \DOMComment */
1474
            $node = $element->getNode();
1475 3
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1476
            $parentNode = $element->getNode()->parentNode;
1477 3
            if ($parentNode !== null) {
1478 1
                $parentNode->replaceChild($child, $node);
1479
            }
1480
1481 1
            ++$this->protected_tags_counter;
1482 1
        }
1483 1
1484 1
        return $dom;
1485 1
    }
1486 1
1487
    /**
1488
     * Remove comments in the dom.
1489
     *
1490 1
     * @param HtmlDomParser $dom
1491 1
     *
1492
     * @return HtmlDomParser
1493
     */
1494
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1495 1
    {
1496 1
        foreach ($dom->find('//comment()') as $commentWrapper) {
1497 1
            $comment = $commentWrapper->getNode();
1498 1
            $val = $comment->nodeValue;
1499
            if (\strpos($val, '[') === false) {
1500
                $parentNode = $comment->parentNode;
1501
                if ($parentNode !== null) {
1502
                    $parentNode->removeChild($comment);
1503 3
                }
1504
            }
1505
        }
1506
1507
        $dom->getDocument()->normalizeDocument();
1508
1509
        return $dom;
1510
    }
1511
1512 9
    /**
1513
     * Trim tags in the dom.
1514 9
     *
1515
     * @param SimpleHtmlDomInterface $element
1516 9
     *
1517
     * @return void
1518
     */
1519
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1520
    {
1521
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1522
            $node = $element->getNode();
1523
1524 2
            /** @var \DOMNode[] $candidates */
1525
            $candidates = [];
1526 2
            if ($node->childNodes->length > 0) {
1527
                $candidates[] = $node->firstChild;
1528 2
                $candidates[] = $node->lastChild;
1529
                $candidates[] = $node->previousSibling;
1530
                $candidates[] = $node->nextSibling;
1531
            }
1532
1533
            /** @var mixed $candidate - false-positive error from phpstan */
1534
            foreach ($candidates as &$candidate) {
1535
                if ($candidate === null) {
1536
                    continue;
1537
                }
1538 47
1539
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1540 47
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1541 47
                    if ($nodeValueTmp !== null) {
1542
                        $candidate->nodeValue = $nodeValueTmp;
1543 43
                    }
1544 43
                }
1545 43
            }
1546
        }
1547
    }
1548
1549 43
    /**
1550 43
     * Callback function for preg_replace_callback use.
1551 43
     *
1552 8
     * @param array $matches PREG matches
1553
     *
1554 8
     * @return string
1555
     */
1556
    private function restoreProtectedHtml($matches): string
1557 43
    {
1558 8
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1559
1560
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1561 40
    }
1562 40
1563 40
    /**
1564
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1565
     *
1566
     * @return $this
1567 47
     */
1568
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1569 47
    {
1570
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1571
1572
        return $this;
1573
    }
1574
1575
    /**
1576
     * Sum-up extra whitespace from dom-nodes.
1577
     *
1578
     * @param HtmlDomParser $dom
1579 2
     *
1580
     * @return HtmlDomParser
1581 2
     */
1582
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1583 2
    {
1584
        $text_nodes = $dom->find('//text()');
1585
        foreach ($text_nodes as $text_node_wrapper) {
1586
            /* @var $text_node \DOMNode */
1587
            $text_node = $text_node_wrapper->getNode();
1588
            $xp = $text_node->getNodePath();
1589
            if ($xp === null) {
1590
                continue;
1591
            }
1592
1593
            $doSkip = false;
1594
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1595
                if (\strpos($xp, "/${pattern}") !== false) {
1596
                    $doSkip = true;
1597
1598
                    break;
1599
                }
1600
            }
1601
            if ($doSkip) {
1602
                continue;
1603
            }
1604
1605
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1606
            if ($nodeValueTmp !== null) {
1607
                $text_node->nodeValue = $nodeValueTmp;
1608
            }
1609
        }
1610
1611
        $dom->getDocument()->normalizeDocument();
1612
1613
        return $dom;
1614
    }
1615
1616
    /**
1617
     * WARNING: maybe bad for performance ...
1618
     *
1619
     * @param bool $keepBrokenHtml
1620
     *
1621
     * @return HtmlMin
1622
     */
1623
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1624
    {
1625
        $this->keepBrokenHtml = $keepBrokenHtml;
1626
1627
        return $this;
1628
    }
1629
}
1630