Completed
Push — master ( b3b2b5...2a7a8f )
by Lars
01:23
created

HtmlMin::domNodeToString()   F

Complexity

Conditions 31
Paths 55

Size

Total Lines 112

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 49
CRAP Score 31.4135

Importance

Changes 0
Metric Value
dl 0
loc 112
ccs 49
cts 53
cp 0.9245
rs 3.3333
c 0
b 0
f 0
cc 31
nc 55
nop 1
crap 31.4135

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $keepPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainLinksRelative = false;
199
200
    /**
201
     * @var string
202
     */
203
    private $localDomain = '';
204
205
    /**
206
     * @var array
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var bool
215
     */
216
    private $doSortCssClassNames = true;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doSortHtmlAttributes = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDefaultAttributes = false;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedAnchorName = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedTypeFromScriptTag = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveValueFromEmptyInput = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveEmptyAttributes = true;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doSumUpWhitespace = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveSpacesBetweenTags = false;
267
268
    /**
269
     * @var bool
270
     */
271
    private $keepBrokenHtml = false;
272
273
    /**
274
     * @var bool
275
     */
276
    private $withDocType = false;
277
278
    /**
279
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
280
     */
281
    private $domLoopObservers;
282
283
    /**
284
     * @var int
285
     */
286
    private $protected_tags_counter = 0;
287
288
    /**
289
     * HtmlMin constructor.
290
     */
291 55
    public function __construct()
292
    {
293 55
        $this->domLoopObservers = new \SplObjectStorage();
294
295 55
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
296 55
    }
297
298
    /**
299
     * @param HtmlMinDomObserverInterface $observer
300
     *
301
     * @return void
302
     */
303 55
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
304
    {
305 55
        $this->domLoopObservers->attach($observer);
306 55
    }
307
308
    /**
309
     * @param bool $doOptimizeAttributes
310
     *
311
     * @return $this
312
     */
313 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
314
    {
315 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
316
317 2
        return $this;
318
    }
319
320
    /**
321
     * @param bool $doOptimizeViaHtmlDomParser
322
     *
323
     * @return $this
324
     */
325 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
326
    {
327 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
328
329 1
        return $this;
330
    }
331
332
    /**
333
     * @param bool $doRemoveComments
334
     *
335
     * @return $this
336
     */
337 3
    public function doRemoveComments(bool $doRemoveComments = true): self
338
    {
339 3
        $this->doRemoveComments = $doRemoveComments;
340
341 3
        return $this;
342
    }
343
344
    /**
345
     * @param bool $doRemoveDefaultAttributes
346
     *
347
     * @return $this
348
     */
349 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
350
    {
351 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
352
353 2
        return $this;
354
    }
355
356
    /**
357
     * @param bool $doRemoveDeprecatedAnchorName
358
     *
359
     * @return $this
360
     */
361 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
362
    {
363 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
364
365 2
        return $this;
366
    }
367
368
    /**
369
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
370
     *
371
     * @return $this
372
     */
373 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
374
    {
375 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
376
377 2
        return $this;
378
    }
379
380
    /**
381
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
382
     *
383
     * @return $this
384
     */
385 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
386
    {
387 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
388
389 2
        return $this;
390
    }
391
392
    /**
393
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
394
     *
395
     * @return $this
396
     */
397 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
398
    {
399 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
400
401 2
        return $this;
402
    }
403
404
    /**
405
     * @param bool $doRemoveEmptyAttributes
406
     *
407
     * @return $this
408
     */
409 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
410
    {
411 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
412
413 2
        return $this;
414
    }
415
416
    /**
417
     * @param bool $doRemoveHttpPrefixFromAttributes
418
     *
419
     * @return $this
420
     */
421 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
422
    {
423 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
424
425 6
        return $this;
426
    }
427
428
    /**
429
     * @param bool $doRemoveHttpsPrefixFromAttributes
430
     *
431
     * @return $this
432
     */
433 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
434
    {
435 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
436
437 1
        return $this;
438
    }
439
440
    /**
441
     * @param bool $keepPrefixOnExternalAttributes
442
     *
443
     * @return $this
444
     */
445 1
    public function keepPrefixOnExternalAttributes(bool $keepPrefixOnExternalAttributes = true): self
446
    {
447 1
        $this->keepPrefixOnExternalAttributes = $keepPrefixOnExternalAttributes;
448
449 1
        return $this;
450
    }
451
452
    /**
453
     * @param bool $doMakeSameDomainLinksRelative
454
     *
455
     * @return $this
456
     */
457 2
    public function doMakeSameDomainLinksRelative(bool $doMakeSameDomainLinksRelative = true): self
458
    {
459 2
        $this->doMakeSameDomainLinksRelative = $doMakeSameDomainLinksRelative;
460
461 2
        return $this;
462
    }
463
464
    /**
465
     * @param string $localDomain
466
     *
467
     * @return $this
468
     */
469 2
    public function setLocalDomain(string $localDomain = ''): self
470
    {
471 2
        $this->localDomain = \rtrim(\preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
472
473 2
        return $this;
474
    }
475
476
    /**
477
     * @param void
478
     *
479
     * @return $this->localDomain
0 ignored issues
show
Documentation introduced by
The doc-type $this->localDomain could not be parsed: Unknown type name "$this-" at position 0. (view supported doc-types)

This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.

Loading history...
480
     */
481 2
    public function getLocalDomain(): string
482
    {
483 2
        return $this->localDomain;
484
    }
485
486
    /**
487
     * @param bool $doRemoveOmittedHtmlTags
488
     *
489
     * @return $this
490
     */
491 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
492
    {
493 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
494
495 1
        return $this;
496
    }
497
498
    /**
499
     * @param bool $doRemoveOmittedQuotes
500
     *
501
     * @return $this
502
     */
503 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
504
    {
505 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
506
507 1
        return $this;
508
    }
509
510
    /**
511
     * @param bool $doRemoveSpacesBetweenTags
512
     *
513
     * @return $this
514
     */
515 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
516
    {
517 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
518
519 1
        return $this;
520
    }
521
522
    /**
523
     * @param bool $doRemoveValueFromEmptyInput
524
     *
525
     * @return $this
526
     */
527 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
528
    {
529 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
530
531 2
        return $this;
532
    }
533
534
    /**
535
     * @param bool $doRemoveWhitespaceAroundTags
536
     *
537
     * @return $this
538
     */
539 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
540
    {
541 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
542
543 5
        return $this;
544
    }
545
546
    /**
547
     * @param bool $doSortCssClassNames
548
     *
549
     * @return $this
550
     */
551 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
552
    {
553 2
        $this->doSortCssClassNames = $doSortCssClassNames;
554
555 2
        return $this;
556
    }
557
558
    /**
559
     * @param bool $doSortHtmlAttributes
560
     *
561
     * @return $this
562
     */
563 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
564
    {
565 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
566
567 2
        return $this;
568
    }
569
570
    /**
571
     * @param bool $doSumUpWhitespace
572
     *
573
     * @return $this
574
     */
575 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
576
    {
577 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
578
579 2
        return $this;
580
    }
581
582 51
    private function domNodeAttributesToString(\DOMNode $node): string
583
    {
584
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
585 51
        $attr_str = '';
586 51
        if ($node->attributes !== null) {
587 51
            foreach ($node->attributes as $attribute) {
588 34
                $attr_str .= $attribute->name;
589
590
                if (
591 34
                    $this->doOptimizeAttributes
592
                    &&
593 34
                    isset(self::$booleanAttributes[$attribute->name])
594
                ) {
595 9
                    $attr_str .= ' ';
596
597 9
                    continue;
598
                }
599
600 34
                $attr_str .= '=';
601
602
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
603 34
                $omit_quotes = $this->doRemoveOmittedQuotes
604
                               &&
605 34
                               $attribute->value !== ''
606
                               &&
607 34
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
608
                               &&
609 34
                               \strpos($attribute->name, ' ') === false
610
                               &&
611 34
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
612
613 34
                $quoteTmp = '"';
614
                if (
615 34
                    !$omit_quotes
616
                    &&
617 34
                    \strpos($attribute->value, '"') !== false
618
                ) {
619 1
                    $quoteTmp = "'";
620
                }
621
622
                if (
623 34
                    $this->doOptimizeAttributes
624
                    &&
625
                    (
626 33
                        $attribute->name === 'srcset'
627
                        ||
628 34
                        $attribute->name === 'sizes'
629
                    )
630
                ) {
631 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
632
                } else {
633 34
                    $attr_val = $attribute->value;
634
                }
635
636 34
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
637 34
                $attr_str .= ' ';
638
            }
639
        }
640
641 51
        return \trim($attr_str);
642
    }
643
644
    /**
645
     * @param \DOMNode $node
646
     *
647
     * @return bool
648
     */
649 50
    private function domNodeClosingTagOptional(\DOMNode $node): bool
650
    {
651 50
        $tag_name = $node->nodeName;
652
653
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
654 50
        $parent_node = $node->parentNode;
655
656 50
        if ($parent_node) {
657 50
            $parent_tag_name = $parent_node->nodeName;
658
        } else {
659
            $parent_tag_name = null;
660
        }
661
662 50
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
663
664
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
665
666
        // Implemented:
667
        //
668
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
669
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
670
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
671
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
672
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
673
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
674
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
675
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
676
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
677
678
        /**
679
         * @noinspection TodoComment
680
         *
681
         * TODO: Not Implemented
682
         */
683
        //
684
        // <html> may be omitted if first thing inside is not comment
685
        // <head> may be omitted if first thing inside is an element
686
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
687
        // <colgroup> may be omitted if first thing inside is <col>
688
        // <tbody> may be omitted if first thing inside is <tr>
689
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
690
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
691
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
692
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
693
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
694
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
695
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
696
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
697
        //
698
        // <-- However, a start tag must never be omitted if it has any attributes.
699
700
        /** @noinspection InArrayCanBeUsedInspection */
701 50
        return \in_array($tag_name, self::$optional_end_tags, true)
702
               ||
703
               (
704 47
                   $tag_name === 'li'
705
                   &&
706
                   (
707 6
                       $nextSibling === null
708
                       ||
709
                       (
710 4
                           $nextSibling instanceof \DOMElement
711
                           &&
712 47
                           $nextSibling->tagName === 'li'
713
                       )
714
                   )
715
               )
716
               ||
717
               (
718 47
                   $tag_name === 'rp'
719
                   &&
720
                   (
721
                       $nextSibling === null
722
                       ||
723
                       (
724
                           $nextSibling instanceof \DOMElement
725
                           &&
726
                           (
727
                               $nextSibling->tagName === 'rp'
728
                               ||
729 47
                               $nextSibling->tagName === 'rt'
730
                           )
731
                       )
732
                   )
733
               )
734
               ||
735
               (
736 47
                   $tag_name === 'tr'
737
                   &&
738
                   (
739 1
                       $nextSibling === null
740
                       ||
741
                       (
742 1
                           $nextSibling instanceof \DOMElement
743
                           &&
744 47
                           $nextSibling->tagName === 'tr'
745
                       )
746
                   )
747
               )
748
               ||
749
               (
750 47
                   $tag_name === 'source'
751
                   &&
752
                   (
753 1
                       $parent_tag_name === 'audio'
754
                       ||
755 1
                       $parent_tag_name === 'video'
756
                       ||
757 1
                       $parent_tag_name === 'picture'
758
                       ||
759 47
                       $parent_tag_name === 'source'
760
                   )
761
                   &&
762
                   (
763 1
                       $nextSibling === null
764
                       ||
765
                       (
766
                           $nextSibling instanceof \DOMElement
767
                           &&
768 47
                           $nextSibling->tagName === 'source'
769
                       )
770
                   )
771
               )
772
               ||
773
               (
774
                   (
775 47
                       $tag_name === 'td'
776
                       ||
777 47
                       $tag_name === 'th'
778
                   )
779
                   &&
780
                   (
781 1
                       $nextSibling === null
782
                       ||
783
                       (
784 1
                           $nextSibling instanceof \DOMElement
785
                           &&
786
                           (
787 1
                               $nextSibling->tagName === 'td'
788
                               ||
789 47
                               $nextSibling->tagName === 'th'
790
                           )
791
                       )
792
                   )
793
               )
794
               ||
795
               (
796
                   (
797 47
                       $tag_name === 'dd'
798
                       ||
799 47
                       $tag_name === 'dt'
800
                   )
801
                   &&
802
                   (
803
                       (
804 3
                           $nextSibling === null
805
                           &&
806 3
                           $tag_name === 'dd'
807
                       )
808
                       ||
809
                       (
810 3
                           $nextSibling instanceof \DOMElement
811
                           &&
812
                           (
813 3
                               $nextSibling->tagName === 'dd'
814
                               ||
815 47
                               $nextSibling->tagName === 'dt'
816
                           )
817
                       )
818
                   )
819
               )
820
               ||
821
               (
822 47
                   $tag_name === 'option'
823
                   &&
824
                   (
825 1
                       $nextSibling === null
826
                       ||
827
                       (
828 1
                           $nextSibling instanceof \DOMElement
829
                           &&
830
                           (
831 1
                               $nextSibling->tagName === 'option'
832
                               ||
833 47
                               $nextSibling->tagName === 'optgroup'
834
                           )
835
                       )
836
                   )
837
               )
838
               ||
839
               (
840 47
                   $tag_name === 'p'
841
                   &&
842
                   (
843
                       (
844 14
                           $nextSibling === null
845
                           &&
846
                           (
847 12
                               $node->parentNode !== null
848
                               &&
849
                               !\in_array(
850 12
                                   $node->parentNode->nodeName,
851
                                   [
852
                                       'a',
853
                                       'audio',
854
                                       'del',
855
                                       'ins',
856
                                       'map',
857
                                       'noscript',
858
                                       'video',
859
                                   ],
860
                                   true
861
                               )
862
                           )
863
                       )
864
                       ||
865
                       (
866 9
                           $nextSibling instanceof \DOMElement
867
                           &&
868
                           \in_array(
869 50
                               $nextSibling->tagName,
870
                               [
871
                                   'address',
872
                                   'article',
873
                                   'aside',
874
                                   'blockquote',
875
                                   'dir',
876
                                   'div',
877
                                   'dl',
878
                                   'fieldset',
879
                                   'footer',
880
                                   'form',
881
                                   'h1',
882
                                   'h2',
883
                                   'h3',
884
                                   'h4',
885
                                   'h5',
886
                                   'h6',
887
                                   'header',
888
                                   'hgroup',
889
                                   'hr',
890
                                   'menu',
891
                                   'nav',
892
                                   'ol',
893
                                   'p',
894
                                   'pre',
895
                                   'section',
896
                                   'table',
897
                                   'ul',
898
                               ],
899
                               true
900
                           )
901
                       )
902
                   )
903
               );
904
    }
905
906 51
    protected function domNodeToString(\DOMNode $node): string
907
    {
908
        // init
909 51
        $html = '';
910 51
        $emptyStringTmp = '';
911
912 51
        foreach ($node->childNodes as $child) {
913 51
            if ($emptyStringTmp === 'is_empty') {
914 27
                $emptyStringTmp = 'last_was_empty';
915
            } else {
916 51
                $emptyStringTmp = '';
917
            }
918
919 51
            if ($child instanceof \DOMDocumentType) {
920
                // add the doc-type only if it wasn't generated by DomDocument
921 12
                if (!$this->withDocType) {
922
                    continue;
923
                }
924
925 12
                if ($child->name) {
926 12
                    if (!$child->publicId && $child->systemId) {
927
                        $tmpTypeSystem = 'SYSTEM';
928
                        $tmpTypePublic = '';
929
                    } else {
930 12
                        $tmpTypeSystem = '';
931 12
                        $tmpTypePublic = 'PUBLIC';
932
                    }
933
934 12
                    $html .= '<!DOCTYPE ' . $child->name . ''
935 12
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
936 12
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
937 12
                             . '>';
938
                }
939 51
            } elseif ($child instanceof \DOMElement) {
940 51
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
941 51
                $html .= '>' . $this->domNodeToString($child);
942
943
                if (
944 51
                    !$this->doRemoveOmittedHtmlTags
945
                    ||
946 51
                    !$this->domNodeClosingTagOptional($child)
947
                ) {
948 45
                    $html .= '</' . $child->tagName . '>';
949
                }
950
951 51
                if (!$this->doRemoveWhitespaceAroundTags) {
952
                    /** @noinspection NestedPositiveIfStatementsInspection */
953
                    if (
954 50
                        $child->nextSibling instanceof \DOMText
955
                        &&
956 50
                        $child->nextSibling->wholeText === ' '
957
                    ) {
958
                        if (
959 26
                            $emptyStringTmp !== 'last_was_empty'
960
                            &&
961 26
                            \substr($html, -1) !== ' '
962
                        ) {
963 26
                            $html = \rtrim($html);
964
965
                            if (
966 26
                                $child->parentNode
967
                                &&
968 26
                                $child->parentNode->nodeName !== 'head'
969
                            ) {
970 26
                                $html .= ' ';
971
                            }
972
                        }
973 51
                        $emptyStringTmp = 'is_empty';
974
                    }
975
                }
976 47
            } elseif ($child instanceof \DOMText) {
977 47
                if ($child->isElementContentWhitespace()) {
978
                    if (
979 30
                        $child->previousSibling !== null
980
                        &&
981 30
                        $child->nextSibling !== null
982
                    ) {
983
                        if (
984
                            (
985 21
                                $child->wholeText
986
                                &&
987 21
                                \strpos($child->wholeText, ' ') !== false
988
                            )
989
                            ||
990
                            (
991
                                $emptyStringTmp !== 'last_was_empty'
992
                                &&
993 21
                                \substr($html, -1) !== ' '
994
                            )
995
                        ) {
996 21
                            $html = \rtrim($html);
997
998
                            if (
999 21
                                $child->parentNode
1000
                                &&
1001 21
                                $child->parentNode->nodeName !== 'head'
1002
                            ) {
1003 21
                                $html .= ' ';
1004
                            }
1005
                        }
1006 30
                        $emptyStringTmp = 'is_empty';
1007
                    }
1008
                } else {
1009 47
                    $html .= $child->wholeText;
1010
                }
1011 1
            } elseif ($child instanceof \DOMComment) {
1012 1
                $html .= '<!--' . $child->textContent . '-->';
1013
            }
1014
        }
1015
1016 51
        return $html;
1017
    }
1018
1019
    /**
1020
     * @return array
1021
     */
1022
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1023
    {
1024
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1025
    }
1026
1027
    /**
1028
     * @return bool
1029
     */
1030
    public function isDoOptimizeAttributes(): bool
1031
    {
1032
        return $this->doOptimizeAttributes;
1033
    }
1034
1035
    /**
1036
     * @return bool
1037
     */
1038
    public function isDoOptimizeViaHtmlDomParser(): bool
1039
    {
1040
        return $this->doOptimizeViaHtmlDomParser;
1041
    }
1042
1043
    /**
1044
     * @return bool
1045
     */
1046
    public function isDoRemoveComments(): bool
1047
    {
1048
        return $this->doRemoveComments;
1049
    }
1050
1051
    /**
1052
     * @return bool
1053
     */
1054 34
    public function isDoRemoveDefaultAttributes(): bool
1055
    {
1056 34
        return $this->doRemoveDefaultAttributes;
1057
    }
1058
1059
    /**
1060
     * @return bool
1061
     */
1062 34
    public function isDoRemoveDeprecatedAnchorName(): bool
1063
    {
1064 34
        return $this->doRemoveDeprecatedAnchorName;
1065
    }
1066
1067
    /**
1068
     * @return bool
1069
     */
1070 34
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1071
    {
1072 34
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1073
    }
1074
1075
    /**
1076
     * @return bool
1077
     */
1078 34
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1079
    {
1080 34
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1081
    }
1082
1083
    /**
1084
     * @return bool
1085
     */
1086 34
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1087
    {
1088 34
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1089
    }
1090
1091
    /**
1092
     * @return bool
1093
     */
1094 34
    public function isDoRemoveEmptyAttributes(): bool
1095
    {
1096 34
        return $this->doRemoveEmptyAttributes;
1097
    }
1098
1099
    /**
1100
     * @return bool
1101
     */
1102 34
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1103
    {
1104 34
        return $this->doRemoveHttpPrefixFromAttributes;
1105
    }
1106
1107
    /**
1108
     * @return bool
1109
     */
1110 34
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1111
    {
1112 34
        return $this->doRemoveHttpsPrefixFromAttributes;
1113
    }
1114
1115
    /**
1116
     * @return bool
1117
     */
1118 4
    public function isKeepPrefixOnExternalAttributes(): bool
1119
    {
1120 4
        return $this->keepPrefixOnExternalAttributes;
1121
    }
1122
1123
    /**
1124
     * @return bool
1125
     */
1126 34
    public function isDoMakeSameDomainLinksRelative(): bool
1127
    {
1128 34
        return $this->doMakeSameDomainLinksRelative;
1129
    }
1130
1131
    /**
1132
     * @return bool
1133
     */
1134 2
    public function isLocalDomainSet(): bool
1135
    {
1136 2
        return $this->localDomain !== '';
1137
    }
1138
1139
    /**
1140
     * @return bool
1141
     */
1142
    public function isDoRemoveOmittedHtmlTags(): bool
1143
    {
1144
        return $this->doRemoveOmittedHtmlTags;
1145
    }
1146
1147
    /**
1148
     * @return bool
1149
     */
1150
    public function isDoRemoveOmittedQuotes(): bool
1151
    {
1152
        return $this->doRemoveOmittedQuotes;
1153
    }
1154
1155
    /**
1156
     * @return bool
1157
     */
1158
    public function isDoRemoveSpacesBetweenTags(): bool
1159
    {
1160
        return $this->doRemoveSpacesBetweenTags;
1161
    }
1162
1163
    /**
1164
     * @return bool
1165
     */
1166 34
    public function isDoRemoveValueFromEmptyInput(): bool
1167
    {
1168 34
        return $this->doRemoveValueFromEmptyInput;
1169
    }
1170
1171
    /**
1172
     * @return bool
1173
     */
1174
    public function isDoRemoveWhitespaceAroundTags(): bool
1175
    {
1176
        return $this->doRemoveWhitespaceAroundTags;
1177
    }
1178
1179
    /**
1180
     * @return bool
1181
     */
1182 34
    public function isDoSortCssClassNames(): bool
1183
    {
1184 34
        return $this->doSortCssClassNames;
1185
    }
1186
1187
    /**
1188
     * @return bool
1189
     */
1190 34
    public function isDoSortHtmlAttributes(): bool
1191
    {
1192 34
        return $this->doSortHtmlAttributes;
1193
    }
1194
1195
    /**
1196
     * @return bool
1197
     */
1198
    public function isDoSumUpWhitespace(): bool
1199
    {
1200
        return $this->doSumUpWhitespace;
1201
    }
1202
1203
    /**
1204
     * @param string $html
1205
     * @param bool   $multiDecodeNewHtmlEntity
1206
     *
1207
     * @return string
1208
     */
1209 55
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1210
    {
1211 55
        $html = (string) $html;
1212 55
        if (!isset($html[0])) {
1213 1
            return '';
1214
        }
1215
1216 55
        $html = \trim($html);
1217 55
        if (!$html) {
1218 3
            return '';
1219
        }
1220
1221
        // reset
1222 52
        $this->protectedChildNodes = [];
1223
1224
        // save old content
1225 52
        $origHtml = $html;
1226 52
        $origHtmlLength = \strlen($html);
1227
1228
        // -------------------------------------------------------------------------
1229
        // Minify the HTML via "HtmlDomParser"
1230
        // -------------------------------------------------------------------------
1231
1232 52
        if ($this->doOptimizeViaHtmlDomParser) {
1233 51
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1234
        }
1235
1236
        // -------------------------------------------------------------------------
1237
        // Trim whitespace from html-string. [protected html is still protected]
1238
        // -------------------------------------------------------------------------
1239
1240
        // Remove extra white-space(s) between HTML attribute(s)
1241 52
        if (\strpos($html, ' ') !== false) {
1242 46
            $html = (string) \preg_replace_callback(
1243 46
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1244
                static function ($matches) {
1245 46
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1246 46
                },
1247 46
                $html
1248
            );
1249
        }
1250
1251 52
        if ($this->doRemoveSpacesBetweenTags) {
1252
            /** @noinspection NestedPositiveIfStatementsInspection */
1253 1
            if (\strpos($html, ' ') !== false) {
1254
                // Remove spaces that are between > and <
1255 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1256
            }
1257
        }
1258
1259
        // -------------------------------------------------------------------------
1260
        // Restore protected HTML-code.
1261
        // -------------------------------------------------------------------------
1262
1263 52
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1264 9
            $html = (string) \preg_replace_callback(
1265 9
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1266 9
                [$this, 'restoreProtectedHtml'],
1267 9
                $html
1268
            );
1269
        }
1270
1271
        // -------------------------------------------------------------------------
1272
        // Restore protected HTML-entities.
1273
        // -------------------------------------------------------------------------
1274
1275 52
        if ($this->doOptimizeViaHtmlDomParser) {
1276 51
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1277
        }
1278
1279
        // ------------------------------------
1280
        // Final clean-up
1281
        // ------------------------------------
1282
1283 52
        $html = \str_replace(
1284
            [
1285 52
                'html>' . "\n",
1286
                "\n" . '<html',
1287
                'html/>' . "\n",
1288
                "\n" . '</html',
1289
                'head>' . "\n",
1290
                "\n" . '<head',
1291
                'head/>' . "\n",
1292
                "\n" . '</head',
1293
            ],
1294
            [
1295 52
                'html>',
1296
                '<html',
1297
                'html/>',
1298
                '</html',
1299
                'head>',
1300
                '<head',
1301
                'head/>',
1302
                '</head',
1303
            ],
1304 52
            $html
1305
        );
1306
1307
        // self closing tags, don't need a trailing slash ...
1308 52
        $replace = [];
1309 52
        $replacement = [];
1310 52
        foreach (self::$selfClosingTags as $selfClosingTag) {
1311 52
            $replace[] = '<' . $selfClosingTag . '/>';
1312 52
            $replacement[] = '<' . $selfClosingTag . '>';
1313 52
            $replace[] = '<' . $selfClosingTag . ' />';
1314 52
            $replacement[] = '<' . $selfClosingTag . '>';
1315 52
            $replace[] = '></' . $selfClosingTag . '>';
1316 52
            $replacement[] = '>';
1317
        }
1318 52
        $html = \str_replace(
1319 52
            $replace,
1320 52
            $replacement,
1321 52
            $html
1322
        );
1323
1324
        // ------------------------------------
1325
        // check if compression worked
1326
        // ------------------------------------
1327
1328 52
        if ($origHtmlLength < \strlen($html)) {
1329
            $html = $origHtml;
1330
        }
1331
1332 52
        return $html;
1333
    }
1334
1335
    /**
1336
     * @param \DOMNode $node
1337
     *
1338
     * @return \DOMNode|null
1339
     */
1340 50
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1341
    {
1342
        do {
1343
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1344 50
            $node = $node->nextSibling;
1345 50
        } while (!($node === null || $node instanceof \DOMElement));
1346
1347 50
        return $node;
1348
    }
1349
1350
    /**
1351
     * Check if the current string is an conditional comment.
1352
     *
1353
     * INFO: since IE >= 10 conditional comment are not working anymore
1354
     *
1355
     * <!--[if expression]> HTML <![endif]-->
1356
     * <![if expression]> HTML <![endif]>
1357
     *
1358
     * @param string $comment
1359
     *
1360
     * @return bool
1361
     */
1362 4
    private function isConditionalComment($comment): bool
1363
    {
1364 4 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1365
            /** @noinspection RegExpRedundantEscape */
1366 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1367 2
                return true;
1368
            }
1369
        }
1370
1371 4 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1372
            /** @noinspection RegExpRedundantEscape */
1373 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1374 1
                return true;
1375
            }
1376
        }
1377
1378 4
        return false;
1379
    }
1380
1381
    /**
1382
     * @param string $html
1383
     * @param bool   $multiDecodeNewHtmlEntity
1384
     *
1385
     * @return string
1386
     */
1387 51
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1388
    {
1389
        // init dom
1390 51
        $dom = new HtmlDomParser();
1391
        /** @noinspection UnusedFunctionResultInspection */
1392 51
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1393
1394 51
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1395 51
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1396
1397
        // load dom
1398
        /** @noinspection UnusedFunctionResultInspection */
1399 51
        $dom->loadHtml($html);
1400
1401 51
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1402
1403
        // -------------------------------------------------------------------------
1404
        // Protect <nocompress> HTML tags first.
1405
        // -------------------------------------------------------------------------
1406
1407 51
        $dom = $this->protectTagHelper($dom, 'nocompress');
1408
1409
        // -------------------------------------------------------------------------
1410
        // Notify the Observer before the minification.
1411
        // -------------------------------------------------------------------------
1412
1413 51
        foreach ($dom->find('*') as $element) {
1414 51
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1415
        }
1416
1417
        // -------------------------------------------------------------------------
1418
        // Protect HTML tags and conditional comments.
1419
        // -------------------------------------------------------------------------
1420
1421 51
        $dom = $this->protectTags($dom);
1422
1423
        // -------------------------------------------------------------------------
1424
        // Remove default HTML comments. [protected html is still protected]
1425
        // -------------------------------------------------------------------------
1426
1427 51
        if ($this->doRemoveComments) {
1428 49
            $dom = $this->removeComments($dom);
1429
        }
1430
1431
        // -------------------------------------------------------------------------
1432
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1433
        // -------------------------------------------------------------------------
1434
1435 51
        if ($this->doSumUpWhitespace) {
1436 50
            $dom = $this->sumUpWhitespace($dom);
1437
        }
1438
1439 51
        foreach ($dom->find('*') as $element) {
1440
1441
            // -------------------------------------------------------------------------
1442
            // Remove whitespace around tags. [protected html is still protected]
1443
            // -------------------------------------------------------------------------
1444
1445 51
            if ($this->doRemoveWhitespaceAroundTags) {
1446 3
                $this->removeWhitespaceAroundTags($element);
1447
            }
1448
1449
            // -------------------------------------------------------------------------
1450
            // Notify the Observer after the minification.
1451
            // -------------------------------------------------------------------------
1452
1453 51
            $this->notifyObserversAboutDomElementAfterMinification($element);
1454
        }
1455
1456
        // -------------------------------------------------------------------------
1457
        // Convert the Dom into a string.
1458
        // -------------------------------------------------------------------------
1459
1460 51
        return $dom->fixHtmlOutput(
1461 51
            $this->domNodeToString($dom->getDocument()),
1462 51
            $multiDecodeNewHtmlEntity
1463
        );
1464
    }
1465
1466
    /**
1467
     * @param SimpleHtmlDomInterface $domElement
1468
     *
1469
     * @return void
1470
     */
1471 51
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1472
    {
1473 51
        foreach ($this->domLoopObservers as $observer) {
1474 51
            $observer->domElementAfterMinification($domElement, $this);
1475
        }
1476 51
    }
1477
1478
    /**
1479
     * @param SimpleHtmlDomInterface $domElement
1480
     *
1481
     * @return void
1482
     */
1483 51
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1484
    {
1485 51
        foreach ($this->domLoopObservers as $observer) {
1486 51
            $observer->domElementBeforeMinification($domElement, $this);
1487
        }
1488 51
    }
1489
1490
    /**
1491
     * @param HtmlDomParser $dom
1492
     * @param string        $selector
1493
     *
1494
     * @return HtmlDomParser
1495
     */
1496 51
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1497
    {
1498 51
        foreach ($dom->find($selector) as $element) {
1499 5
            if ($element->isRemoved()) {
1500 1
                continue;
1501
            }
1502
1503 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1504 5
            $parentNode = $element->getNode()->parentNode;
1505 5
            if ($parentNode !== null) {
1506 5
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1507
            }
1508
1509 5
            ++$this->protected_tags_counter;
1510
        }
1511
1512 51
        return $dom;
1513
    }
1514
1515
    /**
1516
     * Prevent changes of inline "styles" and "scripts".
1517
     *
1518
     * @param HtmlDomParser $dom
1519
     *
1520
     * @return HtmlDomParser
1521
     */
1522 51
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1523
    {
1524 51
        $this->protectTagHelper($dom, 'code');
1525
1526 51
        foreach ($dom->find('script, style') as $element) {
1527 7
            if ($element->isRemoved()) {
1528
                continue;
1529
            }
1530
1531 7
            if ($element->tag === 'script' || $element->tag === 'style') {
1532 7
                $attributes = $element->getAllAttributes();
1533
                // skip external links
1534 7
                if (isset($attributes['src'])) {
1535 4
                    continue;
1536
                }
1537
            }
1538
1539 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1540 5
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1541
1542 5
            ++$this->protected_tags_counter;
1543
        }
1544
1545 51
        foreach ($dom->find('//comment()') as $element) {
1546 4
            if ($element->isRemoved()) {
1547
                continue;
1548
            }
1549
1550 4
            $text = $element->text();
1551
1552
            // skip normal comments
1553 4
            if (!$this->isConditionalComment($text)) {
1554 4
                continue;
1555
            }
1556
1557 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1558
1559
            /* @var $node \DOMComment */
1560 2
            $node = $element->getNode();
1561 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1562 2
            $parentNode = $element->getNode()->parentNode;
1563 2
            if ($parentNode !== null) {
1564 2
                $parentNode->replaceChild($child, $node);
1565
            }
1566
1567 2
            ++$this->protected_tags_counter;
1568
        }
1569
1570 51
        return $dom;
1571
    }
1572
1573
    /**
1574
     * Remove comments in the dom.
1575
     *
1576
     * @param HtmlDomParser $dom
1577
     *
1578
     * @return HtmlDomParser
1579
     */
1580 49
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1581
    {
1582 49
        foreach ($dom->find('//comment()') as $commentWrapper) {
1583 3
            $comment = $commentWrapper->getNode();
1584 3
            $val = $comment->nodeValue;
1585 3
            if (\strpos($val, '[') === false) {
1586 3
                $parentNode = $comment->parentNode;
1587 3
                if ($parentNode !== null) {
1588 3
                    $parentNode->removeChild($comment);
1589
                }
1590
            }
1591
        }
1592
1593 49
        $dom->getDocument()->normalizeDocument();
1594
1595 49
        return $dom;
1596
    }
1597
1598
    /**
1599
     * Trim tags in the dom.
1600
     *
1601
     * @param SimpleHtmlDomInterface $element
1602
     *
1603
     * @return void
1604
     */
1605 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1606
    {
1607 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1608 1
            $node = $element->getNode();
1609
1610
            /** @var \DOMNode[] $candidates */
1611 1
            $candidates = [];
1612 1
            if ($node->childNodes->length > 0) {
1613 1
                $candidates[] = $node->firstChild;
1614 1
                $candidates[] = $node->lastChild;
1615 1
                $candidates[] = $node->previousSibling;
1616 1
                $candidates[] = $node->nextSibling;
1617
            }
1618
1619
            /** @var mixed $candidate - false-positive error from phpstan */
1620 1
            foreach ($candidates as &$candidate) {
1621 1
                if ($candidate === null) {
1622
                    continue;
1623
                }
1624
1625 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1626 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1627 1
                    if ($nodeValueTmp !== null) {
1628 1
                        $candidate->nodeValue = $nodeValueTmp;
1629
                    }
1630
                }
1631
            }
1632
        }
1633 3
    }
1634
1635
    /**
1636
     * Callback function for preg_replace_callback use.
1637
     *
1638
     * @param array $matches PREG matches
1639
     *
1640
     * @return string
1641
     */
1642 9
    private function restoreProtectedHtml($matches): string
1643
    {
1644 9
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1645
1646 9
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1647
    }
1648
1649
    /**
1650
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1651
     *
1652
     * @return $this
1653
     */
1654 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1655
    {
1656 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1657
1658 2
        return $this;
1659
    }
1660
1661
    /**
1662
     * Sum-up extra whitespace from dom-nodes.
1663
     *
1664
     * @param HtmlDomParser $dom
1665
     *
1666
     * @return HtmlDomParser
1667
     */
1668 50
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1669
    {
1670 50
        $text_nodes = $dom->find('//text()');
1671 50
        foreach ($text_nodes as $text_node_wrapper) {
1672
            /* @var $text_node \DOMNode */
1673 46
            $text_node = $text_node_wrapper->getNode();
1674 46
            $xp = $text_node->getNodePath();
1675 46
            if ($xp === null) {
1676
                continue;
1677
            }
1678
1679 46
            $doSkip = false;
1680 46
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1681 46
                if (\strpos($xp, "/${pattern}") !== false) {
1682 8
                    $doSkip = true;
1683
1684 8
                    break;
1685
                }
1686
            }
1687 46
            if ($doSkip) {
1688 8
                continue;
1689
            }
1690
1691 43
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1692 43
            if ($nodeValueTmp !== null) {
1693 43
                $text_node->nodeValue = $nodeValueTmp;
1694
            }
1695
        }
1696
1697 50
        $dom->getDocument()->normalizeDocument();
1698
1699 50
        return $dom;
1700
    }
1701
1702
    /**
1703
     * WARNING: maybe bad for performance ...
1704
     *
1705
     * @param bool $keepBrokenHtml
1706
     *
1707
     * @return HtmlMin
1708
     */
1709 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1710
    {
1711 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1712
1713 2
        return $this;
1714
    }
1715
}
1716