Completed
Push — master ( 760841...0bddd7 )
by Lars
01:21
created

HtmlMin::getLocalDomains()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $doKeepHttpAndHttpsPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainsLinksRelative = false;
199
200
    /**
201
     * @var string[]
202
     */
203
    private $localDomains = [];
204
205
    /**
206
     * @var array
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var bool
215
     */
216
    private $doSortCssClassNames = true;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doSortHtmlAttributes = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDefaultAttributes = false;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedAnchorName = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedTypeFromScriptTag = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveValueFromEmptyInput = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveEmptyAttributes = true;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doSumUpWhitespace = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveSpacesBetweenTags = false;
267
268
    /**
269
     * @var bool
270
     */
271
    private $keepBrokenHtml = false;
272
273
    /**
274
     * @var bool
275
     */
276
    private $withDocType = false;
277
278
    /**
279
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
280
     */
281
    private $domLoopObservers;
282
283
    /**
284
     * @var int
285
     */
286
    private $protected_tags_counter = 0;
287
288
    /**
289
     * HtmlMin constructor.
290
     */
291 55
    public function __construct()
292
    {
293 55
        $this->domLoopObservers = new \SplObjectStorage();
294
295 55
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
296 55
    }
297
298
    /**
299
     * @param HtmlMinDomObserverInterface $observer
300
     *
301
     * @return void
302
     */
303 55
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
304
    {
305 55
        $this->domLoopObservers->attach($observer);
306 55
    }
307
308
    /**
309
     * @param bool $doOptimizeAttributes
310
     *
311
     * @return $this
312
     */
313 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
314
    {
315 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
316
317 2
        return $this;
318
    }
319
320
    /**
321
     * @param bool $doOptimizeViaHtmlDomParser
322
     *
323
     * @return $this
324
     */
325 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
326
    {
327 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
328
329 1
        return $this;
330
    }
331
332
    /**
333
     * @param bool $doRemoveComments
334
     *
335
     * @return $this
336
     */
337 3
    public function doRemoveComments(bool $doRemoveComments = true): self
338
    {
339 3
        $this->doRemoveComments = $doRemoveComments;
340
341 3
        return $this;
342
    }
343
344
    /**
345
     * @param bool $doRemoveDefaultAttributes
346
     *
347
     * @return $this
348
     */
349 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
350
    {
351 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
352
353 2
        return $this;
354
    }
355
356
    /**
357
     * @param bool $doRemoveDeprecatedAnchorName
358
     *
359
     * @return $this
360
     */
361 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
362
    {
363 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
364
365 2
        return $this;
366
    }
367
368
    /**
369
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
370
     *
371
     * @return $this
372
     */
373 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
374
    {
375 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
376
377 2
        return $this;
378
    }
379
380
    /**
381
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
382
     *
383
     * @return $this
384
     */
385 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
386
    {
387 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
388
389 2
        return $this;
390
    }
391
392
    /**
393
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
394
     *
395
     * @return $this
396
     */
397 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
398
    {
399 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
400
401 2
        return $this;
402
    }
403
404
    /**
405
     * @param bool $doRemoveEmptyAttributes
406
     *
407
     * @return $this
408
     */
409 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
410
    {
411 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
412
413 2
        return $this;
414
    }
415
416
    /**
417
     * @param bool $doRemoveHttpPrefixFromAttributes
418
     *
419
     * @return $this
420
     */
421 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
422
    {
423 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
424
425 6
        return $this;
426
    }
427
428
    /**
429
     * @param bool $doRemoveHttpsPrefixFromAttributes
430
     *
431
     * @return $this
432
     */
433 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
434
    {
435 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
436
437 1
        return $this;
438
    }
439
440
    /**
441
     * @param bool $doKeepHttpAndHttpsPrefixOnExternalAttributes
442
     *
443
     * @return $this
444
     */
445 1
    public function doKeepHttpAndHttpsPrefixOnExternalAttributes(bool $doKeepHttpAndHttpsPrefixOnExternalAttributes = true): self
446
    {
447 1
        $this->doKeepHttpAndHttpsPrefixOnExternalAttributes = $doKeepHttpAndHttpsPrefixOnExternalAttributes;
448
449 1
        return $this;
450
    }
451
452
    /**
453
     * @param string[] $localDomains
454
     *
455
     * @return $this
456
     */
457 1
    public function doMakeSameDomainsLinksRelative(array $localDomains): self
458
    {
459
        /** @noinspection AlterInForeachInspection */
460 1
        foreach ($localDomains as &$localDomain) {
461 1
            $localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
462
        }
463
464 1
        $this->localDomains = $localDomains;
465 1
        $this->doMakeSameDomainsLinksRelative = \count($this->localDomains) > 0;
466
467 1
        return $this;
468
    }
469
470
    /**
471
     * @return string[]
472
     */
473 1
    public function getLocalDomains(): array
474
    {
475 1
        return $this->localDomains;
476
    }
477
478
    /**
479
     * @param bool $doRemoveOmittedHtmlTags
480
     *
481
     * @return $this
482
     */
483 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
484
    {
485 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
486
487 1
        return $this;
488
    }
489
490
    /**
491
     * @param bool $doRemoveOmittedQuotes
492
     *
493
     * @return $this
494
     */
495 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
496
    {
497 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
498
499 1
        return $this;
500
    }
501
502
    /**
503
     * @param bool $doRemoveSpacesBetweenTags
504
     *
505
     * @return $this
506
     */
507 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
508
    {
509 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
510
511 1
        return $this;
512
    }
513
514
    /**
515
     * @param bool $doRemoveValueFromEmptyInput
516
     *
517
     * @return $this
518
     */
519 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
520
    {
521 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
522
523 2
        return $this;
524
    }
525
526
    /**
527
     * @param bool $doRemoveWhitespaceAroundTags
528
     *
529
     * @return $this
530
     */
531 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
532
    {
533 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
534
535 5
        return $this;
536
    }
537
538
    /**
539
     * @param bool $doSortCssClassNames
540
     *
541
     * @return $this
542
     */
543 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
544
    {
545 2
        $this->doSortCssClassNames = $doSortCssClassNames;
546
547 2
        return $this;
548
    }
549
550
    /**
551
     * @param bool $doSortHtmlAttributes
552
     *
553
     * @return $this
554
     */
555 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
556
    {
557 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
558
559 2
        return $this;
560
    }
561
562
    /**
563
     * @param bool $doSumUpWhitespace
564
     *
565
     * @return $this
566
     */
567 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
568
    {
569 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
570
571 2
        return $this;
572
    }
573
574 51
    private function domNodeAttributesToString(\DOMNode $node): string
575
    {
576
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
577 51
        $attr_str = '';
578 51
        if ($node->attributes !== null) {
579 51
            foreach ($node->attributes as $attribute) {
580 34
                $attr_str .= $attribute->name;
581
582
                if (
583 34
                    $this->doOptimizeAttributes
584
                    &&
585 34
                    isset(self::$booleanAttributes[$attribute->name])
586
                ) {
587 10
                    $attr_str .= ' ';
588
589 10
                    continue;
590
                }
591
592 34
                $attr_str .= '=';
593
594
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
595 34
                $omit_quotes = $this->doRemoveOmittedQuotes
596
                               &&
597 34
                               $attribute->value !== ''
598
                               &&
599 34
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
600
                               &&
601 34
                               \strpos($attribute->name, ' ') === false
602
                               &&
603 34
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
604
605 34
                $quoteTmp = '"';
606
                if (
607 34
                    !$omit_quotes
608
                    &&
609 34
                    \strpos($attribute->value, '"') !== false
610
                ) {
611 1
                    $quoteTmp = "'";
612
                }
613
614
                if (
615 34
                    $this->doOptimizeAttributes
616
                    &&
617
                    (
618 33
                        $attribute->name === 'srcset'
619
                        ||
620 34
                        $attribute->name === 'sizes'
621
                    )
622
                ) {
623 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
624
                } else {
625 34
                    $attr_val = $attribute->value;
626
                }
627
628 34
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
629 34
                $attr_str .= ' ';
630
            }
631
        }
632
633 51
        return \trim($attr_str);
634
    }
635
636
    /**
637
     * @param \DOMNode $node
638
     *
639
     * @return bool
640
     */
641 50
    private function domNodeClosingTagOptional(\DOMNode $node): bool
642
    {
643 50
        $tag_name = $node->nodeName;
644
645
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
646 50
        $parent_node = $node->parentNode;
647
648 50
        if ($parent_node) {
649 50
            $parent_tag_name = $parent_node->nodeName;
650
        } else {
651
            $parent_tag_name = null;
652
        }
653
654 50
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
655
656
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
657
658
        // Implemented:
659
        //
660
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
661
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
662
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
663
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
664
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
665
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
666
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
667
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
668
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
669
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
670
671
        /**
672
         * @noinspection TodoComment
673
         *
674
         * TODO: Not Implemented
675
         */
676
        //
677
        // <html> may be omitted if first thing inside is not comment
678
        // <head> may be omitted if first thing inside is an element
679
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
680
        // <colgroup> may be omitted if first thing inside is <col>
681
        // <tbody> may be omitted if first thing inside is <tr>
682
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
683
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
684
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
685
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
686
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
687
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
688
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
689
        //
690
        // <-- However, a start tag must never be omitted if it has any attributes.
691
692
        /** @noinspection InArrayCanBeUsedInspection */
693 50
        return \in_array($tag_name, self::$optional_end_tags, true)
694
               ||
695
               (
696 47
                   $tag_name === 'li'
697
                   &&
698
                   (
699 6
                       $nextSibling === null
700
                       ||
701
                       (
702 4
                           $nextSibling instanceof \DOMElement
703
                           &&
704 47
                           $nextSibling->tagName === 'li'
705
                       )
706
                   )
707
               )
708
               ||
709
               (
710 47
                   $tag_name === 'optgroup'
711
                   &&
712
                   (
713 1
                       $nextSibling === null
714
                       ||
715
                       (
716 1
                           $nextSibling instanceof \DOMElement
717
                           &&
718 47
                           $nextSibling->tagName === 'optgroup'
719
                       )
720
                   )
721
               )
722
               ||
723
               (
724 47
                   $tag_name === 'rp'
725
                   &&
726
                   (
727
                       $nextSibling === null
728
                       ||
729
                       (
730
                           $nextSibling instanceof \DOMElement
731
                           &&
732
                           (
733
                               $nextSibling->tagName === 'rp'
734
                               ||
735 47
                               $nextSibling->tagName === 'rt'
736
                           )
737
                       )
738
                   )
739
               )
740
               ||
741
               (
742 47
                   $tag_name === 'tr'
743
                   &&
744
                   (
745 1
                       $nextSibling === null
746
                       ||
747
                       (
748 1
                           $nextSibling instanceof \DOMElement
749
                           &&
750 47
                           $nextSibling->tagName === 'tr'
751
                       )
752
                   )
753
               )
754
               ||
755
               (
756 47
                   $tag_name === 'source'
757
                   &&
758
                   (
759 1
                       $parent_tag_name === 'audio'
760
                       ||
761 1
                       $parent_tag_name === 'video'
762
                       ||
763 1
                       $parent_tag_name === 'picture'
764
                       ||
765 47
                       $parent_tag_name === 'source'
766
                   )
767
                   &&
768
                   (
769 1
                       $nextSibling === null
770
                       ||
771
                       (
772
                           $nextSibling instanceof \DOMElement
773
                           &&
774 47
                           $nextSibling->tagName === 'source'
775
                       )
776
                   )
777
               )
778
               ||
779
               (
780
                   (
781 47
                       $tag_name === 'td'
782
                       ||
783 47
                       $tag_name === 'th'
784
                   )
785
                   &&
786
                   (
787 1
                       $nextSibling === null
788
                       ||
789
                       (
790 1
                           $nextSibling instanceof \DOMElement
791
                           &&
792
                           (
793 1
                               $nextSibling->tagName === 'td'
794
                               ||
795 47
                               $nextSibling->tagName === 'th'
796
                           )
797
                       )
798
                   )
799
               )
800
               ||
801
               (
802
                   (
803 47
                       $tag_name === 'dd'
804
                       ||
805 47
                       $tag_name === 'dt'
806
                   )
807
                   &&
808
                   (
809
                       (
810 3
                           $nextSibling === null
811
                           &&
812 3
                           $tag_name === 'dd'
813
                       )
814
                       ||
815
                       (
816 3
                           $nextSibling instanceof \DOMElement
817
                           &&
818
                           (
819 3
                               $nextSibling->tagName === 'dd'
820
                               ||
821 47
                               $nextSibling->tagName === 'dt'
822
                           )
823
                       )
824
                   )
825
               )
826
               ||
827
               (
828 47
                   $tag_name === 'option'
829
                   &&
830
                   (
831 2
                       $nextSibling === null
832
                       ||
833
                       (
834 2
                           $nextSibling instanceof \DOMElement
835
                           &&
836
                           (
837 2
                               $nextSibling->tagName === 'option'
838
                               ||
839 47
                               $nextSibling->tagName === 'optgroup'
840
                           )
841
                       )
842
                   )
843
               )
844
               ||
845
               (
846 47
                   $tag_name === 'p'
847
                   &&
848
                   (
849
                       (
850 14
                           $nextSibling === null
851
                           &&
852
                           (
853 12
                               $node->parentNode !== null
854
                               &&
855
                               !\in_array(
856 12
                                   $node->parentNode->nodeName,
857
                                   [
858
                                       'a',
859
                                       'audio',
860
                                       'del',
861
                                       'ins',
862
                                       'map',
863
                                       'noscript',
864
                                       'video',
865
                                   ],
866
                                   true
867
                               )
868
                           )
869
                       )
870
                       ||
871
                       (
872 9
                           $nextSibling instanceof \DOMElement
873
                           &&
874
                           \in_array(
875 50
                               $nextSibling->tagName,
876
                               [
877
                                   'address',
878
                                   'article',
879
                                   'aside',
880
                                   'blockquote',
881
                                   'dir',
882
                                   'div',
883
                                   'dl',
884
                                   'fieldset',
885
                                   'footer',
886
                                   'form',
887
                                   'h1',
888
                                   'h2',
889
                                   'h3',
890
                                   'h4',
891
                                   'h5',
892
                                   'h6',
893
                                   'header',
894
                                   'hgroup',
895
                                   'hr',
896
                                   'menu',
897
                                   'nav',
898
                                   'ol',
899
                                   'p',
900
                                   'pre',
901
                                   'section',
902
                                   'table',
903
                                   'ul',
904
                               ],
905
                               true
906
                           )
907
                       )
908
                   )
909
               );
910
    }
911
912 51
    protected function domNodeToString(\DOMNode $node): string
913
    {
914
        // init
915 51
        $html = '';
916 51
        $emptyStringTmp = '';
917
918 51
        foreach ($node->childNodes as $child) {
919 51
            if ($emptyStringTmp === 'is_empty') {
920 28
                $emptyStringTmp = 'last_was_empty';
921
            } else {
922 51
                $emptyStringTmp = '';
923
            }
924
925 51
            if ($child instanceof \DOMDocumentType) {
926
                // add the doc-type only if it wasn't generated by DomDocument
927 12
                if (!$this->withDocType) {
928
                    continue;
929
                }
930
931 12
                if ($child->name) {
932 12
                    if (!$child->publicId && $child->systemId) {
933
                        $tmpTypeSystem = 'SYSTEM';
934
                        $tmpTypePublic = '';
935
                    } else {
936 12
                        $tmpTypeSystem = '';
937 12
                        $tmpTypePublic = 'PUBLIC';
938
                    }
939
940 12
                    $html .= '<!DOCTYPE ' . $child->name . ''
941 12
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
942 12
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
943 12
                             . '>';
944
                }
945 51
            } elseif ($child instanceof \DOMElement) {
946 51
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
947 51
                $html .= '>' . $this->domNodeToString($child);
948
949
                if (
950 51
                    !$this->doRemoveOmittedHtmlTags
951
                    ||
952 51
                    !$this->domNodeClosingTagOptional($child)
953
                ) {
954 45
                    $html .= '</' . $child->tagName . '>';
955
                }
956
957 51
                if (!$this->doRemoveWhitespaceAroundTags) {
958
                    /** @noinspection NestedPositiveIfStatementsInspection */
959
                    if (
960 50
                        $child->nextSibling instanceof \DOMText
961
                        &&
962 50
                        $child->nextSibling->wholeText === ' '
963
                    ) {
964
                        if (
965 27
                            $emptyStringTmp !== 'last_was_empty'
966
                            &&
967 27
                            \substr($html, -1) !== ' '
968
                        ) {
969 27
                            $html = \rtrim($html);
970
971
                            if (
972 27
                                $child->parentNode
973
                                &&
974 27
                                $child->parentNode->nodeName !== 'head'
975
                            ) {
976 27
                                $html .= ' ';
977
                            }
978
                        }
979 51
                        $emptyStringTmp = 'is_empty';
980
                    }
981
                }
982 47
            } elseif ($child instanceof \DOMText) {
983 47
                if ($child->isElementContentWhitespace()) {
984
                    if (
985 31
                        $child->previousSibling !== null
986
                        &&
987 31
                        $child->nextSibling !== null
988
                    ) {
989
                        if (
990
                            (
991 22
                                $child->wholeText
992
                                &&
993 22
                                \strpos($child->wholeText, ' ') !== false
994
                            )
995
                            ||
996
                            (
997
                                $emptyStringTmp !== 'last_was_empty'
998
                                &&
999 22
                                \substr($html, -1) !== ' '
1000
                            )
1001
                        ) {
1002 22
                            $html = \rtrim($html);
1003
1004
                            if (
1005 22
                                $child->parentNode
1006
                                &&
1007 22
                                $child->parentNode->nodeName !== 'head'
1008
                            ) {
1009 22
                                $html .= ' ';
1010
                            }
1011
                        }
1012 31
                        $emptyStringTmp = 'is_empty';
1013
                    }
1014
                } else {
1015 47
                    $html .= $child->wholeText;
1016
                }
1017 1
            } elseif ($child instanceof \DOMComment) {
1018 1
                $html .= '<!--' . $child->textContent . '-->';
1019
            }
1020
        }
1021
1022 51
        return $html;
1023
    }
1024
1025
    /**
1026
     * @return array
1027
     */
1028
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1029
    {
1030
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1031
    }
1032
1033
    /**
1034
     * @return bool
1035
     */
1036
    public function isDoOptimizeAttributes(): bool
1037
    {
1038
        return $this->doOptimizeAttributes;
1039
    }
1040
1041
    /**
1042
     * @return bool
1043
     */
1044
    public function isDoOptimizeViaHtmlDomParser(): bool
1045
    {
1046
        return $this->doOptimizeViaHtmlDomParser;
1047
    }
1048
1049
    /**
1050
     * @return bool
1051
     */
1052
    public function isDoRemoveComments(): bool
1053
    {
1054
        return $this->doRemoveComments;
1055
    }
1056
1057
    /**
1058
     * @return bool
1059
     */
1060 34
    public function isDoRemoveDefaultAttributes(): bool
1061
    {
1062 34
        return $this->doRemoveDefaultAttributes;
1063
    }
1064
1065
    /**
1066
     * @return bool
1067
     */
1068 34
    public function isDoRemoveDeprecatedAnchorName(): bool
1069
    {
1070 34
        return $this->doRemoveDeprecatedAnchorName;
1071
    }
1072
1073
    /**
1074
     * @return bool
1075
     */
1076 34
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1077
    {
1078 34
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1079
    }
1080
1081
    /**
1082
     * @return bool
1083
     */
1084 34
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1085
    {
1086 34
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1087
    }
1088
1089
    /**
1090
     * @return bool
1091
     */
1092 34
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1093
    {
1094 34
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1095
    }
1096
1097
    /**
1098
     * @return bool
1099
     */
1100 34
    public function isDoRemoveEmptyAttributes(): bool
1101
    {
1102 34
        return $this->doRemoveEmptyAttributes;
1103
    }
1104
1105
    /**
1106
     * @return bool
1107
     */
1108 34
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1109
    {
1110 34
        return $this->doRemoveHttpPrefixFromAttributes;
1111
    }
1112
1113
    /**
1114
     * @return bool
1115
     */
1116 34
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1117
    {
1118 34
        return $this->doRemoveHttpsPrefixFromAttributes;
1119
    }
1120
1121
    /**
1122
     * @return bool
1123
     */
1124 4
    public function isdoKeepHttpAndHttpsPrefixOnExternalAttributes(): bool
1125
    {
1126 4
        return $this->doKeepHttpAndHttpsPrefixOnExternalAttributes;
1127
    }
1128
1129
    /**
1130
     * @return bool
1131
     */
1132 34
    public function isDoMakeSameDomainsLinksRelative(): bool
1133
    {
1134 34
        return $this->doMakeSameDomainsLinksRelative;
1135
    }
1136
1137
    /**
1138
     * @return bool
1139
     */
1140
    public function isDoRemoveOmittedHtmlTags(): bool
1141
    {
1142
        return $this->doRemoveOmittedHtmlTags;
1143
    }
1144
1145
    /**
1146
     * @return bool
1147
     */
1148
    public function isDoRemoveOmittedQuotes(): bool
1149
    {
1150
        return $this->doRemoveOmittedQuotes;
1151
    }
1152
1153
    /**
1154
     * @return bool
1155
     */
1156
    public function isDoRemoveSpacesBetweenTags(): bool
1157
    {
1158
        return $this->doRemoveSpacesBetweenTags;
1159
    }
1160
1161
    /**
1162
     * @return bool
1163
     */
1164 34
    public function isDoRemoveValueFromEmptyInput(): bool
1165
    {
1166 34
        return $this->doRemoveValueFromEmptyInput;
1167
    }
1168
1169
    /**
1170
     * @return bool
1171
     */
1172
    public function isDoRemoveWhitespaceAroundTags(): bool
1173
    {
1174
        return $this->doRemoveWhitespaceAroundTags;
1175
    }
1176
1177
    /**
1178
     * @return bool
1179
     */
1180 34
    public function isDoSortCssClassNames(): bool
1181
    {
1182 34
        return $this->doSortCssClassNames;
1183
    }
1184
1185
    /**
1186
     * @return bool
1187
     */
1188 34
    public function isDoSortHtmlAttributes(): bool
1189
    {
1190 34
        return $this->doSortHtmlAttributes;
1191
    }
1192
1193
    /**
1194
     * @return bool
1195
     */
1196
    public function isDoSumUpWhitespace(): bool
1197
    {
1198
        return $this->doSumUpWhitespace;
1199
    }
1200
1201
    /**
1202
     * @param string $html
1203
     * @param bool   $multiDecodeNewHtmlEntity
1204
     *
1205
     * @return string
1206
     */
1207 55
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1208
    {
1209 55
        $html = (string) $html;
1210 55
        if (!isset($html[0])) {
1211 1
            return '';
1212
        }
1213
1214 55
        $html = \trim($html);
1215 55
        if (!$html) {
1216 3
            return '';
1217
        }
1218
1219
        // reset
1220 52
        $this->protectedChildNodes = [];
1221
1222
        // save old content
1223 52
        $origHtml = $html;
1224 52
        $origHtmlLength = \strlen($html);
1225
1226
        // -------------------------------------------------------------------------
1227
        // Minify the HTML via "HtmlDomParser"
1228
        // -------------------------------------------------------------------------
1229
1230 52
        if ($this->doOptimizeViaHtmlDomParser) {
1231 51
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1232
        }
1233
1234
        // -------------------------------------------------------------------------
1235
        // Trim whitespace from html-string. [protected html is still protected]
1236
        // -------------------------------------------------------------------------
1237
1238
        // Remove extra white-space(s) between HTML attribute(s)
1239 52
        if (\strpos($html, ' ') !== false) {
1240 46
            $html = (string) \preg_replace_callback(
1241 46
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1242
                static function ($matches) {
1243 46
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1244 46
                },
1245 46
                $html
1246
            );
1247
        }
1248
1249 52
        if ($this->doRemoveSpacesBetweenTags) {
1250
            /** @noinspection NestedPositiveIfStatementsInspection */
1251 1
            if (\strpos($html, ' ') !== false) {
1252
                // Remove spaces that are between > and <
1253 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1254
            }
1255
        }
1256
1257
        // -------------------------------------------------------------------------
1258
        // Restore protected HTML-code.
1259
        // -------------------------------------------------------------------------
1260
1261 52
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1262 9
            $html = (string) \preg_replace_callback(
1263 9
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1264 9
                [$this, 'restoreProtectedHtml'],
1265 9
                $html
1266
            );
1267
        }
1268
1269
        // -------------------------------------------------------------------------
1270
        // Restore protected HTML-entities.
1271
        // -------------------------------------------------------------------------
1272
1273 52
        if ($this->doOptimizeViaHtmlDomParser) {
1274 51
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1275
        }
1276
1277
        // ------------------------------------
1278
        // Final clean-up
1279
        // ------------------------------------
1280
1281 52
        $html = \str_replace(
1282
            [
1283 52
                'html>' . "\n",
1284
                "\n" . '<html',
1285
                'html/>' . "\n",
1286
                "\n" . '</html',
1287
                'head>' . "\n",
1288
                "\n" . '<head',
1289
                'head/>' . "\n",
1290
                "\n" . '</head',
1291
            ],
1292
            [
1293 52
                'html>',
1294
                '<html',
1295
                'html/>',
1296
                '</html',
1297
                'head>',
1298
                '<head',
1299
                'head/>',
1300
                '</head',
1301
            ],
1302 52
            $html
1303
        );
1304
1305
        // self closing tags, don't need a trailing slash ...
1306 52
        $replace = [];
1307 52
        $replacement = [];
1308 52
        foreach (self::$selfClosingTags as $selfClosingTag) {
1309 52
            $replace[] = '<' . $selfClosingTag . '/>';
1310 52
            $replacement[] = '<' . $selfClosingTag . '>';
1311 52
            $replace[] = '<' . $selfClosingTag . ' />';
1312 52
            $replacement[] = '<' . $selfClosingTag . '>';
1313 52
            $replace[] = '></' . $selfClosingTag . '>';
1314 52
            $replacement[] = '>';
1315
        }
1316 52
        $html = \str_replace(
1317 52
            $replace,
1318 52
            $replacement,
1319 52
            $html
1320
        );
1321
1322
        // ------------------------------------
1323
        // check if compression worked
1324
        // ------------------------------------
1325
1326 52
        if ($origHtmlLength < \strlen($html)) {
1327
            $html = $origHtml;
1328
        }
1329
1330 52
        return $html;
1331
    }
1332
1333
    /**
1334
     * @param \DOMNode $node
1335
     *
1336
     * @return \DOMNode|null
1337
     */
1338 50
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1339
    {
1340
        do {
1341
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1342 50
            $node = $node->nextSibling;
1343 50
        } while (!($node === null || $node instanceof \DOMElement));
1344
1345 50
        return $node;
1346
    }
1347
1348
    /**
1349
     * Check if the current string is an conditional comment.
1350
     *
1351
     * INFO: since IE >= 10 conditional comment are not working anymore
1352
     *
1353
     * <!--[if expression]> HTML <![endif]-->
1354
     * <![if expression]> HTML <![endif]>
1355
     *
1356
     * @param string $comment
1357
     *
1358
     * @return bool
1359
     */
1360 4
    private function isConditionalComment($comment): bool
1361
    {
1362 4 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1363
            /** @noinspection RegExpRedundantEscape */
1364 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1365 2
                return true;
1366
            }
1367
        }
1368
1369 4 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1370
            /** @noinspection RegExpRedundantEscape */
1371 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1372 1
                return true;
1373
            }
1374
        }
1375
1376 4
        return false;
1377
    }
1378
1379
    /**
1380
     * @param string $html
1381
     * @param bool   $multiDecodeNewHtmlEntity
1382
     *
1383
     * @return string
1384
     */
1385 51
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1386
    {
1387
        // init dom
1388 51
        $dom = new HtmlDomParser();
1389
        /** @noinspection UnusedFunctionResultInspection */
1390 51
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1391
1392 51
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1393 51
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1394
1395
        // load dom
1396
        /** @noinspection UnusedFunctionResultInspection */
1397 51
        $dom->loadHtml($html);
1398
1399 51
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1400
1401
        // -------------------------------------------------------------------------
1402
        // Protect <nocompress> HTML tags first.
1403
        // -------------------------------------------------------------------------
1404
1405 51
        $dom = $this->protectTagHelper($dom, 'nocompress');
1406
1407
        // -------------------------------------------------------------------------
1408
        // Notify the Observer before the minification.
1409
        // -------------------------------------------------------------------------
1410
1411 51
        foreach ($dom->find('*') as $element) {
1412 51
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1413
        }
1414
1415
        // -------------------------------------------------------------------------
1416
        // Protect HTML tags and conditional comments.
1417
        // -------------------------------------------------------------------------
1418
1419 51
        $dom = $this->protectTags($dom);
1420
1421
        // -------------------------------------------------------------------------
1422
        // Remove default HTML comments. [protected html is still protected]
1423
        // -------------------------------------------------------------------------
1424
1425 51
        if ($this->doRemoveComments) {
1426 49
            $dom = $this->removeComments($dom);
1427
        }
1428
1429
        // -------------------------------------------------------------------------
1430
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1431
        // -------------------------------------------------------------------------
1432
1433 51
        if ($this->doSumUpWhitespace) {
1434 50
            $dom = $this->sumUpWhitespace($dom);
1435
        }
1436
1437 51
        foreach ($dom->find('*') as $element) {
1438
1439
            // -------------------------------------------------------------------------
1440
            // Remove whitespace around tags. [protected html is still protected]
1441
            // -------------------------------------------------------------------------
1442
1443 51
            if ($this->doRemoveWhitespaceAroundTags) {
1444 3
                $this->removeWhitespaceAroundTags($element);
1445
            }
1446
1447
            // -------------------------------------------------------------------------
1448
            // Notify the Observer after the minification.
1449
            // -------------------------------------------------------------------------
1450
1451 51
            $this->notifyObserversAboutDomElementAfterMinification($element);
1452
        }
1453
1454
        // -------------------------------------------------------------------------
1455
        // Convert the Dom into a string.
1456
        // -------------------------------------------------------------------------
1457
1458 51
        return $dom->fixHtmlOutput(
1459 51
            $this->domNodeToString($dom->getDocument()),
1460 51
            $multiDecodeNewHtmlEntity
1461
        );
1462
    }
1463
1464
    /**
1465
     * @param SimpleHtmlDomInterface $domElement
1466
     *
1467
     * @return void
1468
     */
1469 51
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1470
    {
1471 51
        foreach ($this->domLoopObservers as $observer) {
1472 51
            $observer->domElementAfterMinification($domElement, $this);
1473
        }
1474 51
    }
1475
1476
    /**
1477
     * @param SimpleHtmlDomInterface $domElement
1478
     *
1479
     * @return void
1480
     */
1481 51
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1482
    {
1483 51
        foreach ($this->domLoopObservers as $observer) {
1484 51
            $observer->domElementBeforeMinification($domElement, $this);
1485
        }
1486 51
    }
1487
1488
    /**
1489
     * @param HtmlDomParser $dom
1490
     * @param string        $selector
1491
     *
1492
     * @return HtmlDomParser
1493
     */
1494 51
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1495
    {
1496 51
        foreach ($dom->find($selector) as $element) {
1497 5
            if ($element->isRemoved()) {
1498 1
                continue;
1499
            }
1500
1501 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1502 5
            $parentNode = $element->getNode()->parentNode;
1503 5
            if ($parentNode !== null) {
1504 5
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1505
            }
1506
1507 5
            ++$this->protected_tags_counter;
1508
        }
1509
1510 51
        return $dom;
1511
    }
1512
1513
    /**
1514
     * Prevent changes of inline "styles" and "scripts".
1515
     *
1516
     * @param HtmlDomParser $dom
1517
     *
1518
     * @return HtmlDomParser
1519
     */
1520 51
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1521
    {
1522 51
        $this->protectTagHelper($dom, 'code');
1523
1524 51
        foreach ($dom->find('script, style') as $element) {
1525 7
            if ($element->isRemoved()) {
1526
                continue;
1527
            }
1528
1529 7
            if ($element->tag === 'script' || $element->tag === 'style') {
1530 7
                $attributes = $element->getAllAttributes();
1531
                // skip external links
1532 7
                if (isset($attributes['src'])) {
1533 4
                    continue;
1534
                }
1535
            }
1536
1537 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1538 5
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1539
1540 5
            ++$this->protected_tags_counter;
1541
        }
1542
1543 51
        foreach ($dom->find('//comment()') as $element) {
1544 4
            if ($element->isRemoved()) {
1545
                continue;
1546
            }
1547
1548 4
            $text = $element->text();
1549
1550
            // skip normal comments
1551 4
            if (!$this->isConditionalComment($text)) {
1552 4
                continue;
1553
            }
1554
1555 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1556
1557
            /* @var $node \DOMComment */
1558 2
            $node = $element->getNode();
1559 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1560 2
            $parentNode = $element->getNode()->parentNode;
1561 2
            if ($parentNode !== null) {
1562 2
                $parentNode->replaceChild($child, $node);
1563
            }
1564
1565 2
            ++$this->protected_tags_counter;
1566
        }
1567
1568 51
        return $dom;
1569
    }
1570
1571
    /**
1572
     * Remove comments in the dom.
1573
     *
1574
     * @param HtmlDomParser $dom
1575
     *
1576
     * @return HtmlDomParser
1577
     */
1578 49
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1579
    {
1580 49
        foreach ($dom->find('//comment()') as $commentWrapper) {
1581 3
            $comment = $commentWrapper->getNode();
1582 3
            $val = $comment->nodeValue;
1583 3
            if (\strpos($val, '[') === false) {
1584 3
                $parentNode = $comment->parentNode;
1585 3
                if ($parentNode !== null) {
1586 3
                    $parentNode->removeChild($comment);
1587
                }
1588
            }
1589
        }
1590
1591 49
        $dom->getDocument()->normalizeDocument();
1592
1593 49
        return $dom;
1594
    }
1595
1596
    /**
1597
     * Trim tags in the dom.
1598
     *
1599
     * @param SimpleHtmlDomInterface $element
1600
     *
1601
     * @return void
1602
     */
1603 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1604
    {
1605 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1606 1
            $node = $element->getNode();
1607
1608
            /** @var \DOMNode[] $candidates */
1609 1
            $candidates = [];
1610 1
            if ($node->childNodes->length > 0) {
1611 1
                $candidates[] = $node->firstChild;
1612 1
                $candidates[] = $node->lastChild;
1613 1
                $candidates[] = $node->previousSibling;
1614 1
                $candidates[] = $node->nextSibling;
1615
            }
1616
1617
            /** @var mixed $candidate - false-positive error from phpstan */
1618 1
            foreach ($candidates as &$candidate) {
1619 1
                if ($candidate === null) {
1620
                    continue;
1621
                }
1622
1623 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1624 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1625 1
                    if ($nodeValueTmp !== null) {
1626 1
                        $candidate->nodeValue = $nodeValueTmp;
1627
                    }
1628
                }
1629
            }
1630
        }
1631 3
    }
1632
1633
    /**
1634
     * Callback function for preg_replace_callback use.
1635
     *
1636
     * @param array $matches PREG matches
1637
     *
1638
     * @return string
1639
     */
1640 9
    private function restoreProtectedHtml($matches): string
1641
    {
1642 9
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1643
1644 9
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1645
    }
1646
1647
    /**
1648
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1649
     *
1650
     * @return $this
1651
     */
1652 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1653
    {
1654 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1655
1656 2
        return $this;
1657
    }
1658
1659
    /**
1660
     * Sum-up extra whitespace from dom-nodes.
1661
     *
1662
     * @param HtmlDomParser $dom
1663
     *
1664
     * @return HtmlDomParser
1665
     */
1666 50
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1667
    {
1668 50
        $text_nodes = $dom->find('//text()');
1669 50
        foreach ($text_nodes as $text_node_wrapper) {
1670
            /* @var $text_node \DOMNode */
1671 46
            $text_node = $text_node_wrapper->getNode();
1672 46
            $xp = $text_node->getNodePath();
1673 46
            if ($xp === null) {
1674
                continue;
1675
            }
1676
1677 46
            $doSkip = false;
1678 46
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1679 46
                if (\strpos($xp, "/${pattern}") !== false) {
1680 8
                    $doSkip = true;
1681
1682 8
                    break;
1683
                }
1684
            }
1685 46
            if ($doSkip) {
1686 8
                continue;
1687
            }
1688
1689 43
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1690 43
            if ($nodeValueTmp !== null) {
1691 43
                $text_node->nodeValue = $nodeValueTmp;
1692
            }
1693
        }
1694
1695 50
        $dom->getDocument()->normalizeDocument();
1696
1697 50
        return $dom;
1698
    }
1699
1700
    /**
1701
     * WARNING: maybe bad for performance ...
1702
     *
1703
     * @param bool $keepBrokenHtml
1704
     *
1705
     * @return HtmlMin
1706
     */
1707 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1708
    {
1709 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1710
1711 2
        return $this;
1712
    }
1713
}
1714