Completed
Pull Request — master (#47)
by
unknown
01:20
created

HtmlMin::isDoRemoveDeprecatedAnchorName()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $doKeepHttpAndHttpsPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainsLinksRelative = false;
199
200
    /**
201
     * @var string[]
202
     */
203
    private $localDomains = [];
204
205
    /**
206
     * @var array
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var bool
215
     */
216
    private $doSortCssClassNames = true;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doSortHtmlAttributes = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDefaultAttributes = false;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedAnchorName = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedTypeFromScriptTag = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveValueFromEmptyInput = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveEmptyAttributes = true;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doSumUpWhitespace = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveSpacesBetweenTags = false;
267
268
    /**
269
     * @var bool
270
     */
271
    private $keepBrokenHtml = false;
272
273
    /**
274
     * @var bool
275
     */
276
    private $withDocType = false;
277
278
    /**
279
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
280
     */
281
    private $domLoopObservers;
282
283
    /**
284
     * @var int
285
     */
286
    private $protected_tags_counter = 0;
287
288
    /**
289
     * HtmlMin constructor.
290
     */
291 56
    public function __construct()
292
    {
293 56
        $this->domLoopObservers = new \SplObjectStorage();
294
295 56
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
296 56
    }
297
298
    /**
299
     * @param HtmlMinDomObserverInterface $observer
300
     *
301
     * @return void
302
     */
303 56
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
304
    {
305 56
        $this->domLoopObservers->attach($observer);
306 56
    }
307
308
    /**
309
     * @param bool $doOptimizeAttributes
310
     *
311
     * @return $this
312
     */
313 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
314
    {
315 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
316
317 2
        return $this;
318
    }
319
320
    /**
321
     * @param bool $doOptimizeViaHtmlDomParser
322
     *
323
     * @return $this
324
     */
325 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
326
    {
327 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
328
329 1
        return $this;
330
    }
331
332
    /**
333
     * @param bool $doRemoveComments
334
     *
335
     * @return $this
336
     */
337 3
    public function doRemoveComments(bool $doRemoveComments = true): self
338
    {
339 3
        $this->doRemoveComments = $doRemoveComments;
340
341 3
        return $this;
342
    }
343
344
    /**
345
     * @param bool $doRemoveDefaultAttributes
346
     *
347
     * @return $this
348
     */
349 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
350
    {
351 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
352
353 2
        return $this;
354
    }
355
356
    /**
357
     * @param bool $doRemoveDeprecatedAnchorName
358
     *
359
     * @return $this
360
     */
361 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
362
    {
363 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
364
365 2
        return $this;
366
    }
367
368
    /**
369
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
370
     *
371
     * @return $this
372
     */
373 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
374
    {
375 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
376
377 2
        return $this;
378
    }
379
380
    /**
381
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
382
     *
383
     * @return $this
384
     */
385 3
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
386
    {
387 3
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
388
389 3
        return $this;
390
    }
391
392
    /**
393
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
394
     *
395
     * @return $this
396
     */
397 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
398
    {
399 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
400
401 2
        return $this;
402
    }
403
404
    /**
405
     * @param bool $doRemoveEmptyAttributes
406
     *
407
     * @return $this
408
     */
409 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
410
    {
411 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
412
413 2
        return $this;
414
    }
415
416
    /**
417
     * @param bool $doRemoveHttpPrefixFromAttributes
418
     *
419
     * @return $this
420
     */
421 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
422
    {
423 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
424
425 6
        return $this;
426
    }
427
428
    /**
429
     * @param bool $doRemoveHttpsPrefixFromAttributes
430
     *
431
     * @return $this
432
     */
433 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
434
    {
435 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
436
437 1
        return $this;
438
    }
439
440
    /**
441
     * @param bool $doKeepHttpAndHttpsPrefixOnExternalAttributes
442
     *
443
     * @return $this
444
     */
445 1
    public function doKeepHttpAndHttpsPrefixOnExternalAttributes(bool $doKeepHttpAndHttpsPrefixOnExternalAttributes = true): self
446
    {
447 1
        $this->doKeepHttpAndHttpsPrefixOnExternalAttributes = $doKeepHttpAndHttpsPrefixOnExternalAttributes;
448
449 1
        return $this;
450
    }
451
452
    /**
453
     * @param string[] $localDomains
454
     *
455
     * @return $this
456
     */
457 1
    public function doMakeSameDomainsLinksRelative(array $localDomains): self
458
    {
459
        /** @noinspection AlterInForeachInspection */
460 1
        foreach ($localDomains as &$localDomain) {
461 1
            $localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
462
        }
463
464 1
        $this->localDomains = $localDomains;
465 1
        $this->doMakeSameDomainsLinksRelative = \count($this->localDomains) > 0;
466
467 1
        return $this;
468
    }
469
470
    /**
471
     * @return string[]
472
     */
473 1
    public function getLocalDomains(): array
474
    {
475 1
        return $this->localDomains;
476
    }
477
478
    /**
479
     * @param bool $doRemoveOmittedHtmlTags
480
     *
481
     * @return $this
482
     */
483 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
484
    {
485 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
486
487 1
        return $this;
488
    }
489
490
    /**
491
     * @param bool $doRemoveOmittedQuotes
492
     *
493
     * @return $this
494
     */
495 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
496
    {
497 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
498
499 1
        return $this;
500
    }
501
502
    /**
503
     * @param bool $doRemoveSpacesBetweenTags
504
     *
505
     * @return $this
506
     */
507 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
508
    {
509 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
510
511 1
        return $this;
512
    }
513
514
    /**
515
     * @param bool $doRemoveValueFromEmptyInput
516
     *
517
     * @return $this
518
     */
519 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
520
    {
521 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
522
523 2
        return $this;
524
    }
525
526
    /**
527
     * @param bool $doRemoveWhitespaceAroundTags
528
     *
529
     * @return $this
530
     */
531 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
532
    {
533 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
534
535 5
        return $this;
536
    }
537
538
    /**
539
     * @param bool $doSortCssClassNames
540
     *
541
     * @return $this
542
     */
543 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
544
    {
545 2
        $this->doSortCssClassNames = $doSortCssClassNames;
546
547 2
        return $this;
548
    }
549
550
    /**
551
     * @param bool $doSortHtmlAttributes
552
     *
553
     * @return $this
554
     */
555 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
556
    {
557 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
558
559 2
        return $this;
560
    }
561
562
    /**
563
     * @param bool $doSumUpWhitespace
564
     *
565
     * @return $this
566
     */
567 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
568
    {
569 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
570
571 2
        return $this;
572
    }
573
574 52
    private function domNodeAttributesToString(\DOMNode $node): string
575
    {
576
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
577 52
        $attr_str = '';
578 52
        if ($node->attributes !== null) {
579 52
            foreach ($node->attributes as $attribute) {
580 35
                $attr_str .= $attribute->name;
581
582
                if (
583 35
                    $this->doOptimizeAttributes
584
                    &&
585 35
                    isset(self::$booleanAttributes[$attribute->name])
586
                ) {
587 10
                    $attr_str .= ' ';
588
589 10
                    continue;
590
                }
591
592 35
                $attr_str .= '=';
593
594
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
595 35
                $omit_quotes = $this->doRemoveOmittedQuotes
596
                               &&
597 35
                               $attribute->value !== ''
598
                               &&
599 35
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
600
                               &&
601 35
                               \strpos($attribute->name, ' ') === false
602
                               &&
603 35
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
604
605 35
                $quoteTmp = '"';
606
                if (
607 35
                    !$omit_quotes
608
                    &&
609 35
                    \strpos($attribute->value, '"') !== false
610
                ) {
611 1
                    $quoteTmp = "'";
612
                }
613
614
                if (
615 35
                    $this->doOptimizeAttributes
616
                    &&
617
                    (
618 34
                        $attribute->name === 'srcset'
619
                        ||
620 35
                        $attribute->name === 'sizes'
621
                    )
622
                ) {
623 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
624
                } else {
625 35
                    $attr_val = $attribute->value;
626
                }
627
628 35
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
629 35
                $attr_str .= ' ';
630
            }
631
        }
632
633 52
        return \trim($attr_str);
634
    }
635
636
    /**
637
     * @param \DOMNode $node
638
     *
639
     * @return bool
640
     */
641 51
    private function domNodeClosingTagOptional(\DOMNode $node): bool
642
    {
643 51
        $tag_name = $node->nodeName;
644
645
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
646 51
        $parent_node = $node->parentNode;
647
648 51
        if ($parent_node) {
649 51
            $parent_tag_name = $parent_node->nodeName;
650
        } else {
651
            $parent_tag_name = null;
652
        }
653
654 51
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
655
656
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
657
658
        // Implemented:
659
        //
660
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
661
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
662
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
663
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
664
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
665
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
666
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
667
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
668
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
669
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
670
671
        /**
672
         * @noinspection TodoComment
673
         *
674
         * TODO: Not Implemented
675
         */
676
        //
677
        // <html> may be omitted if first thing inside is not comment
678
        // <head> may be omitted if first thing inside is an element
679
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
680
        // <colgroup> may be omitted if first thing inside is <col>
681
        // <tbody> may be omitted if first thing inside is <tr>
682
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
683
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
684
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
685
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
686
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
687
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
688
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
689
        //
690
        // <-- However, a start tag must never be omitted if it has any attributes.
691
692
        /** @noinspection InArrayCanBeUsedInspection */
693 51
        return \in_array($tag_name, self::$optional_end_tags, true)
694
               ||
695
               (
696 48
                   $tag_name === 'li'
697
                   &&
698
                   (
699 6
                       $nextSibling === null
700
                       ||
701
                       (
702 4
                           $nextSibling instanceof \DOMElement
703
                           &&
704 48
                           $nextSibling->tagName === 'li'
705
                       )
706
                   )
707
               )
708
               ||
709
               (
710 48
                   $tag_name === 'optgroup'
711
                   &&
712
                   (
713 1
                       $nextSibling === null
714
                       ||
715
                       (
716 1
                           $nextSibling instanceof \DOMElement
717
                           &&
718 48
                           $nextSibling->tagName === 'optgroup'
719
                       )
720
                   )
721
               )
722
               ||
723
               (
724 48
                   $tag_name === 'rp'
725
                   &&
726
                   (
727
                       $nextSibling === null
728
                       ||
729
                       (
730
                           $nextSibling instanceof \DOMElement
731
                           &&
732
                           (
733
                               $nextSibling->tagName === 'rp'
734
                               ||
735 48
                               $nextSibling->tagName === 'rt'
736
                           )
737
                       )
738
                   )
739
               )
740
               ||
741
               (
742 48
                   $tag_name === 'tr'
743
                   &&
744
                   (
745 1
                       $nextSibling === null
746
                       ||
747
                       (
748 1
                           $nextSibling instanceof \DOMElement
749
                           &&
750 48
                           $nextSibling->tagName === 'tr'
751
                       )
752
                   )
753
               )
754
               ||
755
               (
756 48
                   $tag_name === 'source'
757
                   &&
758
                   (
759 1
                       $parent_tag_name === 'audio'
760
                       ||
761 1
                       $parent_tag_name === 'video'
762
                       ||
763 1
                       $parent_tag_name === 'picture'
764
                       ||
765 48
                       $parent_tag_name === 'source'
766
                   )
767
                   &&
768
                   (
769 1
                       $nextSibling === null
770
                       ||
771
                       (
772
                           $nextSibling instanceof \DOMElement
773
                           &&
774 48
                           $nextSibling->tagName === 'source'
775
                       )
776
                   )
777
               )
778
               ||
779
               (
780
                   (
781 48
                       $tag_name === 'td'
782
                       ||
783 48
                       $tag_name === 'th'
784
                   )
785
                   &&
786
                   (
787 1
                       $nextSibling === null
788
                       ||
789
                       (
790 1
                           $nextSibling instanceof \DOMElement
791
                           &&
792
                           (
793 1
                               $nextSibling->tagName === 'td'
794
                               ||
795 48
                               $nextSibling->tagName === 'th'
796
                           )
797
                       )
798
                   )
799
               )
800
               ||
801
               (
802
                   (
803 48
                       $tag_name === 'dd'
804
                       ||
805 48
                       $tag_name === 'dt'
806
                   )
807
                   &&
808
                   (
809
                       (
810 3
                           $nextSibling === null
811
                           &&
812 3
                           $tag_name === 'dd'
813
                       )
814
                       ||
815
                       (
816 3
                           $nextSibling instanceof \DOMElement
817
                           &&
818
                           (
819 3
                               $nextSibling->tagName === 'dd'
820
                               ||
821 48
                               $nextSibling->tagName === 'dt'
822
                           )
823
                       )
824
                   )
825
               )
826
               ||
827
               (
828 48
                   $tag_name === 'option'
829
                   &&
830
                   (
831 2
                       $nextSibling === null
832
                       ||
833
                       (
834 2
                           $nextSibling instanceof \DOMElement
835
                           &&
836
                           (
837 2
                               $nextSibling->tagName === 'option'
838
                               ||
839 48
                               $nextSibling->tagName === 'optgroup'
840
                           )
841
                       )
842
                   )
843
               )
844
               ||
845
               (
846 48
                   $tag_name === 'p'
847
                   &&
848
                   (
849
                       (
850 14
                           $nextSibling === null
851
                           &&
852
                           (
853 12
                               $node->parentNode !== null
854
                               &&
855
                               !\in_array(
856 12
                                   $node->parentNode->nodeName,
857
                                   [
858
                                       'a',
859
                                       'audio',
860
                                       'del',
861
                                       'ins',
862
                                       'map',
863
                                       'noscript',
864
                                       'video',
865
                                   ],
866
                                   true
867
                               )
868
                           )
869
                       )
870
                       ||
871
                       (
872 9
                           $nextSibling instanceof \DOMElement
873
                           &&
874
                           \in_array(
875 51
                               $nextSibling->tagName,
876
                               [
877
                                   'address',
878
                                   'article',
879
                                   'aside',
880
                                   'blockquote',
881
                                   'dir',
882
                                   'div',
883
                                   'dl',
884
                                   'fieldset',
885
                                   'footer',
886
                                   'form',
887
                                   'h1',
888
                                   'h2',
889
                                   'h3',
890
                                   'h4',
891
                                   'h5',
892
                                   'h6',
893
                                   'header',
894
                                   'hgroup',
895
                                   'hr',
896
                                   'menu',
897
                                   'nav',
898
                                   'ol',
899
                                   'p',
900
                                   'pre',
901
                                   'section',
902
                                   'table',
903
                                   'ul',
904
                               ],
905
                               true
906
                           )
907
                       )
908
                   )
909
               );
910
    }
911
912 52
    protected function domNodeToString(\DOMNode $node): string
913
    {
914
        // init
915 52
        $html = '';
916 52
        $emptyStringTmp = '';
917
918 52
        foreach ($node->childNodes as $child) {
919 52
            if ($emptyStringTmp === 'is_empty') {
920 29
                $emptyStringTmp = 'last_was_empty';
921
            } else {
922 52
                $emptyStringTmp = '';
923
            }
924
925 52
            if ($child instanceof \DOMDocumentType) {
926
                // add the doc-type only if it wasn't generated by DomDocument
927 12
                if (!$this->withDocType) {
928
                    continue;
929
                }
930
931 12
                if ($child->name) {
932 12
                    if (!$child->publicId && $child->systemId) {
933
                        $tmpTypeSystem = 'SYSTEM';
934
                        $tmpTypePublic = '';
935
                    } else {
936 12
                        $tmpTypeSystem = '';
937 12
                        $tmpTypePublic = 'PUBLIC';
938
                    }
939
940 12
                    $html .= '<!DOCTYPE ' . $child->name . ''
941 12
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
942 12
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
943 12
                             . '>';
944
                }
945 52
            } elseif ($child instanceof \DOMElement) {
946 52
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
947 52
                $html .= '>' . $this->domNodeToString($child);
948
949
                if (
950 52
                    !$this->doRemoveOmittedHtmlTags
951
                    ||
952 52
                    !$this->domNodeClosingTagOptional($child)
953
                ) {
954 46
                    $html .= '</' . $child->tagName . '>';
955
                }
956
957 52
                if (!$this->doRemoveWhitespaceAroundTags) {
958
                    /** @noinspection NestedPositiveIfStatementsInspection */
959
                    if (
960 51
                        $child->nextSibling instanceof \DOMText
961
                        &&
962 51
                        $child->nextSibling->wholeText === ' '
963
                    ) {
964
                        if (
965 28
                            $emptyStringTmp !== 'last_was_empty'
966
                            &&
967 28
                            \substr($html, -1) !== ' '
968
                        ) {
969 28
                            $html = \rtrim($html);
970
971
                            if (
972 28
                                $child->parentNode
973
                                &&
974 28
                                $child->parentNode->nodeName !== 'head'
975
                            ) {
976 27
                                $html .= ' ';
977
                            }
978
                        }
979 52
                        $emptyStringTmp = 'is_empty';
980
                    }
981
                }
982 48
            } elseif ($child instanceof \DOMText) {
983 48
                if ($child->isElementContentWhitespace()) {
984
                    if (
985 32
                        $child->previousSibling !== null
986
                        &&
987 32
                        $child->nextSibling !== null
988
                    ) {
989
                        if (
990
                            (
991 23
                                $child->wholeText
992
                                &&
993 23
                                \strpos($child->wholeText, ' ') !== false
994
                            )
995
                            ||
996
                            (
997
                                $emptyStringTmp !== 'last_was_empty'
998
                                &&
999 23
                                \substr($html, -1) !== ' '
1000
                            )
1001
                        ) {
1002 23
                            $html = \rtrim($html);
1003
1004
                            if (
1005 23
                                $child->parentNode
1006
                                &&
1007 23
                                $child->parentNode->nodeName !== 'head'
1008
                            ) {
1009 22
                                $html .= ' ';
1010
                            }
1011
                        }
1012 32
                        $emptyStringTmp = 'is_empty';
1013
                    }
1014
                } else {
1015 48
                    $html .= $child->wholeText;
1016
                }
1017 1
            } elseif ($child instanceof \DOMComment) {
1018 1
                $html .= '<!--' . $child->textContent . '-->';
1019
            }
1020
        }
1021
1022 52
        return $html;
1023
    }
1024
1025
    /**
1026
     * @return array
1027
     */
1028
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1029
    {
1030
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1031
    }
1032
1033
    /**
1034
     * @return bool
1035
     */
1036
    public function isDoOptimizeAttributes(): bool
1037
    {
1038
        return $this->doOptimizeAttributes;
1039
    }
1040
1041
    /**
1042
     * @return bool
1043
     */
1044
    public function isDoOptimizeViaHtmlDomParser(): bool
1045
    {
1046
        return $this->doOptimizeViaHtmlDomParser;
1047
    }
1048
1049
    /**
1050
     * @return bool
1051
     */
1052
    public function isDoRemoveComments(): bool
1053
    {
1054
        return $this->doRemoveComments;
1055
    }
1056
1057
    /**
1058
     * @return bool
1059
     */
1060 35
    public function isDoRemoveDefaultAttributes(): bool
1061
    {
1062 35
        return $this->doRemoveDefaultAttributes;
1063
    }
1064
1065
    /**
1066
     * @return bool
1067
     */
1068 35
    public function isDoRemoveDeprecatedAnchorName(): bool
1069
    {
1070 35
        return $this->doRemoveDeprecatedAnchorName;
1071
    }
1072
1073
    /**
1074
     * @return bool
1075
     */
1076 35
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1077
    {
1078 35
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1079
    }
1080
1081
    /**
1082
     * @return bool
1083
     */
1084 35
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1085
    {
1086 35
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1087
    }
1088
1089
    /**
1090
     * @return bool
1091
     */
1092 35
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1093
    {
1094 35
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1095
    }
1096
1097
    /**
1098
     * @return bool
1099
     */
1100 35
    public function isDoRemoveEmptyAttributes(): bool
1101
    {
1102 35
        return $this->doRemoveEmptyAttributes;
1103
    }
1104
1105
    /**
1106
     * @return bool
1107
     */
1108 35
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1109
    {
1110 35
        return $this->doRemoveHttpPrefixFromAttributes;
1111
    }
1112
1113
    /**
1114
     * @return bool
1115
     */
1116 35
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1117
    {
1118 35
        return $this->doRemoveHttpsPrefixFromAttributes;
1119
    }
1120
1121
    /**
1122
     * @return bool
1123
     */
1124 4
    public function isdoKeepHttpAndHttpsPrefixOnExternalAttributes(): bool
1125
    {
1126 4
        return $this->doKeepHttpAndHttpsPrefixOnExternalAttributes;
1127
    }
1128
1129
    /**
1130
     * @return bool
1131
     */
1132 35
    public function isDoMakeSameDomainsLinksRelative(): bool
1133
    {
1134 35
        return $this->doMakeSameDomainsLinksRelative;
1135
    }
1136
1137
    /**
1138
     * @return bool
1139
     */
1140
    public function isDoRemoveOmittedHtmlTags(): bool
1141
    {
1142
        return $this->doRemoveOmittedHtmlTags;
1143
    }
1144
1145
    /**
1146
     * @return bool
1147
     */
1148
    public function isDoRemoveOmittedQuotes(): bool
1149
    {
1150
        return $this->doRemoveOmittedQuotes;
1151
    }
1152
1153
    /**
1154
     * @return bool
1155
     */
1156
    public function isDoRemoveSpacesBetweenTags(): bool
1157
    {
1158
        return $this->doRemoveSpacesBetweenTags;
1159
    }
1160
1161
    /**
1162
     * @return bool
1163
     */
1164 35
    public function isDoRemoveValueFromEmptyInput(): bool
1165
    {
1166 35
        return $this->doRemoveValueFromEmptyInput;
1167
    }
1168
1169
    /**
1170
     * @return bool
1171
     */
1172
    public function isDoRemoveWhitespaceAroundTags(): bool
1173
    {
1174
        return $this->doRemoveWhitespaceAroundTags;
1175
    }
1176
1177
    /**
1178
     * @return bool
1179
     */
1180 35
    public function isDoSortCssClassNames(): bool
1181
    {
1182 35
        return $this->doSortCssClassNames;
1183
    }
1184
1185
    /**
1186
     * @return bool
1187
     */
1188 35
    public function isDoSortHtmlAttributes(): bool
1189
    {
1190 35
        return $this->doSortHtmlAttributes;
1191
    }
1192
1193
    /**
1194
     * @return bool
1195
     */
1196
    public function isDoSumUpWhitespace(): bool
1197
    {
1198
        return $this->doSumUpWhitespace;
1199
    }
1200
1201
    /**
1202
     * @param string $html
1203
     * @param bool   $multiDecodeNewHtmlEntity
1204
     *
1205
     * @return string
1206
     */
1207 56
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1208
    {
1209 56
        $html = (string) $html;
1210 56
        if (!isset($html[0])) {
1211 1
            return '';
1212
        }
1213
1214 56
        $html = \trim($html);
1215 56
        if (!$html) {
1216 3
            return '';
1217
        }
1218
1219
        // reset
1220 53
        $this->protectedChildNodes = [];
1221
1222
        // save old content
1223 53
        $origHtml = $html;
1224 53
        $origHtmlLength = \strlen($html);
1225
1226
        // -------------------------------------------------------------------------
1227
        // Minify the HTML via "HtmlDomParser"
1228
        // -------------------------------------------------------------------------
1229
1230 53
        if ($this->doOptimizeViaHtmlDomParser) {
1231 52
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1232
        }
1233
1234
        // -------------------------------------------------------------------------
1235
        // Trim whitespace from html-string. [protected html is still protected]
1236
        // -------------------------------------------------------------------------
1237
1238
        // Remove extra white-space(s) between HTML attribute(s)
1239 53
        if (\strpos($html, ' ') !== false) {
1240 47
            $html = (string) \preg_replace_callback(
1241 47
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1242
                static function ($matches) {
1243 47
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1244 47
                },
1245 47
                $html
1246
            );
1247
        }
1248
1249 53
        if ($this->doRemoveSpacesBetweenTags) {
1250
            /** @noinspection NestedPositiveIfStatementsInspection */
1251 1
            if (\strpos($html, ' ') !== false) {
1252
                // Remove spaces that are between > and <
1253 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1254
            }
1255
        }
1256
1257
        // -------------------------------------------------------------------------
1258
        // Restore protected HTML-code.
1259
        // -------------------------------------------------------------------------
1260
1261 53
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1262 10
            $html = (string) \preg_replace_callback(
1263 10
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1264 10
                [$this, 'restoreProtectedHtml'],
1265 10
                $html
1266
            );
1267
        }
1268
1269
        // -------------------------------------------------------------------------
1270
        // Restore protected HTML-entities.
1271
        // -------------------------------------------------------------------------
1272
1273 53
        if ($this->doOptimizeViaHtmlDomParser) {
1274 52
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1275
        }
1276
1277
        // ------------------------------------
1278
        // Final clean-up
1279
        // ------------------------------------
1280
1281 53
        $html = \str_replace(
1282
            [
1283 53
                'html>' . "\n",
1284
                "\n" . '<html',
1285
                'html/>' . "\n",
1286
                "\n" . '</html',
1287
                'head>' . "\n",
1288
                "\n" . '<head',
1289
                'head/>' . "\n",
1290
                "\n" . '</head',
1291
            ],
1292
            [
1293 53
                'html>',
1294
                '<html',
1295
                'html/>',
1296
                '</html',
1297
                'head>',
1298
                '<head',
1299
                'head/>',
1300
                '</head',
1301
            ],
1302 53
            $html
1303
        );
1304
1305
        // self closing tags, don't need a trailing slash ...
1306 53
        $replace = [];
1307 53
        $replacement = [];
1308 53
        foreach (self::$selfClosingTags as $selfClosingTag) {
1309 53
            $replace[] = '<' . $selfClosingTag . '/>';
1310 53
            $replacement[] = '<' . $selfClosingTag . '>';
1311 53
            $replace[] = '<' . $selfClosingTag . ' />';
1312 53
            $replacement[] = '<' . $selfClosingTag . '>';
1313 53
            $replace[] = '></' . $selfClosingTag . '>';
1314 53
            $replacement[] = '>';
1315
        }
1316 53
        $html = \str_replace(
1317 53
            $replace,
1318 53
            $replacement,
1319 53
            $html
1320
        );
1321
1322
        // ------------------------------------
1323
        // check if compression worked
1324
        // ------------------------------------
1325
1326 53
        if ($origHtmlLength < \strlen($html)) {
1327
            $html = $origHtml;
1328
        }
1329
1330 53
        return $html;
1331
    }
1332
1333
    /**
1334
     * @param \DOMNode $node
1335
     *
1336
     * @return \DOMNode|null
1337
     */
1338 51
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1339
    {
1340
        do {
1341
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1342 51
            $node = $node->nextSibling;
1343 51
        } while (!($node === null || $node instanceof \DOMElement));
1344
1345 51
        return $node;
1346
    }
1347
1348
    /**
1349
     * Check if the current string is an conditional comment.
1350
     *
1351
     * INFO: since IE >= 10 conditional comment are not working anymore
1352
     *
1353
     * <!--[if expression]> HTML <![endif]-->
1354
     * <![if expression]> HTML <![endif]>
1355
     *
1356
     * @param string $comment
1357
     *
1358
     * @return bool
1359
     */
1360 4
    private function isConditionalComment($comment): bool
1361
    {
1362 4 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1363
            /** @noinspection RegExpRedundantEscape */
1364 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1365 2
                return true;
1366
            }
1367
        }
1368
1369 4 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1370
            /** @noinspection RegExpRedundantEscape */
1371 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1372 1
                return true;
1373
            }
1374
        }
1375
1376 4
        return false;
1377
    }
1378
1379
    /**
1380
     * @param string $html
1381
     * @param bool   $multiDecodeNewHtmlEntity
1382
     *
1383
     * @return string
1384
     */
1385 52
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1386
    {
1387
        // init dom
1388 52
        $dom = new HtmlDomParser();
1389
        /** @noinspection UnusedFunctionResultInspection */
1390 52
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1391
1392 52
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1393 52
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1394
1395
        // load dom
1396
        /** @noinspection UnusedFunctionResultInspection */
1397 52
        $dom->loadHtml($html);
1398
1399 52
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1400
1401
        // -------------------------------------------------------------------------
1402
        // Protect <nocompress> HTML tags first.
1403
        // -------------------------------------------------------------------------
1404
1405 52
        $dom = $this->protectTagHelper($dom, 'nocompress');
1406
1407
        // -------------------------------------------------------------------------
1408
        // Notify the Observer before the minification.
1409
        // -------------------------------------------------------------------------
1410
1411 52
        foreach ($dom->find('*') as $element) {
1412 52
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1413
        }
1414
1415
        // -------------------------------------------------------------------------
1416
        // Protect HTML tags and conditional comments.
1417
        // -------------------------------------------------------------------------
1418
1419 52
        $dom = $this->protectTags($dom);
1420
1421
        // -------------------------------------------------------------------------
1422
        // Remove default HTML comments. [protected html is still protected]
1423
        // -------------------------------------------------------------------------
1424
1425 52
        if ($this->doRemoveComments) {
1426 50
            $dom = $this->removeComments($dom);
1427
        }
1428
1429
        // -------------------------------------------------------------------------
1430
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1431
        // -------------------------------------------------------------------------
1432
1433 52
        if ($this->doSumUpWhitespace) {
1434 51
            $dom = $this->sumUpWhitespace($dom);
1435
        }
1436
1437 52
        foreach ($dom->find('*') as $element) {
1438
1439
            // -------------------------------------------------------------------------
1440
            // Remove whitespace around tags. [protected html is still protected]
1441
            // -------------------------------------------------------------------------
1442
1443 52
            if ($this->doRemoveWhitespaceAroundTags) {
1444 3
                $this->removeWhitespaceAroundTags($element);
1445
            }
1446
1447
            // -------------------------------------------------------------------------
1448
            // Notify the Observer after the minification.
1449
            // -------------------------------------------------------------------------
1450
1451 52
            $this->notifyObserversAboutDomElementAfterMinification($element);
1452
        }
1453
1454
        // -------------------------------------------------------------------------
1455
        // Convert the Dom into a string.
1456
        // -------------------------------------------------------------------------
1457
1458 52
        return $dom->fixHtmlOutput(
1459 52
            $this->domNodeToString($dom->getDocument()),
1460 52
            $multiDecodeNewHtmlEntity
1461
        );
1462
    }
1463
1464
    /**
1465
     * @param SimpleHtmlDomInterface $domElement
1466
     *
1467
     * @return void
1468
     */
1469 52
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1470
    {
1471 52
        foreach ($this->domLoopObservers as $observer) {
1472 52
            $observer->domElementAfterMinification($domElement, $this);
1473
        }
1474 52
    }
1475
1476
    /**
1477
     * @param SimpleHtmlDomInterface $domElement
1478
     *
1479
     * @return void
1480
     */
1481 52
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1482
    {
1483 52
        foreach ($this->domLoopObservers as $observer) {
1484 52
            $observer->domElementBeforeMinification($domElement, $this);
1485
        }
1486 52
    }
1487
1488
    /**
1489
     * @param HtmlDomParser $dom
1490
     * @param string        $selector
1491
     *
1492
     * @return HtmlDomParser
1493
     */
1494 52
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1495
    {
1496 52
        foreach ($dom->find($selector) as $element) {
1497 5
            if ($element->isRemoved()) {
1498 1
                continue;
1499
            }
1500
1501 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1502 5
            $parentNode = $element->getNode()->parentNode;
1503 5
            if ($parentNode !== null) {
1504 5
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1505
            }
1506
1507 5
            ++$this->protected_tags_counter;
1508
        }
1509
1510 52
        return $dom;
1511
    }
1512
1513
    /**
1514
     * Prevent changes of inline "styles" and "scripts".
1515
     *
1516
     * @param HtmlDomParser $dom
1517
     *
1518
     * @return HtmlDomParser
1519
     */
1520 52
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1521
    {
1522 52
        $this->protectTagHelper($dom, 'code');
1523
1524 52
        foreach ($dom->find('script, style') as $element) {
1525 8
            if ($element->isRemoved()) {
1526
                continue;
1527
            }
1528
1529 8
            if ($element->tag === 'script' || $element->tag === 'style') {
1530 8
                $attributes = $element->getAllAttributes();
1531
                // skip external links
1532 8
                if (isset($attributes['src'])) {
1533 5
                    continue;
1534
                }
1535
            }
1536
1537 6
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1538 6
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1539
1540 6
            ++$this->protected_tags_counter;
1541
        }
1542
1543 52
        foreach ($dom->find('//comment()') as $element) {
1544 4
            if ($element->isRemoved()) {
1545
                continue;
1546
            }
1547
1548 4
            $text = $element->text();
1549
1550
            // skip normal comments
1551 4
            if (!$this->isConditionalComment($text)) {
1552 4
                continue;
1553
            }
1554
1555 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1556
1557
            /* @var $node \DOMComment */
1558 2
            $node = $element->getNode();
1559 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1560 2
            $parentNode = $element->getNode()->parentNode;
1561 2
            if ($parentNode !== null) {
1562 2
                $parentNode->replaceChild($child, $node);
1563
            }
1564
1565 2
            ++$this->protected_tags_counter;
1566
        }
1567
1568 52
        return $dom;
1569
    }
1570
1571
    /**
1572
     * Remove comments in the dom.
1573
     *
1574
     * @param HtmlDomParser $dom
1575
     *
1576
     * @return HtmlDomParser
1577
     */
1578 50
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1579
    {
1580 50
        foreach ($dom->find('//comment()') as $commentWrapper) {
1581 3
            $comment = $commentWrapper->getNode();
1582 3
            $val = $comment->nodeValue;
1583 3
            if (\strpos($val, '[') === false) {
1584 3
                $parentNode = $comment->parentNode;
1585 3
                if ($parentNode !== null) {
1586 3
                    $parentNode->removeChild($comment);
1587
                }
1588
            }
1589
        }
1590
1591 50
        $dom->getDocument()->normalizeDocument();
1592
1593 50
        return $dom;
1594
    }
1595
1596
    /**
1597
     * Trim tags in the dom.
1598
     *
1599
     * @param SimpleHtmlDomInterface $element
1600
     *
1601
     * @return void
1602
     */
1603 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1604
    {
1605 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1606 1
            $node = $element->getNode();
1607
1608
            /** @var \DOMNode[] $candidates */
1609 1
            $candidates = [];
1610 1
            if ($node->childNodes->length > 0) {
1611 1
                $candidates[] = $node->firstChild;
1612 1
                $candidates[] = $node->lastChild;
1613 1
                $candidates[] = $node->previousSibling;
1614 1
                $candidates[] = $node->nextSibling;
1615
            }
1616
1617
            /** @var mixed $candidate - false-positive error from phpstan */
1618 1
            foreach ($candidates as &$candidate) {
1619 1
                if ($candidate === null) {
1620
                    continue;
1621
                }
1622
1623 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1624 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1625 1
                    if ($nodeValueTmp !== null) {
1626 1
                        $candidate->nodeValue = $nodeValueTmp;
1627
                    }
1628
                }
1629
            }
1630
        }
1631 3
    }
1632
1633
    /**
1634
     * Callback function for preg_replace_callback use.
1635
     *
1636
     * @param array $matches PREG matches
1637
     *
1638
     * @return string
1639
     */
1640 10
    private function restoreProtectedHtml($matches): string
1641
    {
1642 10
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1643
1644 10
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1645
    }
1646
1647
    /**
1648
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1649
     *
1650
     * @return $this
1651
     */
1652 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1653
    {
1654 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1655
1656 2
        return $this;
1657
    }
1658
1659
    /**
1660
     * Sum-up extra whitespace from dom-nodes.
1661
     *
1662
     * @param HtmlDomParser $dom
1663
     *
1664
     * @return HtmlDomParser
1665
     */
1666 51
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1667
    {
1668 51
        $text_nodes = $dom->find('//text()');
1669 51
        foreach ($text_nodes as $text_node_wrapper) {
1670
            /* @var $text_node \DOMNode */
1671 47
            $text_node = $text_node_wrapper->getNode();
1672 47
            $xp = $text_node->getNodePath();
1673 47
            if ($xp === null) {
1674
                continue;
1675
            }
1676
1677 47
            $doSkip = false;
1678 47
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1679 47
                if (\strpos($xp, "/${pattern}") !== false) {
1680 9
                    $doSkip = true;
1681
1682 9
                    break;
1683
                }
1684
            }
1685 47
            if ($doSkip) {
1686 9
                continue;
1687
            }
1688
1689 44
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1690 44
            if ($nodeValueTmp !== null) {
1691 44
                $text_node->nodeValue = $nodeValueTmp;
1692
            }
1693
        }
1694
1695 51
        $dom->getDocument()->normalizeDocument();
1696
1697 51
        return $dom;
1698
    }
1699
1700
    /**
1701
     * WARNING: maybe bad for performance ...
1702
     *
1703
     * @param bool $keepBrokenHtml
1704
     *
1705
     * @return HtmlMin
1706
     */
1707 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1708
    {
1709 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1710
1711 2
        return $this;
1712
    }
1713
}
1714