Completed
Push — master ( d27882...fd8f2b )
by Lars
14s queued 13s
created

HtmlMin::isLocalDomainSet()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $keepPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainLinksRelative = false;
199
200
    /**
201
     * @var string
202
     */
203
    private $localDomain = '';
204
205
    /**
206
     * @var array
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var bool
215
     */
216
    private $doSortCssClassNames = true;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doSortHtmlAttributes = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDefaultAttributes = false;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedAnchorName = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedTypeFromScriptTag = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveValueFromEmptyInput = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveEmptyAttributes = true;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doSumUpWhitespace = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveSpacesBetweenTags = false;
267
268
    /**
269
     * @var bool
270
     */
271
    private $keepBrokenHtml = false;
272
273
    /**
274
     * @var bool
275
     */
276
    private $withDocType = false;
277
278
    /**
279
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
280
     */
281
    private $domLoopObservers;
282
283
    /**
284
     * @var int
285
     */
286
    private $protected_tags_counter = 0;
287
288
    /**
289
     * HtmlMin constructor.
290
     */
291 54
    public function __construct()
292
    {
293 54
        $this->domLoopObservers = new \SplObjectStorage();
294
295 54
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
296 54
    }
297
298
    /**
299
     * @param HtmlMinDomObserverInterface $observer
300
     *
301
     * @return void
302
     */
303 54
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
304
    {
305 54
        $this->domLoopObservers->attach($observer);
306 54
    }
307
308
    /**
309
     * @param bool $doOptimizeAttributes
310
     *
311
     * @return $this
312
     */
313 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
314
    {
315 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
316
317 2
        return $this;
318
    }
319
320
    /**
321
     * @param bool $doOptimizeViaHtmlDomParser
322
     *
323
     * @return $this
324
     */
325 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
326
    {
327 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
328
329 1
        return $this;
330
    }
331
332
    /**
333
     * @param bool $doRemoveComments
334
     *
335
     * @return $this
336
     */
337 3
    public function doRemoveComments(bool $doRemoveComments = true): self
338
    {
339 3
        $this->doRemoveComments = $doRemoveComments;
340
341 3
        return $this;
342
    }
343
344
    /**
345
     * @param bool $doRemoveDefaultAttributes
346
     *
347
     * @return $this
348
     */
349 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
350
    {
351 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
352
353 2
        return $this;
354
    }
355
356
    /**
357
     * @param bool $doRemoveDeprecatedAnchorName
358
     *
359
     * @return $this
360
     */
361 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
362
    {
363 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
364
365 2
        return $this;
366
    }
367
368
    /**
369
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
370
     *
371
     * @return $this
372
     */
373 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
374
    {
375 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
376
377 2
        return $this;
378
    }
379
380
    /**
381
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
382
     *
383
     * @return $this
384
     */
385 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
386
    {
387 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
388
389 2
        return $this;
390
    }
391
392
    /**
393
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
394
     *
395
     * @return $this
396
     */
397 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
398
    {
399 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
400
401 2
        return $this;
402
    }
403
404
    /**
405
     * @param bool $doRemoveEmptyAttributes
406
     *
407
     * @return $this
408
     */
409 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
410
    {
411 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
412
413 2
        return $this;
414
    }
415
416
    /**
417
     * @param bool $doRemoveHttpPrefixFromAttributes
418
     *
419
     * @return $this
420
     */
421 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
422
    {
423 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
424
425 6
        return $this;
426
    }
427
428
    /**
429
     * @param bool $doRemoveHttpsPrefixFromAttributes
430
     *
431
     * @return $this
432
     */
433 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
434
    {
435 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
436
437 1
        return $this;
438
    }
439
440
    /**
441
     * @param bool $keepPrefixOnExternalAttributes
442
     *
443
     * @return $this
444
     */
445 1
    public function keepPrefixOnExternalAttributes(bool $keepPrefixOnExternalAttributes = true): self
446
    {
447 1
        $this->keepPrefixOnExternalAttributes = $keepPrefixOnExternalAttributes;
448
449 1
        return $this;
450
    }
451
452
    /**
453
     * @param bool $doMakeSameDomainLinksRelative
0 ignored issues
show
Bug introduced by
There is no parameter named $doMakeSameDomainLinksRelative. Was it maybe removed?

This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function.

Consider the following example. The parameter $italy is not defined by the method finale(...).

/**
 * @param array $germany
 * @param array $island
 * @param array $italy
 */
function finale($germany, $island) {
    return "2:1";
}

The most likely cause is that the parameter was removed, but the annotation was not.

Loading history...
454
     *
455
     * @return $this
456
     */
457 1
    public function doMakeSameDomainLinksRelative(string $localDomain = ''): self
458
    {
459 1
		$this->localDomain = \rtrim((string) \preg_replace('/(?:https?:)?\/\//i', '', $localDomain), '/');
460
461 1
		$this->doMakeSameDomainLinksRelative = ($this->localDomain !== '');
462
463 1
        return $this;
464
    }
465
466
    /**
467
     * @return string
468
     */
469 1
    public function getLocalDomain(): string
470
    {
471 1
        return $this->localDomain;
472
    }
473
474
    /**
475
     * @param bool $doRemoveOmittedHtmlTags
476
     *
477
     * @return $this
478
     */
479 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
480
    {
481 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
482
483 1
        return $this;
484
    }
485
486
    /**
487
     * @param bool $doRemoveOmittedQuotes
488
     *
489
     * @return $this
490
     */
491 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
492
    {
493 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
494
495 1
        return $this;
496
    }
497
498
    /**
499
     * @param bool $doRemoveSpacesBetweenTags
500
     *
501
     * @return $this
502
     */
503 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
504
    {
505 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
506
507 1
        return $this;
508
    }
509
510
    /**
511
     * @param bool $doRemoveValueFromEmptyInput
512
     *
513
     * @return $this
514
     */
515 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
516
    {
517 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
518
519 2
        return $this;
520
    }
521
522
    /**
523
     * @param bool $doRemoveWhitespaceAroundTags
524
     *
525
     * @return $this
526
     */
527 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
528
    {
529 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
530
531 5
        return $this;
532
    }
533
534
    /**
535
     * @param bool $doSortCssClassNames
536
     *
537
     * @return $this
538
     */
539 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
540
    {
541 2
        $this->doSortCssClassNames = $doSortCssClassNames;
542
543 2
        return $this;
544
    }
545
546
    /**
547
     * @param bool $doSortHtmlAttributes
548
     *
549
     * @return $this
550
     */
551 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
552
    {
553 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
554
555 2
        return $this;
556
    }
557
558
    /**
559
     * @param bool $doSumUpWhitespace
560
     *
561
     * @return $this
562
     */
563 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
564
    {
565 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
566
567 2
        return $this;
568
    }
569
570 50
    private function domNodeAttributesToString(\DOMNode $node): string
571
    {
572
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
573 50
        $attr_str = '';
574 50
        if ($node->attributes !== null) {
575 50
            foreach ($node->attributes as $attribute) {
576 33
                $attr_str .= $attribute->name;
577
578
                if (
579 33
                    $this->doOptimizeAttributes
580
                    &&
581 33
                    isset(self::$booleanAttributes[$attribute->name])
582
                ) {
583 9
                    $attr_str .= ' ';
584
585 9
                    continue;
586
                }
587
588 33
                $attr_str .= '=';
589
590
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
591 33
                $omit_quotes = $this->doRemoveOmittedQuotes
592
                               &&
593 33
                               $attribute->value !== ''
594
                               &&
595 33
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
596
                               &&
597 33
                               \strpos($attribute->name, ' ') === false
598
                               &&
599 33
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
600
601 33
                $quoteTmp = '"';
602
                if (
603 33
                    !$omit_quotes
604
                    &&
605 33
                    \strpos($attribute->value, '"') !== false
606
                ) {
607 1
                    $quoteTmp = "'";
608
                }
609
610
                if (
611 33
                    $this->doOptimizeAttributes
612
                    &&
613
                    (
614 32
                        $attribute->name === 'srcset'
615
                        ||
616 33
                        $attribute->name === 'sizes'
617
                    )
618
                ) {
619 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
620
                } else {
621 33
                    $attr_val = $attribute->value;
622
                }
623
624 33
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
625 33
                $attr_str .= ' ';
626
            }
627
        }
628
629 50
        return \trim($attr_str);
630
    }
631
632
    /**
633
     * @param \DOMNode $node
634
     *
635
     * @return bool
636
     */
637 49
    private function domNodeClosingTagOptional(\DOMNode $node): bool
638
    {
639 49
        $tag_name = $node->nodeName;
640
641
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
642 49
        $parent_node = $node->parentNode;
643
644 49
        if ($parent_node) {
645 49
            $parent_tag_name = $parent_node->nodeName;
646
        } else {
647
            $parent_tag_name = null;
648
        }
649
650 49
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
651
652
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
653
654
        // Implemented:
655
        //
656
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
657
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
658
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
659
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
660
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
661
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
662
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
663
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
664
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
665
666
        /**
667
         * @noinspection TodoComment
668
         *
669
         * TODO: Not Implemented
670
         */
671
        //
672
        // <html> may be omitted if first thing inside is not comment
673
        // <head> may be omitted if first thing inside is an element
674
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
675
        // <colgroup> may be omitted if first thing inside is <col>
676
        // <tbody> may be omitted if first thing inside is <tr>
677
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
678
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
679
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
680
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
681
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
682
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
683
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
684
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
685
        //
686
        // <-- However, a start tag must never be omitted if it has any attributes.
687
688
        /** @noinspection InArrayCanBeUsedInspection */
689 49
        return \in_array($tag_name, self::$optional_end_tags, true)
690
               ||
691
               (
692 46
                   $tag_name === 'li'
693
                   &&
694
                   (
695 6
                       $nextSibling === null
696
                       ||
697
                       (
698 4
                           $nextSibling instanceof \DOMElement
699
                           &&
700 46
                           $nextSibling->tagName === 'li'
701
                       )
702
                   )
703
               )
704
               ||
705
               (
706 46
                   $tag_name === 'rp'
707
                   &&
708
                   (
709
                       $nextSibling === null
710
                       ||
711
                       (
712
                           $nextSibling instanceof \DOMElement
713
                           &&
714
                           (
715
                               $nextSibling->tagName === 'rp'
716
                               ||
717 46
                               $nextSibling->tagName === 'rt'
718
                           )
719
                       )
720
                   )
721
               )
722
               ||
723
               (
724 46
                   $tag_name === 'tr'
725
                   &&
726
                   (
727 1
                       $nextSibling === null
728
                       ||
729
                       (
730 1
                           $nextSibling instanceof \DOMElement
731
                           &&
732 46
                           $nextSibling->tagName === 'tr'
733
                       )
734
                   )
735
               )
736
               ||
737
               (
738 46
                   $tag_name === 'source'
739
                   &&
740
                   (
741 1
                       $parent_tag_name === 'audio'
742
                       ||
743 1
                       $parent_tag_name === 'video'
744
                       ||
745 1
                       $parent_tag_name === 'picture'
746
                       ||
747 46
                       $parent_tag_name === 'source'
748
                   )
749
                   &&
750
                   (
751 1
                       $nextSibling === null
752
                       ||
753
                       (
754
                           $nextSibling instanceof \DOMElement
755
                           &&
756 46
                           $nextSibling->tagName === 'source'
757
                       )
758
                   )
759
               )
760
               ||
761
               (
762
                   (
763 46
                       $tag_name === 'td'
764
                       ||
765 46
                       $tag_name === 'th'
766
                   )
767
                   &&
768
                   (
769 1
                       $nextSibling === null
770
                       ||
771
                       (
772 1
                           $nextSibling instanceof \DOMElement
773
                           &&
774
                           (
775 1
                               $nextSibling->tagName === 'td'
776
                               ||
777 46
                               $nextSibling->tagName === 'th'
778
                           )
779
                       )
780
                   )
781
               )
782
               ||
783
               (
784
                   (
785 46
                       $tag_name === 'dd'
786
                       ||
787 46
                       $tag_name === 'dt'
788
                   )
789
                   &&
790
                   (
791
                       (
792 3
                           $nextSibling === null
793
                           &&
794 3
                           $tag_name === 'dd'
795
                       )
796
                       ||
797
                       (
798 3
                           $nextSibling instanceof \DOMElement
799
                           &&
800
                           (
801 3
                               $nextSibling->tagName === 'dd'
802
                               ||
803 46
                               $nextSibling->tagName === 'dt'
804
                           )
805
                       )
806
                   )
807
               )
808
               ||
809
               (
810 46
                   $tag_name === 'option'
811
                   &&
812
                   (
813 1
                       $nextSibling === null
814
                       ||
815
                       (
816 1
                           $nextSibling instanceof \DOMElement
817
                           &&
818
                           (
819 1
                               $nextSibling->tagName === 'option'
820
                               ||
821 46
                               $nextSibling->tagName === 'optgroup'
822
                           )
823
                       )
824
                   )
825
               )
826
               ||
827
               (
828 46
                   $tag_name === 'p'
829
                   &&
830
                   (
831
                       (
832 14
                           $nextSibling === null
833
                           &&
834
                           (
835 12
                               $node->parentNode !== null
836
                               &&
837
                               !\in_array(
838 12
                                   $node->parentNode->nodeName,
839
                                   [
840
                                       'a',
841
                                       'audio',
842
                                       'del',
843
                                       'ins',
844
                                       'map',
845
                                       'noscript',
846
                                       'video',
847
                                   ],
848
                                   true
849
                               )
850
                           )
851
                       )
852
                       ||
853
                       (
854 9
                           $nextSibling instanceof \DOMElement
855
                           &&
856
                           \in_array(
857 49
                               $nextSibling->tagName,
858
                               [
859
                                   'address',
860
                                   'article',
861
                                   'aside',
862
                                   'blockquote',
863
                                   'dir',
864
                                   'div',
865
                                   'dl',
866
                                   'fieldset',
867
                                   'footer',
868
                                   'form',
869
                                   'h1',
870
                                   'h2',
871
                                   'h3',
872
                                   'h4',
873
                                   'h5',
874
                                   'h6',
875
                                   'header',
876
                                   'hgroup',
877
                                   'hr',
878
                                   'menu',
879
                                   'nav',
880
                                   'ol',
881
                                   'p',
882
                                   'pre',
883
                                   'section',
884
                                   'table',
885
                                   'ul',
886
                               ],
887
                               true
888
                           )
889
                       )
890
                   )
891
               );
892
    }
893
894 50
    protected function domNodeToString(\DOMNode $node): string
895
    {
896
        // init
897 50
        $html = '';
898 50
        $emptyStringTmp = '';
899
900 50
        foreach ($node->childNodes as $child) {
901 50
            if ($emptyStringTmp === 'is_empty') {
902 27
                $emptyStringTmp = 'last_was_empty';
903
            } else {
904 50
                $emptyStringTmp = '';
905
            }
906
907 50
            if ($child instanceof \DOMDocumentType) {
908
                // add the doc-type only if it wasn't generated by DomDocument
909 12
                if (!$this->withDocType) {
910
                    continue;
911
                }
912
913 12
                if ($child->name) {
914 12
                    if (!$child->publicId && $child->systemId) {
915
                        $tmpTypeSystem = 'SYSTEM';
916
                        $tmpTypePublic = '';
917
                    } else {
918 12
                        $tmpTypeSystem = '';
919 12
                        $tmpTypePublic = 'PUBLIC';
920
                    }
921
922 12
                    $html .= '<!DOCTYPE ' . $child->name . ''
923 12
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
924 12
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
925 12
                             . '>';
926
                }
927 50
            } elseif ($child instanceof \DOMElement) {
928 50
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
929 50
                $html .= '>' . $this->domNodeToString($child);
930
931
                if (
932 50
                    !$this->doRemoveOmittedHtmlTags
933
                    ||
934 50
                    !$this->domNodeClosingTagOptional($child)
935
                ) {
936 44
                    $html .= '</' . $child->tagName . '>';
937
                }
938
939 50
                if (!$this->doRemoveWhitespaceAroundTags) {
940
                    /** @noinspection NestedPositiveIfStatementsInspection */
941
                    if (
942 49
                        $child->nextSibling instanceof \DOMText
943
                        &&
944 49
                        $child->nextSibling->wholeText === ' '
945
                    ) {
946
                        if (
947 26
                            $emptyStringTmp !== 'last_was_empty'
948
                            &&
949 26
                            \substr($html, -1) !== ' '
950
                        ) {
951 26
                            $html = \rtrim($html);
952
953
                            if (
954 26
                                $child->parentNode
955
                                &&
956 26
                                $child->parentNode->nodeName !== 'head'
957
                            ) {
958 26
                                $html .= ' ';
959
                            }
960
                        }
961 50
                        $emptyStringTmp = 'is_empty';
962
                    }
963
                }
964 46
            } elseif ($child instanceof \DOMText) {
965 46
                if ($child->isElementContentWhitespace()) {
966
                    if (
967 30
                        $child->previousSibling !== null
968
                        &&
969 30
                        $child->nextSibling !== null
970
                    ) {
971
                        if (
972
                            (
973 21
                                $child->wholeText
974
                                &&
975 21
                                \strpos($child->wholeText, ' ') !== false
976
                            )
977
                            ||
978
                            (
979
                                $emptyStringTmp !== 'last_was_empty'
980
                                &&
981 21
                                \substr($html, -1) !== ' '
982
                            )
983
                        ) {
984 21
                            $html = \rtrim($html);
985
986
                            if (
987 21
                                $child->parentNode
988
                                &&
989 21
                                $child->parentNode->nodeName !== 'head'
990
                            ) {
991 21
                                $html .= ' ';
992
                            }
993
                        }
994 30
                        $emptyStringTmp = 'is_empty';
995
                    }
996
                } else {
997 46
                    $html .= $child->wholeText;
998
                }
999 1
            } elseif ($child instanceof \DOMComment) {
1000 1
                $html .= '<!--' . $child->textContent . '-->';
1001
            }
1002
        }
1003
1004 50
        return $html;
1005
    }
1006
1007
    /**
1008
     * @return array
1009
     */
1010
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1011
    {
1012
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1013
    }
1014
1015
    /**
1016
     * @return bool
1017
     */
1018
    public function isDoOptimizeAttributes(): bool
1019
    {
1020
        return $this->doOptimizeAttributes;
1021
    }
1022
1023
    /**
1024
     * @return bool
1025
     */
1026
    public function isDoOptimizeViaHtmlDomParser(): bool
1027
    {
1028
        return $this->doOptimizeViaHtmlDomParser;
1029
    }
1030
1031
    /**
1032
     * @return bool
1033
     */
1034
    public function isDoRemoveComments(): bool
1035
    {
1036
        return $this->doRemoveComments;
1037
    }
1038
1039
    /**
1040
     * @return bool
1041
     */
1042 33
    public function isDoRemoveDefaultAttributes(): bool
1043
    {
1044 33
        return $this->doRemoveDefaultAttributes;
1045
    }
1046
1047
    /**
1048
     * @return bool
1049
     */
1050 33
    public function isDoRemoveDeprecatedAnchorName(): bool
1051
    {
1052 33
        return $this->doRemoveDeprecatedAnchorName;
1053
    }
1054
1055
    /**
1056
     * @return bool
1057
     */
1058 33
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1059
    {
1060 33
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1061
    }
1062
1063
    /**
1064
     * @return bool
1065
     */
1066 33
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1067
    {
1068 33
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1069
    }
1070
1071
    /**
1072
     * @return bool
1073
     */
1074 33
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1075
    {
1076 33
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1077
    }
1078
1079
    /**
1080
     * @return bool
1081
     */
1082 33
    public function isDoRemoveEmptyAttributes(): bool
1083
    {
1084 33
        return $this->doRemoveEmptyAttributes;
1085
    }
1086
1087
    /**
1088
     * @return bool
1089
     */
1090 33
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1091
    {
1092 33
        return $this->doRemoveHttpPrefixFromAttributes;
1093
    }
1094
1095
    /**
1096
     * @return bool
1097
     */
1098 33
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1099
    {
1100 33
        return $this->doRemoveHttpsPrefixFromAttributes;
1101
    }
1102
1103
    /**
1104
     * @return bool
1105
     */
1106 4
    public function isKeepPrefixOnExternalAttributes(): bool
1107
    {
1108 4
        return $this->keepPrefixOnExternalAttributes;
1109
    }
1110
1111
    /**
1112
     * @return bool
1113
     */
1114 33
    public function isDoMakeSameDomainLinksRelative(): bool
1115
    {
1116 33
        return $this->doMakeSameDomainLinksRelative;
1117
    }
1118
1119
    /**
1120
     * @return bool
1121
     */
1122
    public function isDoRemoveOmittedHtmlTags(): bool
1123
    {
1124
        return $this->doRemoveOmittedHtmlTags;
1125
    }
1126
1127
    /**
1128
     * @return bool
1129
     */
1130
    public function isDoRemoveOmittedQuotes(): bool
1131
    {
1132
        return $this->doRemoveOmittedQuotes;
1133
    }
1134
1135
    /**
1136
     * @return bool
1137
     */
1138
    public function isDoRemoveSpacesBetweenTags(): bool
1139
    {
1140
        return $this->doRemoveSpacesBetweenTags;
1141
    }
1142
1143
    /**
1144
     * @return bool
1145
     */
1146 33
    public function isDoRemoveValueFromEmptyInput(): bool
1147
    {
1148 33
        return $this->doRemoveValueFromEmptyInput;
1149
    }
1150
1151
    /**
1152
     * @return bool
1153
     */
1154
    public function isDoRemoveWhitespaceAroundTags(): bool
1155
    {
1156
        return $this->doRemoveWhitespaceAroundTags;
1157
    }
1158
1159
    /**
1160
     * @return bool
1161
     */
1162 33
    public function isDoSortCssClassNames(): bool
1163
    {
1164 33
        return $this->doSortCssClassNames;
1165
    }
1166
1167
    /**
1168
     * @return bool
1169
     */
1170 33
    public function isDoSortHtmlAttributes(): bool
1171
    {
1172 33
        return $this->doSortHtmlAttributes;
1173
    }
1174
1175
    /**
1176
     * @return bool
1177
     */
1178
    public function isDoSumUpWhitespace(): bool
1179
    {
1180
        return $this->doSumUpWhitespace;
1181
    }
1182
1183
    /**
1184
     * @param string $html
1185
     * @param bool   $multiDecodeNewHtmlEntity
1186
     *
1187
     * @return string
1188
     */
1189 54
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1190
    {
1191 54
        $html = (string) $html;
1192 54
        if (!isset($html[0])) {
1193 1
            return '';
1194
        }
1195
1196 54
        $html = \trim($html);
1197 54
        if (!$html) {
1198 3
            return '';
1199
        }
1200
1201
        // reset
1202 51
        $this->protectedChildNodes = [];
1203
1204
        // save old content
1205 51
        $origHtml = $html;
1206 51
        $origHtmlLength = \strlen($html);
1207
1208
        // -------------------------------------------------------------------------
1209
        // Minify the HTML via "HtmlDomParser"
1210
        // -------------------------------------------------------------------------
1211
1212 51
        if ($this->doOptimizeViaHtmlDomParser) {
1213 50
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1214
        }
1215
1216
        // -------------------------------------------------------------------------
1217
        // Trim whitespace from html-string. [protected html is still protected]
1218
        // -------------------------------------------------------------------------
1219
1220
        // Remove extra white-space(s) between HTML attribute(s)
1221 51
        if (\strpos($html, ' ') !== false) {
1222 45
            $html = (string) \preg_replace_callback(
1223 45
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1224
                static function ($matches) {
1225 45
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1226 45
                },
1227 45
                $html
1228
            );
1229
        }
1230
1231 51
        if ($this->doRemoveSpacesBetweenTags) {
1232
            /** @noinspection NestedPositiveIfStatementsInspection */
1233 1
            if (\strpos($html, ' ') !== false) {
1234
                // Remove spaces that are between > and <
1235 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1236
            }
1237
        }
1238
1239
        // -------------------------------------------------------------------------
1240
        // Restore protected HTML-code.
1241
        // -------------------------------------------------------------------------
1242
1243 51
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1244 9
            $html = (string) \preg_replace_callback(
1245 9
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1246 9
                [$this, 'restoreProtectedHtml'],
1247 9
                $html
1248
            );
1249
        }
1250
1251
        // -------------------------------------------------------------------------
1252
        // Restore protected HTML-entities.
1253
        // -------------------------------------------------------------------------
1254
1255 51
        if ($this->doOptimizeViaHtmlDomParser) {
1256 50
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1257
        }
1258
1259
        // ------------------------------------
1260
        // Final clean-up
1261
        // ------------------------------------
1262
1263 51
        $html = \str_replace(
1264
            [
1265 51
                'html>' . "\n",
1266
                "\n" . '<html',
1267
                'html/>' . "\n",
1268
                "\n" . '</html',
1269
                'head>' . "\n",
1270
                "\n" . '<head',
1271
                'head/>' . "\n",
1272
                "\n" . '</head',
1273
            ],
1274
            [
1275 51
                'html>',
1276
                '<html',
1277
                'html/>',
1278
                '</html',
1279
                'head>',
1280
                '<head',
1281
                'head/>',
1282
                '</head',
1283
            ],
1284 51
            $html
1285
        );
1286
1287
        // self closing tags, don't need a trailing slash ...
1288 51
        $replace = [];
1289 51
        $replacement = [];
1290 51
        foreach (self::$selfClosingTags as $selfClosingTag) {
1291 51
            $replace[] = '<' . $selfClosingTag . '/>';
1292 51
            $replacement[] = '<' . $selfClosingTag . '>';
1293 51
            $replace[] = '<' . $selfClosingTag . ' />';
1294 51
            $replacement[] = '<' . $selfClosingTag . '>';
1295 51
            $replace[] = '></' . $selfClosingTag . '>';
1296 51
            $replacement[] = '>';
1297
        }
1298 51
        $html = \str_replace(
1299 51
            $replace,
1300 51
            $replacement,
1301 51
            $html
1302
        );
1303
1304
        // ------------------------------------
1305
        // check if compression worked
1306
        // ------------------------------------
1307
1308 51
        if ($origHtmlLength < \strlen($html)) {
1309
            $html = $origHtml;
1310
        }
1311
1312 51
        return $html;
1313
    }
1314
1315
    /**
1316
     * @param \DOMNode $node
1317
     *
1318
     * @return \DOMNode|null
1319
     */
1320 49
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1321
    {
1322
        do {
1323
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1324 49
            $node = $node->nextSibling;
1325 49
        } while (!($node === null || $node instanceof \DOMElement));
1326
1327 49
        return $node;
1328
    }
1329
1330
    /**
1331
     * Check if the current string is an conditional comment.
1332
     *
1333
     * INFO: since IE >= 10 conditional comment are not working anymore
1334
     *
1335
     * <!--[if expression]> HTML <![endif]-->
1336
     * <![if expression]> HTML <![endif]>
1337
     *
1338
     * @param string $comment
1339
     *
1340
     * @return bool
1341
     */
1342 4
    private function isConditionalComment($comment): bool
1343
    {
1344 4 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1345
            /** @noinspection RegExpRedundantEscape */
1346 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1347 2
                return true;
1348
            }
1349
        }
1350
1351 4 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1352
            /** @noinspection RegExpRedundantEscape */
1353 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1354 1
                return true;
1355
            }
1356
        }
1357
1358 4
        return false;
1359
    }
1360
1361
    /**
1362
     * @param string $html
1363
     * @param bool   $multiDecodeNewHtmlEntity
1364
     *
1365
     * @return string
1366
     */
1367 50
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1368
    {
1369
        // init dom
1370 50
        $dom = new HtmlDomParser();
1371
        /** @noinspection UnusedFunctionResultInspection */
1372 50
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1373
1374 50
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1375 50
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1376
1377
        // load dom
1378
        /** @noinspection UnusedFunctionResultInspection */
1379 50
        $dom->loadHtml($html);
1380
1381 50
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1382
1383
        // -------------------------------------------------------------------------
1384
        // Protect <nocompress> HTML tags first.
1385
        // -------------------------------------------------------------------------
1386
1387 50
        $dom = $this->protectTagHelper($dom, 'nocompress');
1388
1389
        // -------------------------------------------------------------------------
1390
        // Notify the Observer before the minification.
1391
        // -------------------------------------------------------------------------
1392
1393 50
        foreach ($dom->find('*') as $element) {
1394 50
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1395
        }
1396
1397
        // -------------------------------------------------------------------------
1398
        // Protect HTML tags and conditional comments.
1399
        // -------------------------------------------------------------------------
1400
1401 50
        $dom = $this->protectTags($dom);
1402
1403
        // -------------------------------------------------------------------------
1404
        // Remove default HTML comments. [protected html is still protected]
1405
        // -------------------------------------------------------------------------
1406
1407 50
        if ($this->doRemoveComments) {
1408 48
            $dom = $this->removeComments($dom);
1409
        }
1410
1411
        // -------------------------------------------------------------------------
1412
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1413
        // -------------------------------------------------------------------------
1414
1415 50
        if ($this->doSumUpWhitespace) {
1416 49
            $dom = $this->sumUpWhitespace($dom);
1417
        }
1418
1419 50
        foreach ($dom->find('*') as $element) {
1420
1421
            // -------------------------------------------------------------------------
1422
            // Remove whitespace around tags. [protected html is still protected]
1423
            // -------------------------------------------------------------------------
1424
1425 50
            if ($this->doRemoveWhitespaceAroundTags) {
1426 3
                $this->removeWhitespaceAroundTags($element);
1427
            }
1428
1429
            // -------------------------------------------------------------------------
1430
            // Notify the Observer after the minification.
1431
            // -------------------------------------------------------------------------
1432
1433 50
            $this->notifyObserversAboutDomElementAfterMinification($element);
1434
        }
1435
1436
        // -------------------------------------------------------------------------
1437
        // Convert the Dom into a string.
1438
        // -------------------------------------------------------------------------
1439
1440 50
        return $dom->fixHtmlOutput(
1441 50
            $this->domNodeToString($dom->getDocument()),
1442 50
            $multiDecodeNewHtmlEntity
1443
        );
1444
    }
1445
1446
    /**
1447
     * @param SimpleHtmlDomInterface $domElement
1448
     *
1449
     * @return void
1450
     */
1451 50
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1452
    {
1453 50
        foreach ($this->domLoopObservers as $observer) {
1454 50
            $observer->domElementAfterMinification($domElement, $this);
1455
        }
1456 50
    }
1457
1458
    /**
1459
     * @param SimpleHtmlDomInterface $domElement
1460
     *
1461
     * @return void
1462
     */
1463 50
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1464
    {
1465 50
        foreach ($this->domLoopObservers as $observer) {
1466 50
            $observer->domElementBeforeMinification($domElement, $this);
1467
        }
1468 50
    }
1469
1470
    /**
1471
     * @param HtmlDomParser $dom
1472
     * @param string        $selector
1473
     *
1474
     * @return HtmlDomParser
1475
     */
1476 50
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1477
    {
1478 50
        foreach ($dom->find($selector) as $element) {
1479 5
            if ($element->isRemoved()) {
1480 1
                continue;
1481
            }
1482
1483 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1484 5
            $parentNode = $element->getNode()->parentNode;
1485 5
            if ($parentNode !== null) {
1486 5
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1487
            }
1488
1489 5
            ++$this->protected_tags_counter;
1490
        }
1491
1492 50
        return $dom;
1493
    }
1494
1495
    /**
1496
     * Prevent changes of inline "styles" and "scripts".
1497
     *
1498
     * @param HtmlDomParser $dom
1499
     *
1500
     * @return HtmlDomParser
1501
     */
1502 50
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1503
    {
1504 50
        $this->protectTagHelper($dom, 'code');
1505
1506 50
        foreach ($dom->find('script, style') as $element) {
1507 7
            if ($element->isRemoved()) {
1508
                continue;
1509
            }
1510
1511 7
            if ($element->tag === 'script' || $element->tag === 'style') {
1512 7
                $attributes = $element->getAllAttributes();
1513
                // skip external links
1514 7
                if (isset($attributes['src'])) {
1515 4
                    continue;
1516
                }
1517
            }
1518
1519 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1520 5
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1521
1522 5
            ++$this->protected_tags_counter;
1523
        }
1524
1525 50
        foreach ($dom->find('//comment()') as $element) {
1526 4
            if ($element->isRemoved()) {
1527
                continue;
1528
            }
1529
1530 4
            $text = $element->text();
1531
1532
            // skip normal comments
1533 4
            if (!$this->isConditionalComment($text)) {
1534 4
                continue;
1535
            }
1536
1537 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1538
1539
            /* @var $node \DOMComment */
1540 2
            $node = $element->getNode();
1541 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1542 2
            $parentNode = $element->getNode()->parentNode;
1543 2
            if ($parentNode !== null) {
1544 2
                $parentNode->replaceChild($child, $node);
1545
            }
1546
1547 2
            ++$this->protected_tags_counter;
1548
        }
1549
1550 50
        return $dom;
1551
    }
1552
1553
    /**
1554
     * Remove comments in the dom.
1555
     *
1556
     * @param HtmlDomParser $dom
1557
     *
1558
     * @return HtmlDomParser
1559
     */
1560 48
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1561
    {
1562 48
        foreach ($dom->find('//comment()') as $commentWrapper) {
1563 3
            $comment = $commentWrapper->getNode();
1564 3
            $val = $comment->nodeValue;
1565 3
            if (\strpos($val, '[') === false) {
1566 3
                $parentNode = $comment->parentNode;
1567 3
                if ($parentNode !== null) {
1568 3
                    $parentNode->removeChild($comment);
1569
                }
1570
            }
1571
        }
1572
1573 48
        $dom->getDocument()->normalizeDocument();
1574
1575 48
        return $dom;
1576
    }
1577
1578
    /**
1579
     * Trim tags in the dom.
1580
     *
1581
     * @param SimpleHtmlDomInterface $element
1582
     *
1583
     * @return void
1584
     */
1585 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1586
    {
1587 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1588 1
            $node = $element->getNode();
1589
1590
            /** @var \DOMNode[] $candidates */
1591 1
            $candidates = [];
1592 1
            if ($node->childNodes->length > 0) {
1593 1
                $candidates[] = $node->firstChild;
1594 1
                $candidates[] = $node->lastChild;
1595 1
                $candidates[] = $node->previousSibling;
1596 1
                $candidates[] = $node->nextSibling;
1597
            }
1598
1599
            /** @var mixed $candidate - false-positive error from phpstan */
1600 1
            foreach ($candidates as &$candidate) {
1601 1
                if ($candidate === null) {
1602
                    continue;
1603
                }
1604
1605 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1606 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1607 1
                    if ($nodeValueTmp !== null) {
1608 1
                        $candidate->nodeValue = $nodeValueTmp;
1609
                    }
1610
                }
1611
            }
1612
        }
1613 3
    }
1614
1615
    /**
1616
     * Callback function for preg_replace_callback use.
1617
     *
1618
     * @param array $matches PREG matches
1619
     *
1620
     * @return string
1621
     */
1622 9
    private function restoreProtectedHtml($matches): string
1623
    {
1624 9
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1625
1626 9
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1627
    }
1628
1629
    /**
1630
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1631
     *
1632
     * @return $this
1633
     */
1634 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1635
    {
1636 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1637
1638 2
        return $this;
1639
    }
1640
1641
    /**
1642
     * Sum-up extra whitespace from dom-nodes.
1643
     *
1644
     * @param HtmlDomParser $dom
1645
     *
1646
     * @return HtmlDomParser
1647
     */
1648 49
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1649
    {
1650 49
        $text_nodes = $dom->find('//text()');
1651 49
        foreach ($text_nodes as $text_node_wrapper) {
1652
            /* @var $text_node \DOMNode */
1653 45
            $text_node = $text_node_wrapper->getNode();
1654 45
            $xp = $text_node->getNodePath();
1655 45
            if ($xp === null) {
1656
                continue;
1657
            }
1658
1659 45
            $doSkip = false;
1660 45
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1661 45
                if (\strpos($xp, "/${pattern}") !== false) {
1662 8
                    $doSkip = true;
1663
1664 8
                    break;
1665
                }
1666
            }
1667 45
            if ($doSkip) {
1668 8
                continue;
1669
            }
1670
1671 42
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1672 42
            if ($nodeValueTmp !== null) {
1673 42
                $text_node->nodeValue = $nodeValueTmp;
1674
            }
1675
        }
1676
1677 49
        $dom->getDocument()->normalizeDocument();
1678
1679 49
        return $dom;
1680
    }
1681
1682
    /**
1683
     * WARNING: maybe bad for performance ...
1684
     *
1685
     * @param bool $keepBrokenHtml
1686
     *
1687
     * @return HtmlMin
1688
     */
1689 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1690
    {
1691 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1692
1693 2
        return $this;
1694
    }
1695
}
1696