Completed
Push — master ( 61ae49...b42d4b )
by Lars
11:14 queued 10:00
created

HtmlMin::isDoSortHtmlAttributes()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = true;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var array
192
     */
193
    private $domainsToRemoveHttpPrefixFromAttributes = [
194
        'google.com',
195
        'google.de',
196
    ];
197
198
    /**
199
     * @var bool
200
     */
201
    private $doSortCssClassNames = true;
202
203
    /**
204
     * @var bool
205
     */
206
    private $doSortHtmlAttributes = true;
207
208
    /**
209
     * @var bool
210
     */
211
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
212
213
    /**
214
     * @var bool
215
     */
216
    private $doRemoveDefaultAttributes = false;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doRemoveDeprecatedAnchorName = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDeprecatedTypeFromScriptTag = true;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveValueFromEmptyInput = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveEmptyAttributes = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doSumUpWhitespace = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveSpacesBetweenTags = false;
252
253
    /**
254
     * @var bool
255
     */
256
    private $keepBrokenHtml = false;
257
258
    /**
259
     * @var bool
260
     */
261
    private $withDocType = false;
262
263
    /**
264
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
265
     */
266
    private $domLoopObservers;
267
268
    /**
269
     * @var int
270
     */
271
    private $protected_tags_counter = 0;
272
273
    /**
274
     * HtmlMin constructor.
275
     */
276 52
    public function __construct()
277
    {
278 52
        $this->domLoopObservers = new \SplObjectStorage();
279
280 52
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
281 52
    }
282
283
    /**
284
     * @param HtmlMinDomObserverInterface $observer
285
     *
286
     * @return void
287
     */
288 52
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
289
    {
290 52
        $this->domLoopObservers->attach($observer);
291 52
    }
292
293
    /**
294
     * @param bool $doOptimizeAttributes
295
     *
296
     * @return $this
297
     */
298 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
299
    {
300 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
301
302 2
        return $this;
303
    }
304
305
    /**
306
     * @param bool $doOptimizeViaHtmlDomParser
307
     *
308
     * @return $this
309
     */
310 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
311
    {
312 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
313
314 1
        return $this;
315
    }
316
317
    /**
318
     * @param bool $doRemoveComments
319
     *
320
     * @return $this
321
     */
322 3
    public function doRemoveComments(bool $doRemoveComments = true): self
323
    {
324 3
        $this->doRemoveComments = $doRemoveComments;
325
326 3
        return $this;
327
    }
328
329
    /**
330
     * @param bool $doRemoveDefaultAttributes
331
     *
332
     * @return $this
333
     */
334 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
335
    {
336 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
337
338 2
        return $this;
339
    }
340
341
    /**
342
     * @param bool $doRemoveDeprecatedAnchorName
343
     *
344
     * @return $this
345
     */
346 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
347
    {
348 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
349
350 2
        return $this;
351
    }
352
353
    /**
354
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
355
     *
356
     * @return $this
357
     */
358 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
359
    {
360 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
361
362 2
        return $this;
363
    }
364
365
    /**
366
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
367
     *
368
     * @return $this
369
     */
370 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
371
    {
372 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
373
374 2
        return $this;
375
    }
376
377
    /**
378
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
379
     *
380
     * @return $this
381
     */
382 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
383
    {
384 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
385
386 2
        return $this;
387
    }
388
389
    /**
390
     * @param bool $doRemoveEmptyAttributes
391
     *
392
     * @return $this
393
     */
394 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
395
    {
396 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
397
398 2
        return $this;
399
    }
400
401
    /**
402
     * @param bool $doRemoveHttpPrefixFromAttributes
403
     *
404
     * @return $this
405
     */
406 5
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
407
    {
408 5
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
409
410 5
        return $this;
411
    }
412
413
    /**
414
     * @param bool $doRemoveHttpsPrefixFromAttributes
415
     *
416
     * @return $this
417
     */
418 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
419
    {
420 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
421
422 1
        return $this;
423
    }
424
425
    /**
426
     * @param bool $doRemoveOmittedHtmlTags
427
     *
428
     * @return $this
429
     */
430 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
431
    {
432 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
433
434 1
        return $this;
435
    }
436
437
    /**
438
     * @param bool $doRemoveOmittedQuotes
439
     *
440
     * @return $this
441
     */
442 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
443
    {
444 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
445
446 1
        return $this;
447
    }
448
449
    /**
450
     * @param bool $doRemoveSpacesBetweenTags
451
     *
452
     * @return $this
453
     */
454 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
455
    {
456 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
457
458 1
        return $this;
459
    }
460
461
    /**
462
     * @param bool $doRemoveValueFromEmptyInput
463
     *
464
     * @return $this
465
     */
466 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
467
    {
468 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
469
470 2
        return $this;
471
    }
472
473
    /**
474
     * @param bool $doRemoveWhitespaceAroundTags
475
     *
476
     * @return $this
477
     */
478 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
479
    {
480 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
481
482 5
        return $this;
483
    }
484
485
    /**
486
     * @param bool $doSortCssClassNames
487
     *
488
     * @return $this
489
     */
490 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
491
    {
492 2
        $this->doSortCssClassNames = $doSortCssClassNames;
493
494 2
        return $this;
495
    }
496
497
    /**
498
     * @param bool $doSortHtmlAttributes
499
     *
500
     * @return $this
501
     */
502 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
503
    {
504 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
505
506 2
        return $this;
507
    }
508
509
    /**
510
     * @param bool $doSumUpWhitespace
511
     *
512
     * @return $this
513
     */
514 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
515
    {
516 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
517
518 2
        return $this;
519
    }
520
521 48
    private function domNodeAttributesToString(\DOMNode $node): string
522
    {
523
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
524 48
        $attr_str = '';
525 48
        if ($node->attributes !== null) {
526 48
            foreach ($node->attributes as $attribute) {
527 31
                $attr_str .= $attribute->name;
528
529
                if (
530 31
                    $this->doOptimizeAttributes
531
                    &&
532 31
                    isset(self::$booleanAttributes[$attribute->name])
533
                ) {
534 9
                    $attr_str .= ' ';
535
536 9
                    continue;
537
                }
538
539 31
                $attr_str .= '=';
540
541
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
542 31
                $omit_quotes = $this->doRemoveOmittedQuotes
543
                               &&
544 31
                               $attribute->value !== ''
545
                               &&
546 31
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
547
                               &&
548 31
                               \strpos($attribute->name, ' ') === false
549
                               &&
550 31
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
551
552 31
                $quoteTmp = '"';
553
                if (
554 31
                    !$omit_quotes
555
                    &&
556 31
                    \strpos($attribute->value, '"') !== false
557
                ) {
558 1
                    $quoteTmp = "'";
559
                }
560
561
                if (
562 31
                    $this->doOptimizeAttributes
563
                    &&
564
                    (
565 30
                        $attribute->name === 'srcset'
566
                        ||
567 31
                        $attribute->name === 'sizes'
568
                    )
569
                ) {
570 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
571
                } else {
572 31
                    $attr_val = $attribute->value;
573
                }
574
575 31
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
576 31
                $attr_str .= ' ';
577
            }
578
        }
579
580 48
        return \trim($attr_str);
581
    }
582
583
    /**
584
     * @param \DOMNode $node
585
     *
586
     * @return bool
587
     */
588 47
    private function domNodeClosingTagOptional(\DOMNode $node): bool
589
    {
590 47
        $tag_name = $node->nodeName;
591
592
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
593 47
        $parent_node = $node->parentNode;
594
595 47
        if ($parent_node) {
596 47
            $parent_tag_name = $parent_node->nodeName;
597
        } else {
598
            $parent_tag_name = null;
599
        }
600
601 47
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
602
603
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
604
605
        // Implemented:
606
        //
607
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
608
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
609
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
610
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
611
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
612
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
613
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
614
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
615
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
616
617
        /**
618
         * @noinspection TodoComment
619
         *
620
         * TODO: Not Implemented
621
         */
622
        //
623
        // <html> may be omitted if first thing inside is not comment
624
        // <head> may be omitted if first thing inside is an element
625
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
626
        // <colgroup> may be omitted if first thing inside is <col>
627
        // <tbody> may be omitted if first thing inside is <tr>
628
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
629
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
630
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
631
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
632
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
633
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
634
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
635
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
636
        //
637
        // <-- However, a start tag must never be omitted if it has any attributes.
638
639 47
        return \in_array($tag_name, self::$optional_end_tags, true)
640
               ||
641
               (
642 44
                   $tag_name === 'li'
643
                   &&
644
                   (
645 6
                       $nextSibling === null
646
                       ||
647
                       (
648 4
                           $nextSibling instanceof \DOMElement
649
                           &&
650 44
                           $nextSibling->tagName === 'li'
651
                       )
652
                   )
653
               )
654
               ||
655
               (
656 44
                   $tag_name === 'rp'
657
                   &&
658
                   (
659
                       $nextSibling === null
660
                       ||
661
                       (
662
                           $nextSibling instanceof \DOMElement
663
                           &&
664
                           (
665
                               $nextSibling->tagName === 'rp'
666
                               ||
667 44
                               $nextSibling->tagName === 'rt'
668
                           )
669
                       )
670
                   )
671
               )
672
               ||
673
               (
674 44
                   $tag_name === 'tr'
675
                   &&
676
                   (
677 1
                       $nextSibling === null
678
                       ||
679
                       (
680 1
                           $nextSibling instanceof \DOMElement
681
                           &&
682 44
                           $nextSibling->tagName === 'tr'
683
                       )
684
                   )
685
               )
686
               ||
687
               (
688 44
                   $tag_name === 'source'
689
                   &&
690
                   (
691 1
                       $parent_tag_name === 'audio'
692
                       ||
693 1
                       $parent_tag_name === 'video'
694
                       ||
695 1
                       $parent_tag_name === 'picture'
696
                       ||
697 44
                       $parent_tag_name === 'source'
698
                   )
699
                   &&
700
                   (
701 1
                       $nextSibling === null
702
                       ||
703
                       (
704
                           $nextSibling instanceof \DOMElement
705
                           &&
706 44
                           $nextSibling->tagName === 'source'
707
                       )
708
                   )
709
               )
710
               ||
711
               (
712
                   (
713 44
                       $tag_name === 'td'
714
                       ||
715 44
                       $tag_name === 'th'
716
                   )
717
                   &&
718
                   (
719 1
                       $nextSibling === null
720
                       ||
721
                       (
722 1
                           $nextSibling instanceof \DOMElement
723
                           &&
724
                           (
725 1
                               $nextSibling->tagName === 'td'
726
                               ||
727 44
                               $nextSibling->tagName === 'th'
728
                           )
729
                       )
730
                   )
731
               )
732
               ||
733
               (
734
                   (
735 44
                       $tag_name === 'dd'
736
                       ||
737 44
                       $tag_name === 'dt'
738
                   )
739
                   &&
740
                   (
741
                       (
742 3
                           $nextSibling === null
743
                           &&
744 3
                           $tag_name === 'dd'
745
                       )
746
                       ||
747
                       (
748 3
                           $nextSibling instanceof \DOMElement
749
                           &&
750
                           (
751 3
                               $nextSibling->tagName === 'dd'
752
                               ||
753 44
                               $nextSibling->tagName === 'dt'
754
                           )
755
                       )
756
                   )
757
               )
758
               ||
759
               (
760 44
                   $tag_name === 'option'
761
                   &&
762
                   (
763 1
                       $nextSibling === null
764
                       ||
765
                       (
766 1
                           $nextSibling instanceof \DOMElement
767
                           &&
768
                           (
769 1
                               $nextSibling->tagName === 'option'
770
                               ||
771 44
                               $nextSibling->tagName === 'optgroup'
772
                           )
773
                       )
774
                   )
775
               )
776
               ||
777
               (
778 44
                   $tag_name === 'p'
779
                   &&
780
                   (
781
                       (
782 14
                           $nextSibling === null
783
                           &&
784
                           (
785 12
                               $node->parentNode !== null
786
                               &&
787
                               !\in_array(
788 12
                                   $node->parentNode->nodeName,
789
                                   [
790
                                       'a',
791
                                       'audio',
792
                                       'del',
793
                                       'ins',
794
                                       'map',
795
                                       'noscript',
796
                                       'video',
797
                                   ],
798
                                   true
799
                               )
800
                           )
801
                       )
802
                       ||
803
                       (
804 9
                           $nextSibling instanceof \DOMElement
805
                           &&
806
                           \in_array(
807 47
                               $nextSibling->tagName,
808
                               [
809
                                   'address',
810
                                   'article',
811
                                   'aside',
812
                                   'blockquote',
813
                                   'dir',
814
                                   'div',
815
                                   'dl',
816
                                   'fieldset',
817
                                   'footer',
818
                                   'form',
819
                                   'h1',
820
                                   'h2',
821
                                   'h3',
822
                                   'h4',
823
                                   'h5',
824
                                   'h6',
825
                                   'header',
826
                                   'hgroup',
827
                                   'hr',
828
                                   'menu',
829
                                   'nav',
830
                                   'ol',
831
                                   'p',
832
                                   'pre',
833
                                   'section',
834
                                   'table',
835
                                   'ul',
836
                               ],
837
                               true
838
                           )
839
                       )
840
                   )
841
               );
842
    }
843
844 48
    protected function domNodeToString(\DOMNode $node): string
845
    {
846
        // init
847 48
        $html = '';
848 48
        $emptyStringTmp = '';
849
850 48
        foreach ($node->childNodes as $child) {
851 48
            if ($emptyStringTmp === 'is_empty') {
852 27
                $emptyStringTmp = 'last_was_empty';
853
            } else {
854 48
                $emptyStringTmp = '';
855
            }
856
857 48
            if ($child instanceof \DOMDocumentType) {
858
                // add the doc-type only if it wasn't generated by DomDocument
859 12
                if (!$this->withDocType) {
860
                    continue;
861
                }
862
863 12
                if ($child->name) {
864 12
                    if (!$child->publicId && $child->systemId) {
865
                        $tmpTypeSystem = 'SYSTEM';
866
                        $tmpTypePublic = '';
867
                    } else {
868 12
                        $tmpTypeSystem = '';
869 12
                        $tmpTypePublic = 'PUBLIC';
870
                    }
871
872 12
                    $html .= '<!DOCTYPE ' . $child->name . ''
873 12
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
874 12
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
875 12
                             . '>';
876
                }
877 48
            } elseif ($child instanceof \DOMElement) {
878 48
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
879 48
                $html .= '>' . $this->domNodeToString($child);
880
881
                if (
882 48
                    !$this->doRemoveOmittedHtmlTags
883
                    ||
884 48
                    !$this->domNodeClosingTagOptional($child)
885
                ) {
886 42
                    $html .= '</' . $child->tagName . '>';
887
                }
888
889 48
                if (!$this->doRemoveWhitespaceAroundTags) {
890
                    /** @noinspection NestedPositiveIfStatementsInspection */
891
                    if (
892 47
                        $child->nextSibling instanceof \DOMText
893
                        &&
894 47
                        $child->nextSibling->wholeText === ' '
895
                    ) {
896
                        if (
897 26
                            $emptyStringTmp !== 'last_was_empty'
898
                            &&
899 26
                            \substr($html, -1) !== ' '
900
                        ) {
901 26
                            $html = \rtrim($html);
902
903
                            if (
904 26
                                $child->parentNode
905
                                &&
906 26
                                $child->parentNode->nodeName !== 'head'
907
                            ) {
908 26
                                $html .= ' ';
909
                            }
910
                        }
911 48
                        $emptyStringTmp = 'is_empty';
912
                    }
913
                }
914 44
            } elseif ($child instanceof \DOMText) {
915 44
                if ($child->isElementContentWhitespace()) {
916
                    if (
917 30
                        $child->previousSibling !== null
918
                        &&
919 30
                        $child->nextSibling !== null
920
                    ) {
921
                        if (
922
                            (
923 21
                                $child->wholeText
924
                                &&
925 21
                                \strpos($child->wholeText, ' ') !== false
926
                            )
927
                            ||
928
                            (
929
                                $emptyStringTmp !== 'last_was_empty'
930
                                &&
931 21
                                \substr($html, -1) !== ' '
932
                            )
933
                        ) {
934 21
                            $html = \rtrim($html);
935
936
                            if (
937 21
                                $child->parentNode
938
                                &&
939 21
                                $child->parentNode->nodeName !== 'head'
940
                            ) {
941 21
                                $html .= ' ';
942
                            }
943
                        }
944 30
                        $emptyStringTmp = 'is_empty';
945
                    }
946
                } else {
947 44
                    $html .= $child->wholeText;
948
                }
949 1
            } elseif ($child instanceof \DOMComment) {
950 1
                $html .= '<!--' . $child->textContent . '-->';
951
            }
952
        }
953
954 48
        return $html;
955
    }
956
957
    /**
958
     * @return array
959
     */
960
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
961
    {
962
        return $this->domainsToRemoveHttpPrefixFromAttributes;
963
    }
964
965
    /**
966
     * @return bool
967
     */
968
    public function isDoOptimizeAttributes(): bool
969
    {
970
        return $this->doOptimizeAttributes;
971
    }
972
973
    /**
974
     * @return bool
975
     */
976
    public function isDoOptimizeViaHtmlDomParser(): bool
977
    {
978
        return $this->doOptimizeViaHtmlDomParser;
979
    }
980
981
    /**
982
     * @return bool
983
     */
984
    public function isDoRemoveComments(): bool
985
    {
986
        return $this->doRemoveComments;
987
    }
988
989
    /**
990
     * @return bool
991
     */
992 31
    public function isDoRemoveDefaultAttributes(): bool
993
    {
994 31
        return $this->doRemoveDefaultAttributes;
995
    }
996
997
    /**
998
     * @return bool
999
     */
1000 31
    public function isDoRemoveDeprecatedAnchorName(): bool
1001
    {
1002 31
        return $this->doRemoveDeprecatedAnchorName;
1003
    }
1004
1005
    /**
1006
     * @return bool
1007
     */
1008 31
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1009
    {
1010 31
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1011
    }
1012
1013
    /**
1014
     * @return bool
1015
     */
1016 31
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1017
    {
1018 31
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1019
    }
1020
1021
    /**
1022
     * @return bool
1023
     */
1024 31
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1025
    {
1026 31
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1027
    }
1028
1029
    /**
1030
     * @return bool
1031
     */
1032 31
    public function isDoRemoveEmptyAttributes(): bool
1033
    {
1034 31
        return $this->doRemoveEmptyAttributes;
1035
    }
1036
1037
    /**
1038
     * @return bool
1039
     */
1040 31
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1041
    {
1042 31
        return $this->doRemoveHttpPrefixFromAttributes;
1043
    }
1044
1045
    /**
1046
     * @return bool
1047
     */
1048 31
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1049
    {
1050 31
        return $this->doRemoveHttpsPrefixFromAttributes;
1051
    }
1052
1053
    /**
1054
     * @return bool
1055
     */
1056
    public function isDoRemoveOmittedHtmlTags(): bool
1057
    {
1058
        return $this->doRemoveOmittedHtmlTags;
1059
    }
1060
1061
    /**
1062
     * @return bool
1063
     */
1064
    public function isDoRemoveOmittedQuotes(): bool
1065
    {
1066
        return $this->doRemoveOmittedQuotes;
1067
    }
1068
1069
    /**
1070
     * @return bool
1071
     */
1072
    public function isDoRemoveSpacesBetweenTags(): bool
1073
    {
1074
        return $this->doRemoveSpacesBetweenTags;
1075
    }
1076
1077
    /**
1078
     * @return bool
1079
     */
1080 31
    public function isDoRemoveValueFromEmptyInput(): bool
1081
    {
1082 31
        return $this->doRemoveValueFromEmptyInput;
1083
    }
1084
1085
    /**
1086
     * @return bool
1087
     */
1088
    public function isDoRemoveWhitespaceAroundTags(): bool
1089
    {
1090
        return $this->doRemoveWhitespaceAroundTags;
1091
    }
1092
1093
    /**
1094
     * @return bool
1095
     */
1096 31
    public function isDoSortCssClassNames(): bool
1097
    {
1098 31
        return $this->doSortCssClassNames;
1099
    }
1100
1101
    /**
1102
     * @return bool
1103
     */
1104 31
    public function isDoSortHtmlAttributes(): bool
1105
    {
1106 31
        return $this->doSortHtmlAttributes;
1107
    }
1108
1109
    /**
1110
     * @return bool
1111
     */
1112
    public function isDoSumUpWhitespace(): bool
1113
    {
1114
        return $this->doSumUpWhitespace;
1115
    }
1116
1117
    /**
1118
     * @param string $html
1119
     * @param bool   $multiDecodeNewHtmlEntity
1120
     *
1121
     * @return string
1122
     */
1123 52
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1124
    {
1125 52
        $html = (string) $html;
1126 52
        if (!isset($html[0])) {
1127 1
            return '';
1128
        }
1129
1130 52
        $html = \trim($html);
1131 52
        if (!$html) {
1132 3
            return '';
1133
        }
1134
1135
        // reset
1136 49
        $this->protectedChildNodes = [];
1137
1138
        // save old content
1139 49
        $origHtml = $html;
1140 49
        $origHtmlLength = \strlen($html);
1141
1142
        // -------------------------------------------------------------------------
1143
        // Minify the HTML via "HtmlDomParser"
1144
        // -------------------------------------------------------------------------
1145
1146 49
        if ($this->doOptimizeViaHtmlDomParser) {
1147 48
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1148
        }
1149
1150
        // -------------------------------------------------------------------------
1151
        // Trim whitespace from html-string. [protected html is still protected]
1152
        // -------------------------------------------------------------------------
1153
1154
        // Remove extra white-space(s) between HTML attribute(s)
1155 49
        if (\strpos($html, ' ') !== false) {
1156 43
            $html = (string) \preg_replace_callback(
1157 43
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1158
                static function ($matches) {
1159 43
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1160 43
                },
1161 43
                $html
1162
            );
1163
        }
1164
1165 49
        if ($this->doRemoveSpacesBetweenTags) {
1166
            /** @noinspection NestedPositiveIfStatementsInspection */
1167 1
            if (\strpos($html, ' ') !== false) {
1168
                // Remove spaces that are between > and <
1169 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1170
            }
1171
        }
1172
1173
        // -------------------------------------------------------------------------
1174
        // Restore protected HTML-code.
1175
        // -------------------------------------------------------------------------
1176
1177 49
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1178 9
            $html = (string) \preg_replace_callback(
1179 9
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1180 9
                [$this, 'restoreProtectedHtml'],
1181 9
                $html
1182
            );
1183
        }
1184
1185
        // -------------------------------------------------------------------------
1186
        // Restore protected HTML-entities.
1187
        // -------------------------------------------------------------------------
1188
1189 49
        if ($this->doOptimizeViaHtmlDomParser) {
1190 48
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1191
        }
1192
1193
        // ------------------------------------
1194
        // Final clean-up
1195
        // ------------------------------------
1196
1197 49
        $html = \str_replace(
1198
            [
1199 49
                'html>' . "\n",
1200
                "\n" . '<html',
1201
                'html/>' . "\n",
1202
                "\n" . '</html',
1203
                'head>' . "\n",
1204
                "\n" . '<head',
1205
                'head/>' . "\n",
1206
                "\n" . '</head',
1207
            ],
1208
            [
1209 49
                'html>',
1210
                '<html',
1211
                'html/>',
1212
                '</html',
1213
                'head>',
1214
                '<head',
1215
                'head/>',
1216
                '</head',
1217
            ],
1218 49
            $html
1219
        );
1220
1221
        // self closing tags, don't need a trailing slash ...
1222 49
        $replace = [];
1223 49
        $replacement = [];
1224 49
        foreach (self::$selfClosingTags as $selfClosingTag) {
1225 49
            $replace[] = '<' . $selfClosingTag . '/>';
1226 49
            $replacement[] = '<' . $selfClosingTag . '>';
1227 49
            $replace[] = '<' . $selfClosingTag . ' />';
1228 49
            $replacement[] = '<' . $selfClosingTag . '>';
1229 49
            $replace[] = '></' . $selfClosingTag . '>';
1230 49
            $replacement[] = '>';
1231
        }
1232 49
        $html = \str_replace(
1233 49
            $replace,
1234 49
            $replacement,
1235 49
            $html
1236
        );
1237
1238
        // ------------------------------------
1239
        // check if compression worked
1240
        // ------------------------------------
1241
1242 49
        if ($origHtmlLength < \strlen($html)) {
1243
            $html = $origHtml;
1244
        }
1245
1246 49
        return $html;
1247
    }
1248
1249
    /**
1250
     * @param \DOMNode $node
1251
     *
1252
     * @return \DOMNode|null
1253
     */
1254 47
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1255
    {
1256
        do {
1257
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1258 47
            $node = $node->nextSibling;
1259 47
        } while (!($node === null || $node instanceof \DOMElement));
1260
1261 47
        return $node;
1262
    }
1263
1264
    /**
1265
     * Check if the current string is an conditional comment.
1266
     *
1267
     * INFO: since IE >= 10 conditional comment are not working anymore
1268
     *
1269
     * <!--[if expression]> HTML <![endif]-->
1270
     * <![if expression]> HTML <![endif]>
1271
     *
1272
     * @param string $comment
1273
     *
1274
     * @return bool
1275
     */
1276 4
    private function isConditionalComment($comment): bool
1277
    {
1278 4 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1279
            /** @noinspection RegExpRedundantEscape */
1280 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1281 2
                return true;
1282
            }
1283
        }
1284
1285 4 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1286
            /** @noinspection RegExpRedundantEscape */
1287 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1288 1
                return true;
1289
            }
1290
        }
1291
1292 4
        return false;
1293
    }
1294
1295
    /**
1296
     * @param string $html
1297
     * @param bool   $multiDecodeNewHtmlEntity
1298
     *
1299
     * @return string
1300
     */
1301 48
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1302
    {
1303
        // init dom
1304 48
        $dom = new HtmlDomParser();
1305
        /** @noinspection UnusedFunctionResultInspection */
1306 48
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1307
1308 48
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1309 48
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1310
1311
        // load dom
1312
        /** @noinspection UnusedFunctionResultInspection */
1313 48
        $dom->loadHtml($html);
1314
1315 48
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1316
1317
        // -------------------------------------------------------------------------
1318
        // Protect <nocompress> HTML tags first.
1319
        // -------------------------------------------------------------------------
1320
1321 48
        $dom = $this->protectTagHelper($dom, 'nocompress');
1322
1323
        // -------------------------------------------------------------------------
1324
        // Notify the Observer before the minification.
1325
        // -------------------------------------------------------------------------
1326
1327 48
        foreach ($dom->find('*') as $element) {
1328 48
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1329
        }
1330
1331
        // -------------------------------------------------------------------------
1332
        // Protect HTML tags and conditional comments.
1333
        // -------------------------------------------------------------------------
1334
1335 48
        $dom = $this->protectTags($dom);
1336
1337
        // -------------------------------------------------------------------------
1338
        // Remove default HTML comments. [protected html is still protected]
1339
        // -------------------------------------------------------------------------
1340
1341 48
        if ($this->doRemoveComments) {
1342 46
            $dom = $this->removeComments($dom);
1343
        }
1344
1345
        // -------------------------------------------------------------------------
1346
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1347
        // -------------------------------------------------------------------------
1348
1349 48
        if ($this->doSumUpWhitespace) {
1350 47
            $dom = $this->sumUpWhitespace($dom);
1351
        }
1352
1353 48
        foreach ($dom->find('*') as $element) {
1354
1355
            // -------------------------------------------------------------------------
1356
            // Remove whitespace around tags. [protected html is still protected]
1357
            // -------------------------------------------------------------------------
1358
1359 48
            if ($this->doRemoveWhitespaceAroundTags) {
1360 3
                $this->removeWhitespaceAroundTags($element);
1361
            }
1362
1363
            // -------------------------------------------------------------------------
1364
            // Notify the Observer after the minification.
1365
            // -------------------------------------------------------------------------
1366
1367 48
            $this->notifyObserversAboutDomElementAfterMinification($element);
1368
        }
1369
1370
        // -------------------------------------------------------------------------
1371
        // Convert the Dom into a string.
1372
        // -------------------------------------------------------------------------
1373
1374 48
        return $dom->fixHtmlOutput(
1375 48
            $this->domNodeToString($dom->getDocument()),
1376
            $multiDecodeNewHtmlEntity
1377
        );
1378
    }
1379
1380
    /**
1381
     * @param SimpleHtmlDomInterface $domElement
1382
     *
1383
     * @return void
1384
     */
1385 48
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1386
    {
1387 48
        foreach ($this->domLoopObservers as $observer) {
1388 48
            $observer->domElementAfterMinification($domElement, $this);
1389
        }
1390 48
    }
1391
1392
    /**
1393
     * @param SimpleHtmlDomInterface $domElement
1394
     *
1395
     * @return void
1396
     */
1397 48
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1398
    {
1399 48
        foreach ($this->domLoopObservers as $observer) {
1400 48
            $observer->domElementBeforeMinification($domElement, $this);
1401
        }
1402 48
    }
1403
1404
    /**
1405
     * @param HtmlDomParser $dom
1406
     * @param string        $selector
1407
     *
1408
     * @return HtmlDomParser
1409
     */
1410 48
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1411
    {
1412 48
        foreach ($dom->find($selector) as $element) {
1413 5
            if ($element->isRemoved()) {
1414 1
                continue;
1415
            }
1416
1417 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1418 5
            $parentNode = $element->getNode()->parentNode;
1419 5
            if ($parentNode !== null) {
1420 5
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1421
            }
1422
1423 5
            ++$this->protected_tags_counter;
1424
        }
1425
1426 48
        return $dom;
1427
    }
1428
1429
    /**
1430
     * Prevent changes of inline "styles" and "scripts".
1431
     *
1432
     * @param HtmlDomParser $dom
1433
     *
1434
     * @return HtmlDomParser
1435
     */
1436 48
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1437
    {
1438 48
        $this->protectTagHelper($dom, 'code');
1439
1440 48
        foreach ($dom->find('script, style') as $element) {
1441 7
            if ($element->isRemoved()) {
1442
                continue;
1443
            }
1444
1445 7
            if ($element->tag === 'script' || $element->tag === 'style') {
1446 7
                $attributes = $element->getAllAttributes();
1447
                // skip external links
1448 7
                if (isset($attributes['src'])) {
1449 4
                    continue;
1450
                }
1451
            }
1452
1453 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1454 5
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1455
1456 5
            ++$this->protected_tags_counter;
1457
        }
1458
1459 48
        foreach ($dom->find('//comment()') as $element) {
1460 4
            if ($element->isRemoved()) {
1461
                continue;
1462
            }
1463
1464 4
            $text = $element->text();
1465
1466
            // skip normal comments
1467 4
            if (!$this->isConditionalComment($text)) {
1468 4
                continue;
1469
            }
1470
1471 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1472
1473
            /* @var $node \DOMComment */
1474 2
            $node = $element->getNode();
1475 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1476 2
            $parentNode = $element->getNode()->parentNode;
1477 2
            if ($parentNode !== null) {
1478 2
                $parentNode->replaceChild($child, $node);
1479
            }
1480
1481 2
            ++$this->protected_tags_counter;
1482
        }
1483
1484 48
        return $dom;
1485
    }
1486
1487
    /**
1488
     * Remove comments in the dom.
1489
     *
1490
     * @param HtmlDomParser $dom
1491
     *
1492
     * @return HtmlDomParser
1493
     */
1494 46
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1495
    {
1496 46
        foreach ($dom->find('//comment()') as $commentWrapper) {
1497 3
            $comment = $commentWrapper->getNode();
1498 3
            $val = $comment->nodeValue;
1499 3
            if (\strpos($val, '[') === false) {
1500 3
                $parentNode = $comment->parentNode;
1501 3
                if ($parentNode !== null) {
1502 3
                    $parentNode->removeChild($comment);
1503
                }
1504
            }
1505
        }
1506
1507 46
        $dom->getDocument()->normalizeDocument();
1508
1509 46
        return $dom;
1510
    }
1511
1512
    /**
1513
     * Trim tags in the dom.
1514
     *
1515
     * @param SimpleHtmlDomInterface $element
1516
     *
1517
     * @return void
1518
     */
1519 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1520
    {
1521 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1522 1
            $node = $element->getNode();
1523
1524
            /** @var \DOMNode[] $candidates */
1525 1
            $candidates = [];
1526 1
            if ($node->childNodes->length > 0) {
1527 1
                $candidates[] = $node->firstChild;
1528 1
                $candidates[] = $node->lastChild;
1529 1
                $candidates[] = $node->previousSibling;
1530 1
                $candidates[] = $node->nextSibling;
1531
            }
1532
1533
            /** @var mixed $candidate - false-positive error from phpstan */
1534 1
            foreach ($candidates as &$candidate) {
1535 1
                if ($candidate === null) {
1536
                    continue;
1537
                }
1538
1539 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1540 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1541 1
                    if ($nodeValueTmp !== null) {
1542 1
                        $candidate->nodeValue = $nodeValueTmp;
1543
                    }
1544
                }
1545
            }
1546
        }
1547 3
    }
1548
1549
    /**
1550
     * Callback function for preg_replace_callback use.
1551
     *
1552
     * @param array $matches PREG matches
1553
     *
1554
     * @return string
1555
     */
1556 9
    private function restoreProtectedHtml($matches): string
1557
    {
1558 9
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1559
1560 9
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1561
    }
1562
1563
    /**
1564
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1565
     *
1566
     * @return $this
1567
     */
1568 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1569
    {
1570 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1571
1572 2
        return $this;
1573
    }
1574
1575
    /**
1576
     * Sum-up extra whitespace from dom-nodes.
1577
     *
1578
     * @param HtmlDomParser $dom
1579
     *
1580
     * @return HtmlDomParser
1581
     */
1582 47
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1583
    {
1584 47
        $text_nodes = $dom->find('//text()');
1585 47
        foreach ($text_nodes as $text_node_wrapper) {
1586
            /* @var $text_node \DOMNode */
1587 43
            $text_node = $text_node_wrapper->getNode();
1588 43
            $xp = $text_node->getNodePath();
1589 43
            if ($xp === null) {
1590
                continue;
1591
            }
1592
1593 43
            $doSkip = false;
1594 43
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1595 43
                if (\strpos($xp, "/${pattern}") !== false) {
1596 8
                    $doSkip = true;
1597
1598 8
                    break;
1599
                }
1600
            }
1601 43
            if ($doSkip) {
1602 8
                continue;
1603
            }
1604
1605 40
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1606 40
            if ($nodeValueTmp !== null) {
1607 40
                $text_node->nodeValue = $nodeValueTmp;
1608
            }
1609
        }
1610
1611 47
        $dom->getDocument()->normalizeDocument();
1612
1613 47
        return $dom;
1614
    }
1615
1616
    /**
1617
     * WARNING: maybe bad for performance ...
1618
     *
1619
     * @param bool $keepBrokenHtml
1620
     *
1621
     * @return HtmlMin
1622
     */
1623 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1624
    {
1625 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1626
1627 2
        return $this;
1628
    }
1629
}
1630