Completed
Push — master ( 48460b...554df0 )
by Lars
01:59
created

HtmlMin::removeComments()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 15
ccs 8
cts 8
cp 1
rs 9.7666
c 0
b 0
f 0
cc 3
nc 3
nop 1
crap 3
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var array
30
     */
31
    private static $optional_end_tags = [
32
        'html',
33
        'head',
34
        'body',
35
    ];
36
37
    private static $selfClosingTags = [
38
        'area',
39
        'base',
40
        'basefont',
41
        'br',
42
        'col',
43
        'command',
44
        'embed',
45
        'frame',
46
        'hr',
47
        'img',
48
        'input',
49
        'isindex',
50
        'keygen',
51
        'link',
52
        'meta',
53
        'param',
54
        'source',
55
        'track',
56
        'wbr',
57
    ];
58
59
    private static $trimWhitespaceFromTags = [
60
        'article' => '',
61
        'br'      => '',
62
        'div'     => '',
63
        'footer'  => '',
64
        'hr'      => '',
65
        'nav'     => '',
66
        'p'       => '',
67
        'script'  => '',
68
    ];
69
70
    /**
71
     * @var array
72
     */
73
    private static $booleanAttributes = [
74
        'allowfullscreen' => '',
75
        'async'           => '',
76
        'autofocus'       => '',
77
        'autoplay'        => '',
78
        'checked'         => '',
79
        'compact'         => '',
80
        'controls'        => '',
81
        'declare'         => '',
82
        'default'         => '',
83
        'defaultchecked'  => '',
84
        'defaultmuted'    => '',
85
        'defaultselected' => '',
86
        'defer'           => '',
87
        'disabled'        => '',
88
        'enabled'         => '',
89
        'formnovalidate'  => '',
90
        'hidden'          => '',
91
        'indeterminate'   => '',
92
        'inert'           => '',
93
        'ismap'           => '',
94
        'itemscope'       => '',
95
        'loop'            => '',
96
        'multiple'        => '',
97
        'muted'           => '',
98
        'nohref'          => '',
99
        'noresize'        => '',
100
        'noshade'         => '',
101
        'novalidate'      => '',
102
        'nowrap'          => '',
103
        'open'            => '',
104
        'pauseonexit'     => '',
105
        'readonly'        => '',
106
        'required'        => '',
107
        'reversed'        => '',
108
        'scoped'          => '',
109
        'seamless'        => '',
110
        'selected'        => '',
111
        'sortable'        => '',
112
        'truespeed'       => '',
113
        'typemustmatch'   => '',
114
        'visible'         => '',
115
    ];
116
117
    /**
118
     * @var array
119
     */
120
    private static $skipTagsForRemoveWhitespace = [
121
        'code',
122
        'pre',
123
        'script',
124
        'style',
125
        'textarea',
126
    ];
127
128
    /**
129
     * @var array
130
     */
131
    private $protectedChildNodes = [];
132
133
    /**
134
     * @var string
135
     */
136
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
137
138
    /**
139
     * @var bool
140
     */
141
    private $doOptimizeViaHtmlDomParser = true;
142
143
    /**
144
     * @var bool
145
     */
146
    private $doOptimizeAttributes = true;
147
148
    /**
149
     * @var bool
150
     */
151
    private $doRemoveComments = true;
152
153
    /**
154
     * @var bool
155
     */
156
    private $doRemoveWhitespaceAroundTags = false;
157
158
    /**
159
     * @var bool
160
     */
161
    private $doRemoveOmittedQuotes = true;
162
163
    /**
164
     * @var bool
165
     */
166
    private $doRemoveOmittedHtmlTags = true;
167
168
    /**
169
     * @var bool
170
     */
171
    private $doRemoveHttpPrefixFromAttributes = false;
172
173
    /**
174
     * @var array
175
     */
176
    private $domainsToRemoveHttpPrefixFromAttributes = [
177
        'google.com',
178
        'google.de',
179
    ];
180
181
    /**
182
     * @var bool
183
     */
184
    private $doSortCssClassNames = true;
185
186
    /**
187
     * @var bool
188
     */
189
    private $doSortHtmlAttributes = true;
190
191
    /**
192
     * @var bool
193
     */
194
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
195
196
    /**
197
     * @var bool
198
     */
199
    private $doRemoveDefaultAttributes = false;
200
201
    /**
202
     * @var bool
203
     */
204
    private $doRemoveDeprecatedAnchorName = true;
205
206
    /**
207
     * @var bool
208
     */
209
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
210
211
    /**
212
     * @var bool
213
     */
214
    private $doRemoveDeprecatedTypeFromScriptTag = true;
215
216
    /**
217
     * @var bool
218
     */
219
    private $doRemoveValueFromEmptyInput = true;
220
221
    /**
222
     * @var bool
223
     */
224
    private $doRemoveEmptyAttributes = true;
225
226
    /**
227
     * @var bool
228
     */
229
    private $doSumUpWhitespace = true;
230
231
    /**
232
     * @var bool
233
     */
234
    private $doRemoveSpacesBetweenTags = false;
235
236
    /**
237
     * @var bool
238
     */
239
    private $keepBrokenHtml = false;
240
241
    /**
242
     * @var bool
243
     */
244
    private $withDocType = false;
245
246
    /**
247
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
248
     */
249
    private $domLoopObservers;
250
251
    /**
252
     * @var int
253
     */
254
    private $protected_tags_counter = 0;
255
256
    /**
257
     * HtmlMin constructor.
258
     */
259 50
    public function __construct()
260
    {
261 50
        $this->domLoopObservers = new \SplObjectStorage();
262
263 50
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
264 50
    }
265
266
    /**
267
     * @param HtmlMinDomObserverInterface $observer
268
     *
269
     * @return void
270
     */
271 50
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
272
    {
273 50
        $this->domLoopObservers->attach($observer);
274 50
    }
275
276
    /**
277
     * @param bool $doOptimizeAttributes
278
     *
279
     * @return $this
280
     */
281 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
282
    {
283 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
284
285 2
        return $this;
286
    }
287
288
    /**
289
     * @param bool $doOptimizeViaHtmlDomParser
290
     *
291
     * @return $this
292
     */
293 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
294
    {
295 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
296
297 1
        return $this;
298
    }
299
300
    /**
301
     * @param bool $doRemoveComments
302
     *
303
     * @return $this
304
     */
305 3
    public function doRemoveComments(bool $doRemoveComments = true): self
306
    {
307 3
        $this->doRemoveComments = $doRemoveComments;
308
309 3
        return $this;
310
    }
311
312
    /**
313
     * @param bool $doRemoveDefaultAttributes
314
     *
315
     * @return $this
316
     */
317 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
318
    {
319 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
320
321 2
        return $this;
322
    }
323
324
    /**
325
     * @param bool $doRemoveDeprecatedAnchorName
326
     *
327
     * @return $this
328
     */
329 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
330
    {
331 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
332
333 2
        return $this;
334
    }
335
336
    /**
337
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
338
     *
339
     * @return $this
340
     */
341 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
342
    {
343 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
344
345 2
        return $this;
346
    }
347
348
    /**
349
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
350
     *
351
     * @return $this
352
     */
353 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
354
    {
355 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
356
357 2
        return $this;
358
    }
359
360
    /**
361
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
362
     *
363
     * @return $this
364
     */
365 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
366
    {
367 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
368
369 2
        return $this;
370
    }
371
372
    /**
373
     * @param bool $doRemoveEmptyAttributes
374
     *
375
     * @return $this
376
     */
377 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
378
    {
379 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
380
381 2
        return $this;
382
    }
383
384
    /**
385
     * @param bool $doRemoveHttpPrefixFromAttributes
386
     *
387
     * @return $this
388
     */
389 4
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
390
    {
391 4
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
392
393 4
        return $this;
394
    }
395
396
    /**
397
     * @param bool $doRemoveOmittedHtmlTags
398
     *
399
     * @return $this
400
     */
401 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
402
    {
403 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
404
405 1
        return $this;
406
    }
407
408
    /**
409
     * @param bool $doRemoveOmittedQuotes
410
     *
411
     * @return $this
412
     */
413 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
414
    {
415 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
416
417 1
        return $this;
418
    }
419
420
    /**
421
     * @param bool $doRemoveSpacesBetweenTags
422
     *
423
     * @return $this
424
     */
425 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
426
    {
427 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
428
429 1
        return $this;
430
    }
431
432
    /**
433
     * @param bool $doRemoveValueFromEmptyInput
434
     *
435
     * @return $this
436
     */
437 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
438
    {
439 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
440
441 2
        return $this;
442
    }
443
444
    /**
445
     * @param bool $doRemoveWhitespaceAroundTags
446
     *
447
     * @return $this
448
     */
449 4
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
450
    {
451 4
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
452
453 4
        return $this;
454
    }
455
456
    /**
457
     * @param bool $doSortCssClassNames
458
     *
459
     * @return $this
460
     */
461 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
462
    {
463 2
        $this->doSortCssClassNames = $doSortCssClassNames;
464
465 2
        return $this;
466
    }
467
468
    /**
469
     * @param bool $doSortHtmlAttributes
470
     *
471
     * @return $this
472
     */
473 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
474
    {
475 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
476
477 2
        return $this;
478
    }
479
480
    /**
481
     * @param bool $doSumUpWhitespace
482
     *
483
     * @return $this
484
     */
485 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
486
    {
487 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
488
489 2
        return $this;
490
    }
491
492 46
    private function domNodeAttributesToString(\DOMNode $node): string
493
    {
494
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
495 46
        $attr_str = '';
496 46
        if ($node->attributes !== null) {
497 46
            foreach ($node->attributes as $attribute) {
498 29
                $attr_str .= $attribute->name;
499
500
                if (
501 29
                    $this->doOptimizeAttributes
502
                    &&
503 29
                    isset(self::$booleanAttributes[$attribute->name])
504
                ) {
505 8
                    $attr_str .= ' ';
506
507 8
                    continue;
508
                }
509
510 29
                $attr_str .= '=';
511
512
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
513 29
                $omit_quotes = $this->doRemoveOmittedQuotes
514
                               &&
515 29
                               $attribute->value !== ''
516
                               &&
517 29
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
518
                               &&
519 29
                               \strpos($attribute->name, ' ') === false
520
                               &&
521 29
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
522
523 29
                $quoteTmp = '"';
524
                if (
525 29
                    !$omit_quotes
526
                    &&
527 29
                    \strpos($attribute->value, '"') !== false
528
                ) {
529 1
                    $quoteTmp = "'";
530
                }
531
532
                if (
533 29
                    $this->doOptimizeAttributes
534
                    &&
535
                    (
536 28
                        $attribute->name === 'srcset'
537
                        ||
538 29
                        $attribute->name === 'sizes'
539
                    )
540
                ) {
541 1
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
542
                } else {
543 29
                    $attr_val = $attribute->value;
544
                }
545
546 29
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
547 29
                $attr_str .= ' ';
548
            }
549
        }
550
551 46
        return \trim($attr_str);
552
    }
553
554
    /**
555
     * @param \DOMNode $node
556
     *
557
     * @return bool
558
     */
559 45
    private function domNodeClosingTagOptional(\DOMNode $node): bool
560
    {
561 45
        $tag_name = $node->nodeName;
562
563
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
564 45
        $parent_node = $node->parentNode;
565
566 45
        if ($parent_node) {
567 45
            $parent_tag_name = $parent_node->nodeName;
568
        } else {
569
            $parent_tag_name = null;
570
        }
571
572 45
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
573
574
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
575
576
        // Implemented:
577
        //
578
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
579
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
580
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
581
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
582
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
583
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
584
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
585
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
586
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
587
588
        /**
589
         * @noinspection TodoComment
590
         *
591
         * TODO: Not Implemented
592
         */
593
        //
594
        // <html> may be omitted if first thing inside is not comment
595
        // <head> may be omitted if first thing inside is an element
596
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
597
        // <colgroup> may be omitted if first thing inside is <col>
598
        // <tbody> may be omitted if first thing inside is <tr>
599
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
600
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
601
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
602
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
603
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
604
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
605
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
606
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
607
        //
608
        // <-- However, a start tag must never be omitted if it has any attributes.
609
610 45
        return \in_array($tag_name, self::$optional_end_tags, true)
611
               ||
612
               (
613 42
                   $tag_name === 'li'
614
                   &&
615
                   (
616 5
                       $nextSibling === null
617
                       ||
618
                       (
619 3
                           $nextSibling instanceof \DOMElement
620
                           &&
621 42
                           $nextSibling->tagName === 'li'
622
                       )
623
                   )
624
               )
625
               ||
626
               (
627 42
                   $tag_name === 'rp'
628
                   &&
629
                   (
630
                       $nextSibling === null
631
                       ||
632
                       (
633
                           $nextSibling instanceof \DOMElement
634
                           &&
635
                           (
636
                               $nextSibling->tagName === 'rp'
637
                               ||
638 42
                               $nextSibling->tagName === 'rt'
639
                           )
640
                       )
641
                   )
642
               )
643
               ||
644
               (
645 42
                   $tag_name === 'tr'
646
                   &&
647
                   (
648 1
                       $nextSibling === null
649
                       ||
650
                       (
651 1
                           $nextSibling instanceof \DOMElement
652
                           &&
653 42
                           $nextSibling->tagName === 'tr'
654
                       )
655
                   )
656
               )
657
               ||
658
               (
659 42
                   $tag_name === 'source'
660
                   &&
661
                   (
662 1
                       $parent_tag_name === 'audio'
663
                       ||
664 1
                       $parent_tag_name === 'video'
665
                       ||
666 1
                       $parent_tag_name === 'picture'
667
                       ||
668 42
                       $parent_tag_name === 'source'
669
                   )
670
                   &&
671
                   (
672 1
                       $nextSibling === null
673
                       ||
674
                       (
675
                           $nextSibling instanceof \DOMElement
676
                           &&
677 42
                           $nextSibling->tagName === 'source'
678
                       )
679
                   )
680
               )
681
               ||
682
               (
683
                   (
684 42
                       $tag_name === 'td'
685
                       ||
686 42
                       $tag_name === 'th'
687
                   )
688
                   &&
689
                   (
690 1
                       $nextSibling === null
691
                       ||
692
                       (
693 1
                           $nextSibling instanceof \DOMElement
694
                           &&
695
                           (
696 1
                               $nextSibling->tagName === 'td'
697
                               ||
698 42
                               $nextSibling->tagName === 'th'
699
                           )
700
                       )
701
                   )
702
               )
703
               ||
704
               (
705
                   (
706 42
                       $tag_name === 'dd'
707
                       ||
708 42
                       $tag_name === 'dt'
709
                   )
710
                   &&
711
                   (
712
                       (
713 3
                           $nextSibling === null
714
                           &&
715 3
                           $tag_name === 'dd'
716
                       )
717
                       ||
718
                       (
719 3
                           $nextSibling instanceof \DOMElement
720
                           &&
721
                           (
722 3
                               $nextSibling->tagName === 'dd'
723
                               ||
724 42
                               $nextSibling->tagName === 'dt'
725
                           )
726
                       )
727
                   )
728
               )
729
               ||
730
               (
731 42
                   $tag_name === 'option'
732
                   &&
733
                   (
734
                       $nextSibling === null
735
                       ||
736
                       (
737
                           $nextSibling instanceof \DOMElement
738
                           &&
739
                           (
740
                               $nextSibling->tagName === 'option'
741
                               ||
742 42
                               $nextSibling->tagName === 'optgroup'
743
                           )
744
                       )
745
                   )
746
               )
747
               ||
748
               (
749 42
                   $tag_name === 'p'
750
                   &&
751
                   (
752
                       (
753 13
                           $nextSibling === null
754
                           &&
755
                           (
756 11
                               $node->parentNode !== null
757
                               &&
758 11
                               !\in_array(
759 11
                                   $node->parentNode->nodeName,
760
                                   [
761 11
                                       'a',
762
                                       'audio',
763
                                       'del',
764
                                       'ins',
765
                                       'map',
766
                                       'noscript',
767
                                       'video',
768
                                   ],
769 11
                                   true
770
                               )
771
                           )
772
                       )
773
                       ||
774
                       (
775 9
                           $nextSibling instanceof \DOMElement
776
                           &&
777 9
                           \in_array(
778 9
                               $nextSibling->tagName,
779
                               [
780 9
                                   'address',
781
                                   'article',
782
                                   'aside',
783
                                   'blockquote',
784
                                   'dir',
785
                                   'div',
786
                                   'dl',
787
                                   'fieldset',
788
                                   'footer',
789
                                   'form',
790
                                   'h1',
791
                                   'h2',
792
                                   'h3',
793
                                   'h4',
794
                                   'h5',
795
                                   'h6',
796
                                   'header',
797
                                   'hgroup',
798
                                   'hr',
799
                                   'menu',
800
                                   'nav',
801
                                   'ol',
802
                                   'p',
803
                                   'pre',
804
                                   'section',
805
                                   'table',
806
                                   'ul',
807
                               ],
808 45
                               true
809
                           )
810
                       )
811
                   )
812
               );
813
    }
814
815 46
    protected function domNodeToString(\DOMNode $node): string
816
    {
817
        // init
818 46
        $html = '';
819 46
        $emptyStringTmp = '';
820
821 46
        foreach ($node->childNodes as $child) {
822 46
            if ($emptyStringTmp === 'is_empty') {
823 26
                $emptyStringTmp = 'last_was_empty';
824
            } else {
825 46
                $emptyStringTmp = '';
826
            }
827
828 46
            if ($child instanceof \DOMDocumentType) {
829
                // add the doc-type only if it wasn't generated by DomDocument
830 12
                if (!$this->withDocType) {
831
                    continue;
832
                }
833
834 12
                if ($child->name) {
835 12
                    if (!$child->publicId && $child->systemId) {
836
                        $tmpTypeSystem = 'SYSTEM';
837
                        $tmpTypePublic = '';
838
                    } else {
839 12
                        $tmpTypeSystem = '';
840 12
                        $tmpTypePublic = 'PUBLIC';
841
                    }
842
843 12
                    $html .= '<!DOCTYPE ' . $child->name . ''
844 12
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
845 12
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
846 12
                             . '>';
847
                }
848 46
            } elseif ($child instanceof \DOMElement) {
849 46
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
850 46
                $html .= '>' . $this->domNodeToString($child);
851
852
                if (
853 46
                    !$this->doRemoveOmittedHtmlTags
854
                    ||
855 46
                    !$this->domNodeClosingTagOptional($child)
856
                ) {
857 40
                    $html .= '</' . $child->tagName . '>';
858
                }
859
860 46
                if (!$this->doRemoveWhitespaceAroundTags) {
861
                    /** @noinspection NestedPositiveIfStatementsInspection */
862
                    if (
863 45
                        $child->nextSibling instanceof \DOMText
864
                        &&
865 45
                        $child->nextSibling->wholeText === ' '
866
                    ) {
867
                        if (
868 25
                            $emptyStringTmp !== 'last_was_empty'
869
                            &&
870 25
                            \substr($html, -1) !== ' '
871
                        ) {
872 25
                            $html .= ' ';
873
                        }
874 46
                        $emptyStringTmp = 'is_empty';
875
                    }
876
                }
877 42
            } elseif ($child instanceof \DOMText) {
878 42
                if ($child->isElementContentWhitespace()) {
879
                    if (
880 29
                        $child->previousSibling !== null
881
                        &&
882 29
                        $child->nextSibling !== null
883
                    ) {
884
                        if (
885 20
                            $emptyStringTmp !== 'last_was_empty'
886
                            &&
887 20
                            \substr($html, -1) !== ' '
888
                        ) {
889 5
                            $html .= ' ';
890
                        }
891 29
                        $emptyStringTmp = 'is_empty';
892
                    }
893
                } else {
894 42
                    $html .= $child->wholeText;
895
                }
896 1
            } elseif ($child instanceof \DOMComment) {
897 46
                $html .= '<!--' . $child->textContent . '-->';
898
            }
899
        }
900
901 46
        return $html;
902
    }
903
904
    /**
905
     * @return array
906
     */
907
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
908
    {
909
        return $this->domainsToRemoveHttpPrefixFromAttributes;
910
    }
911
912
    /**
913
     * @return bool
914
     */
915
    public function isDoOptimizeAttributes(): bool
916
    {
917
        return $this->doOptimizeAttributes;
918
    }
919
920
    /**
921
     * @return bool
922
     */
923
    public function isDoOptimizeViaHtmlDomParser(): bool
924
    {
925
        return $this->doOptimizeViaHtmlDomParser;
926
    }
927
928
    /**
929
     * @return bool
930
     */
931
    public function isDoRemoveComments(): bool
932
    {
933
        return $this->doRemoveComments;
934
    }
935
936
    /**
937
     * @return bool
938
     */
939 29
    public function isDoRemoveDefaultAttributes(): bool
940
    {
941 29
        return $this->doRemoveDefaultAttributes;
942
    }
943
944
    /**
945
     * @return bool
946
     */
947 29
    public function isDoRemoveDeprecatedAnchorName(): bool
948
    {
949 29
        return $this->doRemoveDeprecatedAnchorName;
950
    }
951
952
    /**
953
     * @return bool
954
     */
955 29
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
956
    {
957 29
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
958
    }
959
960
    /**
961
     * @return bool
962
     */
963 29
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
964
    {
965 29
        return $this->doRemoveDeprecatedTypeFromScriptTag;
966
    }
967
968
    /**
969
     * @return bool
970
     */
971 29
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
972
    {
973 29
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
974
    }
975
976
    /**
977
     * @return bool
978
     */
979 29
    public function isDoRemoveEmptyAttributes(): bool
980
    {
981 29
        return $this->doRemoveEmptyAttributes;
982
    }
983
984
    /**
985
     * @return bool
986
     */
987 29
    public function isDoRemoveHttpPrefixFromAttributes(): bool
988
    {
989 29
        return $this->doRemoveHttpPrefixFromAttributes;
990
    }
991
992
    /**
993
     * @return bool
994
     */
995
    public function isDoRemoveOmittedHtmlTags(): bool
996
    {
997
        return $this->doRemoveOmittedHtmlTags;
998
    }
999
1000
    /**
1001
     * @return bool
1002
     */
1003
    public function isDoRemoveOmittedQuotes(): bool
1004
    {
1005
        return $this->doRemoveOmittedQuotes;
1006
    }
1007
1008
    /**
1009
     * @return bool
1010
     */
1011
    public function isDoRemoveSpacesBetweenTags(): bool
1012
    {
1013
        return $this->doRemoveSpacesBetweenTags;
1014
    }
1015
1016
    /**
1017
     * @return bool
1018
     */
1019 29
    public function isDoRemoveValueFromEmptyInput(): bool
1020
    {
1021 29
        return $this->doRemoveValueFromEmptyInput;
1022
    }
1023
1024
    /**
1025
     * @return bool
1026
     */
1027
    public function isDoRemoveWhitespaceAroundTags(): bool
1028
    {
1029
        return $this->doRemoveWhitespaceAroundTags;
1030
    }
1031
1032
    /**
1033
     * @return bool
1034
     */
1035 29
    public function isDoSortCssClassNames(): bool
1036
    {
1037 29
        return $this->doSortCssClassNames;
1038
    }
1039
1040
    /**
1041
     * @return bool
1042
     */
1043 29
    public function isDoSortHtmlAttributes(): bool
1044
    {
1045 29
        return $this->doSortHtmlAttributes;
1046
    }
1047
1048
    /**
1049
     * @return bool
1050
     */
1051
    public function isDoSumUpWhitespace(): bool
1052
    {
1053
        return $this->doSumUpWhitespace;
1054
    }
1055
1056
    /**
1057
     * @param string $html
1058
     * @param bool   $decodeUtf8Specials <p>Use this only in special cases, e.g. for PHP 5.3</p>
1059
     *
1060
     * @return string
1061
     */
1062 50
    public function minify($html, $decodeUtf8Specials = false): string
1063
    {
1064 50
        $html = (string) $html;
1065 50
        if (!isset($html[0])) {
1066 1
            return '';
1067
        }
1068
1069 50
        $html = \trim($html);
1070 50
        if (!$html) {
1071 3
            return '';
1072
        }
1073
1074
        // reset
1075 47
        $this->protectedChildNodes = [];
1076
1077
        // save old content
1078 47
        $origHtml = $html;
1079 47
        $origHtmlLength = \strlen($html);
1080
1081
        // -------------------------------------------------------------------------
1082
        // Minify the HTML via "HtmlDomParser"
1083
        // -------------------------------------------------------------------------
1084
1085 47
        if ($this->doOptimizeViaHtmlDomParser) {
1086 46
            $html = $this->minifyHtmlDom($html, $decodeUtf8Specials);
1087
        }
1088
1089
        // -------------------------------------------------------------------------
1090
        // Trim whitespace from html-string. [protected html is still protected]
1091
        // -------------------------------------------------------------------------
1092
1093
        // Remove extra white-space(s) between HTML attribute(s)
1094 47
        if (strpos($html, ' ') !== false) {
1095 41
            $html = (string) \preg_replace_callback(
1096 41
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1097
                static function ($matches) {
1098 41
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1099 41
                },
1100 41
                $html
1101
            );
1102
        }
1103
1104 47
        if ($this->doRemoveSpacesBetweenTags) {
1105
            /** @noinspection NestedPositiveIfStatementsInspection */
1106 1
            if (strpos($html, ' ') !== false) {
1107
                // Remove spaces that are between > and <
1108 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1109
            }
1110
        }
1111
1112
        // -------------------------------------------------------------------------
1113
        // Restore protected HTML-code.
1114
        // -------------------------------------------------------------------------
1115
1116 47
        if (strpos($html, $this->protectedChildNodesHelper) !== false) {
1117 8
            $html = (string) \preg_replace_callback(
1118 8
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1119 8
                [$this, 'restoreProtectedHtml'],
1120 8
                $html
1121
            );
1122
        }
1123
1124
        // -------------------------------------------------------------------------
1125
        // Restore protected HTML-entities.
1126
        // -------------------------------------------------------------------------
1127
1128 47
        if ($this->doOptimizeViaHtmlDomParser) {
1129 46
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1130
        }
1131
1132
        // ------------------------------------
1133
        // Final clean-up
1134
        // ------------------------------------
1135
1136 47
        $html = \str_replace(
1137
            [
1138 47
                'html>' . "\n",
1139
                "\n" . '<html',
1140
                'html/>' . "\n",
1141
                "\n" . '</html',
1142
                'head>' . "\n",
1143
                "\n" . '<head',
1144
                'head/>' . "\n",
1145
                "\n" . '</head',
1146
            ],
1147
            [
1148 47
                'html>',
1149
                '<html',
1150
                'html/>',
1151
                '</html',
1152
                'head>',
1153
                '<head',
1154
                'head/>',
1155
                '</head',
1156
            ],
1157 47
            $html
1158
        );
1159
1160
        // self closing tags, don't need a trailing slash ...
1161 47
        $replace = [];
1162 47
        $replacement = [];
1163 47
        foreach (self::$selfClosingTags as $selfClosingTag) {
1164 47
            $replace[] = '<' . $selfClosingTag . '/>';
1165 47
            $replacement[] = '<' . $selfClosingTag . '>';
1166 47
            $replace[] = '<' . $selfClosingTag . ' />';
1167 47
            $replacement[] = '<' . $selfClosingTag . '>';
1168 47
            $replace[] = '></' . $selfClosingTag . '>';
1169 47
            $replacement[] = '>';
1170
        }
1171 47
        $html = \str_replace(
1172 47
            $replace,
1173 47
            $replacement,
1174 47
            $html
1175
        );
1176
1177
        // ------------------------------------
1178
        // check if compression worked
1179
        // ------------------------------------
1180
1181 47
        if ($origHtmlLength < \strlen($html)) {
1182 1
            $html = $origHtml;
1183
        }
1184
1185 47
        return $html;
1186
    }
1187
1188
    /**
1189
     * @param \DOMNode $node
1190
     *
1191
     * @return \DOMNode|null
1192
     */
1193 45
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1194
    {
1195
        do {
1196
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1197 45
            $node = $node->nextSibling;
1198 45
        } while (!($node === null || $node instanceof \DOMElement));
1199
1200 45
        return $node;
1201
    }
1202
1203
    /**
1204
     * Check if the current string is an conditional comment.
1205
     *
1206
     * INFO: since IE >= 10 conditional comment are not working anymore
1207
     *
1208
     * <!--[if expression]> HTML <![endif]-->
1209
     * <![if expression]> HTML <![endif]>
1210
     *
1211
     * @param string $comment
1212
     *
1213
     * @return bool
1214
     */
1215 4
    private function isConditionalComment($comment): bool
1216
    {
1217 4 View Code Duplication
        if (strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1218
            /** @noinspection RegExpRedundantEscape */
1219 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1220 2
                return true;
1221
            }
1222
        }
1223
1224 4 View Code Duplication
        if (strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1225
            /** @noinspection RegExpRedundantEscape */
1226 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1227 1
                return true;
1228
            }
1229
        }
1230
1231 4
        return false;
1232
    }
1233
1234
    /**
1235
     * @param string $html
1236
     * @param bool $decodeUtf8Specials
1237
     *
1238
     * @return string
1239
     */
1240 46
    private function minifyHtmlDom($html, $decodeUtf8Specials): string
1241
    {
1242
        // init dom
1243 46
        $dom = new HtmlDomParser();
1244
        /** @noinspection UnusedFunctionResultInspection */
1245 46
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1246
1247 46
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1248 46
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1249
1250
        // load dom
1251
        /** @noinspection UnusedFunctionResultInspection */
1252 46
        $dom->loadHtml($html);
1253
1254 46
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1255
1256
        // -------------------------------------------------------------------------
1257
        // Protect <nocompress> HTML tags first.
1258
        // -------------------------------------------------------------------------
1259
1260 46
        $dom = $this->protectTagHelper($dom, 'nocompress');
1261
1262
        // -------------------------------------------------------------------------
1263
        // Notify the Observer before the minification.
1264
        // -------------------------------------------------------------------------
1265
1266 46
        foreach ($dom->find('*') as $element) {
1267 46
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1268
        }
1269
1270
        // -------------------------------------------------------------------------
1271
        // Protect HTML tags and conditional comments.
1272
        // -------------------------------------------------------------------------
1273
1274 46
        $dom = $this->protectTags($dom);
1275
1276
        // -------------------------------------------------------------------------
1277
        // Remove default HTML comments. [protected html is still protected]
1278
        // -------------------------------------------------------------------------
1279
1280 46
        if ($this->doRemoveComments) {
1281 44
            $dom = $this->removeComments($dom);
1282
        }
1283
1284
        // -------------------------------------------------------------------------
1285
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1286
        // -------------------------------------------------------------------------
1287
1288 46
        if ($this->doSumUpWhitespace) {
1289 45
            $dom = $this->sumUpWhitespace($dom);
1290
        }
1291
1292 46
        foreach ($dom->find('*') as $element) {
1293
1294
            // -------------------------------------------------------------------------
1295
            // Remove whitespace around tags. [protected html is still protected]
1296
            // -------------------------------------------------------------------------
1297
1298 46
            if ($this->doRemoveWhitespaceAroundTags) {
1299 3
                $this->removeWhitespaceAroundTags($element);
1300
            }
1301
1302
            // -------------------------------------------------------------------------
1303
            // Notify the Observer after the minification.
1304
            // -------------------------------------------------------------------------
1305
1306 46
            $this->notifyObserversAboutDomElementAfterMinification($element);
1307
        }
1308
1309
        // -------------------------------------------------------------------------
1310
        // Convert the Dom into a string.
1311
        // -------------------------------------------------------------------------
1312
1313 46
        return $dom->fixHtmlOutput(
1314 46
            $this->domNodeToString($dom->getDocument()),
1315 46
            $decodeUtf8Specials
1316
        );
1317
    }
1318
1319
    /**
1320
     * @param SimpleHtmlDomInterface $domElement
1321
     *
1322
     * @return void
1323
     */
1324 46
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1325
    {
1326 46
        foreach ($this->domLoopObservers as $observer) {
1327 46
            $observer->domElementAfterMinification($domElement, $this);
1328
        }
1329 46
    }
1330
1331
    /**
1332
     * @param SimpleHtmlDomInterface $domElement
1333
     *
1334
     * @return void
1335
     */
1336 46
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1337
    {
1338 46
        foreach ($this->domLoopObservers as $observer) {
1339 46
            $observer->domElementBeforeMinification($domElement, $this);
1340
        }
1341 46
    }
1342
1343
    /**
1344
     * @param HtmlDomParser $dom
1345
     * @param string        $selector
1346
     *
1347
     * @return HtmlDomParser
1348
     */
1349 46
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1350
    {
1351 46
        foreach ($dom->find($selector) as $element) {
1352 3
            if ($element->isRemoved()) {
1353 1
                continue;
1354
            }
1355
1356 3
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1357 3
            $element->getNode()->parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1358
1359 3
            ++$this->protected_tags_counter;
1360
        }
1361
1362 46
        return $dom;
1363
    }
1364
1365
    /**
1366
     * Prevent changes of inline "styles" and "scripts".
1367
     *
1368
     * @param HtmlDomParser $dom
1369
     *
1370
     * @return HtmlDomParser
1371
     */
1372 46
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1373
    {
1374 46
        $this->protectTagHelper($dom, 'code');
1375
1376 46
        foreach ($dom->find('script, style') as $element) {
1377 7
            if ($element->isRemoved()) {
1378
                continue;
1379
            }
1380
1381 7
            if ($element->tag === 'script' || $element->tag === 'style') {
1382 7
                $attributes = $element->getAllAttributes();
1383
                // skip external links
1384 7
                if (isset($attributes['src'])) {
1385 3
                    continue;
1386
                }
1387
            }
1388
1389 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1390 5
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1391
1392 5
            ++$this->protected_tags_counter;
1393
        }
1394
1395 46
        foreach ($dom->find('//comment()') as $element) {
1396 4
            if ($element->isRemoved()) {
1397
                continue;
1398
            }
1399
1400 4
            $text = $element->text();
1401
1402
            // skip normal comments
1403 4
            if (!$this->isConditionalComment($text)) {
1404 4
                continue;
1405
            }
1406
1407 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1408
1409
            /* @var $node \DOMComment */
1410 2
            $node = $element->getNode();
1411 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1412
            /** @noinspection UnusedFunctionResultInspection */
1413 2
            $element->getNode()->parentNode->replaceChild($child, $node);
1414
1415 2
            ++$this->protected_tags_counter;
1416
        }
1417
1418 46
        return $dom;
1419
    }
1420
1421
    /**
1422
     * Remove comments in the dom.
1423
     *
1424
     * @param HtmlDomParser $dom
1425
     *
1426
     * @return HtmlDomParser
1427
     */
1428 44
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1429
    {
1430 44
        foreach ($dom->find('//comment()') as $commentWrapper) {
1431 3
            $comment = $commentWrapper->getNode();
1432 3
            $val = $comment->nodeValue;
1433 3
            if (\strpos($val, '[') === false) {
1434
                /** @noinspection UnusedFunctionResultInspection */
1435 3
                $comment->parentNode->removeChild($comment);
1436
            }
1437
        }
1438
1439 44
        $dom->getDocument()->normalizeDocument();
1440
1441 44
        return $dom;
1442
    }
1443
1444
    /**
1445
     * Trim tags in the dom.
1446
     *
1447
     * @param SimpleHtmlDomInterface $element
1448
     *
1449
     * @return void
1450
     */
1451 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1452
    {
1453 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1454 1
            $node = $element->getNode();
1455
1456
            /** @var \DOMNode[] $candidates */
1457 1
            $candidates = [];
1458 1
            if ($node->childNodes->length > 0) {
1459 1
                $candidates[] = $node->firstChild;
1460 1
                $candidates[] = $node->lastChild;
1461 1
                $candidates[] = $node->previousSibling;
1462 1
                $candidates[] = $node->nextSibling;
1463
            }
1464
1465
            /** @var mixed $candidate - false-positive error from phpstan */
1466 1
            foreach ($candidates as &$candidate) {
1467 1
                if ($candidate === null) {
1468
                    continue;
1469
                }
1470
1471 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1472 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1473 1
                    if ($nodeValueTmp !== null) {
1474 1
                        $candidate->nodeValue = $nodeValueTmp;
1475
                    }
1476
                }
1477
            }
1478
        }
1479 3
    }
1480
1481
    /**
1482
     * Callback function for preg_replace_callback use.
1483
     *
1484
     * @param array $matches PREG matches
1485
     *
1486
     * @return string
1487
     */
1488 8
    private function restoreProtectedHtml($matches): string
1489
    {
1490 8
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1491
1492 8
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1493
    }
1494
1495
    /**
1496
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1497
     *
1498
     * @return $this
1499
     */
1500 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1501
    {
1502 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1503
1504 2
        return $this;
1505
    }
1506
1507
    /**
1508
     * Sum-up extra whitespace from dom-nodes.
1509
     *
1510
     * @param HtmlDomParser $dom
1511
     *
1512
     * @return HtmlDomParser
1513
     */
1514 45
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1515
    {
1516 45
        $text_nodes = $dom->find('//text()');
1517 45
        foreach ($text_nodes as $text_node_wrapper) {
1518
            /* @var $text_node \DOMNode */
1519 41
            $text_node = $text_node_wrapper->getNode();
1520 41
            $xp = $text_node->getNodePath();
1521 41
            if ($xp === null) {
1522
                continue;
1523
            }
1524
1525 41
            $doSkip = false;
1526 41
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1527 41
                if (\strpos($xp, "/${pattern}") !== false) {
1528 7
                    $doSkip = true;
1529
1530 41
                    break;
1531
                }
1532
            }
1533 41
            if ($doSkip) {
1534 7
                continue;
1535
            }
1536
1537 39
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1538 39
            if ($nodeValueTmp !== null) {
1539 39
                $text_node->nodeValue = $nodeValueTmp;
1540
            }
1541
        }
1542
1543 45
        $dom->getDocument()->normalizeDocument();
1544
1545 45
        return $dom;
1546
    }
1547
1548
    /**
1549
     * WARNING: maybe bad for performance ...
1550
     *
1551
     * @param bool $keepBrokenHtml
1552
     *
1553
     * @return HtmlMin
1554
     */
1555 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1556
    {
1557 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1558
1559 2
        return $this;
1560
    }
1561
}
1562