Completed
Push — master ( d92977...48460b )
by Lars
01:39
created

HtmlMin::sumUpWhitespace()   B

Complexity

Conditions 7
Paths 11

Size

Total Lines 33

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 16
CRAP Score 7.0099

Importance

Changes 0
Metric Value
dl 0
loc 33
ccs 16
cts 17
cp 0.9412
rs 8.4586
c 0
b 0
f 0
cc 7
nc 11
nop 1
crap 7.0099
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]+/u";
27
28
    /**
29
     * @var array
30
     */
31
    private static $optional_end_tags = [
32
        'html',
33
        'head',
34
        'body',
35
    ];
36
37
    private static $selfClosingTags = [
38
        'area',
39
        'base',
40
        'basefont',
41
        'br',
42
        'col',
43
        'command',
44
        'embed',
45
        'frame',
46
        'hr',
47
        'img',
48
        'input',
49
        'isindex',
50
        'keygen',
51
        'link',
52
        'meta',
53
        'param',
54
        'source',
55
        'track',
56
        'wbr',
57
    ];
58
59
    private static $trimWhitespaceFromTags = [
60
        'article' => '',
61
        'br'      => '',
62
        'div'     => '',
63
        'footer'  => '',
64
        'hr'      => '',
65
        'nav'     => '',
66
        'p'       => '',
67
        'script'  => '',
68
    ];
69
70
    /**
71
     * @var array
72
     */
73
    private static $booleanAttributes = [
74
        'allowfullscreen' => '',
75
        'async'           => '',
76
        'autofocus'       => '',
77
        'autoplay'        => '',
78
        'checked'         => '',
79
        'compact'         => '',
80
        'controls'        => '',
81
        'declare'         => '',
82
        'default'         => '',
83
        'defaultchecked'  => '',
84
        'defaultmuted'    => '',
85
        'defaultselected' => '',
86
        'defer'           => '',
87
        'disabled'        => '',
88
        'enabled'         => '',
89
        'formnovalidate'  => '',
90
        'hidden'          => '',
91
        'indeterminate'   => '',
92
        'inert'           => '',
93
        'ismap'           => '',
94
        'itemscope'       => '',
95
        'loop'            => '',
96
        'multiple'        => '',
97
        'muted'           => '',
98
        'nohref'          => '',
99
        'noresize'        => '',
100
        'noshade'         => '',
101
        'novalidate'      => '',
102
        'nowrap'          => '',
103
        'open'            => '',
104
        'pauseonexit'     => '',
105
        'readonly'        => '',
106
        'required'        => '',
107
        'reversed'        => '',
108
        'scoped'          => '',
109
        'seamless'        => '',
110
        'selected'        => '',
111
        'sortable'        => '',
112
        'truespeed'       => '',
113
        'typemustmatch'   => '',
114
        'visible'         => '',
115
    ];
116
117
    /**
118
     * @var array
119
     */
120
    private static $skipTagsForRemoveWhitespace = [
121
        'code',
122
        'pre',
123
        'script',
124
        'style',
125
        'textarea',
126
    ];
127
128
    /**
129
     * @var array
130
     */
131
    private $protectedChildNodes = [];
132
133
    /**
134
     * @var string
135
     */
136
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
137
138
    /**
139
     * @var bool
140
     */
141
    private $doOptimizeViaHtmlDomParser = true;
142
143
    /**
144
     * @var bool
145
     */
146
    private $doOptimizeAttributes = true;
147
148
    /**
149
     * @var bool
150
     */
151
    private $doRemoveComments = true;
152
153
    /**
154
     * @var bool
155
     */
156
    private $doRemoveWhitespaceAroundTags = false;
157
158
    /**
159
     * @var bool
160
     */
161
    private $doRemoveOmittedQuotes = true;
162
163
    /**
164
     * @var bool
165
     */
166
    private $doRemoveOmittedHtmlTags = true;
167
168
    /**
169
     * @var bool
170
     */
171
    private $doRemoveHttpPrefixFromAttributes = false;
172
173
    /**
174
     * @var array
175
     */
176
    private $domainsToRemoveHttpPrefixFromAttributes = [
177
        'google.com',
178
        'google.de',
179
    ];
180
181
    /**
182
     * @var bool
183
     */
184
    private $doSortCssClassNames = true;
185
186
    /**
187
     * @var bool
188
     */
189
    private $doSortHtmlAttributes = true;
190
191
    /**
192
     * @var bool
193
     */
194
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
195
196
    /**
197
     * @var bool
198
     */
199
    private $doRemoveDefaultAttributes = false;
200
201
    /**
202
     * @var bool
203
     */
204
    private $doRemoveDeprecatedAnchorName = true;
205
206
    /**
207
     * @var bool
208
     */
209
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
210
211
    /**
212
     * @var bool
213
     */
214
    private $doRemoveDeprecatedTypeFromScriptTag = true;
215
216
    /**
217
     * @var bool
218
     */
219
    private $doRemoveValueFromEmptyInput = true;
220
221
    /**
222
     * @var bool
223
     */
224
    private $doRemoveEmptyAttributes = true;
225
226
    /**
227
     * @var bool
228
     */
229
    private $doSumUpWhitespace = true;
230
231
    /**
232
     * @var bool
233
     */
234
    private $doRemoveSpacesBetweenTags = false;
235
236
    /**
237
     * @var bool
238
     */
239
    private $keepBrokenHtml = false;
240
241
    /**
242
     * @var bool
243
     */
244
    private $withDocType = false;
245
246
    /**
247
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
248
     */
249
    private $domLoopObservers;
250
251
    /**
252
     * @var int
253
     */
254
    private $protected_tags_counter = 0;
255
256
    /**
257
     * HtmlMin constructor.
258
     */
259 49
    public function __construct()
260
    {
261 49
        $this->domLoopObservers = new \SplObjectStorage();
262
263 49
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
264 49
    }
265
266
    /**
267
     * @param HtmlMinDomObserverInterface $observer
268
     *
269
     * @return void
270
     */
271 49
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
272
    {
273 49
        $this->domLoopObservers->attach($observer);
274 49
    }
275
276
    /**
277
     * @param bool $doOptimizeAttributes
278
     *
279
     * @return $this
280
     */
281 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
282
    {
283 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
284
285 2
        return $this;
286
    }
287
288
    /**
289
     * @param bool $doOptimizeViaHtmlDomParser
290
     *
291
     * @return $this
292
     */
293 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
294
    {
295 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
296
297 1
        return $this;
298
    }
299
300
    /**
301
     * @param bool $doRemoveComments
302
     *
303
     * @return $this
304
     */
305 3
    public function doRemoveComments(bool $doRemoveComments = true): self
306
    {
307 3
        $this->doRemoveComments = $doRemoveComments;
308
309 3
        return $this;
310
    }
311
312
    /**
313
     * @param bool $doRemoveDefaultAttributes
314
     *
315
     * @return $this
316
     */
317 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
318
    {
319 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
320
321 2
        return $this;
322
    }
323
324
    /**
325
     * @param bool $doRemoveDeprecatedAnchorName
326
     *
327
     * @return $this
328
     */
329 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
330
    {
331 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
332
333 2
        return $this;
334
    }
335
336
    /**
337
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
338
     *
339
     * @return $this
340
     */
341 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
342
    {
343 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
344
345 2
        return $this;
346
    }
347
348
    /**
349
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
350
     *
351
     * @return $this
352
     */
353 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
354
    {
355 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
356
357 2
        return $this;
358
    }
359
360
    /**
361
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
362
     *
363
     * @return $this
364
     */
365 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
366
    {
367 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
368
369 2
        return $this;
370
    }
371
372
    /**
373
     * @param bool $doRemoveEmptyAttributes
374
     *
375
     * @return $this
376
     */
377 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
378
    {
379 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
380
381 2
        return $this;
382
    }
383
384
    /**
385
     * @param bool $doRemoveHttpPrefixFromAttributes
386
     *
387
     * @return $this
388
     */
389 4
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
390
    {
391 4
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
392
393 4
        return $this;
394
    }
395
396
    /**
397
     * @param bool $doRemoveOmittedHtmlTags
398
     *
399
     * @return $this
400
     */
401 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
402
    {
403 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
404
405 1
        return $this;
406
    }
407
408
    /**
409
     * @param bool $doRemoveOmittedQuotes
410
     *
411
     * @return $this
412
     */
413 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
414
    {
415 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
416
417 1
        return $this;
418
    }
419
420
    /**
421
     * @param bool $doRemoveSpacesBetweenTags
422
     *
423
     * @return $this
424
     */
425 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
426
    {
427 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
428
429 1
        return $this;
430
    }
431
432
    /**
433
     * @param bool $doRemoveValueFromEmptyInput
434
     *
435
     * @return $this
436
     */
437 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
438
    {
439 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
440
441 2
        return $this;
442
    }
443
444
    /**
445
     * @param bool $doRemoveWhitespaceAroundTags
446
     *
447
     * @return $this
448
     */
449 4
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
450
    {
451 4
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
452
453 4
        return $this;
454
    }
455
456
    /**
457
     * @param bool $doSortCssClassNames
458
     *
459
     * @return $this
460
     */
461 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
462
    {
463 2
        $this->doSortCssClassNames = $doSortCssClassNames;
464
465 2
        return $this;
466
    }
467
468
    /**
469
     * @param bool $doSortHtmlAttributes
470
     *
471
     * @return $this
472
     */
473 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
474
    {
475 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
476
477 2
        return $this;
478
    }
479
480
    /**
481
     * @param bool $doSumUpWhitespace
482
     *
483
     * @return $this
484
     */
485 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
486
    {
487 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
488
489 2
        return $this;
490
    }
491
492 45
    private function domNodeAttributesToString(\DOMNode $node): string
493
    {
494
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
495 45
        $attr_str = '';
496 45
        if ($node->attributes !== null) {
497 45
            foreach ($node->attributes as $attribute) {
498 28
                $attr_str .= $attribute->name;
499
500
                if (
501 28
                    $this->doOptimizeAttributes
502
                    &&
503 28
                    isset(self::$booleanAttributes[$attribute->name])
504
                ) {
505 8
                    $attr_str .= ' ';
506
507 8
                    continue;
508
                }
509
510 28
                $attr_str .= '=';
511
512
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
513 28
                $omit_quotes = $this->doRemoveOmittedQuotes
514
                               &&
515 28
                               $attribute->value !== ''
516
                               &&
517 28
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
518
                               &&
519 28
                               \strpos($attribute->name, ' ') === false
520
                               &&
521 28
                               \preg_match('/["\'=<>` \t\r\n\f]+/', $attribute->value) === 0;
522
523 28
                $quoteTmp = '"';
524
                if (
525 28
                    !$omit_quotes
526
                    &&
527 28
                    \strpos($attribute->value, '"') !== false
528
                ) {
529 1
                    $quoteTmp = "'";
530
                }
531
532
                if (
533 28
                    $this->doOptimizeAttributes
534
                    &&
535
                    (
536 27
                        $attribute->name === 'srcset'
537
                        ||
538 28
                        $attribute->name === 'sizes'
539
                    )
540
                ) {
541 1
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
542
                } else {
543 28
                    $attr_val = $attribute->value;
544
                }
545
546 28
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
547 28
                $attr_str .= ' ';
548
            }
549
        }
550
551 45
        return \trim($attr_str);
552
    }
553
554
    /**
555
     * @param \DOMNode $node
556
     *
557
     * @return bool
558
     */
559 44
    private function domNodeClosingTagOptional(\DOMNode $node): bool
560
    {
561 44
        $tag_name = $node->nodeName;
562
563
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
564 44
        $parent_node = $node->parentNode;
565
566 44
        if ($parent_node) {
567 44
            $parent_tag_name = $parent_node->nodeName;
568
        } else {
569
            $parent_tag_name = null;
570
        }
571
572 44
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
573
574
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
575
576
        // Implemented:
577
        //
578
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
579
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
580
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
581
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
582
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
583
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
584
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
585
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
586
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
587
588
        /**
589
         * @noinspection TodoComment
590
         *
591
         * TODO: Not Implemented
592
         */
593
        //
594
        // <html> may be omitted if first thing inside is not comment
595
        // <head> may be omitted if first thing inside is an element
596
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
597
        // <colgroup> may be omitted if first thing inside is <col>
598
        // <tbody> may be omitted if first thing inside is <tr>
599
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
600
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
601
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
602
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
603
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
604
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
605
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
606
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
607
        //
608
        // <-- However, a start tag must never be omitted if it has any attributes.
609
610 44
        return \in_array($tag_name, self::$optional_end_tags, true)
611
               ||
612
               (
613 41
                   $tag_name === 'li'
614
                   &&
615
                   (
616 5
                       $nextSibling === null
617
                       ||
618
                       (
619 3
                           $nextSibling instanceof \DOMElement
620
                           &&
621 41
                           $nextSibling->tagName === 'li'
622
                       )
623
                   )
624
               )
625
               ||
626
               (
627 41
                   $tag_name === 'rp'
628
                   &&
629
                   (
630
                       $nextSibling === null
631
                       ||
632
                       (
633
                           $nextSibling instanceof \DOMElement
634
                           &&
635
                           (
636
                               $nextSibling->tagName === 'rp'
637
                               ||
638 41
                               $nextSibling->tagName === 'rt'
639
                           )
640
                       )
641
                   )
642
               )
643
               ||
644
               (
645 41
                   $tag_name === 'tr'
646
                   &&
647
                   (
648 1
                       $nextSibling === null
649
                       ||
650
                       (
651 1
                           $nextSibling instanceof \DOMElement
652
                           &&
653 41
                           $nextSibling->tagName === 'tr'
654
                       )
655
                   )
656
               )
657
               ||
658
               (
659 41
                   $tag_name === 'source'
660
                   &&
661
                   (
662 1
                       $parent_tag_name === 'audio'
663
                       ||
664 1
                       $parent_tag_name === 'video'
665
                       ||
666 1
                       $parent_tag_name === 'picture'
667
                       ||
668 41
                       $parent_tag_name === 'source'
669
                   )
670
                   &&
671
                   (
672 1
                       $nextSibling === null
673
                       ||
674
                       (
675
                           $nextSibling instanceof \DOMElement
676
                           &&
677 41
                           $nextSibling->tagName === 'source'
678
                       )
679
                   )
680
               )
681
               ||
682
               (
683
                   (
684 41
                       $tag_name === 'td'
685
                       ||
686 41
                       $tag_name === 'th'
687
                   )
688
                   &&
689
                   (
690 1
                       $nextSibling === null
691
                       ||
692
                       (
693 1
                           $nextSibling instanceof \DOMElement
694
                           &&
695
                           (
696 1
                               $nextSibling->tagName === 'td'
697
                               ||
698 41
                               $nextSibling->tagName === 'th'
699
                           )
700
                       )
701
                   )
702
               )
703
               ||
704
               (
705
                   (
706 41
                       $tag_name === 'dd'
707
                       ||
708 41
                       $tag_name === 'dt'
709
                   )
710
                   &&
711
                   (
712
                       (
713 3
                           $nextSibling === null
714
                           &&
715 3
                           $tag_name === 'dd'
716
                       )
717
                       ||
718
                       (
719 3
                           $nextSibling instanceof \DOMElement
720
                           &&
721
                           (
722 3
                               $nextSibling->tagName === 'dd'
723
                               ||
724 41
                               $nextSibling->tagName === 'dt'
725
                           )
726
                       )
727
                   )
728
               )
729
               ||
730
               (
731 41
                   $tag_name === 'option'
732
                   &&
733
                   (
734
                       $nextSibling === null
735
                       ||
736
                       (
737
                           $nextSibling instanceof \DOMElement
738
                           &&
739
                           (
740
                               $nextSibling->tagName === 'option'
741
                               ||
742 41
                               $nextSibling->tagName === 'optgroup'
743
                           )
744
                       )
745
                   )
746
               )
747
               ||
748
               (
749 41
                   $tag_name === 'p'
750
                   &&
751
                   (
752
                       (
753 13
                           $nextSibling === null
754
                           &&
755
                           (
756 11
                               $node->parentNode !== null
757
                               &&
758 11
                               !\in_array(
759 11
                                   $node->parentNode->nodeName,
760
                                   [
761 11
                                       'a',
762
                                       'audio',
763
                                       'del',
764
                                       'ins',
765
                                       'map',
766
                                       'noscript',
767
                                       'video',
768
                                   ],
769 11
                                   true
770
                               )
771
                           )
772
                       )
773
                       ||
774
                       (
775 9
                           $nextSibling instanceof \DOMElement
776
                           &&
777 9
                           \in_array(
778 9
                               $nextSibling->tagName,
779
                               [
780 9
                                   'address',
781
                                   'article',
782
                                   'aside',
783
                                   'blockquote',
784
                                   'dir',
785
                                   'div',
786
                                   'dl',
787
                                   'fieldset',
788
                                   'footer',
789
                                   'form',
790
                                   'h1',
791
                                   'h2',
792
                                   'h3',
793
                                   'h4',
794
                                   'h5',
795
                                   'h6',
796
                                   'header',
797
                                   'hgroup',
798
                                   'hr',
799
                                   'menu',
800
                                   'nav',
801
                                   'ol',
802
                                   'p',
803
                                   'pre',
804
                                   'section',
805
                                   'table',
806
                                   'ul',
807
                               ],
808 44
                               true
809
                           )
810
                       )
811
                   )
812
               );
813
    }
814
815 45
    protected function domNodeToString(\DOMNode $node): string
816
    {
817
        // init
818 45
        $html = '';
819 45
        $emptyStringTmp = '';
820
821 45
        foreach ($node->childNodes as $child) {
822 45
            if ($emptyStringTmp === 'is_empty') {
823 25
                $emptyStringTmp = 'last_was_empty';
824
            } else {
825 45
                $emptyStringTmp = '';
826
            }
827
828 45
            if ($child instanceof \DOMDocumentType) {
829
                // add the doc-type only if it wasn't generated by DomDocument
830 12
                if (!$this->withDocType) {
831
                    continue;
832
                }
833
834 12
                if ($child->name) {
835 12
                    if (!$child->publicId && $child->systemId) {
836
                        $tmpTypeSystem = 'SYSTEM';
837
                        $tmpTypePublic = '';
838
                    } else {
839 12
                        $tmpTypeSystem = '';
840 12
                        $tmpTypePublic = 'PUBLIC';
841
                    }
842
843 12
                    $html .= '<!DOCTYPE ' . $child->name . ''
844 12
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
845 12
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
846 12
                             . '>';
847
                }
848 45
            } elseif ($child instanceof \DOMElement) {
849 45
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
850 45
                $html .= '>' . $this->domNodeToString($child);
851
852
                if (
853 45
                    !$this->doRemoveOmittedHtmlTags
854
                    ||
855 45
                    !$this->domNodeClosingTagOptional($child)
856
                ) {
857 39
                    $html .= '</' . $child->tagName . '>';
858
                }
859
860 45
                if (!$this->doRemoveWhitespaceAroundTags) {
861
                    /** @noinspection NestedPositiveIfStatementsInspection */
862
                    if (
863 44
                        $child->nextSibling instanceof \DOMText
864
                        &&
865 44
                        $child->nextSibling->wholeText === ' '
866
                    ) {
867
                        if (
868 24
                            $emptyStringTmp !== 'last_was_empty'
869
                            &&
870 24
                            \substr($html, -1) !== ' '
871
                        ) {
872 24
                            $html .= ' ';
873
                        }
874 45
                        $emptyStringTmp = 'is_empty';
875
                    }
876
                }
877 41
            } elseif ($child instanceof \DOMText) {
878 41
                if ($child->isElementContentWhitespace()) {
879
                    if (
880 28
                        $child->previousSibling !== null
881
                        &&
882 28
                        $child->nextSibling !== null
883
                    ) {
884
                        if (
885 19
                            $emptyStringTmp !== 'last_was_empty'
886
                            &&
887 19
                            \substr($html, -1) !== ' '
888
                        ) {
889 5
                            $html .= ' ';
890
                        }
891 28
                        $emptyStringTmp = 'is_empty';
892
                    }
893
                } else {
894 41
                    $html .= $child->wholeText;
895
                }
896 1
            } elseif ($child instanceof \DOMComment) {
897 45
                $html .= '<!--' . $child->textContent . '-->';
898
            }
899
        }
900
901 45
        return $html;
902
    }
903
904
    /**
905
     * @return array
906
     */
907
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
908
    {
909
        return $this->domainsToRemoveHttpPrefixFromAttributes;
910
    }
911
912
    /**
913
     * @return bool
914
     */
915
    public function isDoOptimizeAttributes(): bool
916
    {
917
        return $this->doOptimizeAttributes;
918
    }
919
920
    /**
921
     * @return bool
922
     */
923
    public function isDoOptimizeViaHtmlDomParser(): bool
924
    {
925
        return $this->doOptimizeViaHtmlDomParser;
926
    }
927
928
    /**
929
     * @return bool
930
     */
931
    public function isDoRemoveComments(): bool
932
    {
933
        return $this->doRemoveComments;
934
    }
935
936
    /**
937
     * @return bool
938
     */
939 28
    public function isDoRemoveDefaultAttributes(): bool
940
    {
941 28
        return $this->doRemoveDefaultAttributes;
942
    }
943
944
    /**
945
     * @return bool
946
     */
947 28
    public function isDoRemoveDeprecatedAnchorName(): bool
948
    {
949 28
        return $this->doRemoveDeprecatedAnchorName;
950
    }
951
952
    /**
953
     * @return bool
954
     */
955 28
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
956
    {
957 28
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
958
    }
959
960
    /**
961
     * @return bool
962
     */
963 28
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
964
    {
965 28
        return $this->doRemoveDeprecatedTypeFromScriptTag;
966
    }
967
968
    /**
969
     * @return bool
970
     */
971 28
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
972
    {
973 28
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
974
    }
975
976
    /**
977
     * @return bool
978
     */
979 28
    public function isDoRemoveEmptyAttributes(): bool
980
    {
981 28
        return $this->doRemoveEmptyAttributes;
982
    }
983
984
    /**
985
     * @return bool
986
     */
987 28
    public function isDoRemoveHttpPrefixFromAttributes(): bool
988
    {
989 28
        return $this->doRemoveHttpPrefixFromAttributes;
990
    }
991
992
    /**
993
     * @return bool
994
     */
995
    public function isDoRemoveOmittedHtmlTags(): bool
996
    {
997
        return $this->doRemoveOmittedHtmlTags;
998
    }
999
1000
    /**
1001
     * @return bool
1002
     */
1003
    public function isDoRemoveOmittedQuotes(): bool
1004
    {
1005
        return $this->doRemoveOmittedQuotes;
1006
    }
1007
1008
    /**
1009
     * @return bool
1010
     */
1011
    public function isDoRemoveSpacesBetweenTags(): bool
1012
    {
1013
        return $this->doRemoveSpacesBetweenTags;
1014
    }
1015
1016
    /**
1017
     * @return bool
1018
     */
1019 28
    public function isDoRemoveValueFromEmptyInput(): bool
1020
    {
1021 28
        return $this->doRemoveValueFromEmptyInput;
1022
    }
1023
1024
    /**
1025
     * @return bool
1026
     */
1027
    public function isDoRemoveWhitespaceAroundTags(): bool
1028
    {
1029
        return $this->doRemoveWhitespaceAroundTags;
1030
    }
1031
1032
    /**
1033
     * @return bool
1034
     */
1035 28
    public function isDoSortCssClassNames(): bool
1036
    {
1037 28
        return $this->doSortCssClassNames;
1038
    }
1039
1040
    /**
1041
     * @return bool
1042
     */
1043 28
    public function isDoSortHtmlAttributes(): bool
1044
    {
1045 28
        return $this->doSortHtmlAttributes;
1046
    }
1047
1048
    /**
1049
     * @return bool
1050
     */
1051
    public function isDoSumUpWhitespace(): bool
1052
    {
1053
        return $this->doSumUpWhitespace;
1054
    }
1055
1056
    /**
1057
     * @param string $html
1058
     * @param bool   $decodeUtf8Specials <p>Use this only in special cases, e.g. for PHP 5.3</p>
1059
     *
1060
     * @return string
1061
     */
1062 49
    public function minify($html, $decodeUtf8Specials = false): string
1063
    {
1064 49
        $html = (string) $html;
1065 49
        if (!isset($html[0])) {
1066 1
            return '';
1067
        }
1068
1069 49
        $html = \trim($html);
1070 49
        if (!$html) {
1071 3
            return '';
1072
        }
1073
1074
        // init
1075 46
        static $CACHE_SELF_CLOSING_TAGS = null;
1076 46
        if ($CACHE_SELF_CLOSING_TAGS === null) {
1077 1
            $CACHE_SELF_CLOSING_TAGS = \implode('|', self::$selfClosingTags);
1078
        }
1079
1080
        // reset
1081 46
        $this->protectedChildNodes = [];
1082
1083
        // save old content
1084 46
        $origHtml = $html;
1085 46
        $origHtmlLength = \strlen($html);
1086
1087
        // -------------------------------------------------------------------------
1088
        // Minify the HTML via "HtmlDomParser"
1089
        // -------------------------------------------------------------------------
1090
1091 46
        if ($this->doOptimizeViaHtmlDomParser) {
1092 45
            $html = $this->minifyHtmlDom($html, $decodeUtf8Specials);
1093
        }
1094
1095
        // -------------------------------------------------------------------------
1096
        // Trim whitespace from html-string. [protected html is still protected]
1097
        // -------------------------------------------------------------------------
1098
1099
        // Remove extra white-space(s) between HTML attribute(s)
1100 46
        $html = (string) \preg_replace_callback(
1101 46
            '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#u',
1102 46
            static function ($matches) {
1103 46
                return '<' . $matches[1] . \preg_replace('#([^\s=]+)(\=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1104 46
            },
1105 46
            $html
1106
        );
1107
1108 46
        if ($this->doRemoveSpacesBetweenTags) {
1109
            // Remove spaces that are between > and <
1110 1
            $html = (string) \preg_replace('/(>) (<)/', '>$2', $html);
1111
        }
1112
1113
        // -------------------------------------------------------------------------
1114
        // Restore protected HTML-code.
1115
        // -------------------------------------------------------------------------
1116
1117 46
        $html = (string) \preg_replace_callback(
1118 46
            '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1119 46
            [$this, 'restoreProtectedHtml'],
1120 46
            $html
1121
        );
1122
1123
        // -------------------------------------------------------------------------
1124
        // Restore protected HTML-entities.
1125
        // -------------------------------------------------------------------------
1126
1127 46
        if ($this->doOptimizeViaHtmlDomParser) {
1128 45
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1129
        }
1130
1131
        // ------------------------------------
1132
        // Final clean-up
1133
        // ------------------------------------
1134
1135 46
        $html = \str_replace(
1136
            [
1137 46
                'html>' . "\n",
1138
                "\n" . '<html',
1139
                'html/>' . "\n",
1140
                "\n" . '</html',
1141
                'head>' . "\n",
1142
                "\n" . '<head',
1143
                'head/>' . "\n",
1144
                "\n" . '</head',
1145
            ],
1146
            [
1147 46
                'html>',
1148
                '<html',
1149
                'html/>',
1150
                '</html',
1151
                'head>',
1152
                '<head',
1153
                'head/>',
1154
                '</head',
1155
            ],
1156 46
            $html
1157
        );
1158
1159
        // self closing tags, don't need a trailing slash ...
1160 46
        $replace = [];
1161 46
        $replacement = [];
1162 46
        foreach (self::$selfClosingTags as $selfClosingTag) {
1163 46
            $replace[] = '<' . $selfClosingTag . '/>';
1164 46
            $replacement[] = '<' . $selfClosingTag . '>';
1165 46
            $replace[] = '<' . $selfClosingTag . ' />';
1166 46
            $replacement[] = '<' . $selfClosingTag . '>';
1167
        }
1168 46
        $html = \str_replace(
1169 46
            $replace,
1170 46
            $replacement,
1171 46
            $html
1172
        );
1173
1174 46
        $html = (string) \preg_replace('#<\b(' . $CACHE_SELF_CLOSING_TAGS . ')([^>]*+)><\/\b\1>#', '<\\1\\2>', $html);
1175
1176
        // ------------------------------------
1177
        // check if compression worked
1178
        // ------------------------------------
1179
1180 46
        if ($origHtmlLength < \strlen($html)) {
1181 1
            $html = $origHtml;
1182
        }
1183
1184 46
        return $html;
1185
    }
1186
1187
    /**
1188
     * @param \DOMNode $node
1189
     *
1190
     * @return \DOMNode|null
1191
     */
1192 44
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1193
    {
1194
        do {
1195
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1196 44
            $node = $node->nextSibling;
1197 44
        } while (!($node === null || $node instanceof \DOMElement));
1198
1199 44
        return $node;
1200
    }
1201
1202
    /**
1203
     * Check if the current string is an conditional comment.
1204
     *
1205
     * INFO: since IE >= 10 conditional comment are not working anymore
1206
     *
1207
     * <!--[if expression]> HTML <![endif]-->
1208
     * <![if expression]> HTML <![endif]>
1209
     *
1210
     * @param string $comment
1211
     *
1212
     * @return bool
1213
     */
1214 4
    private function isConditionalComment($comment): bool
1215
    {
1216 4
        if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1217 2
            return true;
1218
        }
1219
1220 4
        if (\preg_match('/\[endif\]$/', $comment)) {
1221 1
            return true;
1222
        }
1223
1224 4
        return false;
1225
    }
1226
1227
    /**
1228
     * @param string $html
1229
     * @param bool $decodeUtf8Specials
1230
     *
1231
     * @return string
1232
     */
1233 45
    private function minifyHtmlDom($html, $decodeUtf8Specials): string
1234
    {
1235
        // init dom
1236 45
        $dom = new HtmlDomParser();
1237
        /** @noinspection UnusedFunctionResultInspection */
1238 45
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1239
1240 45
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1241 45
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1242
1243
        // load dom
1244
        /** @noinspection UnusedFunctionResultInspection */
1245 45
        $dom->loadHtml($html);
1246
1247 45
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1248
1249
        // -------------------------------------------------------------------------
1250
        // Protect <nocompress> HTML tags first.
1251
        // -------------------------------------------------------------------------
1252
1253 45
        $dom = $this->protectTagHelper($dom, 'nocompress');
1254
1255
        // -------------------------------------------------------------------------
1256
        // Notify the Observer before the minification.
1257
        // -------------------------------------------------------------------------
1258
1259 45
        foreach ($dom->find('*') as $element) {
1260 45
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1261
        }
1262
1263
        // -------------------------------------------------------------------------
1264
        // Protect HTML tags and conditional comments.
1265
        // -------------------------------------------------------------------------
1266
1267 45
        $dom = $this->protectTags($dom);
1268
1269
        // -------------------------------------------------------------------------
1270
        // Remove default HTML comments. [protected html is still protected]
1271
        // -------------------------------------------------------------------------
1272
1273 45
        if ($this->doRemoveComments) {
1274 43
            $dom = $this->removeComments($dom);
1275
        }
1276
1277
        // -------------------------------------------------------------------------
1278
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1279
        // -------------------------------------------------------------------------
1280
1281 45
        if ($this->doSumUpWhitespace) {
1282 44
            $dom = $this->sumUpWhitespace($dom);
1283
        }
1284
1285 45
        foreach ($dom->find('*') as $element) {
1286
1287
            // -------------------------------------------------------------------------
1288
            // Remove whitespace around tags. [protected html is still protected]
1289
            // -------------------------------------------------------------------------
1290
1291 45
            if ($this->doRemoveWhitespaceAroundTags) {
1292 3
                $this->removeWhitespaceAroundTags($element);
1293
            }
1294
1295
            // -------------------------------------------------------------------------
1296
            // Notify the Observer after the minification.
1297
            // -------------------------------------------------------------------------
1298
1299 45
            $this->notifyObserversAboutDomElementAfterMinification($element);
1300
        }
1301
1302
        // -------------------------------------------------------------------------
1303
        // Convert the Dom into a string.
1304
        // -------------------------------------------------------------------------
1305
1306 45
        return $dom->fixHtmlOutput(
1307 45
            $this->domNodeToString($dom->getDocument()),
1308 45
            $decodeUtf8Specials
1309
        );
1310
    }
1311
1312
    /**
1313
     * @param SimpleHtmlDomInterface $domElement
1314
     *
1315
     * @return void
1316
     */
1317 45
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1318
    {
1319 45
        foreach ($this->domLoopObservers as $observer) {
1320 45
            $observer->domElementAfterMinification($domElement, $this);
1321
        }
1322 45
    }
1323
1324
    /**
1325
     * @param SimpleHtmlDomInterface $domElement
1326
     *
1327
     * @return void
1328
     */
1329 45
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1330
    {
1331 45
        foreach ($this->domLoopObservers as $observer) {
1332 45
            $observer->domElementBeforeMinification($domElement, $this);
1333
        }
1334 45
    }
1335
1336
    /**
1337
     * @param HtmlDomParser $dom
1338
     * @param string        $selector
1339
     *
1340
     * @return HtmlDomParser
1341
     */
1342 45
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1343
    {
1344 45
        foreach ($dom->find($selector) as $element) {
1345 3
            if ($element->isRemoved()) {
1346 1
                continue;
1347
            }
1348
1349 3
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1350 3
            $element->getNode()->parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1351
1352 3
            ++$this->protected_tags_counter;
1353
        }
1354
1355 45
        return $dom;
1356
    }
1357
1358
    /**
1359
     * Prevent changes of inline "styles" and "scripts".
1360
     *
1361
     * @param HtmlDomParser $dom
1362
     *
1363
     * @return HtmlDomParser
1364
     */
1365 45
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1366
    {
1367 45
        $this->protectTagHelper($dom, 'code');
1368
1369 45
        foreach ($dom->find('script, style') as $element) {
1370 7
            if ($element->isRemoved()) {
1371
                continue;
1372
            }
1373
1374 7
            if ($element->tag === 'script' || $element->tag === 'style') {
1375 7
                $attributes = $element->getAllAttributes();
1376
                // skip external links
1377 7
                if (isset($attributes['src'])) {
1378 3
                    continue;
1379
                }
1380
            }
1381
1382 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1383 5
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1384
1385 5
            ++$this->protected_tags_counter;
1386
        }
1387
1388 45
        foreach ($dom->find('//comment()') as $element) {
1389 4
            if ($element->isRemoved()) {
1390
                continue;
1391
            }
1392
1393 4
            $text = $element->text();
1394
1395
            // skip normal comments
1396 4
            if (!$this->isConditionalComment($text)) {
1397 4
                continue;
1398
            }
1399
1400 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1401
1402
            /* @var $node \DOMComment */
1403 2
            $node = $element->getNode();
1404 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1405
            /** @noinspection UnusedFunctionResultInspection */
1406 2
            $element->getNode()->parentNode->replaceChild($child, $node);
1407
1408 2
            ++$this->protected_tags_counter;
1409
        }
1410
1411 45
        return $dom;
1412
    }
1413
1414
    /**
1415
     * Remove comments in the dom.
1416
     *
1417
     * @param HtmlDomParser $dom
1418
     *
1419
     * @return HtmlDomParser
1420
     */
1421 43
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1422
    {
1423 43
        foreach ($dom->find('//comment()') as $commentWrapper) {
1424 3
            $comment = $commentWrapper->getNode();
1425 3
            $val = $comment->nodeValue;
1426 3
            if (\strpos($val, '[') === false) {
1427
                /** @noinspection UnusedFunctionResultInspection */
1428 3
                $comment->parentNode->removeChild($comment);
1429
            }
1430
        }
1431
1432 43
        $dom->getDocument()->normalizeDocument();
1433
1434 43
        return $dom;
1435
    }
1436
1437
    /**
1438
     * Trim tags in the dom.
1439
     *
1440
     * @param SimpleHtmlDomInterface $element
1441
     *
1442
     * @return void
1443
     */
1444 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1445
    {
1446 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1447 1
            $node = $element->getNode();
1448
1449
            /** @var \DOMNode[] $candidates */
1450 1
            $candidates = [];
1451 1
            if ($node->childNodes->length > 0) {
1452 1
                $candidates[] = $node->firstChild;
1453 1
                $candidates[] = $node->lastChild;
1454 1
                $candidates[] = $node->previousSibling;
1455 1
                $candidates[] = $node->nextSibling;
1456
            }
1457
1458
            /** @var mixed $candidate - false-positive error from phpstan */
1459 1
            foreach ($candidates as &$candidate) {
1460 1
                if ($candidate === null) {
1461
                    continue;
1462
                }
1463
1464 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1465 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1466 1
                    if ($nodeValueTmp !== null) {
1467 1
                        $candidate->nodeValue = $nodeValueTmp;
1468
                    }
1469
                }
1470
            }
1471
        }
1472 3
    }
1473
1474
    /**
1475
     * Callback function for preg_replace_callback use.
1476
     *
1477
     * @param array $matches PREG matches
1478
     *
1479
     * @return string
1480
     */
1481 8
    private function restoreProtectedHtml($matches): string
1482
    {
1483 8
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1484
1485 8
        $html = '';
1486 8
        if (isset($this->protectedChildNodes[$matchesInner['id']])) {
1487 8
            $html .= $this->protectedChildNodes[$matchesInner['id']];
1488
        }
1489
1490 8
        return $html;
1491
    }
1492
1493
    /**
1494
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1495
     *
1496
     * @return $this
1497
     */
1498 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1499
    {
1500 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1501
1502 2
        return $this;
1503
    }
1504
1505
    /**
1506
     * Sum-up extra whitespace from dom-nodes.
1507
     *
1508
     * @param HtmlDomParser $dom
1509
     *
1510
     * @return HtmlDomParser
1511
     */
1512 44
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1513
    {
1514 44
        $text_nodes = $dom->find('//text()');
1515 44
        foreach ($text_nodes as $text_node_wrapper) {
1516
            /* @var $text_node \DOMNode */
1517 40
            $text_node = $text_node_wrapper->getNode();
1518 40
            $xp = $text_node->getNodePath();
1519 40
            if ($xp === null) {
1520
                continue;
1521
            }
1522
1523 40
            $doSkip = false;
1524 40
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1525 40
                if (\strpos($xp, "/${pattern}") !== false) {
1526 7
                    $doSkip = true;
1527
1528 40
                    break;
1529
                }
1530
            }
1531 40
            if ($doSkip) {
1532 7
                continue;
1533
            }
1534
1535 38
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1536 38
            if ($nodeValueTmp !== null) {
1537 38
                $text_node->nodeValue = $nodeValueTmp;
1538
            }
1539
        }
1540
1541
        $dom->getDocument()->normalizeDocument();
1542
1543
        return $dom;
1544
    }
1545
1546
    /**
1547
     * WARNING: maybe bad for performance ...
1548
     *
1549
     * @param bool $keepBrokenHtml
1550
     *
1551
     * @return HtmlMin
1552
     */
1553
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1554
    {
1555 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1556
1557 2
        return $this;
1558
    }
1559
}
1560