Completed
Push — master ( 2d5552...d92977 )
by Lars
02:49
created

getDomainsToRemoveHttpPrefixFromAttributes()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 0
cts 2
cp 0
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]+/u";
27
28
    /**
29
     * @var array
30
     */
31
    private static $optional_end_tags = [
32
        'html',
33
        'head',
34
        'body',
35
    ];
36
37
    private static $selfClosingTags = [
38
        'area',
39
        'base',
40
        'basefont',
41
        'br',
42
        'col',
43
        'command',
44
        'embed',
45
        'frame',
46
        'hr',
47
        'img',
48
        'input',
49
        'isindex',
50
        'keygen',
51
        'link',
52
        'meta',
53
        'param',
54
        'source',
55
        'track',
56
        'wbr',
57
    ];
58
59
    private static $trimWhitespaceFromTags = [
60
        'article' => '',
61
        'br'      => '',
62
        'div'     => '',
63
        'footer'  => '',
64
        'hr'      => '',
65
        'nav'     => '',
66
        'p'       => '',
67
        'script'  => '',
68
    ];
69
70
    /**
71
     * @var array
72
     */
73
    private static $booleanAttributes = [
74
        'allowfullscreen' => '',
75
        'async'           => '',
76
        'autofocus'       => '',
77
        'autoplay'        => '',
78
        'checked'         => '',
79
        'compact'         => '',
80
        'controls'        => '',
81
        'declare'         => '',
82
        'default'         => '',
83
        'defaultchecked'  => '',
84
        'defaultmuted'    => '',
85
        'defaultselected' => '',
86
        'defer'           => '',
87
        'disabled'        => '',
88
        'enabled'         => '',
89
        'formnovalidate'  => '',
90
        'hidden'          => '',
91
        'indeterminate'   => '',
92
        'inert'           => '',
93
        'ismap'           => '',
94
        'itemscope'       => '',
95
        'loop'            => '',
96
        'multiple'        => '',
97
        'muted'           => '',
98
        'nohref'          => '',
99
        'noresize'        => '',
100
        'noshade'         => '',
101
        'novalidate'      => '',
102
        'nowrap'          => '',
103
        'open'            => '',
104
        'pauseonexit'     => '',
105
        'readonly'        => '',
106
        'required'        => '',
107
        'reversed'        => '',
108
        'scoped'          => '',
109
        'seamless'        => '',
110
        'selected'        => '',
111
        'sortable'        => '',
112
        'truespeed'       => '',
113
        'typemustmatch'   => '',
114
        'visible'         => '',
115
    ];
116
117
    /**
118
     * @var array
119
     */
120
    private static $skipTagsForRemoveWhitespace = [
121
        'code',
122
        'pre',
123
        'script',
124
        'style',
125
        'textarea',
126
    ];
127
128
    /**
129
     * @var array
130
     */
131
    private $protectedChildNodes = [];
132
133
    /**
134
     * @var string
135
     */
136
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
137
138
    /**
139
     * @var bool
140
     */
141
    private $doOptimizeViaHtmlDomParser = true;
142
143
    /**
144
     * @var bool
145
     */
146
    private $doOptimizeAttributes = true;
147
148
    /**
149
     * @var bool
150
     */
151
    private $doRemoveComments = true;
152
153
    /**
154
     * @var bool
155
     */
156
    private $doRemoveWhitespaceAroundTags = false;
157
158
    /**
159
     * @var bool
160
     */
161
    private $doRemoveOmittedQuotes = true;
162
163
    /**
164
     * @var bool
165
     */
166
    private $doRemoveOmittedHtmlTags = true;
167
168
    /**
169
     * @var bool
170
     */
171
    private $doRemoveHttpPrefixFromAttributes = false;
172
173
    /**
174
     * @var array
175
     */
176
    private $domainsToRemoveHttpPrefixFromAttributes = [
177
        'google.com',
178
        'google.de',
179
    ];
180
181
    /**
182
     * @var bool
183
     */
184
    private $doSortCssClassNames = true;
185
186
    /**
187
     * @var bool
188
     */
189
    private $doSortHtmlAttributes = true;
190
191
    /**
192
     * @var bool
193
     */
194
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
195
196
    /**
197
     * @var bool
198
     */
199
    private $doRemoveDefaultAttributes = false;
200
201
    /**
202
     * @var bool
203
     */
204
    private $doRemoveDeprecatedAnchorName = true;
205
206
    /**
207
     * @var bool
208
     */
209
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
210
211
    /**
212
     * @var bool
213
     */
214
    private $doRemoveDeprecatedTypeFromScriptTag = true;
215
216
    /**
217
     * @var bool
218
     */
219
    private $doRemoveValueFromEmptyInput = true;
220
221
    /**
222
     * @var bool
223
     */
224
    private $doRemoveEmptyAttributes = true;
225
226
    /**
227
     * @var bool
228
     */
229
    private $doSumUpWhitespace = true;
230
231
    /**
232
     * @var bool
233
     */
234
    private $doRemoveSpacesBetweenTags = false;
235
236
    /**
237
     * @var bool
238
     */
239
    private $keepBrokenHtml = false;
240
241
    /**
242
     * @var bool
243
     */
244
    private $withDocType = false;
245
246
    /**
247
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
248
     */
249
    private $domLoopObservers;
250
251
    /**
252
     * HtmlMin constructor.
253
     */
254 49
    public function __construct()
255
    {
256 49
        $this->domLoopObservers = new \SplObjectStorage();
257
258 49
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
259 49
    }
260
261
    /**
262
     * @param HtmlMinDomObserverInterface $observer
263
     *
264
     * @return void
265
     */
266 49
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
267
    {
268 49
        $this->domLoopObservers->attach($observer);
269 49
    }
270
271
    /**
272
     * @param $domElement SimpleHtmlDomInterface
273
     *
274
     * @return void
275
     */
276 45
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
277
    {
278 45
        foreach ($this->domLoopObservers as $observer) {
279 45
            $observer->domElementBeforeMinification($domElement, $this);
280
        }
281 45
    }
282
283
    /**
284
     * @param SimpleHtmlDomInterface $domElement
285
     *
286
     * @return void
287
     */
288 45
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
289
    {
290 45
        foreach ($this->domLoopObservers as $observer) {
291 45
            $observer->domElementAfterMinification($domElement, $this);
292
        }
293 45
    }
294
295
    /**
296
     * @param bool $doOptimizeAttributes
297
     *
298
     * @return $this
299
     */
300 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
301
    {
302 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
303
304 2
        return $this;
305
    }
306
307
    /**
308
     * @param bool $doOptimizeViaHtmlDomParser
309
     *
310
     * @return $this
311
     */
312 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
313
    {
314 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
315
316 1
        return $this;
317
    }
318
319
    /**
320
     * @param bool $doRemoveComments
321
     *
322
     * @return $this
323
     */
324 3
    public function doRemoveComments(bool $doRemoveComments = true): self
325
    {
326 3
        $this->doRemoveComments = $doRemoveComments;
327
328 3
        return $this;
329
    }
330
331
    /**
332
     * @param bool $doRemoveDefaultAttributes
333
     *
334
     * @return $this
335
     */
336 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
337
    {
338 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
339
340 2
        return $this;
341
    }
342
343
    /**
344
     * @param bool $doRemoveDeprecatedAnchorName
345
     *
346
     * @return $this
347
     */
348 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
349
    {
350 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
351
352 2
        return $this;
353
    }
354
355
    /**
356
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
357
     *
358
     * @return $this
359
     */
360 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
361
    {
362 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
363
364 2
        return $this;
365
    }
366
367
    /**
368
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
369
     *
370
     * @return $this
371
     */
372 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
373
    {
374 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
375
376 2
        return $this;
377
    }
378
379
    /**
380
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
381
     *
382
     * @return $this
383
     */
384 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
385
    {
386 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
387
388 2
        return $this;
389
    }
390
391
    /**
392
     * @param bool $doRemoveEmptyAttributes
393
     *
394
     * @return $this
395
     */
396 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
397
    {
398 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
399
400 2
        return $this;
401
    }
402
403
    /**
404
     * @param bool $doRemoveHttpPrefixFromAttributes
405
     *
406
     * @return $this
407
     */
408 4
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
409
    {
410 4
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
411
412 4
        return $this;
413
    }
414
415
    /**
416
     * @return bool
417
     */
418 28
    public function isDoSortCssClassNames(): bool
419
    {
420 28
        return $this->doSortCssClassNames;
421
    }
422
423
    /**
424
     * @return bool
425
     */
426 28
    public function isDoSortHtmlAttributes(): bool
427
    {
428 28
        return $this->doSortHtmlAttributes;
429
    }
430
431
    /**
432
     * @return bool
433
     */
434 28
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
435
    {
436 28
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
437
    }
438
439
    /**
440
     * @return bool
441
     */
442 28
    public function isDoRemoveDefaultAttributes(): bool
443
    {
444 28
        return $this->doRemoveDefaultAttributes;
445
    }
446
447
    /**
448
     * @return bool
449
     */
450 28
    public function isDoRemoveDeprecatedAnchorName(): bool
451
    {
452 28
        return $this->doRemoveDeprecatedAnchorName;
453
    }
454
455
    /**
456
     * @return bool
457
     */
458 28
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
459
    {
460 28
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
461
    }
462
463
    /**
464
     * @return bool
465
     */
466 28
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
467
    {
468 28
        return $this->doRemoveDeprecatedTypeFromScriptTag;
469
    }
470
471
    /**
472
     * @return bool
473
     */
474 28
    public function isDoRemoveValueFromEmptyInput(): bool
475
    {
476 28
        return $this->doRemoveValueFromEmptyInput;
477
    }
478
479
    /**
480
     * @return bool
481
     */
482 28
    public function isDoRemoveEmptyAttributes(): bool
483
    {
484 28
        return $this->doRemoveEmptyAttributes;
485
    }
486
487
    /**
488
     * @return bool
489
     */
490
    public function isDoSumUpWhitespace(): bool
491
    {
492
        return $this->doSumUpWhitespace;
493
    }
494
495
    /**
496
     * @return bool
497
     */
498
    public function isDoRemoveSpacesBetweenTags(): bool
499
    {
500
        return $this->doRemoveSpacesBetweenTags;
501
    }
502
503
    /**
504
     * @return bool
505
     */
506
    public function isDoOptimizeViaHtmlDomParser(): bool
507
    {
508
        return $this->doOptimizeViaHtmlDomParser;
509
    }
510
511
    /**
512
     * @return bool
513
     */
514
    public function isDoOptimizeAttributes(): bool
515
    {
516
        return $this->doOptimizeAttributes;
517
    }
518
519
    /**
520
     * @return bool
521
     */
522
    public function isDoRemoveComments(): bool
523
    {
524
        return $this->doRemoveComments;
525
    }
526
527
    /**
528
     * @return bool
529
     */
530
    public function isDoRemoveWhitespaceAroundTags(): bool
531
    {
532
        return $this->doRemoveWhitespaceAroundTags;
533
    }
534
535
    /**
536
     * @return bool
537
     */
538
    public function isDoRemoveOmittedQuotes(): bool
539
    {
540
        return $this->doRemoveOmittedQuotes;
541
    }
542
543
    /**
544
     * @return bool
545
     */
546
    public function isDoRemoveOmittedHtmlTags(): bool
547
    {
548
        return $this->doRemoveOmittedHtmlTags;
549
    }
550
551
    /**
552
     * @return bool
553
     */
554 28
    public function isDoRemoveHttpPrefixFromAttributes(): bool
555
    {
556 28
        return $this->doRemoveHttpPrefixFromAttributes;
557
    }
558
559
    /**
560
     * @return array
561
     */
562
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
563
    {
564
        return $this->domainsToRemoveHttpPrefixFromAttributes;
565
    }
566
567
    /**
568
     * @param bool $doRemoveOmittedHtmlTags
569
     *
570
     * @return $this
571
     */
572 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
573
    {
574 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
575
576 1
        return $this;
577
    }
578
579
    /**
580
     * @param bool $doRemoveOmittedQuotes
581
     *
582
     * @return $this
583
     */
584 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
585
    {
586 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
587
588 1
        return $this;
589
    }
590
591
    /**
592
     * @param bool $doRemoveSpacesBetweenTags
593
     *
594
     * @return $this
595
     */
596 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
597
    {
598 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
599
600 1
        return $this;
601
    }
602
603
    /**
604
     * @param bool $doRemoveValueFromEmptyInput
605
     *
606
     * @return $this
607
     */
608 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
609
    {
610 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
611
612 2
        return $this;
613
    }
614
615
    /**
616
     * @param bool $doRemoveWhitespaceAroundTags
617
     *
618
     * @return $this
619
     */
620 4
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
621
    {
622 4
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
623
624 4
        return $this;
625
    }
626
627
    /**
628
     * @param bool $doSortCssClassNames
629
     *
630
     * @return $this
631
     */
632 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
633
    {
634 2
        $this->doSortCssClassNames = $doSortCssClassNames;
635
636 2
        return $this;
637
    }
638
639
    /**
640
     * @param bool $doSortHtmlAttributes
641
     *
642
     * @return $this
643
     */
644 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
645
    {
646 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
647
648 2
        return $this;
649
    }
650
651
    /**
652
     * @param bool $doSumUpWhitespace
653
     *
654
     * @return $this
655
     */
656 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
657
    {
658 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
659
660 2
        return $this;
661
    }
662
663 45
    private function domNodeAttributesToString(\DOMNode $node): string
664
    {
665
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
666 45
        $attr_str = '';
667 45
        if ($node->attributes !== null) {
668 45
            foreach ($node->attributes as $attribute) {
669 28
                $attr_str .= $attribute->name;
670
671
                if (
672 28
                    $this->doOptimizeAttributes
673
                    &&
674 28
                    isset(self::$booleanAttributes[$attribute->name])
675
                ) {
676 8
                    $attr_str .= ' ';
677
678 8
                    continue;
679
                }
680
681 28
                $attr_str .= '=';
682
683
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
684 28
                $omit_quotes = $this->doRemoveOmittedQuotes
685
                               &&
686 28
                               $attribute->value !== ''
687
                               &&
688 28
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
689
                               &&
690 28
                               \strpos($attribute->name, ' ') === false
691
                               &&
692 28
                               \preg_match('/["\'=<>` \t\r\n\f]+/', $attribute->value) === 0;
693
694 28
                $quoteTmp = '"';
695
                if (
696 28
                    !$omit_quotes
697
                    &&
698 28
                    \strpos($attribute->value, '"') !== false
699
                ) {
700 1
                    $quoteTmp = "'";
701
                }
702
703
                if (
704 28
                    $this->doOptimizeAttributes
705
                    &&
706
                    (
707 27
                        $attribute->name === 'srcset'
708
                        ||
709 28
                        $attribute->name === 'sizes'
710
                    )
711
                ) {
712 1
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
713
                } else {
714 28
                    $attr_val = $attribute->value;
715
                }
716
717 28
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
718 28
                $attr_str .= ' ';
719
            }
720
        }
721
722 45
        return \trim($attr_str);
723
    }
724
725
    /**
726
     * @param \DOMNode $node
727
     *
728
     * @return bool
729
     */
730 44
    private function domNodeClosingTagOptional(\DOMNode $node): bool
731
    {
732 44
        $tag_name = $node->nodeName;
733
734 44
        if ($node->parentNode) {
735 44
            $parent_tag_name = $node->parentNode->nodeName;
736
        } else {
737
            $parent_tag_name = null;
738
        }
739
740 44
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
741
742
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
743
744
        // Implemented:
745
        //
746
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
747
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
748
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
749
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
750
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
751
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
752
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
753
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
754
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
755
756
        /**
757
         * @noinspection TodoComment
758
         *
759
         * TODO: Not Implemented
760
         */
761
        //
762
        // <html> may be omitted if first thing inside is not comment
763
        // <head> may be omitted if first thing inside is an element
764
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
765
        // <colgroup> may be omitted if first thing inside is <col>
766
        // <tbody> may be omitted if first thing inside is <tr>
767
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
768
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
769
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
770
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
771
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
772
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
773
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
774
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
775
        //
776
        // <-- However, a start tag must never be omitted if it has any attributes.
777
778 44
        return \in_array($tag_name, self::$optional_end_tags, true)
779
               ||
780
               (
781 41
                   $tag_name === 'li'
782
                   &&
783
                   (
784 5
                       $nextSibling === null
785
                       ||
786
                       (
787 3
                           $nextSibling instanceof \DOMElement
788
                           &&
789 41
                           $nextSibling->tagName === 'li'
790
                       )
791
                   )
792
               )
793
               ||
794
               (
795 41
                   $tag_name === 'rp'
796
                   &&
797
                   (
798
                       $nextSibling === null
799
                       ||
800
                       (
801
                           $nextSibling instanceof \DOMElement
802
                           &&
803
                           (
804
                               $nextSibling->tagName === 'rp'
805
                               ||
806 41
                               $nextSibling->tagName === 'rt'
807
                           )
808
                       )
809
                   )
810
               )
811
               ||
812
               (
813 41
                   $tag_name === 'tr'
814
                   &&
815
                   (
816 1
                       $nextSibling === null
817
                       ||
818
                       (
819 1
                           $nextSibling instanceof \DOMElement
820
                           &&
821 41
                           $nextSibling->tagName === 'tr'
822
                       )
823
                   )
824
               )
825
               ||
826
               (
827 41
                   $tag_name === 'source'
828
                   &&
829
                   (
830 1
                       $parent_tag_name === 'audio'
831
                       ||
832 1
                       $parent_tag_name === 'video'
833
                       ||
834 1
                       $parent_tag_name === 'picture'
835
                       ||
836 41
                       $parent_tag_name === 'source'
837
                   )
838
                   &&
839
                   (
840 1
                       $nextSibling === null
841
                       ||
842
                       (
843
                           $nextSibling instanceof \DOMElement
844
                           &&
845 41
                           $nextSibling->tagName === 'source'
846
                       )
847
                   )
848
               )
849
               ||
850
               (
851
                   (
852 41
                       $tag_name === 'td'
853
                       ||
854 41
                       $tag_name === 'th'
855
                   )
856
                   &&
857
                   (
858 1
                       $nextSibling === null
859
                       ||
860
                       (
861 1
                           $nextSibling instanceof \DOMElement
862
                           &&
863
                           (
864 1
                               $nextSibling->tagName === 'td'
865
                               ||
866 41
                               $nextSibling->tagName === 'th'
867
                           )
868
                       )
869
                   )
870
               )
871
               ||
872
               (
873
                   (
874 41
                       $tag_name === 'dd'
875
                       ||
876 41
                       $tag_name === 'dt'
877
                   )
878
                   &&
879
                   (
880
                       (
881 3
                           $nextSibling === null
882
                           &&
883 3
                           $tag_name === 'dd'
884
                       )
885
                       ||
886
                       (
887 3
                           $nextSibling instanceof \DOMElement
888
                           &&
889
                           (
890 3
                               $nextSibling->tagName === 'dd'
891
                               ||
892 41
                               $nextSibling->tagName === 'dt'
893
                           )
894
                       )
895
                   )
896
               )
897
               ||
898
               (
899 41
                   $tag_name === 'option'
900
                   &&
901
                   (
902
                       $nextSibling === null
903
                       ||
904
                       (
905
                           $nextSibling instanceof \DOMElement
906
                           &&
907
                           (
908
                               $nextSibling->tagName === 'option'
909
                               ||
910 41
                               $nextSibling->tagName === 'optgroup'
911
                           )
912
                       )
913
                   )
914
               )
915
               ||
916
               (
917 41
                   $tag_name === 'p'
918
                   &&
919
                   (
920
                       (
921 13
                           $nextSibling === null
922
                           &&
923
                           (
924 11
                               $node->parentNode !== null
925
                               &&
926
                               !\in_array(
927 11
                                   $node->parentNode->nodeName,
928
                                   [
929
                                       'a',
930
                                       'audio',
931
                                       'del',
932
                                       'ins',
933
                                       'map',
934
                                       'noscript',
935
                                       'video',
936
                                   ],
937
                                   true
938
                               )
939
                           )
940
                       )
941
                       ||
942
                       (
943 9
                           $nextSibling instanceof \DOMElement
944
                           &&
945
                           \in_array(
946 44
                               $nextSibling->tagName,
947
                               [
948
                                   'address',
949
                                   'article',
950
                                   'aside',
951
                                   'blockquote',
952
                                   'dir',
953
                                   'div',
954
                                   'dl',
955
                                   'fieldset',
956
                                   'footer',
957
                                   'form',
958
                                   'h1',
959
                                   'h2',
960
                                   'h3',
961
                                   'h4',
962
                                   'h5',
963
                                   'h6',
964
                                   'header',
965
                                   'hgroup',
966
                                   'hr',
967
                                   'menu',
968
                                   'nav',
969
                                   'ol',
970
                                   'p',
971
                                   'pre',
972
                                   'section',
973
                                   'table',
974
                                   'ul',
975
                               ],
976
                               true
977
                           )
978
                       )
979
                   )
980
               );
981
    }
982
983 45
    protected function domNodeToString(\DOMNode $node): string
984
    {
985
        // init
986 45
        $html = '';
987 45
        $emptyStringTmp = '';
988
989 45
        foreach ($node->childNodes as $child) {
990 45
            if ($emptyStringTmp === 'is_empty') {
991 25
                $emptyStringTmp = 'last_was_empty';
992
            } else {
993 45
                $emptyStringTmp = '';
994
            }
995
996 45
            if ($child instanceof \DOMDocumentType) {
997
                // add the doc-type only if it wasn't generated by DomDocument
998 12
                if (!$this->withDocType) {
999
                    continue;
1000
                }
1001
1002 12
                if ($child->name) {
1003 12
                    if (!$child->publicId && $child->systemId) {
1004
                        $tmpTypeSystem = 'SYSTEM';
1005
                        $tmpTypePublic = '';
1006
                    } else {
1007 12
                        $tmpTypeSystem = '';
1008 12
                        $tmpTypePublic = 'PUBLIC';
1009
                    }
1010
1011 12
                    $html .= '<!DOCTYPE ' . $child->name . ''
1012 12
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
1013 12
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
1014 12
                             . '>';
1015
                }
1016 45
            } elseif ($child instanceof \DOMElement) {
1017 45
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
1018 45
                $html .= '>' . $this->domNodeToString($child);
1019
1020
                if (
1021 45
                    !$this->doRemoveOmittedHtmlTags
1022
                    ||
1023 45
                    !$this->domNodeClosingTagOptional($child)
1024
                ) {
1025 39
                    $html .= '</' . $child->tagName . '>';
1026
                }
1027
1028 45
                if (!$this->doRemoveWhitespaceAroundTags) {
1029
                    /** @noinspection NestedPositiveIfStatementsInspection */
1030
                    if (
1031 44
                        $child->nextSibling instanceof \DOMText
1032
                        &&
1033 44
                        $child->nextSibling->wholeText === ' '
1034
                    ) {
1035
                        if (
1036 24
                            $emptyStringTmp !== 'last_was_empty'
1037
                            &&
1038 24
                            \substr($html, -1) !== ' '
1039
                        ) {
1040 24
                            $html .= ' ';
1041
                        }
1042 45
                        $emptyStringTmp = 'is_empty';
1043
                    }
1044
                }
1045 41
            } elseif ($child instanceof \DOMText) {
1046 41
                if ($child->isElementContentWhitespace()) {
1047
                    if (
1048 28
                        $child->previousSibling !== null
1049
                        &&
1050 28
                        $child->nextSibling !== null
1051
                    ) {
1052
                        if (
1053 19
                            $emptyStringTmp !== 'last_was_empty'
1054
                            &&
1055 19
                            \substr($html, -1) !== ' '
1056
                        ) {
1057 5
                            $html .= ' ';
1058
                        }
1059 28
                        $emptyStringTmp = 'is_empty';
1060
                    }
1061
                } else {
1062 41
                    $html .= $child->wholeText;
1063
                }
1064 1
            } elseif ($child instanceof \DOMComment) {
1065 1
                $html .= '<!--' . $child->textContent . '-->';
1066
            }
1067
        }
1068
1069 45
        return $html;
1070
    }
1071
1072
    /**
1073
     * @param \DOMNode $node
1074
     *
1075
     * @return \DOMNode|null
1076
     */
1077 44
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1078
    {
1079
        do {
1080 44
            $node = $node->nextSibling;
1081 44
        } while (!($node === null || $node instanceof \DOMElement));
1082
1083 44
        return $node;
1084
    }
1085
1086
    /**
1087
     * Check if the current string is an conditional comment.
1088
     *
1089
     * INFO: since IE >= 10 conditional comment are not working anymore
1090
     *
1091
     * <!--[if expression]> HTML <![endif]-->
1092
     * <![if expression]> HTML <![endif]>
1093
     *
1094
     * @param string $comment
1095
     *
1096
     * @return bool
1097
     */
1098 4
    private function isConditionalComment($comment): bool
1099
    {
1100 4
        if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1101 2
            return true;
1102
        }
1103
1104 4
        if (\preg_match('/\[endif\]$/', $comment)) {
1105 1
            return true;
1106
        }
1107
1108 4
        return false;
1109
    }
1110
1111
    /**
1112
     * @param string $html
1113
     * @param bool   $decodeUtf8Specials <p>Use this only in special cases, e.g. for PHP 5.3</p>
1114
     *
1115
     * @return string
1116
     */
1117 49
    public function minify($html, $decodeUtf8Specials = false): string
1118
    {
1119 49
        $html = (string) $html;
1120 49
        if (!isset($html[0])) {
1121 1
            return '';
1122
        }
1123
1124 49
        $html = \trim($html);
1125 49
        if (!$html) {
1126 3
            return '';
1127
        }
1128
1129
        // init
1130 46
        static $CACHE_SELF_CLOSING_TAGS = null;
1131 46
        if ($CACHE_SELF_CLOSING_TAGS === null) {
1132 1
            $CACHE_SELF_CLOSING_TAGS = \implode('|', self::$selfClosingTags);
1133
        }
1134
1135
        // reset
1136 46
        $this->protectedChildNodes = [];
1137
1138
        // save old content
1139 46
        $origHtml = $html;
1140 46
        $origHtmlLength = \strlen($html);
1141
1142
        // -------------------------------------------------------------------------
1143
        // Minify the HTML via "HtmlDomParser"
1144
        // -------------------------------------------------------------------------
1145
1146 46
        if ($this->doOptimizeViaHtmlDomParser) {
1147 45
            $html = $this->minifyHtmlDom($html, $decodeUtf8Specials);
1148
        }
1149
1150
        // -------------------------------------------------------------------------
1151
        // Trim whitespace from html-string. [protected html is still protected]
1152
        // -------------------------------------------------------------------------
1153
1154
        // Remove extra white-space(s) between HTML attribute(s)
1155 46
        $html = (string) \preg_replace_callback(
1156 46
            '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#u',
1157
            static function ($matches) {
1158 46
                return '<' . $matches[1] . \preg_replace('#([^\s=]+)(\=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1159 46
            },
1160 46
            $html
1161
        );
1162
1163 46
        if ($this->doRemoveSpacesBetweenTags) {
1164
            // Remove spaces that are between > and <
1165 1
            $html = (string) \preg_replace('/(>) (<)/', '>$2', $html);
1166
        }
1167
1168
        // -------------------------------------------------------------------------
1169
        // Restore protected HTML-code.
1170
        // -------------------------------------------------------------------------
1171
1172 46
        $html = (string) \preg_replace_callback(
1173 46
            '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1174 46
            [$this, 'restoreProtectedHtml'],
1175 46
            $html
1176
        );
1177
1178
        // -------------------------------------------------------------------------
1179
        // Restore protected HTML-entities.
1180
        // -------------------------------------------------------------------------
1181
1182 46
        if ($this->doOptimizeViaHtmlDomParser) {
1183 45
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1184
        }
1185
1186
        // ------------------------------------
1187
        // Final clean-up
1188
        // ------------------------------------
1189
1190 46
        $html = \str_replace(
1191
            [
1192 46
                'html>' . "\n",
1193
                "\n" . '<html',
1194
                'html/>' . "\n",
1195
                "\n" . '</html',
1196
                'head>' . "\n",
1197
                "\n" . '<head',
1198
                'head/>' . "\n",
1199
                "\n" . '</head',
1200
            ],
1201
            [
1202 46
                'html>',
1203
                '<html',
1204
                'html/>',
1205
                '</html',
1206
                'head>',
1207
                '<head',
1208
                'head/>',
1209
                '</head',
1210
            ],
1211 46
            $html
1212
        );
1213
1214
        // self closing tags, don't need a trailing slash ...
1215 46
        $replace = [];
1216 46
        $replacement = [];
1217 46
        foreach (self::$selfClosingTags as $selfClosingTag) {
1218 46
            $replace[] = '<' . $selfClosingTag . '/>';
1219 46
            $replacement[] = '<' . $selfClosingTag . '>';
1220 46
            $replace[] = '<' . $selfClosingTag . ' />';
1221 46
            $replacement[] = '<' . $selfClosingTag . '>';
1222
        }
1223 46
        $html = \str_replace(
1224 46
            $replace,
1225 46
            $replacement,
1226 46
            $html
1227
        );
1228
1229 46
        $html = (string) \preg_replace('#<\b(' . $CACHE_SELF_CLOSING_TAGS . ')([^>]*+)><\/\b\1>#', '<\\1\\2>', $html);
1230
1231
        // ------------------------------------
1232
        // check if compression worked
1233
        // ------------------------------------
1234
1235 46
        if ($origHtmlLength < \strlen($html)) {
1236 1
            $html = $origHtml;
1237
        }
1238
1239 46
        return $html;
1240
    }
1241
1242
    /**
1243
     * @param $html
1244
     * @param $decodeUtf8Specials
1245
     *
1246
     * @return string
1247
     */
1248 45
    private function minifyHtmlDom($html, $decodeUtf8Specials): string
1249
    {
1250
        // init dom
1251 45
        $dom = new HtmlDomParser();
1252
        /** @noinspection UnusedFunctionResultInspection */
1253 45
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1254
1255 45
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1256 45
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1257
1258
        // load dom
1259
        /** @noinspection UnusedFunctionResultInspection */
1260 45
        $dom->loadHtml($html);
1261
1262 45
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1263
1264 45
        foreach ($dom->find('*') as $element) {
1265 45
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1266
        }
1267
1268
        // -------------------------------------------------------------------------
1269
        // Protect HTML tags and conditional comments.
1270
        // -------------------------------------------------------------------------
1271
1272 45
        $dom = $this->protectTags($dom);
1273
1274
        // -------------------------------------------------------------------------
1275
        // Remove default HTML comments. [protected html is still protected]
1276
        // -------------------------------------------------------------------------
1277
1278 45
        if ($this->doRemoveComments) {
1279 43
            $dom = $this->removeComments($dom);
1280
        }
1281
1282
        // -------------------------------------------------------------------------
1283
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1284
        // -------------------------------------------------------------------------
1285
1286 45
        if ($this->doSumUpWhitespace) {
1287 44
            $dom = $this->sumUpWhitespace($dom);
1288
        }
1289
1290 45
        foreach ($dom->find('*') as $element) {
1291
1292
            // -------------------------------------------------------------------------
1293
            // Remove whitespace around tags. [protected html is still protected]
1294
            // -------------------------------------------------------------------------
1295
1296 45
            if ($this->doRemoveWhitespaceAroundTags) {
1297 3
                $this->removeWhitespaceAroundTags($element);
1298
            }
1299
1300 45
            $this->notifyObserversAboutDomElementAfterMinification($element);
1301
        }
1302
1303
        // -------------------------------------------------------------------------
1304
        // Convert the Dom into a string.
1305
        // -------------------------------------------------------------------------
1306
1307 45
        return $dom->fixHtmlOutput(
1308 45
            $this->domNodeToString($dom->getDocument()),
1309 45
            $decodeUtf8Specials
1310
        );
1311
    }
1312
1313
    /**
1314
     * Prevent changes of inline "styles" and "scripts".
1315
     *
1316
     * @param HtmlDomParser $dom
1317
     *
1318
     * @return HtmlDomParser
1319
     */
1320 45
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1321
    {
1322
        // init
1323 45
        $counter = 0;
1324
1325 45
        foreach ($dom->find('script, style') as $element) {
1326
1327 7
            if ($element->tag === 'script' || $element->tag === 'style') {
1328 7
                $attributes = $element->getAllAttributes();
1329
                // skip external links
1330 7
                if (isset($attributes['src'])) {
1331 3
                    continue;
1332
                }
1333
            }
1334
1335 5
            $this->protectedChildNodes[$counter] = $element->innerhtml;
1336 5
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
1337
1338 5
            ++$counter;
1339
        }
1340
1341 45
        foreach ($dom->find('code, nocompress') as $element) {
1342 3
            if ($element->isRemoved()) {
1343 1
                continue;
1344
            }
1345
1346 3
            $this->protectedChildNodes[$counter] = $element->parentNode()->innerHtml();
1347 3
            $element->getNode()->parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
1348
1349 3
            ++$counter;
1350
        }
1351
1352 45
        foreach ($dom->find('//comment()') as $element) {
1353 4
            $text = $element->text();
1354
1355
            // skip normal comments
1356 4
            if (!$this->isConditionalComment($text)) {
1357 4
                continue;
1358
            }
1359
1360 2
            $this->protectedChildNodes[$counter] = '<!--' . $text . '-->';
1361
1362
            /* @var $node \DOMComment */
1363 2
            $node = $element->getNode();
1364 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>');
1365
            /** @noinspection UnusedFunctionResultInspection */
1366 2
            $element->getNode()->parentNode->replaceChild($child, $node);
1367
1368 2
            ++$counter;
1369
        }
1370
1371 45
        return $dom;
1372
    }
1373
1374
    /**
1375
     * Remove comments in the dom.
1376
     *
1377
     * @param HtmlDomParser $dom
1378
     *
1379
     * @return HtmlDomParser
1380
     */
1381 43
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1382
    {
1383 43
        foreach ($dom->find('//comment()') as $commentWrapper) {
1384 3
            $comment = $commentWrapper->getNode();
1385 3
            $val = $comment->nodeValue;
1386 3
            if (\strpos($val, '[') === false) {
1387
                /** @noinspection UnusedFunctionResultInspection */
1388 3
                $comment->parentNode->removeChild($comment);
1389
            }
1390
        }
1391
1392 43
        $dom->getDocument()->normalizeDocument();
1393
1394 43
        return $dom;
1395
    }
1396
1397
    /**
1398
     * Trim tags in the dom.
1399
     *
1400
     * @param SimpleHtmlDomInterface $element
1401
     *
1402
     * @return void
1403
     */
1404 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1405
    {
1406 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1407 1
            $node = $element->getNode();
1408
1409
            /** @var \DOMNode[] $candidates */
1410 1
            $candidates = [];
1411 1
            if ($node->childNodes->length > 0) {
1412 1
                $candidates[] = $node->firstChild;
1413 1
                $candidates[] = $node->lastChild;
1414 1
                $candidates[] = $node->previousSibling;
1415 1
                $candidates[] = $node->nextSibling;
1416
            }
1417
1418 1
            foreach ($candidates as &$candidate) {
1419 1
                if ($candidate === null) {
1420
                    continue;
1421
                }
1422
1423 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1424 1
                    $candidate->nodeValue = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1425
                }
1426
            }
1427
        }
1428 3
    }
1429
1430
    /**
1431
     * Callback function for preg_replace_callback use.
1432
     *
1433
     * @param array $matches PREG matches
1434
     *
1435
     * @return string
1436
     */
1437 8
    private function restoreProtectedHtml($matches): string
1438
    {
1439 8
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1440
1441 8
        $html = '';
1442 8
        if (isset($this->protectedChildNodes[$matchesInner['id']])) {
1443 8
            $html .= $this->protectedChildNodes[$matchesInner['id']];
1444
        }
1445
1446 8
        return $html;
1447
    }
1448
1449
    /**
1450
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1451
     *
1452
     * @return $this
1453
     */
1454 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1455
    {
1456 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1457
1458 2
        return $this;
1459
    }
1460
1461
    /**
1462
     * Sum-up extra whitespace from dom-nodes.
1463
     *
1464
     * @param HtmlDomParser $dom
1465
     *
1466
     * @return HtmlDomParser
1467
     */
1468 44
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1469
    {
1470 44
        $text_nodes = $dom->find('//text()');
1471 44
        foreach ($text_nodes as $text_node_wrapper) {
1472
            /* @var $text_node \DOMNode */
1473 40
            $text_node = $text_node_wrapper->getNode();
1474 40
            $xp = $text_node->getNodePath();
1475
1476 40
            $doSkip = false;
1477 40
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1478 40
                if (\strpos($xp, "/${pattern}") !== false) {
1479 7
                    $doSkip = true;
1480
1481 7
                    break;
1482
                }
1483
            }
1484 40
            if ($doSkip) {
1485 7
                continue;
1486
            }
1487
1488 38
            $text_node->nodeValue = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1489
        }
1490
1491 44
        $dom->getDocument()->normalizeDocument();
1492
1493 44
        return $dom;
1494
    }
1495
1496
    /**
1497
     * WARNING: maybe bad for performance ...
1498
     *
1499
     * @param bool $keepBrokenHtml
1500
     *
1501
     * @return HtmlMin
1502
     */
1503 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1504
    {
1505 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1506
1507 2
        return $this;
1508
    }
1509
}
1510