Completed
Push — master ( 62a1f2...d70753 )
by Lars
01:36
created

notifyObserversAboutDomElementAfterMinification()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 4
cts 4
cp 1
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]+/u";
27
28
    /**
29
     * @var array
30
     */
31
    private static $optional_end_tags = [
32
        'html',
33
        'head',
34
        'body',
35
    ];
36
37
    private static $selfClosingTags = [
38
        'area',
39
        'base',
40
        'basefont',
41
        'br',
42
        'col',
43
        'command',
44
        'embed',
45
        'frame',
46
        'hr',
47
        'img',
48
        'input',
49
        'isindex',
50
        'keygen',
51
        'link',
52
        'meta',
53
        'param',
54
        'source',
55
        'track',
56
        'wbr',
57
    ];
58
59
    private static $trimWhitespaceFromTags = [
60
        'article' => '',
61
        'br'      => '',
62
        'div'     => '',
63
        'footer'  => '',
64
        'hr'      => '',
65
        'nav'     => '',
66
        'p'       => '',
67
        'script'  => '',
68
    ];
69
70
    /**
71
     * @var array
72
     */
73
    private static $booleanAttributes = [
74
        'allowfullscreen' => '',
75
        'async'           => '',
76
        'autofocus'       => '',
77
        'autoplay'        => '',
78
        'checked'         => '',
79
        'compact'         => '',
80
        'controls'        => '',
81
        'declare'         => '',
82
        'default'         => '',
83
        'defaultchecked'  => '',
84
        'defaultmuted'    => '',
85
        'defaultselected' => '',
86
        'defer'           => '',
87
        'disabled'        => '',
88
        'enabled'         => '',
89
        'formnovalidate'  => '',
90
        'hidden'          => '',
91
        'indeterminate'   => '',
92
        'inert'           => '',
93
        'ismap'           => '',
94
        'itemscope'       => '',
95
        'loop'            => '',
96
        'multiple'        => '',
97
        'muted'           => '',
98
        'nohref'          => '',
99
        'noresize'        => '',
100
        'noshade'         => '',
101
        'novalidate'      => '',
102
        'nowrap'          => '',
103
        'open'            => '',
104
        'pauseonexit'     => '',
105
        'readonly'        => '',
106
        'required'        => '',
107
        'reversed'        => '',
108
        'scoped'          => '',
109
        'seamless'        => '',
110
        'selected'        => '',
111
        'sortable'        => '',
112
        'truespeed'       => '',
113
        'typemustmatch'   => '',
114
        'visible'         => '',
115
    ];
116
117
    /**
118
     * @var array
119
     */
120
    private static $skipTagsForRemoveWhitespace = [
121
        'code',
122
        'pre',
123
        'script',
124
        'style',
125
        'textarea',
126
    ];
127
128
    /**
129
     * @var array
130
     */
131
    private $protectedChildNodes = [];
132
133
    /**
134
     * @var string
135
     */
136
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
137
138
    /**
139
     * @var bool
140
     */
141
    private $doOptimizeViaHtmlDomParser = true;
142
143
    /**
144
     * @var bool
145
     */
146
    private $doOptimizeAttributes = true;
147
148
    /**
149
     * @var bool
150
     */
151
    private $doRemoveComments = true;
152
153
    /**
154
     * @var bool
155
     */
156
    private $doRemoveWhitespaceAroundTags = false;
157
158
    /**
159
     * @var bool
160
     */
161
    private $doRemoveOmittedQuotes = true;
162
163
    /**
164
     * @var bool
165
     */
166
    private $doRemoveOmittedHtmlTags = true;
167
168
    /**
169
     * @var bool
170
     */
171
    private $doRemoveHttpPrefixFromAttributes = false;
172
173
    /**
174
     * @var array
175
     */
176
    private $domainsToRemoveHttpPrefixFromAttributes = [
177
        'google.com',
178
        'google.de',
179
    ];
180
181
    /**
182
     * @var bool
183
     */
184
    private $doSortCssClassNames = true;
185
186
    /**
187
     * @var bool
188
     */
189
    private $doSortHtmlAttributes = true;
190
191
    /**
192
     * @var bool
193
     */
194
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
195
196
    /**
197
     * @var bool
198
     */
199
    private $doRemoveDefaultAttributes = false;
200
201
    /**
202
     * @var bool
203
     */
204
    private $doRemoveDeprecatedAnchorName = true;
205
206
    /**
207
     * @var bool
208
     */
209
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
210
211
    /**
212
     * @var bool
213
     */
214
    private $doRemoveDeprecatedTypeFromScriptTag = true;
215
216
    /**
217
     * @var bool
218
     */
219
    private $doRemoveValueFromEmptyInput = true;
220
221
    /**
222
     * @var bool
223
     */
224
    private $doRemoveEmptyAttributes = true;
225
226
    /**
227
     * @var bool
228
     */
229
    private $doSumUpWhitespace = true;
230
231
    /**
232
     * @var bool
233
     */
234
    private $doRemoveSpacesBetweenTags = false;
235
236
    /**
237
     * @var bool
238
     */
239
    private $keepBrokenHtml = false;
240
241
    /**
242
     * @var bool
243
     */
244
    private $withDocType = false;
245
246
    /**
247
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
248
     */
249
    private $domLoopObservers;
250
251
    /**
252
     * HtmlMin constructor.
253
     */
254 47
    public function __construct()
255
    {
256 47
        $this->domLoopObservers = new \SplObjectStorage();
257
258 47
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
259 47
    }
260
261
    /**
262
     * @param HtmlMinDomObserverInterface $observer
263
     *
264
     * @return void
265
     */
266 47
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
267
    {
268 47
        $this->domLoopObservers->attach($observer);
269 47
    }
270
271
    /**
272
     * @param $domElement SimpleHtmlDomInterface
273
     *
274
     * @return void
275
     */
276 43
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
277
    {
278 43
        foreach ($this->domLoopObservers as $observer) {
279 43
            $observer->domElementBeforeMinification($domElement, $this);
280
        }
281 43
    }
282
283
    /**
284
     * @param SimpleHtmlDomInterface $domElement
285
     *
286
     * @return void
287
     */
288 43
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
289
    {
290 43
        foreach ($this->domLoopObservers as $observer) {
291 43
            $observer->domElementAfterMinification($domElement, $this);
292
        }
293 43
    }
294
295
    /**
296
     * @param bool $doOptimizeAttributes
297
     *
298
     * @return $this
299
     */
300 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
301
    {
302 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
303
304 2
        return $this;
305
    }
306
307
    /**
308
     * @param bool $doOptimizeViaHtmlDomParser
309
     *
310
     * @return $this
311
     */
312 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
313
    {
314 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
315
316 1
        return $this;
317
    }
318
319
    /**
320
     * @param bool $doRemoveComments
321
     *
322
     * @return $this
323
     */
324 3
    public function doRemoveComments(bool $doRemoveComments = true): self
325
    {
326 3
        $this->doRemoveComments = $doRemoveComments;
327
328 3
        return $this;
329
    }
330
331
    /**
332
     * @param bool $doRemoveDefaultAttributes
333
     *
334
     * @return $this
335
     */
336 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
337
    {
338 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
339
340 2
        return $this;
341
    }
342
343
    /**
344
     * @param bool $doRemoveDeprecatedAnchorName
345
     *
346
     * @return $this
347
     */
348 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
349
    {
350 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
351
352 2
        return $this;
353
    }
354
355
    /**
356
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
357
     *
358
     * @return $this
359
     */
360 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
361
    {
362 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
363
364 2
        return $this;
365
    }
366
367
    /**
368
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
369
     *
370
     * @return $this
371
     */
372 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
373
    {
374 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
375
376 2
        return $this;
377
    }
378
379
    /**
380
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
381
     *
382
     * @return $this
383
     */
384 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
385
    {
386 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
387
388 2
        return $this;
389
    }
390
391
    /**
392
     * @param bool $doRemoveEmptyAttributes
393
     *
394
     * @return $this
395
     */
396 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
397
    {
398 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
399
400 2
        return $this;
401
    }
402
403
    /**
404
     * @param bool $doRemoveHttpPrefixFromAttributes
405
     *
406
     * @return $this
407
     */
408 4
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
409
    {
410 4
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
411
412 4
        return $this;
413
    }
414
415
    /**
416
     * @return bool
417
     */
418 26
    public function isDoSortCssClassNames(): bool
419
    {
420 26
        return $this->doSortCssClassNames;
421
    }
422
423
    /**
424
     * @return bool
425
     */
426 26
    public function isDoSortHtmlAttributes(): bool
427
    {
428 26
        return $this->doSortHtmlAttributes;
429
    }
430
431
    /**
432
     * @return bool
433
     */
434 26
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
435
    {
436 26
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
437
    }
438
439
    /**
440
     * @return bool
441
     */
442 26
    public function isDoRemoveDefaultAttributes(): bool
443
    {
444 26
        return $this->doRemoveDefaultAttributes;
445
    }
446
447
    /**
448
     * @return bool
449
     */
450 26
    public function isDoRemoveDeprecatedAnchorName(): bool
451
    {
452 26
        return $this->doRemoveDeprecatedAnchorName;
453
    }
454
455
    /**
456
     * @return bool
457
     */
458 26
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
459
    {
460 26
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
461
    }
462
463
    /**
464
     * @return bool
465
     */
466 26
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
467
    {
468 26
        return $this->doRemoveDeprecatedTypeFromScriptTag;
469
    }
470
471
    /**
472
     * @return bool
473
     */
474 26
    public function isDoRemoveValueFromEmptyInput(): bool
475
    {
476 26
        return $this->doRemoveValueFromEmptyInput;
477
    }
478
479
    /**
480
     * @return bool
481
     */
482 26
    public function isDoRemoveEmptyAttributes(): bool
483
    {
484 26
        return $this->doRemoveEmptyAttributes;
485
    }
486
487
    /**
488
     * @return bool
489
     */
490
    public function isDoSumUpWhitespace(): bool
491
    {
492
        return $this->doSumUpWhitespace;
493
    }
494
495
    /**
496
     * @return bool
497
     */
498
    public function isDoRemoveSpacesBetweenTags(): bool
499
    {
500
        return $this->doRemoveSpacesBetweenTags;
501
    }
502
503
    /**
504
     * @return bool
505
     */
506
    public function isDoOptimizeViaHtmlDomParser(): bool
507
    {
508
        return $this->doOptimizeViaHtmlDomParser;
509
    }
510
511
    /**
512
     * @return bool
513
     */
514
    public function isDoOptimizeAttributes(): bool
515
    {
516
        return $this->doOptimizeAttributes;
517
    }
518
519
    /**
520
     * @return bool
521
     */
522
    public function isDoRemoveComments(): bool
523
    {
524
        return $this->doRemoveComments;
525
    }
526
527
    /**
528
     * @return bool
529
     */
530
    public function isDoRemoveWhitespaceAroundTags(): bool
531
    {
532
        return $this->doRemoveWhitespaceAroundTags;
533
    }
534
535
    /**
536
     * @return bool
537
     */
538
    public function isDoRemoveOmittedQuotes(): bool
539
    {
540
        return $this->doRemoveOmittedQuotes;
541
    }
542
543
    /**
544
     * @return bool
545
     */
546
    public function isDoRemoveOmittedHtmlTags(): bool
547
    {
548
        return $this->doRemoveOmittedHtmlTags;
549
    }
550
551
    /**
552
     * @return bool
553
     */
554 26
    public function isDoRemoveHttpPrefixFromAttributes(): bool
555
    {
556 26
        return $this->doRemoveHttpPrefixFromAttributes;
557
    }
558
559
    /**
560
     * @return array
561
     */
562
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
563
    {
564
        return $this->domainsToRemoveHttpPrefixFromAttributes;
565
    }
566
567
    /**
568
     * @param bool $doRemoveOmittedHtmlTags
569
     *
570
     * @return $this
571
     */
572 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
573
    {
574 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
575
576 1
        return $this;
577
    }
578
579
    /**
580
     * @param bool $doRemoveOmittedQuotes
581
     *
582
     * @return $this
583
     */
584 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
585
    {
586 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
587
588 1
        return $this;
589
    }
590
591
    /**
592
     * @param bool $doRemoveSpacesBetweenTags
593
     *
594
     * @return $this
595
     */
596 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
597
    {
598 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
599
600 1
        return $this;
601
    }
602
603
    /**
604
     * @param bool $doRemoveValueFromEmptyInput
605
     *
606
     * @return $this
607
     */
608 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
609
    {
610 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
611
612 2
        return $this;
613
    }
614
615
    /**
616
     * @param bool $doRemoveWhitespaceAroundTags
617
     *
618
     * @return $this
619
     */
620 4
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
621
    {
622 4
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
623
624 4
        return $this;
625
    }
626
627
    /**
628
     * @param bool $doSortCssClassNames
629
     *
630
     * @return $this
631
     */
632 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
633
    {
634 2
        $this->doSortCssClassNames = $doSortCssClassNames;
635
636 2
        return $this;
637
    }
638
639
    /**
640
     * @param bool $doSortHtmlAttributes
641
     *
642
     * @return $this
643
     */
644 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
645
    {
646 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
647
648 2
        return $this;
649
    }
650
651
    /**
652
     * @param bool $doSumUpWhitespace
653
     *
654
     * @return $this
655
     */
656 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
657
    {
658 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
659
660 2
        return $this;
661
    }
662
663 43
    private function domNodeAttributesToString(\DOMNode $node): string
664
    {
665
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
666 43
        $attr_str = '';
667 43
        if ($node->attributes !== null) {
668 43
            foreach ($node->attributes as $attribute) {
669 26
                $attr_str .= $attribute->name;
670
671
                if (
672 26
                    $this->doOptimizeAttributes
673
                    &&
674 26
                    isset(self::$booleanAttributes[$attribute->name])
675
                ) {
676 8
                    $attr_str .= ' ';
677
678 8
                    continue;
679
                }
680
681 26
                $attr_str .= '=';
682
683
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
684 26
                $omit_quotes = $this->doRemoveOmittedQuotes
685
                               &&
686 26
                               $attribute->value !== ''
687
                               &&
688 26
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
689
                               &&
690 26
                               \strpos($attribute->name, ' ') === false
691
                               &&
692 26
                               \preg_match('/["\'=<>` \t\r\n\f]+/', $attribute->value) === 0;
693
694 26
                $quoteTmp = '"';
695
                if (
696 26
                    !$omit_quotes
697
                    &&
698 26
                    \strpos($attribute->value, '"') !== false
699
                ) {
700 1
                    $quoteTmp = "'";
701
                }
702
703
                if (
704 26
                    $this->doOptimizeAttributes
705
                    &&
706
                    (
707 25
                        $attribute->name === 'srcset'
708
                        ||
709 26
                        $attribute->name === 'sizes'
710
                    )
711
                ) {
712 1
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
713
                } else {
714 26
                    $attr_val = $attribute->value;
715
                }
716
717 26
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
718 26
                $attr_str .= ' ';
719
            }
720
        }
721
722 43
        return \trim($attr_str);
723
    }
724
725
    /**
726
     * @param \DOMNode $node
727
     *
728
     * @return bool
729
     */
730 42
    private function domNodeClosingTagOptional(\DOMNode $node): bool
731
    {
732 42
        $tag_name = $node->nodeName;
733 42
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
734
735
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
736
737
        // Implemented:
738
        //
739
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
740
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
741
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
742
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
743
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
744
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
745
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
746
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
747
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
748
749
        /**
750
         * @noinspection TodoComment
751
         *
752
         * TODO: Not Implemented
753
         */
754
        //
755
        // <html> may be omitted if first thing inside is not comment
756
        // <head> may be omitted if first thing inside is an element
757
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
758
        // <colgroup> may be omitted if first thing inside is <col>
759
        // <tbody> may be omitted if first thing inside is <tr>
760
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
761
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
762
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
763
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
764
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
765
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
766
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
767
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
768
        //
769
        // <-- However, a start tag must never be omitted if it has any attributes.
770
771 42
        return \in_array($tag_name, self::$optional_end_tags, true)
772
               ||
773
               (
774 39
                   $tag_name === 'li'
775
                   &&
776
                   (
777 5
                       $nextSibling === null
778
                       ||
779
                       (
780 3
                           $nextSibling instanceof \DOMElement
781
                           &&
782 39
                           $nextSibling->tagName === 'li'
783
                       )
784
                   )
785
               )
786
               ||
787
               (
788
                   (
789 39
                       $tag_name === 'rp'
790
                   )
791
                   &&
792
                   (
793
                       $nextSibling === null
794
                       ||
795
                       (
796
                           $nextSibling instanceof \DOMElement
797
                           &&
798
                           (
799
                               $nextSibling->tagName === 'rp'
800
                               ||
801 39
                               $nextSibling->tagName === 'rt'
802
                           )
803
                       )
804
                   )
805
               )
806
               ||
807
               (
808 39
                   $tag_name === 'tr'
809
                   &&
810
                   (
811 1
                       $nextSibling === null
812
                       ||
813
                       (
814 1
                           $nextSibling instanceof \DOMElement
815
                           &&
816 39
                           $nextSibling->tagName === 'tr'
817
                       )
818
                   )
819
               )
820
               ||
821
               (
822
                   (
823 39
                       $tag_name === 'td'
824
                       ||
825 39
                       $tag_name === 'th'
826
                   )
827
                   &&
828
                   (
829 1
                       $nextSibling === null
830
                       ||
831
                       (
832 1
                           $nextSibling instanceof \DOMElement
833
                           &&
834
                           (
835 1
                               $nextSibling->tagName === 'td'
836
                               ||
837 39
                               $nextSibling->tagName === 'th'
838
                           )
839
                       )
840
                   )
841
               )
842
               ||
843
               (
844
                   (
845 39
                       $tag_name === 'dd'
846
                       ||
847 39
                       $tag_name === 'dt'
848
                   )
849
                   &&
850
                   (
851
                       (
852 3
                           $nextSibling === null
853
                           &&
854 3
                           $tag_name === 'dd'
855
                       )
856
                       ||
857
                       (
858 3
                           $nextSibling instanceof \DOMElement
859
                           &&
860
                           (
861 3
                               $nextSibling->tagName === 'dd'
862
                               ||
863 39
                               $nextSibling->tagName === 'dt'
864
                           )
865
                       )
866
                   )
867
               )
868
               ||
869
               (
870 39
                   $tag_name === 'option'
871
                   &&
872
                   (
873
                       $nextSibling === null
874
                       ||
875
                       (
876
                           $nextSibling instanceof \DOMElement
877
                           &&
878
                           (
879
                               $nextSibling->tagName === 'option'
880
                               ||
881 39
                               $nextSibling->tagName === 'optgroup'
882
                           )
883
                       )
884
                   )
885
               )
886
               ||
887
               (
888 39
                   $tag_name === 'p'
889
                   &&
890
                   (
891
                       (
892 12
                           $nextSibling === null
893
                           &&
894
                           (
895 10
                               $node->parentNode !== null
896
                               &&
897 10
                               !\in_array(
898 10
                                   $node->parentNode->nodeName,
899
                                   [
900 10
                                       'a',
901
                                       'audio',
902
                                       'del',
903
                                       'ins',
904
                                       'map',
905
                                       'noscript',
906
                                       'video',
907
                                   ],
908 10
                                   true
909
                               )
910
                           )
911
                       )
912
                       ||
913
                       (
914 9
                           $nextSibling instanceof \DOMElement
915
                           &&
916 9
                           \in_array(
917 9
                               $nextSibling->tagName,
918
                               [
919 9
                                   'address',
920
                                   'article',
921
                                   'aside',
922
                                   'blockquote',
923
                                   'dir',
924
                                   'div',
925
                                   'dl',
926
                                   'fieldset',
927
                                   'footer',
928
                                   'form',
929
                                   'h1',
930
                                   'h2',
931
                                   'h3',
932
                                   'h4',
933
                                   'h5',
934
                                   'h6',
935
                                   'header',
936
                                   'hgroup',
937
                                   'hr',
938
                                   'menu',
939
                                   'nav',
940
                                   'ol',
941
                                   'p',
942
                                   'pre',
943
                                   'section',
944
                                   'table',
945
                                   'ul',
946
                               ],
947 42
                               true
948
                           )
949
                       )
950
                   )
951
               );
952
    }
953
954 43
    protected function domNodeToString(\DOMNode $node): string
955
    {
956
        // init
957 43
        $html = '';
958 43
        $emptyStringTmp = '';
959
960 43
        foreach ($node->childNodes as $child) {
961 43
            if ($emptyStringTmp === 'is_empty') {
962 24
                $emptyStringTmp = 'last_was_empty';
963
            } else {
964 43
                $emptyStringTmp = '';
965
            }
966
967 43
            if ($child instanceof \DOMDocumentType) {
968
                // add the doc-type only if it wasn't generated by DomDocument
969 11
                if (!$this->withDocType) {
970
                    continue;
971
                }
972
973 11
                if ($child->name) {
974 11
                    if (!$child->publicId && $child->systemId) {
975
                        $tmpTypeSystem = 'SYSTEM';
976
                        $tmpTypePublic = '';
977
                    } else {
978 11
                        $tmpTypeSystem = '';
979 11
                        $tmpTypePublic = 'PUBLIC';
980
                    }
981
982 11
                    $html .= '<!DOCTYPE ' . $child->name . ''
983 11
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
984 11
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
985 11
                             . '>';
986
                }
987 43
            } elseif ($child instanceof \DOMElement) {
988 43
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
989 43
                $html .= '>' . $this->domNodeToString($child);
990
991
                if (
992 43
                    !$this->doRemoveOmittedHtmlTags
993
                    ||
994 43
                    !$this->domNodeClosingTagOptional($child)
995
                ) {
996 38
                    $html .= '</' . $child->tagName . '>';
997
                }
998
999 43
                if (!$this->doRemoveWhitespaceAroundTags) {
1000
                    /** @noinspection NestedPositiveIfStatementsInspection */
1001
                    if (
1002 42
                        $child->nextSibling instanceof \DOMText
1003
                        &&
1004 42
                        $child->nextSibling->wholeText === ' '
1005
                    ) {
1006
                        if (
1007 23
                            $emptyStringTmp !== 'last_was_empty'
1008
                            &&
1009 23
                            \substr($html, -1) !== ' '
1010
                        ) {
1011 23
                            $html .= ' ';
1012
                        }
1013 43
                        $emptyStringTmp = 'is_empty';
1014
                    }
1015
                }
1016 39
            } elseif ($child instanceof \DOMText) {
1017 39
                if ($child->isElementContentWhitespace()) {
1018
                    if (
1019 26
                        $child->previousSibling !== null
1020
                        &&
1021 26
                        $child->nextSibling !== null
1022
                    ) {
1023
                        if (
1024 18
                            $emptyStringTmp !== 'last_was_empty'
1025
                            &&
1026 18
                            \substr($html, -1) !== ' '
1027
                        ) {
1028 5
                            $html .= ' ';
1029
                        }
1030 26
                        $emptyStringTmp = 'is_empty';
1031
                    }
1032
                } else {
1033 39
                    $html .= $child->wholeText;
1034
                }
1035 1
            } elseif ($child instanceof \DOMComment) {
1036 43
                $html .= '<!--' . $child->textContent . '-->';
1037
            }
1038
        }
1039
1040 43
        return $html;
1041
    }
1042
1043
    /**
1044
     * @param \DOMNode $node
1045
     *
1046
     * @return \DOMNode|null
1047
     */
1048 42
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1049
    {
1050
        do {
1051 42
            $node = $node->nextSibling;
1052 42
        } while (!($node === null || $node instanceof \DOMElement));
1053
1054 42
        return $node;
1055
    }
1056
1057
    /**
1058
     * Check if the current string is an conditional comment.
1059
     *
1060
     * INFO: since IE >= 10 conditional comment are not working anymore
1061
     *
1062
     * <!--[if expression]> HTML <![endif]-->
1063
     * <![if expression]> HTML <![endif]>
1064
     *
1065
     * @param string $comment
1066
     *
1067
     * @return bool
1068
     */
1069 4
    private function isConditionalComment($comment): bool
1070
    {
1071 4
        if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1072 2
            return true;
1073
        }
1074
1075 4
        if (\preg_match('/\[endif\]$/', $comment)) {
1076 1
            return true;
1077
        }
1078
1079 4
        return false;
1080
    }
1081
1082
    /**
1083
     * @param string $html
1084
     * @param bool   $decodeUtf8Specials <p>Use this only in special cases, e.g. for PHP 5.3</p>
1085
     *
1086
     * @return string
1087
     */
1088 47
    public function minify($html, $decodeUtf8Specials = false): string
1089
    {
1090 47
        $html = (string) $html;
1091 47
        if (!isset($html[0])) {
1092 1
            return '';
1093
        }
1094
1095 47
        $html = \trim($html);
1096 47
        if (!$html) {
1097 3
            return '';
1098
        }
1099
1100
        // init
1101 44
        static $CACHE_SELF_CLOSING_TAGS = null;
1102 44
        if ($CACHE_SELF_CLOSING_TAGS === null) {
1103 1
            $CACHE_SELF_CLOSING_TAGS = \implode('|', self::$selfClosingTags);
1104
        }
1105
1106
        // reset
1107 44
        $this->protectedChildNodes = [];
1108
1109
        // save old content
1110 44
        $origHtml = $html;
1111 44
        $origHtmlLength = \strlen($html);
1112
1113
        // -------------------------------------------------------------------------
1114
        // Minify the HTML via "HtmlDomParser"
1115
        // -------------------------------------------------------------------------
1116
1117 44
        if ($this->doOptimizeViaHtmlDomParser) {
1118 43
            $html = $this->minifyHtmlDom($html, $decodeUtf8Specials);
1119
        }
1120
1121
        // -------------------------------------------------------------------------
1122
        // Trim whitespace from html-string. [protected html is still protected]
1123
        // -------------------------------------------------------------------------
1124
1125
        // Remove extra white-space(s) between HTML attribute(s)
1126 44
        $html = (string) \preg_replace_callback(
1127 44
            '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1128
            static function ($matches) {
1129 44
                return '<' . $matches[1] . \preg_replace('#([^\s=]+)(\=([\'"]?)(.*?)\3)?(\s+|$)#s', ' $1$2', $matches[2]) . $matches[3] . '>';
1130 44
            },
1131 44
            $html
1132
        );
1133
1134 44
        if ($this->doRemoveSpacesBetweenTags) {
1135
            // Remove spaces that are between > and <
1136 1
            $html = (string) \preg_replace('/(>) (<)/', '>$2', $html);
1137
        }
1138
1139
        // -------------------------------------------------------------------------
1140
        // Restore protected HTML-code.
1141
        // -------------------------------------------------------------------------
1142
1143 44
        $html = (string) \preg_replace_callback(
1144 44
            '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1145 44
            [$this, 'restoreProtectedHtml'],
1146 44
            $html
1147
        );
1148
1149
        // -------------------------------------------------------------------------
1150
        // Restore protected HTML-entities.
1151
        // -------------------------------------------------------------------------
1152
1153 44
        if ($this->doOptimizeViaHtmlDomParser) {
1154 43
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1155
        }
1156
1157
        // ------------------------------------
1158
        // Final clean-up
1159
        // ------------------------------------
1160
1161 44
        $html = \str_replace(
1162
            [
1163 44
                'html>' . "\n",
1164
                "\n" . '<html',
1165
                'html/>' . "\n",
1166
                "\n" . '</html',
1167
                'head>' . "\n",
1168
                "\n" . '<head',
1169
                'head/>' . "\n",
1170
                "\n" . '</head',
1171
            ],
1172
            [
1173 44
                'html>',
1174
                '<html',
1175
                'html/>',
1176
                '</html',
1177
                'head>',
1178
                '<head',
1179
                'head/>',
1180
                '</head',
1181
            ],
1182 44
            $html
1183
        );
1184
1185
        // self closing tags, don't need a trailing slash ...
1186 44
        $replace = [];
1187 44
        $replacement = [];
1188 44
        foreach (self::$selfClosingTags as $selfClosingTag) {
1189 44
            $replace[] = '<' . $selfClosingTag . '/>';
1190 44
            $replacement[] = '<' . $selfClosingTag . '>';
1191 44
            $replace[] = '<' . $selfClosingTag . ' />';
1192 44
            $replacement[] = '<' . $selfClosingTag . '>';
1193
        }
1194 44
        $html = \str_replace(
1195 44
            $replace,
1196 44
            $replacement,
1197 44
            $html
1198
        );
1199
1200 44
        $html = (string) \preg_replace('#<\b(' . $CACHE_SELF_CLOSING_TAGS . ')([^>]*+)><\/\b\1>#', '<\\1\\2>', $html);
1201
1202
        // ------------------------------------
1203
        // check if compression worked
1204
        // ------------------------------------
1205
1206 44
        if ($origHtmlLength < \strlen($html)) {
1207 3
            $html = $origHtml;
1208
        }
1209
1210 44
        return $html;
1211
    }
1212
1213
    /**
1214
     * @param $html
1215
     * @param $decodeUtf8Specials
1216
     *
1217
     * @return string
1218
     */
1219 43
    private function minifyHtmlDom($html, $decodeUtf8Specials): string
1220
    {
1221
        // init dom
1222 43
        $dom = new HtmlDomParser();
1223
        /** @noinspection UnusedFunctionResultInspection */
1224 43
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1225
1226 43
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1227 43
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1228
1229
        // load dom
1230
        /** @noinspection UnusedFunctionResultInspection */
1231 43
        $dom->loadHtml($html);
1232
1233 43
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1234
1235 43
        foreach ($dom->find('*') as $element) {
1236 43
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1237
        }
1238
1239
        // -------------------------------------------------------------------------
1240
        // Protect HTML tags and conditional comments.
1241
        // -------------------------------------------------------------------------
1242
1243 43
        $dom = $this->protectTags($dom);
1244
1245
        // -------------------------------------------------------------------------
1246
        // Remove default HTML comments. [protected html is still protected]
1247
        // -------------------------------------------------------------------------
1248
1249 43
        if ($this->doRemoveComments) {
1250 41
            $dom = $this->removeComments($dom);
1251
        }
1252
1253
        // -------------------------------------------------------------------------
1254
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1255
        // -------------------------------------------------------------------------
1256
1257 43
        if ($this->doSumUpWhitespace) {
1258 42
            $dom = $this->sumUpWhitespace($dom);
1259
        }
1260
1261 43
        foreach ($dom->find('*') as $element) {
1262
1263
            // -------------------------------------------------------------------------
1264
            // Remove whitespace around tags. [protected html is still protected]
1265
            // -------------------------------------------------------------------------
1266
1267 43
            if ($this->doRemoveWhitespaceAroundTags) {
1268 3
                $this->removeWhitespaceAroundTags($element);
1269
            }
1270
1271 43
            $this->notifyObserversAboutDomElementAfterMinification($element);
1272
        }
1273
1274
        // -------------------------------------------------------------------------
1275
        // Convert the Dom into a string.
1276
        // -------------------------------------------------------------------------
1277
1278 43
        return $dom->fixHtmlOutput(
1279 43
            $this->domNodeToString($dom->getDocument()),
1280 43
            $decodeUtf8Specials
1281
        );
1282
    }
1283
1284
    /**
1285
     * Prevent changes of inline "styles" and "scripts".
1286
     *
1287
     * @param HtmlDomParser $dom
1288
     *
1289
     * @return HtmlDomParser
1290
     */
1291 43
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1292
    {
1293
        // init
1294 43
        $counter = 0;
1295
1296 43
        foreach ($dom->find('script, style') as $element) {
1297
1298
            // skip external links
1299 6
            if ($element->tag === 'script' || $element->tag === 'style') {
1300 6
                $attributes = $element->getAllAttributes();
1301 6
                if (isset($attributes['src'])) {
1302 3
                    continue;
1303
                }
1304
            }
1305
1306 4
            $this->protectedChildNodes[$counter] = $element->innerhtml;
1307 4
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
1308
1309 4
            ++$counter;
1310
        }
1311
1312 43
        foreach ($dom->find('code, nocompress') as $element) {
1313 3
            if ($element->isRemoved()) {
1314 1
                continue;
1315
            }
1316
1317 3
            $this->protectedChildNodes[$counter] = $element->parentNode()->innerHtml();
1318 3
            $element->getNode()->parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
1319
1320 3
            ++$counter;
1321
        }
1322
1323 43
        foreach ($dom->find('//comment()') as $element) {
1324 4
            $text = $element->text();
1325
1326
            // skip normal comments
1327 4
            if (!$this->isConditionalComment($text)) {
1328 4
                continue;
1329
            }
1330
1331 2
            $this->protectedChildNodes[$counter] = '<!--' . $text . '-->';
1332
1333
            /* @var $node \DOMComment */
1334 2
            $node = $element->getNode();
1335 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>');
1336
            /** @noinspection UnusedFunctionResultInspection */
1337 2
            $element->getNode()->parentNode->replaceChild($child, $node);
1338
1339 2
            ++$counter;
1340
        }
1341
1342 43
        return $dom;
1343
    }
1344
1345
    /**
1346
     * Remove comments in the dom.
1347
     *
1348
     * @param HtmlDomParser $dom
1349
     *
1350
     * @return HtmlDomParser
1351
     */
1352 41
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1353
    {
1354 41
        foreach ($dom->find('//comment()') as $commentWrapper) {
1355 3
            $comment = $commentWrapper->getNode();
1356 3
            $val = $comment->nodeValue;
1357 3
            if (\strpos($val, '[') === false) {
1358
                /** @noinspection UnusedFunctionResultInspection */
1359 3
                $comment->parentNode->removeChild($comment);
1360
            }
1361
        }
1362
1363 41
        $dom->getDocument()->normalizeDocument();
1364
1365 41
        return $dom;
1366
    }
1367
1368
    /**
1369
     * Trim tags in the dom.
1370
     *
1371
     * @param SimpleHtmlDomInterface $element
1372
     *
1373
     * @return void
1374
     */
1375 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1376
    {
1377 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1378 1
            $node = $element->getNode();
1379
1380
            /** @var \DOMNode[] $candidates */
1381 1
            $candidates = [];
1382 1
            if ($node->childNodes->length > 0) {
1383 1
                $candidates[] = $node->firstChild;
1384 1
                $candidates[] = $node->lastChild;
1385 1
                $candidates[] = $node->previousSibling;
1386 1
                $candidates[] = $node->nextSibling;
1387
            }
1388
1389 1
            foreach ($candidates as &$candidate) {
1390 1
                if ($candidate === null) {
1391
                    continue;
1392
                }
1393
1394 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1395 1
                    $candidate->nodeValue = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1396
                }
1397
            }
1398
        }
1399 3
    }
1400
1401
    /**
1402
     * Callback function for preg_replace_callback use.
1403
     *
1404
     * @param array $matches PREG matches
1405
     *
1406
     * @return string
1407
     */
1408 7
    private function restoreProtectedHtml($matches): string
1409
    {
1410 7
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1411
1412 7
        $html = '';
1413 7
        if (isset($this->protectedChildNodes[$matchesInner['id']])) {
1414 7
            $html .= $this->protectedChildNodes[$matchesInner['id']];
1415
        }
1416
1417 7
        return $html;
1418
    }
1419
1420
    /**
1421
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1422
     *
1423
     * @return $this
1424
     */
1425 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1426
    {
1427 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1428
1429 2
        return $this;
1430
    }
1431
1432
    /**
1433
     * Sum-up extra whitespace from dom-nodes.
1434
     *
1435
     * @param HtmlDomParser $dom
1436
     *
1437
     * @return HtmlDomParser
1438
     */
1439 42
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1440
    {
1441 42
        $text_nodes = $dom->find('//text()');
1442 42
        foreach ($text_nodes as $text_node_wrapper) {
1443
            /* @var $text_node \DOMNode */
1444 38
            $text_node = $text_node_wrapper->getNode();
1445 38
            $xp = $text_node->getNodePath();
1446
1447 38
            $doSkip = false;
1448 38
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1449 38
                if (\strpos($xp, "/${pattern}") !== false) {
1450 6
                    $doSkip = true;
1451
1452 38
                    break;
1453
                }
1454
            }
1455 38
            if ($doSkip) {
1456 6
                continue;
1457
            }
1458
1459 36
            $text_node->nodeValue = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1460
        }
1461
1462 42
        $dom->getDocument()->normalizeDocument();
1463
1464 42
        return $dom;
1465
    }
1466
1467
    /**
1468
     * WARNING: maybe bad for performance ...
1469
     *
1470
     * @param bool $keepBrokenHtml
1471
     *
1472
     * @return HtmlMin
1473
     */
1474 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1475
    {
1476 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1477
1478 2
        return $this;
1479
    }
1480
}
1481