Completed
Push — master ( f73514...b3b2b5 )
by Lars
17s queued 13s
created

notifyObserversAboutDomElementAfterMinification()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 4
cts 4
cp 1
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin implements HtmlMinInterface
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]/u";
27
28
    /**
29
     * @var string[]
30
     *
31
     * @psalm-var list<string>
32
     */
33
    private static $optional_end_tags = [
34
        'html',
35
        'head',
36
        'body',
37
    ];
38
39
    /**
40
     * @var string[]
41
     *
42
     * @psalm-var list<string>
43
     */
44
    private static $selfClosingTags = [
45
        'area',
46
        'base',
47
        'basefont',
48
        'br',
49
        'col',
50
        'command',
51
        'embed',
52
        'frame',
53
        'hr',
54
        'img',
55
        'input',
56
        'isindex',
57
        'keygen',
58
        'link',
59
        'meta',
60
        'param',
61
        'source',
62
        'track',
63
        'wbr',
64
    ];
65
66
    /**
67
     * @var string[]
68
     *
69
     * @psalm-var array<string, string>
70
     */
71
    private static $trimWhitespaceFromTags = [
72
        'article' => '',
73
        'br'      => '',
74
        'div'     => '',
75
        'footer'  => '',
76
        'hr'      => '',
77
        'nav'     => '',
78
        'p'       => '',
79
        'script'  => '',
80
    ];
81
82
    /**
83
     * @var array
84
     */
85
    private static $booleanAttributes = [
86
        'allowfullscreen' => '',
87
        'async'           => '',
88
        'autofocus'       => '',
89
        'autoplay'        => '',
90
        'checked'         => '',
91
        'compact'         => '',
92
        'controls'        => '',
93
        'declare'         => '',
94
        'default'         => '',
95
        'defaultchecked'  => '',
96
        'defaultmuted'    => '',
97
        'defaultselected' => '',
98
        'defer'           => '',
99
        'disabled'        => '',
100
        'enabled'         => '',
101
        'formnovalidate'  => '',
102
        'hidden'          => '',
103
        'indeterminate'   => '',
104
        'inert'           => '',
105
        'ismap'           => '',
106
        'itemscope'       => '',
107
        'loop'            => '',
108
        'multiple'        => '',
109
        'muted'           => '',
110
        'nohref'          => '',
111
        'noresize'        => '',
112
        'noshade'         => '',
113
        'novalidate'      => '',
114
        'nowrap'          => '',
115
        'open'            => '',
116
        'pauseonexit'     => '',
117
        'readonly'        => '',
118
        'required'        => '',
119
        'reversed'        => '',
120
        'scoped'          => '',
121
        'seamless'        => '',
122
        'selected'        => '',
123
        'sortable'        => '',
124
        'truespeed'       => '',
125
        'typemustmatch'   => '',
126
        'visible'         => '',
127
    ];
128
129
    /**
130
     * @var array
131
     */
132
    private static $skipTagsForRemoveWhitespace = [
133
        'code',
134
        'pre',
135
        'script',
136
        'style',
137
        'textarea',
138
    ];
139
140
    /**
141
     * @var array
142
     */
143
    private $protectedChildNodes = [];
144
145
    /**
146
     * @var string
147
     */
148
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
149
150
    /**
151
     * @var bool
152
     */
153
    private $doOptimizeViaHtmlDomParser = true;
154
155
    /**
156
     * @var bool
157
     */
158
    private $doOptimizeAttributes = true;
159
160
    /**
161
     * @var bool
162
     */
163
    private $doRemoveComments = true;
164
165
    /**
166
     * @var bool
167
     */
168
    private $doRemoveWhitespaceAroundTags = false;
169
170
    /**
171
     * @var bool
172
     */
173
    private $doRemoveOmittedQuotes = true;
174
175
    /**
176
     * @var bool
177
     */
178
    private $doRemoveOmittedHtmlTags = true;
179
180
    /**
181
     * @var bool
182
     */
183
    private $doRemoveHttpPrefixFromAttributes = false;
184
185
    /**
186
     * @var bool
187
     */
188
    private $doRemoveHttpsPrefixFromAttributes = false;
189
190
    /**
191
     * @var bool
192
     */
193
    private $keepPrefixOnExternalAttributes = false;
194
195
    /**
196
     * @var bool
197
     */
198
    private $doMakeSameDomainLinksRelative = false;
199
200
    /**
201
     * @var string
202
     */
203
    private $localDomain = '';
204
205
    /**
206
     * @var array
207
     */
208
    private $domainsToRemoveHttpPrefixFromAttributes = [
209
        'google.com',
210
        'google.de',
211
    ];
212
213
    /**
214
     * @var bool
215
     */
216
    private $doSortCssClassNames = true;
217
218
    /**
219
     * @var bool
220
     */
221
    private $doSortHtmlAttributes = true;
222
223
    /**
224
     * @var bool
225
     */
226
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
227
228
    /**
229
     * @var bool
230
     */
231
    private $doRemoveDefaultAttributes = false;
232
233
    /**
234
     * @var bool
235
     */
236
    private $doRemoveDeprecatedAnchorName = true;
237
238
    /**
239
     * @var bool
240
     */
241
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
242
243
    /**
244
     * @var bool
245
     */
246
    private $doRemoveDeprecatedTypeFromScriptTag = true;
247
248
    /**
249
     * @var bool
250
     */
251
    private $doRemoveValueFromEmptyInput = true;
252
253
    /**
254
     * @var bool
255
     */
256
    private $doRemoveEmptyAttributes = true;
257
258
    /**
259
     * @var bool
260
     */
261
    private $doSumUpWhitespace = true;
262
263
    /**
264
     * @var bool
265
     */
266
    private $doRemoveSpacesBetweenTags = false;
267
268
    /**
269
     * @var bool
270
     */
271
    private $keepBrokenHtml = false;
272
273
    /**
274
     * @var bool
275
     */
276
    private $withDocType = false;
277
278
    /**
279
     * @var HtmlMinDomObserverInterface[]|\SplObjectStorage
280
     */
281
    private $domLoopObservers;
282
283
    /**
284
     * @var int
285
     */
286
    private $protected_tags_counter = 0;
287
288
    /**
289
     * HtmlMin constructor.
290
     */
291 55
    public function __construct()
292
    {
293 55
        $this->domLoopObservers = new \SplObjectStorage();
294
295 55
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
296 55
    }
297
298
    /**
299
     * @param HtmlMinDomObserverInterface $observer
300
     *
301
     * @return void
302
     */
303 55
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
304
    {
305 55
        $this->domLoopObservers->attach($observer);
306 55
    }
307
308
    /**
309
     * @param bool $doOptimizeAttributes
310
     *
311
     * @return $this
312
     */
313 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
314
    {
315 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
316
317 2
        return $this;
318
    }
319
320
    /**
321
     * @param bool $doOptimizeViaHtmlDomParser
322
     *
323
     * @return $this
324
     */
325 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
326
    {
327 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
328
329 1
        return $this;
330
    }
331
332
    /**
333
     * @param bool $doRemoveComments
334
     *
335
     * @return $this
336
     */
337 3
    public function doRemoveComments(bool $doRemoveComments = true): self
338
    {
339 3
        $this->doRemoveComments = $doRemoveComments;
340
341 3
        return $this;
342
    }
343
344
    /**
345
     * @param bool $doRemoveDefaultAttributes
346
     *
347
     * @return $this
348
     */
349 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
350
    {
351 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
352
353 2
        return $this;
354
    }
355
356
    /**
357
     * @param bool $doRemoveDeprecatedAnchorName
358
     *
359
     * @return $this
360
     */
361 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
362
    {
363 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
364
365 2
        return $this;
366
    }
367
368
    /**
369
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
370
     *
371
     * @return $this
372
     */
373 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
374
    {
375 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
376
377 2
        return $this;
378
    }
379
380
    /**
381
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
382
     *
383
     * @return $this
384
     */
385 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
386
    {
387 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
388
389 2
        return $this;
390
    }
391
392
    /**
393
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
394
     *
395
     * @return $this
396
     */
397 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
398
    {
399 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
400
401 2
        return $this;
402
    }
403
404
    /**
405
     * @param bool $doRemoveEmptyAttributes
406
     *
407
     * @return $this
408
     */
409 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
410
    {
411 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
412
413 2
        return $this;
414
    }
415
416
    /**
417
     * @param bool $doRemoveHttpPrefixFromAttributes
418
     *
419
     * @return $this
420
     */
421 6
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
422
    {
423 6
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
424
425 6
        return $this;
426
    }
427
428
    /**
429
     * @param bool $doRemoveHttpsPrefixFromAttributes
430
     *
431
     * @return $this
432
     */
433 1
    public function doRemoveHttpsPrefixFromAttributes(bool $doRemoveHttpsPrefixFromAttributes = true): self
434
    {
435 1
        $this->doRemoveHttpsPrefixFromAttributes = $doRemoveHttpsPrefixFromAttributes;
436
437 1
        return $this;
438
	}
439
	
440
    /**
441
     * @param bool $keepPrefixOnExternalAttributes
442
     *
443
     * @return $this
444
     */
445 1
    public function keepPrefixOnExternalAttributes(bool $keepPrefixOnExternalAttributes = true): self
446
    {
447 1
        $this->keepPrefixOnExternalAttributes = $keepPrefixOnExternalAttributes;
448
449 1
        return $this;
450
    }
451
	
452
    /**
453
     * @param bool $doMakeSameDomainLinksRelative
454
     *
455
     * @return $this
456
     */
457 2
    public function doMakeSameDomainLinksRelative(bool $doMakeSameDomainLinksRelative = true): self
458
    {
459 2
        $this->doMakeSameDomainLinksRelative = $doMakeSameDomainLinksRelative;
460
461 2
        return $this;
462
    }
463
	
464
    /**
465
     * @param bool $setLocalDomain
0 ignored issues
show
Documentation introduced by
There is no parameter named $setLocalDomain. Did you maybe mean $localDomain?

This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function. It has, however, found a similar but not annotated parameter which might be a good fit.

Consider the following example. The parameter $ireland is not defined by the method finale(...).

/**
 * @param array $germany
 * @param array $ireland
 */
function finale($germany, $island) {
    return "2:1";
}

The most likely cause is that the parameter was changed, but the annotation was not.

Loading history...
466
     *
467
     * @return $this
468
     */
469 2
    public function setLocalDomain(string $localDomain = ''): self
470
    {
471 2
		if ($localDomain === ''){
472
			$this->localDomain = $_SERVER['SERVER_NAME'];
473
		}else{
474 2
			$this->localDomain = rtrim(preg_replace('/(https?:)?\/\//', '', $localDomain), '/');
475
		}
476
477 2
        return $this;
478
	}
479
	
480
	/**
481
     * @param void
482
     *
483
     * @return $this->localDomain
0 ignored issues
show
Documentation introduced by
The doc-type $this->localDomain could not be parsed: Unknown type name "$this-" at position 0. (view supported doc-types)

This check marks PHPDoc comments that could not be parsed by our parser. To see which comment annotations we can parse, please refer to our documentation on supported doc-types.

Loading history...
484
     */
485 2
	public function getLocalDomain(): string
486
	{
487 2
		return $this->localDomain;
488
	}
489
490
    /**
491
     * @param bool $doRemoveOmittedHtmlTags
492
     *
493
     * @return $this
494
     */
495 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
496
    {
497 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
498
499 1
        return $this;
500
    }
501
502
    /**
503
     * @param bool $doRemoveOmittedQuotes
504
     *
505
     * @return $this
506
     */
507 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
508
    {
509 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
510
511 1
        return $this;
512
    }
513
514
    /**
515
     * @param bool $doRemoveSpacesBetweenTags
516
     *
517
     * @return $this
518
     */
519 1
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
520
    {
521 1
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
522
523 1
        return $this;
524
    }
525
526
    /**
527
     * @param bool $doRemoveValueFromEmptyInput
528
     *
529
     * @return $this
530
     */
531 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
532
    {
533 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
534
535 2
        return $this;
536
    }
537
538
    /**
539
     * @param bool $doRemoveWhitespaceAroundTags
540
     *
541
     * @return $this
542
     */
543 5
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
544
    {
545 5
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
546
547 5
        return $this;
548
    }
549
550
    /**
551
     * @param bool $doSortCssClassNames
552
     *
553
     * @return $this
554
     */
555 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
556
    {
557 2
        $this->doSortCssClassNames = $doSortCssClassNames;
558
559 2
        return $this;
560
    }
561
562
    /**
563
     * @param bool $doSortHtmlAttributes
564
     *
565
     * @return $this
566
     */
567 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
568
    {
569 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
570
571 2
        return $this;
572
    }
573
574
    /**
575
     * @param bool $doSumUpWhitespace
576
     *
577
     * @return $this
578
     */
579 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
580
    {
581 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
582
583 2
        return $this;
584
    }
585
586 51
    private function domNodeAttributesToString(\DOMNode $node): string
587
    {
588
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
589 51
        $attr_str = '';
590 51
        if ($node->attributes !== null) {
591 51
            foreach ($node->attributes as $attribute) {
592 34
                $attr_str .= $attribute->name;
593
594
                if (
595 34
                    $this->doOptimizeAttributes
596
                    &&
597 34
                    isset(self::$booleanAttributes[$attribute->name])
598
                ) {
599 9
                    $attr_str .= ' ';
600
601 9
                    continue;
602
                }
603
604 34
                $attr_str .= '=';
605
606
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
607 34
                $omit_quotes = $this->doRemoveOmittedQuotes
608
                               &&
609 34
                               $attribute->value !== ''
610
                               &&
611 34
                               \strpos($attribute->name, '____SIMPLE_HTML_DOM__VOKU') !== 0
612
                               &&
613 34
                               \strpos($attribute->name, ' ') === false
614
                               &&
615 34
                               \preg_match('/["\'=<>` \t\r\n\f]/', $attribute->value) === 0;
616
617 34
                $quoteTmp = '"';
618
                if (
619 34
                    !$omit_quotes
620
                    &&
621 34
                    \strpos($attribute->value, '"') !== false
622
                ) {
623 1
                    $quoteTmp = "'";
624
                }
625
626
                if (
627 34
                    $this->doOptimizeAttributes
628
                    &&
629
                    (
630 33
                        $attribute->name === 'srcset'
631
                        ||
632 34
                        $attribute->name === 'sizes'
633
                    )
634
                ) {
635 2
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
636
                } else {
637 34
                    $attr_val = $attribute->value;
638
                }
639
640 34
                $attr_str .= ($omit_quotes ? '' : $quoteTmp) . $attr_val . ($omit_quotes ? '' : $quoteTmp);
641 34
                $attr_str .= ' ';
642
            }
643
        }
644
645 51
        return \trim($attr_str);
646
    }
647
648
    /**
649
     * @param \DOMNode $node
650
     *
651
     * @return bool
652
     */
653 50
    private function domNodeClosingTagOptional(\DOMNode $node): bool
654
    {
655 50
        $tag_name = $node->nodeName;
656
657
        /** @var \DOMNode|null $parent_node - false-positive error from phpstan */
658 50
        $parent_node = $node->parentNode;
659
660 50
        if ($parent_node) {
661 50
            $parent_tag_name = $parent_node->nodeName;
662
        } else {
663
            $parent_tag_name = null;
664
        }
665
666 50
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
667
668
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
669
670
        // Implemented:
671
        //
672
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
673
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
674
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
675
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
676
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
677
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
678
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
679
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
680
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
681
682
        /**
683
         * @noinspection TodoComment
684
         *
685
         * TODO: Not Implemented
686
         */
687
        //
688
        // <html> may be omitted if first thing inside is not comment
689
        // <head> may be omitted if first thing inside is an element
690
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
691
        // <colgroup> may be omitted if first thing inside is <col>
692
        // <tbody> may be omitted if first thing inside is <tr>
693
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
694
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
695
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
696
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
697
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
698
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
699
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
700
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
701
        //
702
        // <-- However, a start tag must never be omitted if it has any attributes.
703
704 50
        return \in_array($tag_name, self::$optional_end_tags, true)
705
               ||
706
               (
707 47
                   $tag_name === 'li'
708
                   &&
709
                   (
710 6
                       $nextSibling === null
711
                       ||
712
                       (
713 4
                           $nextSibling instanceof \DOMElement
714
                           &&
715 47
                           $nextSibling->tagName === 'li'
716
                       )
717
                   )
718
               )
719
               ||
720
               (
721 47
                   $tag_name === 'rp'
722
                   &&
723
                   (
724
                       $nextSibling === null
725
                       ||
726
                       (
727
                           $nextSibling instanceof \DOMElement
728
                           &&
729
                           (
730
                               $nextSibling->tagName === 'rp'
731
                               ||
732 47
                               $nextSibling->tagName === 'rt'
733
                           )
734
                       )
735
                   )
736
               )
737
               ||
738
               (
739 47
                   $tag_name === 'tr'
740
                   &&
741
                   (
742 1
                       $nextSibling === null
743
                       ||
744
                       (
745 1
                           $nextSibling instanceof \DOMElement
746
                           &&
747 47
                           $nextSibling->tagName === 'tr'
748
                       )
749
                   )
750
               )
751
               ||
752
               (
753 47
                   $tag_name === 'source'
754
                   &&
755
                   (
756 1
                       $parent_tag_name === 'audio'
757
                       ||
758 1
                       $parent_tag_name === 'video'
759
                       ||
760 1
                       $parent_tag_name === 'picture'
761
                       ||
762 47
                       $parent_tag_name === 'source'
763
                   )
764
                   &&
765
                   (
766 1
                       $nextSibling === null
767
                       ||
768
                       (
769
                           $nextSibling instanceof \DOMElement
770
                           &&
771 47
                           $nextSibling->tagName === 'source'
772
                       )
773
                   )
774
               )
775
               ||
776
               (
777
                   (
778 47
                       $tag_name === 'td'
779
                       ||
780 47
                       $tag_name === 'th'
781
                   )
782
                   &&
783
                   (
784 1
                       $nextSibling === null
785
                       ||
786
                       (
787 1
                           $nextSibling instanceof \DOMElement
788
                           &&
789
                           (
790 1
                               $nextSibling->tagName === 'td'
791
                               ||
792 47
                               $nextSibling->tagName === 'th'
793
                           )
794
                       )
795
                   )
796
               )
797
               ||
798
               (
799
                   (
800 47
                       $tag_name === 'dd'
801
                       ||
802 47
                       $tag_name === 'dt'
803
                   )
804
                   &&
805
                   (
806
                       (
807 3
                           $nextSibling === null
808
                           &&
809 3
                           $tag_name === 'dd'
810
                       )
811
                       ||
812
                       (
813 3
                           $nextSibling instanceof \DOMElement
814
                           &&
815
                           (
816 3
                               $nextSibling->tagName === 'dd'
817
                               ||
818 47
                               $nextSibling->tagName === 'dt'
819
                           )
820
                       )
821
                   )
822
               )
823
               ||
824
               (
825 47
                   $tag_name === 'option'
826
                   &&
827
                   (
828 1
                       $nextSibling === null
829
                       ||
830
                       (
831 1
                           $nextSibling instanceof \DOMElement
832
                           &&
833
                           (
834 1
                               $nextSibling->tagName === 'option'
835
                               ||
836 47
                               $nextSibling->tagName === 'optgroup'
837
                           )
838
                       )
839
                   )
840
               )
841
               ||
842
               (
843 47
                   $tag_name === 'p'
844
                   &&
845
                   (
846
                       (
847 14
                           $nextSibling === null
848
                           &&
849
                           (
850 12
                               $node->parentNode !== null
851
                               &&
852
                               !\in_array(
853 12
                                   $node->parentNode->nodeName,
854
                                   [
855
                                       'a',
856
                                       'audio',
857
                                       'del',
858
                                       'ins',
859
                                       'map',
860
                                       'noscript',
861
                                       'video',
862
                                   ],
863
                                   true
864
                               )
865
                           )
866
                       )
867
                       ||
868
                       (
869 9
                           $nextSibling instanceof \DOMElement
870
                           &&
871
                           \in_array(
872 50
                               $nextSibling->tagName,
873
                               [
874
                                   'address',
875
                                   'article',
876
                                   'aside',
877
                                   'blockquote',
878
                                   'dir',
879
                                   'div',
880
                                   'dl',
881
                                   'fieldset',
882
                                   'footer',
883
                                   'form',
884
                                   'h1',
885
                                   'h2',
886
                                   'h3',
887
                                   'h4',
888
                                   'h5',
889
                                   'h6',
890
                                   'header',
891
                                   'hgroup',
892
                                   'hr',
893
                                   'menu',
894
                                   'nav',
895
                                   'ol',
896
                                   'p',
897
                                   'pre',
898
                                   'section',
899
                                   'table',
900
                                   'ul',
901
                               ],
902
                               true
903
                           )
904
                       )
905
                   )
906
               );
907
    }
908
909 51
    protected function domNodeToString(\DOMNode $node): string
910
    {
911
        // init
912 51
        $html = '';
913 51
        $emptyStringTmp = '';
914
915 51
        foreach ($node->childNodes as $child) {
916 51
            if ($emptyStringTmp === 'is_empty') {
917 27
                $emptyStringTmp = 'last_was_empty';
918
            } else {
919 51
                $emptyStringTmp = '';
920
            }
921
922 51
            if ($child instanceof \DOMDocumentType) {
923
                // add the doc-type only if it wasn't generated by DomDocument
924 12
                if (!$this->withDocType) {
925
                    continue;
926
                }
927
928 12
                if ($child->name) {
929 12
                    if (!$child->publicId && $child->systemId) {
930
                        $tmpTypeSystem = 'SYSTEM';
931
                        $tmpTypePublic = '';
932
                    } else {
933 12
                        $tmpTypeSystem = '';
934 12
                        $tmpTypePublic = 'PUBLIC';
935
                    }
936
937 12
                    $html .= '<!DOCTYPE ' . $child->name . ''
938 12
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
939 12
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
940 12
                             . '>';
941
                }
942 51
            } elseif ($child instanceof \DOMElement) {
943 51
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
944 51
                $html .= '>' . $this->domNodeToString($child);
945
946
                if (
947 51
                    !$this->doRemoveOmittedHtmlTags
948
                    ||
949 51
                    !$this->domNodeClosingTagOptional($child)
950
                ) {
951 45
                    $html .= '</' . $child->tagName . '>';
952
                }
953
954 51
                if (!$this->doRemoveWhitespaceAroundTags) {
955
                    /** @noinspection NestedPositiveIfStatementsInspection */
956
                    if (
957 50
                        $child->nextSibling instanceof \DOMText
958
                        &&
959 50
                        $child->nextSibling->wholeText === ' '
960
                    ) {
961
                        if (
962 26
                            $emptyStringTmp !== 'last_was_empty'
963
                            &&
964 26
                            \substr($html, -1) !== ' '
965
                        ) {
966 26
                            $html = \rtrim($html);
967
968
                            if (
969 26
                                $child->parentNode
970
                                &&
971 26
                                $child->parentNode->nodeName !== 'head'
972
                            ) {
973 26
                                $html .= ' ';
974
                            }
975
                        }
976 51
                        $emptyStringTmp = 'is_empty';
977
                    }
978
                }
979 47
            } elseif ($child instanceof \DOMText) {
980 47
                if ($child->isElementContentWhitespace()) {
981
                    if (
982 30
                        $child->previousSibling !== null
983
                        &&
984 30
                        $child->nextSibling !== null
985
                    ) {
986
                        if (
987
                            (
988 21
                                $child->wholeText
989
                                &&
990 21
                                \strpos($child->wholeText, ' ') !== false
991
                            )
992
                            ||
993
                            (
994
                                $emptyStringTmp !== 'last_was_empty'
995
                                &&
996 21
                                \substr($html, -1) !== ' '
997
                            )
998
                        ) {
999 21
                            $html = \rtrim($html);
1000
1001
                            if (
1002 21
                                $child->parentNode
1003
                                &&
1004 21
                                $child->parentNode->nodeName !== 'head'
1005
                            ) {
1006 21
                                $html .= ' ';
1007
                            }
1008
                        }
1009 30
                        $emptyStringTmp = 'is_empty';
1010
                    }
1011
                } else {
1012 47
                    $html .= $child->wholeText;
1013
                }
1014 1
            } elseif ($child instanceof \DOMComment) {
1015 1
                $html .= '<!--' . $child->textContent . '-->';
1016
            }
1017
        }
1018
1019 51
        return $html;
1020
    }
1021
1022
    /**
1023
     * @return array
1024
     */
1025
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
1026
    {
1027
        return $this->domainsToRemoveHttpPrefixFromAttributes;
1028
    }
1029
1030
    /**
1031
     * @return bool
1032
     */
1033
    public function isDoOptimizeAttributes(): bool
1034
    {
1035
        return $this->doOptimizeAttributes;
1036
    }
1037
1038
    /**
1039
     * @return bool
1040
     */
1041
    public function isDoOptimizeViaHtmlDomParser(): bool
1042
    {
1043
        return $this->doOptimizeViaHtmlDomParser;
1044
    }
1045
1046
    /**
1047
     * @return bool
1048
     */
1049
    public function isDoRemoveComments(): bool
1050
    {
1051
        return $this->doRemoveComments;
1052
    }
1053
1054
    /**
1055
     * @return bool
1056
     */
1057 34
    public function isDoRemoveDefaultAttributes(): bool
1058
    {
1059 34
        return $this->doRemoveDefaultAttributes;
1060
    }
1061
1062
    /**
1063
     * @return bool
1064
     */
1065 34
    public function isDoRemoveDeprecatedAnchorName(): bool
1066
    {
1067 34
        return $this->doRemoveDeprecatedAnchorName;
1068
    }
1069
1070
    /**
1071
     * @return bool
1072
     */
1073 34
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
1074
    {
1075 34
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
1076
    }
1077
1078
    /**
1079
     * @return bool
1080
     */
1081 34
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
1082
    {
1083 34
        return $this->doRemoveDeprecatedTypeFromScriptTag;
1084
    }
1085
1086
    /**
1087
     * @return bool
1088
     */
1089 34
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
1090
    {
1091 34
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
1092
    }
1093
1094
    /**
1095
     * @return bool
1096
     */
1097 34
    public function isDoRemoveEmptyAttributes(): bool
1098
    {
1099 34
        return $this->doRemoveEmptyAttributes;
1100
    }
1101
1102
    /**
1103
     * @return bool
1104
     */
1105 34
    public function isDoRemoveHttpPrefixFromAttributes(): bool
1106
    {
1107 34
        return $this->doRemoveHttpPrefixFromAttributes;
1108
    }
1109
1110
    /**
1111
     * @return bool
1112
     */
1113 34
    public function isDoRemoveHttpsPrefixFromAttributes(): bool
1114
    {
1115 34
        return $this->doRemoveHttpsPrefixFromAttributes;
1116
    }
1117
1118
    /**
1119
     * @return bool
1120
     */
1121 4
    public function isKeepPrefixOnExternalAttributes(): bool
1122
    {
1123 4
        return $this->keepPrefixOnExternalAttributes;
1124
    }
1125
1126
    /**
1127
     * @return bool
1128
     */
1129 34
    public function isDoMakeSameDomainLinksRelative(): bool
1130
    {
1131 34
        return $this->doMakeSameDomainLinksRelative;
1132
	}
1133
	
1134
    /**
1135
     * @param bool
1136
     */
1137 2
    public function isLocalDomainSet(): bool
1138
    {
1139 2
		return (!empty($this->localDomain));
1140
    }
1141
1142
    /**
1143
     * @return bool
1144
     */
1145
    public function isDoRemoveOmittedHtmlTags(): bool
1146
    {
1147
        return $this->doRemoveOmittedHtmlTags;
1148
    }
1149
1150
    /**
1151
     * @return bool
1152
     */
1153
    public function isDoRemoveOmittedQuotes(): bool
1154
    {
1155
        return $this->doRemoveOmittedQuotes;
1156
    }
1157
1158
    /**
1159
     * @return bool
1160
     */
1161
    public function isDoRemoveSpacesBetweenTags(): bool
1162
    {
1163
        return $this->doRemoveSpacesBetweenTags;
1164
    }
1165
1166
    /**
1167
     * @return bool
1168
     */
1169 34
    public function isDoRemoveValueFromEmptyInput(): bool
1170
    {
1171 34
        return $this->doRemoveValueFromEmptyInput;
1172
    }
1173
1174
    /**
1175
     * @return bool
1176
     */
1177
    public function isDoRemoveWhitespaceAroundTags(): bool
1178
    {
1179
        return $this->doRemoveWhitespaceAroundTags;
1180
    }
1181
1182
    /**
1183
     * @return bool
1184
     */
1185 34
    public function isDoSortCssClassNames(): bool
1186
    {
1187 34
        return $this->doSortCssClassNames;
1188
    }
1189
1190
    /**
1191
     * @return bool
1192
     */
1193 34
    public function isDoSortHtmlAttributes(): bool
1194
    {
1195 34
        return $this->doSortHtmlAttributes;
1196
    }
1197
1198
    /**
1199
     * @return bool
1200
     */
1201
    public function isDoSumUpWhitespace(): bool
1202
    {
1203
        return $this->doSumUpWhitespace;
1204
    }
1205
1206
    /**
1207
     * @param string $html
1208
     * @param bool   $multiDecodeNewHtmlEntity
1209
     *
1210
     * @return string
1211
     */
1212 55
    public function minify($html, $multiDecodeNewHtmlEntity = false): string
1213
    {
1214 55
        $html = (string) $html;
1215 55
        if (!isset($html[0])) {
1216 1
            return '';
1217
        }
1218
1219 55
        $html = \trim($html);
1220 55
        if (!$html) {
1221 3
            return '';
1222
        }
1223
1224
        // reset
1225 52
        $this->protectedChildNodes = [];
1226
1227
        // save old content
1228 52
        $origHtml = $html;
1229 52
        $origHtmlLength = \strlen($html);
1230
1231
        // -------------------------------------------------------------------------
1232
        // Minify the HTML via "HtmlDomParser"
1233
        // -------------------------------------------------------------------------
1234
1235 52
        if ($this->doOptimizeViaHtmlDomParser) {
1236 51
            $html = $this->minifyHtmlDom($html, $multiDecodeNewHtmlEntity);
1237
        }
1238
1239
        // -------------------------------------------------------------------------
1240
        // Trim whitespace from html-string. [protected html is still protected]
1241
        // -------------------------------------------------------------------------
1242
1243
        // Remove extra white-space(s) between HTML attribute(s)
1244 52
        if (\strpos($html, ' ') !== false) {
1245 46
            $html = (string) \preg_replace_callback(
1246 46
                '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1247
                static function ($matches) {
1248 46
                    return '<' . $matches[1] . \preg_replace('#([^\s=]+)(=([\'"]?)(.*?)\3)?(\s+|$)#su', ' $1$2', $matches[2]) . $matches[3] . '>';
1249 46
                },
1250 46
                $html
1251
            );
1252
        }
1253
1254 52
        if ($this->doRemoveSpacesBetweenTags) {
1255
            /** @noinspection NestedPositiveIfStatementsInspection */
1256 1
            if (\strpos($html, ' ') !== false) {
1257
                // Remove spaces that are between > and <
1258 1
                $html = (string) \preg_replace('#(>)\s(<)#', '>$2', $html);
1259
            }
1260
        }
1261
1262
        // -------------------------------------------------------------------------
1263
        // Restore protected HTML-code.
1264
        // -------------------------------------------------------------------------
1265
1266 52
        if (\strpos($html, $this->protectedChildNodesHelper) !== false) {
1267 9
            $html = (string) \preg_replace_callback(
1268 9
                '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1269 9
                [$this, 'restoreProtectedHtml'],
1270 9
                $html
1271
            );
1272
        }
1273
1274
        // -------------------------------------------------------------------------
1275
        // Restore protected HTML-entities.
1276
        // -------------------------------------------------------------------------
1277
1278 52
        if ($this->doOptimizeViaHtmlDomParser) {
1279 51
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1280
        }
1281
1282
        // ------------------------------------
1283
        // Final clean-up
1284
        // ------------------------------------
1285
1286 52
        $html = \str_replace(
1287
            [
1288 52
                'html>' . "\n",
1289
                "\n" . '<html',
1290
                'html/>' . "\n",
1291
                "\n" . '</html',
1292
                'head>' . "\n",
1293
                "\n" . '<head',
1294
                'head/>' . "\n",
1295
                "\n" . '</head',
1296
            ],
1297
            [
1298 52
                'html>',
1299
                '<html',
1300
                'html/>',
1301
                '</html',
1302
                'head>',
1303
                '<head',
1304
                'head/>',
1305
                '</head',
1306
            ],
1307 52
            $html
1308
        );
1309
1310
        // self closing tags, don't need a trailing slash ...
1311 52
        $replace = [];
1312 52
        $replacement = [];
1313 52
        foreach (self::$selfClosingTags as $selfClosingTag) {
1314 52
            $replace[] = '<' . $selfClosingTag . '/>';
1315 52
            $replacement[] = '<' . $selfClosingTag . '>';
1316 52
            $replace[] = '<' . $selfClosingTag . ' />';
1317 52
            $replacement[] = '<' . $selfClosingTag . '>';
1318 52
            $replace[] = '></' . $selfClosingTag . '>';
1319 52
            $replacement[] = '>';
1320
        }
1321 52
        $html = \str_replace(
1322 52
            $replace,
1323 52
            $replacement,
1324 52
            $html
1325
        );
1326
1327
        // ------------------------------------
1328
        // check if compression worked
1329
        // ------------------------------------
1330
1331 52
        if ($origHtmlLength < \strlen($html)) {
1332
            $html = $origHtml;
1333
        }
1334
1335 52
        return $html;
1336
    }
1337
1338
    /**
1339
     * @param \DOMNode $node
1340
     *
1341
     * @return \DOMNode|null
1342
     */
1343 50
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1344
    {
1345
        do {
1346
            /** @var \DOMNode|null $node - false-positive error from phpstan */
1347 50
            $node = $node->nextSibling;
1348 50
        } while (!($node === null || $node instanceof \DOMElement));
1349
1350 50
        return $node;
1351
    }
1352
1353
    /**
1354
     * Check if the current string is an conditional comment.
1355
     *
1356
     * INFO: since IE >= 10 conditional comment are not working anymore
1357
     *
1358
     * <!--[if expression]> HTML <![endif]-->
1359
     * <![if expression]> HTML <![endif]>
1360
     *
1361
     * @param string $comment
1362
     *
1363
     * @return bool
1364
     */
1365 4
    private function isConditionalComment($comment): bool
1366
    {
1367 4 View Code Duplication
        if (\strpos($comment, '[if ') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1368
            /** @noinspection RegExpRedundantEscape */
1369 2
            if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1370 2
                return true;
1371
            }
1372
        }
1373
1374 4 View Code Duplication
        if (\strpos($comment, '[endif]') !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1375
            /** @noinspection RegExpRedundantEscape */
1376 1
            if (\preg_match('/\[endif\]$/', $comment)) {
1377 1
                return true;
1378
            }
1379
        }
1380
1381 4
        return false;
1382
    }
1383
1384
    /**
1385
     * @param string $html
1386
     * @param bool   $multiDecodeNewHtmlEntity
1387
     *
1388
     * @return string
1389
     */
1390 51
    private function minifyHtmlDom($html, $multiDecodeNewHtmlEntity): string
1391
    {
1392
        // init dom
1393 51
        $dom = new HtmlDomParser();
1394
        /** @noinspection UnusedFunctionResultInspection */
1395 51
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1396
1397 51
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1398 51
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1399
1400
        // load dom
1401
        /** @noinspection UnusedFunctionResultInspection */
1402 51
        $dom->loadHtml($html);
1403
1404 51
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1405
1406
        // -------------------------------------------------------------------------
1407
        // Protect <nocompress> HTML tags first.
1408
        // -------------------------------------------------------------------------
1409
1410 51
        $dom = $this->protectTagHelper($dom, 'nocompress');
1411
1412
        // -------------------------------------------------------------------------
1413
        // Notify the Observer before the minification.
1414
        // -------------------------------------------------------------------------
1415
1416 51
        foreach ($dom->find('*') as $element) {
1417 51
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1418
        }
1419
1420
        // -------------------------------------------------------------------------
1421
        // Protect HTML tags and conditional comments.
1422
        // -------------------------------------------------------------------------
1423
1424 51
        $dom = $this->protectTags($dom);
1425
1426
        // -------------------------------------------------------------------------
1427
        // Remove default HTML comments. [protected html is still protected]
1428
        // -------------------------------------------------------------------------
1429
1430 51
        if ($this->doRemoveComments) {
1431 49
            $dom = $this->removeComments($dom);
1432
        }
1433
1434
        // -------------------------------------------------------------------------
1435
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1436
        // -------------------------------------------------------------------------
1437
1438 51
        if ($this->doSumUpWhitespace) {
1439 50
            $dom = $this->sumUpWhitespace($dom);
1440
        }
1441
1442 51
        foreach ($dom->find('*') as $element) {
1443
1444
            // -------------------------------------------------------------------------
1445
            // Remove whitespace around tags. [protected html is still protected]
1446
            // -------------------------------------------------------------------------
1447
1448 51
            if ($this->doRemoveWhitespaceAroundTags) {
1449 3
                $this->removeWhitespaceAroundTags($element);
1450
            }
1451
1452
            // -------------------------------------------------------------------------
1453
            // Notify the Observer after the minification.
1454
            // -------------------------------------------------------------------------
1455
1456 51
            $this->notifyObserversAboutDomElementAfterMinification($element);
1457
        }
1458
1459
        // -------------------------------------------------------------------------
1460
        // Convert the Dom into a string.
1461
        // -------------------------------------------------------------------------
1462
1463 51
        return $dom->fixHtmlOutput(
1464 51
            $this->domNodeToString($dom->getDocument()),
1465
            $multiDecodeNewHtmlEntity
1466
        );
1467
    }
1468
1469
    /**
1470
     * @param SimpleHtmlDomInterface $domElement
1471
     *
1472
     * @return void
1473
     */
1474 51
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDomInterface $domElement)
1475
    {
1476 51
        foreach ($this->domLoopObservers as $observer) {
1477 51
            $observer->domElementAfterMinification($domElement, $this);
1478
        }
1479 51
    }
1480
1481
    /**
1482
     * @param SimpleHtmlDomInterface $domElement
1483
     *
1484
     * @return void
1485
     */
1486 51
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDomInterface $domElement)
1487
    {
1488 51
        foreach ($this->domLoopObservers as $observer) {
1489 51
            $observer->domElementBeforeMinification($domElement, $this);
1490
        }
1491 51
    }
1492
1493
    /**
1494
     * @param HtmlDomParser $dom
1495
     * @param string        $selector
1496
     *
1497
     * @return HtmlDomParser
1498
     */
1499 51
    private function protectTagHelper(HtmlDomParser $dom, string $selector): HtmlDomParser
1500
    {
1501 51
        foreach ($dom->find($selector) as $element) {
1502 5
            if ($element->isRemoved()) {
1503 1
                continue;
1504
            }
1505
1506 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->parentNode()->innerHtml();
1507 5
            $parentNode = $element->getNode()->parentNode;
1508 5
            if ($parentNode !== null) {
1509 5
                $parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1510
            }
1511
1512 5
            ++$this->protected_tags_counter;
1513
        }
1514
1515 51
        return $dom;
1516
    }
1517
1518
    /**
1519
     * Prevent changes of inline "styles" and "scripts".
1520
     *
1521
     * @param HtmlDomParser $dom
1522
     *
1523
     * @return HtmlDomParser
1524
     */
1525 51
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1526
    {
1527 51
        $this->protectTagHelper($dom, 'code');
1528
1529 51
        foreach ($dom->find('script, style') as $element) {
1530 7
            if ($element->isRemoved()) {
1531
                continue;
1532
            }
1533
1534 7
            if ($element->tag === 'script' || $element->tag === 'style') {
1535 7
                $attributes = $element->getAllAttributes();
1536
                // skip external links
1537 7
                if (isset($attributes['src'])) {
1538 4
                    continue;
1539
                }
1540
            }
1541
1542 5
            $this->protectedChildNodes[$this->protected_tags_counter] = $element->innerhtml;
1543 5
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>';
1544
1545 5
            ++$this->protected_tags_counter;
1546
        }
1547
1548 51
        foreach ($dom->find('//comment()') as $element) {
1549 4
            if ($element->isRemoved()) {
1550
                continue;
1551
            }
1552
1553 4
            $text = $element->text();
1554
1555
            // skip normal comments
1556 4
            if (!$this->isConditionalComment($text)) {
1557 4
                continue;
1558
            }
1559
1560 2
            $this->protectedChildNodes[$this->protected_tags_counter] = '<!--' . $text . '-->';
1561
1562
            /* @var $node \DOMComment */
1563 2
            $node = $element->getNode();
1564 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $this->protected_tags_counter . '"></' . $this->protectedChildNodesHelper . '>');
1565 2
            $parentNode = $element->getNode()->parentNode;
1566 2
            if ($parentNode !== null) {
1567 2
                $parentNode->replaceChild($child, $node);
1568
            }
1569
1570 2
            ++$this->protected_tags_counter;
1571
        }
1572
1573 51
        return $dom;
1574
    }
1575
1576
    /**
1577
     * Remove comments in the dom.
1578
     *
1579
     * @param HtmlDomParser $dom
1580
     *
1581
     * @return HtmlDomParser
1582
     */
1583 49
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1584
    {
1585 49
        foreach ($dom->find('//comment()') as $commentWrapper) {
1586 3
            $comment = $commentWrapper->getNode();
1587 3
            $val = $comment->nodeValue;
1588 3
            if (\strpos($val, '[') === false) {
1589 3
                $parentNode = $comment->parentNode;
1590 3
                if ($parentNode !== null) {
1591 3
                    $parentNode->removeChild($comment);
1592
                }
1593
            }
1594
        }
1595
1596 49
        $dom->getDocument()->normalizeDocument();
1597
1598 49
        return $dom;
1599
    }
1600
1601
    /**
1602
     * Trim tags in the dom.
1603
     *
1604
     * @param SimpleHtmlDomInterface $element
1605
     *
1606
     * @return void
1607
     */
1608 3
    private function removeWhitespaceAroundTags(SimpleHtmlDomInterface $element)
1609
    {
1610 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
0 ignored issues
show
Bug introduced by
Accessing tag on the interface voku\helper\SimpleHtmlDomInterface suggest that you code against a concrete implementation. How about adding an instanceof check?

If you access a property on an interface, you most likely code against a concrete implementation of the interface.

Available Fixes

  1. Adding an additional type check:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeInterface $object) {
        if ($object instanceof SomeClass) {
            $a = $object->a;
        }
    }
    
  2. Changing the type hint:

    interface SomeInterface { }
    class SomeClass implements SomeInterface {
        public $a;
    }
    
    function someFunction(SomeClass $object) {
        $a = $object->a;
    }
    
Loading history...
1611 1
            $node = $element->getNode();
1612
1613
            /** @var \DOMNode[] $candidates */
1614 1
            $candidates = [];
1615 1
            if ($node->childNodes->length > 0) {
1616 1
                $candidates[] = $node->firstChild;
1617 1
                $candidates[] = $node->lastChild;
1618 1
                $candidates[] = $node->previousSibling;
1619 1
                $candidates[] = $node->nextSibling;
1620
            }
1621
1622
            /** @var mixed $candidate - false-positive error from phpstan */
1623 1
            foreach ($candidates as &$candidate) {
1624 1
                if ($candidate === null) {
1625
                    continue;
1626
                }
1627
1628 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1629 1
                    $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1630 1
                    if ($nodeValueTmp !== null) {
1631 1
                        $candidate->nodeValue = $nodeValueTmp;
1632
                    }
1633
                }
1634
            }
1635
        }
1636 3
    }
1637
1638
    /**
1639
     * Callback function for preg_replace_callback use.
1640
     *
1641
     * @param array $matches PREG matches
1642
     *
1643
     * @return string
1644
     */
1645 9
    private function restoreProtectedHtml($matches): string
1646
    {
1647 9
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1648
1649 9
        return $this->protectedChildNodes[$matchesInner['id']] ?? '';
1650
    }
1651
1652
    /**
1653
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1654
     *
1655
     * @return $this
1656
     */
1657 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1658
    {
1659 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1660
1661 2
        return $this;
1662
    }
1663
1664
    /**
1665
     * Sum-up extra whitespace from dom-nodes.
1666
     *
1667
     * @param HtmlDomParser $dom
1668
     *
1669
     * @return HtmlDomParser
1670
     */
1671 50
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1672
    {
1673 50
        $text_nodes = $dom->find('//text()');
1674 50
        foreach ($text_nodes as $text_node_wrapper) {
1675
            /* @var $text_node \DOMNode */
1676 46
            $text_node = $text_node_wrapper->getNode();
1677 46
            $xp = $text_node->getNodePath();
1678 46
            if ($xp === null) {
1679
                continue;
1680
            }
1681
1682 46
            $doSkip = false;
1683 46
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1684 46
                if (\strpos($xp, "/${pattern}") !== false) {
1685 8
                    $doSkip = true;
1686
1687 8
                    break;
1688
                }
1689
            }
1690 46
            if ($doSkip) {
1691 8
                continue;
1692
            }
1693
1694 43
            $nodeValueTmp = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1695 43
            if ($nodeValueTmp !== null) {
1696 43
                $text_node->nodeValue = $nodeValueTmp;
1697
            }
1698
        }
1699
1700 50
        $dom->getDocument()->normalizeDocument();
1701
1702 50
        return $dom;
1703
    }
1704
1705
    /**
1706
     * WARNING: maybe bad for performance ...
1707
     *
1708
     * @param bool $keepBrokenHtml
1709
     *
1710
     * @return HtmlMin
1711
     */
1712 2
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1713
    {
1714 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1715
1716 2
        return $this;
1717
    }
1718
}
1719