Completed
Push — master ( 70f0a6...5ae024 )
by Lars
02:07
created

HtmlMin::minify()   C

Complexity

Conditions 9
Paths 66

Size

Total Lines 124

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 46
CRAP Score 9.0007

Importance

Changes 0
Metric Value
dl 0
loc 124
ccs 46
cts 47
cp 0.9787
rs 6.4444
c 0
b 0
f 0
cc 9
nc 66
nop 2
crap 9.0007

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]+/u";
27
28
    /**
29
     * @var array
30
     */
31
    private static $optional_end_tags = [
32
        'html',
33
        'head',
34
        'body',
35
    ];
36
37
    private static $selfClosingTags = [
38
        'area',
39
        'base',
40
        'basefont',
41
        'br',
42
        'col',
43
        'command',
44
        'embed',
45
        'frame',
46
        'hr',
47
        'img',
48
        'input',
49
        'isindex',
50
        'keygen',
51
        'link',
52
        'meta',
53
        'param',
54
        'source',
55
        'track',
56
        'wbr',
57
    ];
58
59
    private static $trimWhitespaceFromTags = [
60
        'article' => '',
61
        'br'      => '',
62
        'div'     => '',
63
        'footer'  => '',
64
        'hr'      => '',
65
        'nav'     => '',
66
        'p'       => '',
67
        'script'  => '',
68
    ];
69
70
    /**
71
     * @var array
72
     */
73
    private static $booleanAttributes = [
74
        'allowfullscreen' => '',
75
        'async'           => '',
76
        'autofocus'       => '',
77
        'autoplay'        => '',
78
        'checked'         => '',
79
        'compact'         => '',
80
        'controls'        => '',
81
        'declare'         => '',
82
        'default'         => '',
83
        'defaultchecked'  => '',
84
        'defaultmuted'    => '',
85
        'defaultselected' => '',
86
        'defer'           => '',
87
        'disabled'        => '',
88
        'enabled'         => '',
89
        'formnovalidate'  => '',
90
        'hidden'          => '',
91
        'indeterminate'   => '',
92
        'inert'           => '',
93
        'ismap'           => '',
94
        'itemscope'       => '',
95
        'loop'            => '',
96
        'multiple'        => '',
97
        'muted'           => '',
98
        'nohref'          => '',
99
        'noresize'        => '',
100
        'noshade'         => '',
101
        'novalidate'      => '',
102
        'nowrap'          => '',
103
        'open'            => '',
104
        'pauseonexit'     => '',
105
        'readonly'        => '',
106
        'required'        => '',
107
        'reversed'        => '',
108
        'scoped'          => '',
109
        'seamless'        => '',
110
        'selected'        => '',
111
        'sortable'        => '',
112
        'truespeed'       => '',
113
        'typemustmatch'   => '',
114
        'visible'         => '',
115
    ];
116
117
    /**
118
     * @var array
119
     */
120
    private static $skipTagsForRemoveWhitespace = [
121
        'code',
122
        'pre',
123
        'script',
124
        'style',
125
        'textarea',
126
    ];
127
128
    /**
129
     * @var array
130
     */
131
    private $protectedChildNodes = [];
132
133
    /**
134
     * @var string
135
     */
136
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
137
138
    /**
139
     * @var bool
140
     */
141
    private $doOptimizeViaHtmlDomParser = true;
142
143
    /**
144
     * @var bool
145
     */
146
    private $doOptimizeAttributes = true;
147
148
    /**
149
     * @var bool
150
     */
151
    private $doRemoveComments = true;
152
153
    /**
154
     * @var bool
155
     */
156
    private $doRemoveWhitespaceAroundTags = false;
157
158
    /**
159
     * @var bool
160
     */
161
    private $doRemoveOmittedQuotes = true;
162
163
    /**
164
     * @var bool
165
     */
166
    private $doRemoveOmittedHtmlTags = true;
167
168
    /**
169
     * @var bool
170
     */
171
    private $doRemoveHttpPrefixFromAttributes = false;
172
173
    /**
174
     * @var array
175
     */
176
    private $domainsToRemoveHttpPrefixFromAttributes = [
177
        'google.com',
178
        'google.de',
179
    ];
180
181
    /**
182
     * @var bool
183
     */
184
    private $doSortCssClassNames = true;
185
186
    /**
187
     * @var bool
188
     */
189
    private $doSortHtmlAttributes = true;
190
191
    /**
192
     * @var bool
193
     */
194
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
195
196
    /**
197
     * @var bool
198
     */
199
    private $doRemoveDefaultAttributes = false;
200
201
    /**
202
     * @var bool
203
     */
204
    private $doRemoveDeprecatedAnchorName = true;
205
206
    /**
207
     * @var bool
208
     */
209
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
210
211
    /**
212
     * @var bool
213
     */
214
    private $doRemoveDeprecatedTypeFromScriptTag = true;
215
216
    /**
217
     * @var bool
218
     */
219
    private $doRemoveValueFromEmptyInput = true;
220
221
    /**
222
     * @var bool
223
     */
224
    private $doRemoveEmptyAttributes = true;
225
226
    /**
227
     * @var bool
228
     */
229
    private $doSumUpWhitespace = true;
230
231
    /**
232
     * @var bool
233
     */
234
    private $doRemoveSpacesBetweenTags = false;
235
236
    /**
237
     * @var bool
238
     */
239
    private $keepBrokenHtml = false;
240
241
    /**
242
     * @var bool
243
     */
244
    private $withDocType = false;
245
246
    /**
247
     * @var \SplObjectStorage|HtmlMinDomObserverInterface[]
248
     */
249
    private $domLoopObservers;
250
251
    /**
252
     * HtmlMin constructor.
253
     */
254 40
    public function __construct()
255
    {
256 40
        $this->domLoopObservers = new \SplObjectStorage();
257
258 40
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
259 40
    }
260
261
    /**
262
     * @param HtmlMinDomObserverInterface $observer
263
     *
264
     * @return void
265
     */
266 40
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
267
    {
268 40
        $this->domLoopObservers->attach($observer);
269 40
    }
270
271
    /**
272
     * @param $domElement SimpleHtmlDom
273
     *
274
     * @return void
275
     */
276 36
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDom $domElement)
277
    {
278 36
        foreach ($this->domLoopObservers as $observer) {
279 36
            $observer->domElementBeforeMinification($domElement, $this);
280
        }
281 36
    }
282
283 36
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDom $domElement)
284
    {
285 36
        foreach ($this->domLoopObservers as $observer) {
286 36
            $observer->domElementAfterMinification($domElement, $this);
287
        }
288 36
    }
289
290
    /**
291
     * @param bool $doOptimizeAttributes
292
     *
293
     * @return $this
294
     */
295 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
296
    {
297 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
298
299 2
        return $this;
300
    }
301
302
    /**
303
     * @param bool $doOptimizeViaHtmlDomParser
304
     *
305
     * @return $this
306
     */
307 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
308
    {
309 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
310
311 1
        return $this;
312
    }
313
314
    /**
315
     * @param bool $doRemoveComments
316
     *
317
     * @return $this
318
     */
319 3
    public function doRemoveComments(bool $doRemoveComments = true): self
320
    {
321 3
        $this->doRemoveComments = $doRemoveComments;
322
323 3
        return $this;
324
    }
325
326
    /**
327
     * @param bool $doRemoveDefaultAttributes
328
     *
329
     * @return $this
330
     */
331 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
332
    {
333 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
334
335 2
        return $this;
336
    }
337
338
    /**
339
     * @param bool $doRemoveDeprecatedAnchorName
340
     *
341
     * @return $this
342
     */
343 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
344
    {
345 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
346
347 2
        return $this;
348
    }
349
350
    /**
351
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
352
     *
353
     * @return $this
354
     */
355 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
356
    {
357 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
358
359 2
        return $this;
360
    }
361
362
    /**
363
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
364
     *
365
     * @return $this
366
     */
367 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
368
    {
369 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
370
371 2
        return $this;
372
    }
373
374
    /**
375
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
376
     *
377
     * @return $this
378
     */
379 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
380
    {
381 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
382
383 2
        return $this;
384
    }
385
386
    /**
387
     * @param bool $doRemoveEmptyAttributes
388
     *
389
     * @return $this
390
     */
391 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
392
    {
393 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
394
395 2
        return $this;
396
    }
397
398
    /**
399
     * @param bool $doRemoveHttpPrefixFromAttributes
400
     *
401
     * @return $this
402
     */
403 4
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
404
    {
405 4
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
406
407 4
        return $this;
408
    }
409
410
    /**
411
     * @return bool
412
     */
413 22
    public function isDoSortCssClassNames(): bool
414
    {
415 22
        return $this->doSortCssClassNames;
416
    }
417
418
    /**
419
     * @return bool
420
     */
421 22
    public function isDoSortHtmlAttributes(): bool
422
    {
423 22
        return $this->doSortHtmlAttributes;
424
    }
425
426
    /**
427
     * @return bool
428
     */
429 22
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
430
    {
431 22
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
432
    }
433
434
    /**
435
     * @return bool
436
     */
437 22
    public function isDoRemoveDefaultAttributes(): bool
438
    {
439 22
        return $this->doRemoveDefaultAttributes;
440
    }
441
442
    /**
443
     * @return bool
444
     */
445 22
    public function isDoRemoveDeprecatedAnchorName(): bool
446
    {
447 22
        return $this->doRemoveDeprecatedAnchorName;
448
    }
449
450
    /**
451
     * @return bool
452
     */
453 22
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
454
    {
455 22
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
456
    }
457
458
    /**
459
     * @return bool
460
     */
461 22
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
462
    {
463 22
        return $this->doRemoveDeprecatedTypeFromScriptTag;
464
    }
465
466
    /**
467
     * @return bool
468
     */
469 22
    public function isDoRemoveValueFromEmptyInput(): bool
470
    {
471 22
        return $this->doRemoveValueFromEmptyInput;
472
    }
473
474
    /**
475
     * @return bool
476
     */
477 22
    public function isDoRemoveEmptyAttributes(): bool
478
    {
479 22
        return $this->doRemoveEmptyAttributes;
480
    }
481
482
    /**
483
     * @return bool
484
     */
485
    public function isDoSumUpWhitespace(): bool
486
    {
487
        return $this->doSumUpWhitespace;
488
    }
489
490
    /**
491
     * @return bool
492
     */
493
    public function isDoRemoveSpacesBetweenTags(): bool
494
    {
495
        return $this->doRemoveSpacesBetweenTags;
496
    }
497
498
    /**
499
     * @return bool
500
     */
501
    public function isDoOptimizeViaHtmlDomParser(): bool
502
    {
503
        return $this->doOptimizeViaHtmlDomParser;
504
    }
505
506
    /**
507
     * @return bool
508
     */
509
    public function isDoOptimizeAttributes(): bool
510
    {
511
        return $this->doOptimizeAttributes;
512
    }
513
514
    /**
515
     * @return bool
516
     */
517
    public function isDoRemoveComments(): bool
518
    {
519
        return $this->doRemoveComments;
520
    }
521
522
    /**
523
     * @return bool
524
     */
525
    public function isDoRemoveWhitespaceAroundTags(): bool
526
    {
527
        return $this->doRemoveWhitespaceAroundTags;
528
    }
529
530
    /**
531
     * @return bool
532
     */
533
    public function isDoRemoveOmittedQuotes(): bool
534
    {
535
        return $this->doRemoveOmittedQuotes;
536
    }
537
538
    /**
539
     * @return bool
540
     */
541
    public function isDoRemoveOmittedHtmlTags(): bool
542
    {
543
        return $this->doRemoveOmittedHtmlTags;
544
    }
545
546
    /**
547
     * @return bool
548
     */
549 22
    public function isDoRemoveHttpPrefixFromAttributes(): bool
550
    {
551 22
        return $this->doRemoveHttpPrefixFromAttributes;
552
    }
553
554
    /**
555
     * @return array
556
     */
557
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
558
    {
559
        return $this->domainsToRemoveHttpPrefixFromAttributes;
560
    }
561
562
    /**
563
     * @param bool $doRemoveOmittedHtmlTags
564
     *
565
     * @return $this
566
     */
567 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
568
    {
569 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
570
571 1
        return $this;
572
    }
573
574
    /**
575
     * @param bool $doRemoveOmittedQuotes
576
     *
577
     * @return $this
578
     */
579 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
580
    {
581 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
582
583 1
        return $this;
584
    }
585
586
    /**
587
     * @param bool $doRemoveSpacesBetweenTags
588
     *
589
     * @return $this
590
     */
591
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
592
    {
593
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
594
595
        return $this;
596
    }
597
598
    /**
599
     * @param bool $doRemoveValueFromEmptyInput
600
     *
601
     * @return $this
602
     */
603 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
604
    {
605 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
606
607 2
        return $this;
608
    }
609
610
    /**
611
     * @param bool $doRemoveWhitespaceAroundTags
612
     *
613
     * @return $this
614
     */
615 4
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
616
    {
617 4
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
618
619 4
        return $this;
620
    }
621
622
    /**
623
     * @param bool $doSortCssClassNames
624
     *
625
     * @return $this
626
     */
627 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
628
    {
629 2
        $this->doSortCssClassNames = $doSortCssClassNames;
630
631 2
        return $this;
632
    }
633
634
    /**
635
     * @param bool $doSortHtmlAttributes
636
     *
637
     * @return $this
638
     */
639 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
640
    {
641 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
642
643 2
        return $this;
644
    }
645
646
    /**
647
     * @param bool $doSumUpWhitespace
648
     *
649
     * @return $this
650
     */
651 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
652
    {
653 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
654
655 2
        return $this;
656
    }
657
658 36
    private function domNodeAttributesToString(\DOMNode $node): string
659
    {
660
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
661 36
        $attrstr = '';
662 36
        if ($node->attributes !== null) {
663 36
            foreach ($node->attributes as $attribute) {
664 22
                $attrstr .= $attribute->name;
665
666
                if (
667 22
                    $this->doOptimizeAttributes
668
                    &&
669 22
                    isset(self::$booleanAttributes[$attribute->name])
670
                ) {
671 7
                    $attrstr .= ' ';
672
673 7
                    continue;
674
                }
675
676 22
                $attrstr .= '=';
677
678
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
679 22
                $omitquotes = $this->doRemoveOmittedQuotes
680
                              &&
681 22
                              $attribute->value !== ''
682
                              &&
683 22
                              \preg_match('/["\'=<>` \t\r\n\f]+/', $attribute->value) === 0;
684
685 22
                $attr_val = $attribute->value;
686 22
                $attrstr .= ($omitquotes ? '' : '"') . $attr_val . ($omitquotes ? '' : '"');
687 22
                $attrstr .= ' ';
688
            }
689
        }
690
691 36
        return \trim($attrstr);
692
    }
693
694
    /**
695
     * @param \DOMNode $node
696
     *
697
     * @return bool
698
     */
699 35
    private function domNodeClosingTagOptional(\DOMNode $node): bool
700
    {
701 35
        $tag_name = $node->nodeName;
702 35
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
703
704
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
705
706
        // Implemented:
707
        //
708
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
709
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
710
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
711
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
712
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
713
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
714
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
715
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
716
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
717
718
        // TODO:
719
        //
720
        // <html> may be omitted if first thing inside is not comment
721
        // <head> may be omitted if first thing inside is an element
722
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
723
        // <colgroup> may be omitted if first thing inside is <col>
724
        // <tbody> may be omitted if first thing inside is <tr>
725
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
726
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
727
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
728
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
729
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
730
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
731
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
732
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
733
        //
734
        // <-- However, a start tag must never be omitted if it has any attributes.
735
736 35
        return \in_array($tag_name, self::$optional_end_tags, true)
737
               ||
738
               (
739 32
                   $tag_name === 'li'
740
                   &&
741
                   (
742 5
                       $nextSibling === null
743
                       ||
744
                       (
745 3
                           $nextSibling instanceof \DOMElement
746
                           &&
747 32
                           $nextSibling->tagName === 'li'
748
                       )
749
                   )
750
               )
751
               ||
752
               (
753
                   (
754 32
                       $tag_name === 'rp'
755
                   )
756
                   &&
757
                   (
758
                       $nextSibling === null
759
                       ||
760
                       (
761
                           $nextSibling instanceof \DOMElement
762
                           &&
763
                           (
764
                               $nextSibling->tagName === 'rp'
765
                               ||
766 32
                               $nextSibling->tagName === 'rt'
767
                           )
768
                       )
769
                   )
770
               )
771
               ||
772
               (
773 32
                   $tag_name === 'tr'
774
                   &&
775
                   (
776 1
                       $nextSibling === null
777
                       ||
778
                       (
779 1
                           $nextSibling instanceof \DOMElement
780
                           &&
781 32
                           $nextSibling->tagName === 'tr'
782
                       )
783
                   )
784
               )
785
               ||
786
               (
787
                   (
788 32
                       $tag_name === 'td'
789
                       ||
790 32
                       $tag_name === 'th'
791
                   )
792
                   &&
793
                   (
794 1
                       $nextSibling === null
795
                       ||
796
                       (
797 1
                           $nextSibling instanceof \DOMElement
798
                           &&
799
                           (
800 1
                               $nextSibling->tagName === 'td'
801
                               ||
802 32
                               $nextSibling->tagName === 'th'
803
                           )
804
                       )
805
                   )
806
               )
807
               ||
808
               (
809
                   (
810 32
                       $tag_name === 'dd'
811
                       ||
812 32
                       $tag_name === 'dt'
813
                   )
814
                   &&
815
                   (
816
                       (
817 3
                           $nextSibling === null
818
                           &&
819 3
                           $tag_name === 'dd'
820
                       )
821
                       ||
822
                       (
823 3
                           $nextSibling instanceof \DOMElement
824
                           &&
825
                           (
826 3
                               $nextSibling->tagName === 'dd'
827
                               ||
828 32
                               $nextSibling->tagName === 'dt'
829
                           )
830
                       )
831
                   )
832
               )
833
               ||
834
               (
835 32
                   $tag_name === 'option'
836
                   &&
837
                   (
838
                       $nextSibling === null
839
                       ||
840
                       (
841
                           $nextSibling instanceof \DOMElement
842
                           &&
843
                           (
844
                               $nextSibling->tagName === 'option'
845
                               ||
846 32
                               $nextSibling->tagName === 'optgroup'
847
                           )
848
                       )
849
                   )
850
               )
851
               ||
852
               (
853 32
                   $tag_name === 'p'
854
                   &&
855
                   (
856
                       (
857 10
                           $nextSibling === null
858
                           &&
859
                           (
860 9
                               $node->parentNode !== null
861
                               &&
862
                               !\in_array(
863 9
                                   $node->parentNode->nodeName,
864
                                   [
865
                                       'a',
866
                                       'audio',
867
                                       'del',
868
                                       'ins',
869
                                       'map',
870
                                       'noscript',
871
                                       'video',
872
                                   ],
873
                                   true
874
                               )
875
                           )
876
                       )
877
                       ||
878
                       (
879 7
                           $nextSibling instanceof \DOMElement
880
                           &&
881
                           \in_array(
882 35
                               $nextSibling->tagName,
883
                               [
884
                                   'address',
885
                                   'article',
886
                                   'aside',
887
                                   'blockquote',
888
                                   'dir',
889
                                   'div',
890
                                   'dl',
891
                                   'fieldset',
892
                                   'footer',
893
                                   'form',
894
                                   'h1',
895
                                   'h2',
896
                                   'h3',
897
                                   'h4',
898
                                   'h5',
899
                                   'h6',
900
                                   'header',
901
                                   'hgroup',
902
                                   'hr',
903
                                   'menu',
904
                                   'nav',
905
                                   'ol',
906
                                   'p',
907
                                   'pre',
908
                                   'section',
909
                                   'table',
910
                                   'ul',
911
                               ],
912
                               true
913
                           )
914
                       )
915
                   )
916
               );
917
    }
918
919 36
    protected function domNodeToString(\DOMNode $node): string
920
    {
921
        // init
922 36
        $html = '';
923 36
        $emptyStringTmp = '';
924
925 36
        foreach ($node->childNodes as $child) {
926 36
            if ($emptyStringTmp === 'is_empty') {
927 21
                $emptyStringTmp = 'last_was_empty';
928
            } else {
929 36
                $emptyStringTmp = '';
930
            }
931
932 36
            if ($child instanceof \DOMDocumentType) {
933
                // add the doc-type only if it wasn't generated by DomDocument
934 11
                if (!$this->withDocType) {
935
                    continue;
936
                }
937
938 11
                if ($child->name) {
939 11
                    if (!$child->publicId && $child->systemId) {
940
                        $tmpTypeSystem = 'SYSTEM';
941
                        $tmpTypePublic = '';
942
                    } else {
943 11
                        $tmpTypeSystem = '';
944 11
                        $tmpTypePublic = 'PUBLIC';
945
                    }
946
947 11
                    $html .= '<!DOCTYPE ' . $child->name . ''
948 11
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
949 11
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
950 11
                             . '>';
951
                }
952 36
            } elseif ($child instanceof \DOMElement) {
953 36
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
954 36
                $html .= '>' . $this->domNodeToString($child);
955
956
                if (
957 36
                    !$this->doRemoveOmittedHtmlTags
958
                    ||
959 36
                    !$this->domNodeClosingTagOptional($child)
960
                ) {
961 31
                    $html .= '</' . $child->tagName . '>';
962
                }
963
964 36
                if (!$this->doRemoveWhitespaceAroundTags) {
965
                    if (
966 35
                        $child->nextSibling instanceof \DOMText
967
                        &&
968 35
                        $child->nextSibling->wholeText === ' '
969
                    ) {
970
                        if (
971 20
                            $emptyStringTmp !== 'last_was_empty'
972
                            &&
973 20
                            \substr($html, -1) !== ' '
974
                        ) {
975 20
                            $html .= ' ';
976
                        }
977 36
                        $emptyStringTmp = 'is_empty';
978
                    }
979
                }
980 32
            } elseif ($child instanceof \DOMText) {
981 32
                if ($child->isElementContentWhitespace()) {
982
                    if (
983 23
                        $child->previousSibling !== null
984
                        &&
985 23
                        $child->nextSibling !== null
986
                    ) {
987
                        if (
988 16
                            $emptyStringTmp !== 'last_was_empty'
989
                            &&
990 16
                            \substr($html, -1) !== ' '
991
                        ) {
992 5
                            $html .= ' ';
993
                        }
994 23
                        $emptyStringTmp = 'is_empty';
995
                    }
996
                } else {
997 32
                    $html .= $child->wholeText;
998
                }
999 1
            } elseif ($child instanceof \DOMComment) {
1000 1
                $html .= '<!--' . $child->textContent . '-->';
1001
            }
1002
        }
1003
1004 36
        return $html;
1005
    }
1006
1007
    /**
1008
     * @param \DOMNode $node
1009
     *
1010
     * @return \DOMNode|null
1011
     */
1012 35
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1013
    {
1014
        do {
1015 35
            $node = $node->nextSibling;
1016 35
        } while (!($node === null || $node instanceof \DOMElement));
1017
1018 35
        return $node;
1019
    }
1020
1021
    /**
1022
     * Check if the current string is an conditional comment.
1023
     *
1024
     * INFO: since IE >= 10 conditional comment are not working anymore
1025
     *
1026
     * <!--[if expression]> HTML <![endif]-->
1027
     * <![if expression]> HTML <![endif]>
1028
     *
1029
     * @param string $comment
1030
     *
1031
     * @return bool
1032
     */
1033 4
    private function isConditionalComment($comment): bool
1034
    {
1035 4
        if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1036 2
            return true;
1037
        }
1038
1039 4
        if (\preg_match('/\[endif\]$/', $comment)) {
1040 1
            return true;
1041
        }
1042
1043 4
        return false;
1044
    }
1045
1046
    /**
1047
     * @param string $html
1048
     * @param bool   $decodeUtf8Specials <p>Use this only in special cases, e.g. for PHP 5.3</p>
1049
     *
1050
     * @return string
1051
     */
1052 40
    public function minify($html, $decodeUtf8Specials = false): string
1053
    {
1054 40
        $html = (string) $html;
1055 40
        if (!isset($html[0])) {
1056 1
            return '';
1057
        }
1058
1059 40
        $html = \trim($html);
1060 40
        if (!$html) {
1061 3
            return '';
1062
        }
1063
1064
        // init
1065 37
        static $CACHE_SELF_CLOSING_TAGS = null;
1066 37
        if ($CACHE_SELF_CLOSING_TAGS === null) {
1067 1
            $CACHE_SELF_CLOSING_TAGS = \implode('|', self::$selfClosingTags);
1068
        }
1069
1070
        // reset
1071 37
        $this->protectedChildNodes = [];
1072
1073
        // save old content
1074 37
        $origHtml = $html;
1075 37
        $origHtmlLength = \strlen($html);
1076
1077
        // -------------------------------------------------------------------------
1078
        // Minify the HTML via "HtmlDomParser"
1079
        // -------------------------------------------------------------------------
1080
1081 37
        if ($this->doOptimizeViaHtmlDomParser) {
1082 36
            $html = $this->minifyHtmlDom($html, $decodeUtf8Specials);
1083
        }
1084
1085
        // -------------------------------------------------------------------------
1086
        // Trim whitespace from html-string. [protected html is still protected]
1087
        // -------------------------------------------------------------------------
1088
1089
        // Remove extra white-space(s) between HTML attribute(s)
1090 37
        $html = (string) \preg_replace_callback(
1091 37
            '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1092
            function ($matches) {
1093 37
                return '<' . $matches[1] . (string) \preg_replace('#([^\s=]+)(\=([\'"]?)(.*?)\3)?(\s+|$)#s', ' $1$2', $matches[2]) . $matches[3] . '>';
1094 37
            },
1095 37
            $html
1096
        );
1097
1098 37
        if ($this->doRemoveSpacesBetweenTags) {
1099
            // Remove spaces that are between > and <
1100
            $html = (string) \preg_replace('/(>) (<)/', '>$2', $html);
1101
        }
1102
1103
        // -------------------------------------------------------------------------
1104
        // Restore protected HTML-code.
1105
        // -------------------------------------------------------------------------
1106
1107 37
        $html = (string) \preg_replace_callback(
1108 37
            '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1109 37
            [$this, 'restoreProtectedHtml'],
1110 37
            $html
1111
        );
1112
1113
        // -------------------------------------------------------------------------
1114
        // Restore protected HTML-entities.
1115
        // -------------------------------------------------------------------------
1116
1117 37
        if ($this->doOptimizeViaHtmlDomParser) {
1118 36
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1119
        }
1120
1121
        // ------------------------------------
1122
        // Final clean-up
1123
        // ------------------------------------
1124
1125 37
        $html = \str_replace(
1126
            [
1127 37
                'html>' . "\n",
1128
                "\n" . '<html',
1129
                'html/>' . "\n",
1130
                "\n" . '</html',
1131
                'head>' . "\n",
1132
                "\n" . '<head',
1133
                'head/>' . "\n",
1134
                "\n" . '</head',
1135
            ],
1136
            [
1137 37
                'html>',
1138
                '<html',
1139
                'html/>',
1140
                '</html',
1141
                'head>',
1142
                '<head',
1143
                'head/>',
1144
                '</head',
1145
            ],
1146 37
            $html
1147
        );
1148
1149
        // self closing tags, don't need a trailing slash ...
1150 37
        $replace = [];
1151 37
        $replacement = [];
1152 37
        foreach (self::$selfClosingTags as $selfClosingTag) {
1153 37
            $replace[] = '<' . $selfClosingTag . '/>';
1154 37
            $replacement[] = '<' . $selfClosingTag . '>';
1155 37
            $replace[] = '<' . $selfClosingTag . ' />';
1156 37
            $replacement[] = '<' . $selfClosingTag . '>';
1157
        }
1158 37
        $html = \str_replace(
1159 37
            $replace,
1160 37
            $replacement,
1161 37
            $html
1162
        );
1163
1164 37
        $html = (string) \preg_replace('#<\b(' . $CACHE_SELF_CLOSING_TAGS . ')([^>]*+)><\/\b\1>#', '<\\1\\2>', $html);
1165
1166
        // ------------------------------------
1167
        // check if compression worked
1168
        // ------------------------------------
1169
1170 37
        if ($origHtmlLength < \strlen($html)) {
1171 3
            $html = $origHtml;
1172
        }
1173
1174 37
        return $html;
1175
    }
1176
1177
    /**
1178
     * @param $html
1179
     * @param $decodeUtf8Specials
1180
     *
1181
     * @return string
1182
     */
1183 36
    private function minifyHtmlDom($html, $decodeUtf8Specials): string
1184
    {
1185
        // init dom
1186 36
        $dom = new HtmlDomParser();
1187
        /** @noinspection UnusedFunctionResultInspection */
1188 36
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1189
1190 36
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1191 36
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1192
1193
        // load dom
1194
        /** @noinspection UnusedFunctionResultInspection */
1195 36
        $dom->loadHtml($html);
1196
1197 36
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1198
1199 36
        foreach ($dom->find('*') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('*') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1200 36
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1201
        }
1202
1203
        // -------------------------------------------------------------------------
1204
        // Protect HTML tags and conditional comments.
1205
        // -------------------------------------------------------------------------
1206
1207 36
        $dom = $this->protectTags($dom);
1208
1209
        // -------------------------------------------------------------------------
1210
        // Remove default HTML comments. [protected html is still protected]
1211
        // -------------------------------------------------------------------------
1212
1213 36
        if ($this->doRemoveComments) {
1214 34
            $dom = $this->removeComments($dom);
1215
        }
1216
1217
        // -------------------------------------------------------------------------
1218
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1219
        // -------------------------------------------------------------------------
1220
1221 36
        if ($this->doSumUpWhitespace) {
1222 35
            $dom = $this->sumUpWhitespace($dom);
1223
        }
1224
1225 36
        foreach ($dom->find('*') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('*') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1226
1227
            // -------------------------------------------------------------------------
1228
            // Remove whitespace around tags. [protected html is still protected]
1229
            // -------------------------------------------------------------------------
1230
1231 36
            if ($this->doRemoveWhitespaceAroundTags) {
1232 3
                $this->removeWhitespaceAroundTags($element);
1233
            }
1234
1235 36
            $this->notifyObserversAboutDomElementAfterMinification($element);
1236
        }
1237
1238
        // -------------------------------------------------------------------------
1239
        // Convert the Dom into a string.
1240
        // -------------------------------------------------------------------------
1241
1242 36
        return $dom->fixHtmlOutput(
1243 36
            $this->domNodeToString($dom->getDocument()),
1244
            $decodeUtf8Specials
1245
        );
1246
    }
1247
1248
    /**
1249
     * Prevent changes of inline "styles" and "scripts".
1250
     *
1251
     * @param HtmlDomParser $dom
1252
     *
1253
     * @return HtmlDomParser
1254
     */
1255 36
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1256
    {
1257
        // init
1258 36
        $counter = 0;
1259
1260 36
        foreach ($dom->find('script, style') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('script, style') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1261
1262
            // skip external links
1263 4
            if ($element->tag === 'script' || $element->tag === 'style') {
1264 4
                $attributes = $element->getAllAttributes();
1265 4
                if (isset($attributes['src'])) {
1266 3
                    continue;
1267
                }
1268
            }
1269
1270 2
            $this->protectedChildNodes[$counter] = $element->text();
1271 2
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
1272
1273 2
            ++$counter;
1274
        }
1275
1276 36
        foreach ($dom->find('code, nocompress') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('code, nocompress') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1277 3
            if ($element->isRemoved()) {
1278 1
                continue;
1279
            }
1280
1281 3
            $this->protectedChildNodes[$counter] = $element->parentNode()->innerHtml();
1282 3
            $element->getNode()->parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
1283
1284 3
            ++$counter;
1285
        }
1286
1287 36
        foreach ($dom->find('//comment()') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('//comment()') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1288 4
            $text = $element->text();
1289
1290
            // skip normal comments
1291 4
            if (!$this->isConditionalComment($text)) {
1292 4
                continue;
1293
            }
1294
1295 2
            $this->protectedChildNodes[$counter] = '<!--' . $text . '-->';
1296
1297
            /* @var $node \DOMComment */
1298 2
            $node = $element->getNode();
1299 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>');
1300
            /** @noinspection UnusedFunctionResultInspection */
1301 2
            $element->getNode()->parentNode->replaceChild($child, $node);
1302
1303 2
            ++$counter;
1304
        }
1305
1306 36
        return $dom;
1307
    }
1308
1309
    /**
1310
     * Remove comments in the dom.
1311
     *
1312
     * @param HtmlDomParser $dom
1313
     *
1314
     * @return HtmlDomParser
1315
     */
1316 34
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1317
    {
1318 34
        foreach ($dom->find('//comment()') as $commentWrapper) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('//comment()') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1319 3
            $comment = $commentWrapper->getNode();
1320 3
            $val = $comment->nodeValue;
1321 3
            if (\strpos($val, '[') === false) {
1322
                /** @noinspection UnusedFunctionResultInspection */
1323 3
                $comment->parentNode->removeChild($comment);
1324
            }
1325
        }
1326
1327 34
        $dom->getDocument()->normalizeDocument();
1328
1329 34
        return $dom;
1330
    }
1331
1332
    /**
1333
     * Trim tags in the dom.
1334
     *
1335
     * @param SimpleHtmlDom $element
1336
     *
1337
     * @return void
1338
     */
1339 3
    private function removeWhitespaceAroundTags(SimpleHtmlDom $element)
1340
    {
1341 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
1342 1
            $node = $element->getNode();
1343
1344 1
            $candidates = [];
1345 1
            if ($node->childNodes->length > 0) {
1346 1
                $candidates[] = $node->firstChild;
1347 1
                $candidates[] = $node->lastChild;
1348 1
                $candidates[] = $node->previousSibling;
1349 1
                $candidates[] = $node->nextSibling;
1350
            }
1351
1352 1
            foreach ($candidates as &$candidate) {
1353 1
                if ($candidate === null) {
1354
                    continue;
1355
                }
1356
1357 1
                if ($candidate->nodeType === 3) {
1358 1
                    $candidate->nodeValue = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1359
                }
1360
            }
1361
        }
1362 3
    }
1363
1364
    /**
1365
     * Callback function for preg_replace_callback use.
1366
     *
1367
     * @param array $matches PREG matches
1368
     *
1369
     * @return string
1370
     */
1371 5
    private function restoreProtectedHtml($matches): string
1372
    {
1373 5
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1374
1375 5
        $html = '';
1376 5
        if (isset($this->protectedChildNodes[$matchesInner['id']])) {
1377 5
            $html .= $this->protectedChildNodes[$matchesInner['id']];
1378
        }
1379
1380 5
        return $html;
1381
    }
1382
1383
    /**
1384
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1385
     *
1386
     * @return $this
1387
     */
1388 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1389
    {
1390 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1391
1392 2
        return $this;
1393
    }
1394
1395
    /**
1396
     * Sum-up extra whitespace from dom-nodes.
1397
     *
1398
     * @param HtmlDomParser $dom
1399
     *
1400
     * @return HtmlDomParser
1401
     */
1402 35
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1403
    {
1404 35
        $textnodes = $dom->find('//text()');
1405 35
        foreach ($textnodes as $textnodeWrapper) {
0 ignored issues
show
Bug introduced by
The expression $textnodes of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1406
            /* @var $textnode \DOMNode */
1407 31
            $textnode = $textnodeWrapper->getNode();
1408 31
            $xp = $textnode->getNodePath();
1409
1410 31
            $doSkip = false;
1411 31
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1412 31
                if (\strpos($xp, "/${pattern}") !== false) {
1413 3
                    $doSkip = true;
1414
1415 3
                    break;
1416
                }
1417
            }
1418 31
            if ($doSkip) {
1419 3
                continue;
1420
            }
1421
1422 31
            $textnode->nodeValue = \preg_replace(self::$regExSpace, ' ', $textnode->nodeValue);
1423
        }
1424
1425 35
        $dom->getDocument()->normalizeDocument();
1426
1427 35
        return $dom;
1428
    }
1429
1430
    /**
1431
     * WARNING: maybe bad for performance ...
1432
     *
1433
     * @param bool $keepBrokenHtml
1434
     *
1435
     * @return HtmlMin
1436
     */
1437 1
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1438
    {
1439 1
        $this->keepBrokenHtml = $keepBrokenHtml;
1440
1441 1
        return $this;
1442
    }
1443
}
1444