Completed
Push — master ( 1f75fb...d3e0c2 )
by Lars
02:39
created

HtmlMin::minify()   C

Complexity

Conditions 9
Paths 66

Size

Total Lines 124

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 47
CRAP Score 9.0007

Importance

Changes 0
Metric Value
dl 0
loc 124
ccs 47
cts 48
cp 0.9792
rs 6.4444
c 0
b 0
f 0
cc 9
nc 66
nop 2
crap 9.0007

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 */
21
class HtmlMin
22
{
23
    /**
24
     * @var string
25
     */
26
    private static $regExSpace = "/[[:space:]]{2,}|[\r\n]+/u";
27
28
    /**
29
     * @var array
30
     */
31
    private static $optional_end_tags = [
32
        'html',
33
        'head',
34
        'body',
35
    ];
36
37
    private static $selfClosingTags = [
38
        'area',
39
        'base',
40
        'basefont',
41
        'br',
42
        'col',
43
        'command',
44
        'embed',
45
        'frame',
46
        'hr',
47
        'img',
48
        'input',
49
        'isindex',
50
        'keygen',
51
        'link',
52
        'meta',
53
        'param',
54
        'source',
55
        'track',
56
        'wbr',
57
    ];
58
59
    private static $trimWhitespaceFromTags = [
60
        'article' => '',
61
        'br'      => '',
62
        'div'     => '',
63
        'footer'  => '',
64
        'hr'      => '',
65
        'nav'     => '',
66
        'p'       => '',
67
        'script'  => '',
68
    ];
69
70
    /**
71
     * @var array
72
     */
73
    private static $booleanAttributes = [
74
        'allowfullscreen' => '',
75
        'async'           => '',
76
        'autofocus'       => '',
77
        'autoplay'        => '',
78
        'checked'         => '',
79
        'compact'         => '',
80
        'controls'        => '',
81
        'declare'         => '',
82
        'default'         => '',
83
        'defaultchecked'  => '',
84
        'defaultmuted'    => '',
85
        'defaultselected' => '',
86
        'defer'           => '',
87
        'disabled'        => '',
88
        'enabled'         => '',
89
        'formnovalidate'  => '',
90
        'hidden'          => '',
91
        'indeterminate'   => '',
92
        'inert'           => '',
93
        'ismap'           => '',
94
        'itemscope'       => '',
95
        'loop'            => '',
96
        'multiple'        => '',
97
        'muted'           => '',
98
        'nohref'          => '',
99
        'noresize'        => '',
100
        'noshade'         => '',
101
        'novalidate'      => '',
102
        'nowrap'          => '',
103
        'open'            => '',
104
        'pauseonexit'     => '',
105
        'readonly'        => '',
106
        'required'        => '',
107
        'reversed'        => '',
108
        'scoped'          => '',
109
        'seamless'        => '',
110
        'selected'        => '',
111
        'sortable'        => '',
112
        'truespeed'       => '',
113
        'typemustmatch'   => '',
114
        'visible'         => '',
115
    ];
116
117
    /**
118
     * @var array
119
     */
120
    private static $skipTagsForRemoveWhitespace = [
121
        'code',
122
        'pre',
123
        'script',
124
        'style',
125
        'textarea',
126
    ];
127
128
    /**
129
     * @var array
130
     */
131
    private $protectedChildNodes = [];
132
133
    /**
134
     * @var string
135
     */
136
    private $protectedChildNodesHelper = 'html-min--voku--saved-content';
137
138
    /**
139
     * @var bool
140
     */
141
    private $doOptimizeViaHtmlDomParser = true;
142
143
    /**
144
     * @var bool
145
     */
146
    private $doOptimizeAttributes = true;
147
148
    /**
149
     * @var bool
150
     */
151
    private $doRemoveComments = true;
152
153
    /**
154
     * @var bool
155
     */
156
    private $doRemoveWhitespaceAroundTags = false;
157
158
    /**
159
     * @var bool
160
     */
161
    private $doRemoveOmittedQuotes = true;
162
163
    /**
164
     * @var bool
165
     */
166
    private $doRemoveOmittedHtmlTags = true;
167
168
    /**
169
     * @var bool
170
     */
171
    private $doRemoveHttpPrefixFromAttributes = false;
172
173
    /**
174
     * @var array
175
     */
176
    private $domainsToRemoveHttpPrefixFromAttributes = [
177
        'google.com',
178
        'google.de',
179
    ];
180
181
    /**
182
     * @var bool
183
     */
184
    private $doSortCssClassNames = true;
185
186
    /**
187
     * @var bool
188
     */
189
    private $doSortHtmlAttributes = true;
190
191
    /**
192
     * @var bool
193
     */
194
    private $doRemoveDeprecatedScriptCharsetAttribute = true;
195
196
    /**
197
     * @var bool
198
     */
199
    private $doRemoveDefaultAttributes = false;
200
201
    /**
202
     * @var bool
203
     */
204
    private $doRemoveDeprecatedAnchorName = true;
205
206
    /**
207
     * @var bool
208
     */
209
    private $doRemoveDeprecatedTypeFromStylesheetLink = true;
210
211
    /**
212
     * @var bool
213
     */
214
    private $doRemoveDeprecatedTypeFromScriptTag = true;
215
216
    /**
217
     * @var bool
218
     */
219
    private $doRemoveValueFromEmptyInput = true;
220
221
    /**
222
     * @var bool
223
     */
224
    private $doRemoveEmptyAttributes = true;
225
226
    /**
227
     * @var bool
228
     */
229
    private $doSumUpWhitespace = true;
230
231
    /**
232
     * @var bool
233
     */
234
    private $doRemoveSpacesBetweenTags = false;
235
236
    /**
237
     * @var bool
238
     */
239
    private $keepBrokenHtml = false;
240
241
    /**
242
     * @var bool
243
     */
244
    private $withDocType = false;
245
246
    /**
247
     * @var \SplObjectStorage|HtmlMinDomObserverInterface[]
248
     */
249
    private $domLoopObservers;
250
251
    /**
252
     * HtmlMin constructor.
253
     */
254 44
    public function __construct()
255
    {
256 44
        $this->domLoopObservers = new \SplObjectStorage();
257
258 44
        $this->attachObserverToTheDomLoop(new HtmlMinDomObserverOptimizeAttributes());
259 44
    }
260
261
    /**
262
     * @param HtmlMinDomObserverInterface $observer
263
     *
264
     * @return void
265
     */
266 44
    public function attachObserverToTheDomLoop(HtmlMinDomObserverInterface $observer)
267
    {
268 44
        $this->domLoopObservers->attach($observer);
269 44
    }
270
271
    /**
272
     * @param $domElement SimpleHtmlDom
273
     *
274
     * @return void
275
     */
276 40
    private function notifyObserversAboutDomElementBeforeMinification(SimpleHtmlDom $domElement)
277
    {
278 40
        foreach ($this->domLoopObservers as $observer) {
279 40
            $observer->domElementBeforeMinification($domElement, $this);
280
        }
281 40
    }
282
283 40
    private function notifyObserversAboutDomElementAfterMinification(SimpleHtmlDom $domElement)
284
    {
285 40
        foreach ($this->domLoopObservers as $observer) {
286 40
            $observer->domElementAfterMinification($domElement, $this);
287
        }
288 40
    }
289
290
    /**
291
     * @param bool $doOptimizeAttributes
292
     *
293
     * @return $this
294
     */
295 2
    public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
296
    {
297 2
        $this->doOptimizeAttributes = $doOptimizeAttributes;
298
299 2
        return $this;
300
    }
301
302
    /**
303
     * @param bool $doOptimizeViaHtmlDomParser
304
     *
305
     * @return $this
306
     */
307 1
    public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
308
    {
309 1
        $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
310
311 1
        return $this;
312
    }
313
314
    /**
315
     * @param bool $doRemoveComments
316
     *
317
     * @return $this
318
     */
319 3
    public function doRemoveComments(bool $doRemoveComments = true): self
320
    {
321 3
        $this->doRemoveComments = $doRemoveComments;
322
323 3
        return $this;
324
    }
325
326
    /**
327
     * @param bool $doRemoveDefaultAttributes
328
     *
329
     * @return $this
330
     */
331 2
    public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
332
    {
333 2
        $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
334
335 2
        return $this;
336
    }
337
338
    /**
339
     * @param bool $doRemoveDeprecatedAnchorName
340
     *
341
     * @return $this
342
     */
343 2
    public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
344
    {
345 2
        $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
346
347 2
        return $this;
348
    }
349
350
    /**
351
     * @param bool $doRemoveDeprecatedScriptCharsetAttribute
352
     *
353
     * @return $this
354
     */
355 2
    public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
356
    {
357 2
        $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
358
359 2
        return $this;
360
    }
361
362
    /**
363
     * @param bool $doRemoveDeprecatedTypeFromScriptTag
364
     *
365
     * @return $this
366
     */
367 2
    public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
368
    {
369 2
        $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
370
371 2
        return $this;
372
    }
373
374
    /**
375
     * @param bool $doRemoveDeprecatedTypeFromStylesheetLink
376
     *
377
     * @return $this
378
     */
379 2
    public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
380
    {
381 2
        $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
382
383 2
        return $this;
384
    }
385
386
    /**
387
     * @param bool $doRemoveEmptyAttributes
388
     *
389
     * @return $this
390
     */
391 2
    public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
392
    {
393 2
        $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
394
395 2
        return $this;
396
    }
397
398
    /**
399
     * @param bool $doRemoveHttpPrefixFromAttributes
400
     *
401
     * @return $this
402
     */
403 4
    public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
404
    {
405 4
        $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
406
407 4
        return $this;
408
    }
409
410
    /**
411
     * @return bool
412
     */
413 24
    public function isDoSortCssClassNames(): bool
414
    {
415 24
        return $this->doSortCssClassNames;
416
    }
417
418
    /**
419
     * @return bool
420
     */
421 24
    public function isDoSortHtmlAttributes(): bool
422
    {
423 24
        return $this->doSortHtmlAttributes;
424
    }
425
426
    /**
427
     * @return bool
428
     */
429 24
    public function isDoRemoveDeprecatedScriptCharsetAttribute(): bool
430
    {
431 24
        return $this->doRemoveDeprecatedScriptCharsetAttribute;
432
    }
433
434
    /**
435
     * @return bool
436
     */
437 24
    public function isDoRemoveDefaultAttributes(): bool
438
    {
439 24
        return $this->doRemoveDefaultAttributes;
440
    }
441
442
    /**
443
     * @return bool
444
     */
445 24
    public function isDoRemoveDeprecatedAnchorName(): bool
446
    {
447 24
        return $this->doRemoveDeprecatedAnchorName;
448
    }
449
450
    /**
451
     * @return bool
452
     */
453 24
    public function isDoRemoveDeprecatedTypeFromStylesheetLink(): bool
454
    {
455 24
        return $this->doRemoveDeprecatedTypeFromStylesheetLink;
456
    }
457
458
    /**
459
     * @return bool
460
     */
461 24
    public function isDoRemoveDeprecatedTypeFromScriptTag(): bool
462
    {
463 24
        return $this->doRemoveDeprecatedTypeFromScriptTag;
464
    }
465
466
    /**
467
     * @return bool
468
     */
469 24
    public function isDoRemoveValueFromEmptyInput(): bool
470
    {
471 24
        return $this->doRemoveValueFromEmptyInput;
472
    }
473
474
    /**
475
     * @return bool
476
     */
477 24
    public function isDoRemoveEmptyAttributes(): bool
478
    {
479 24
        return $this->doRemoveEmptyAttributes;
480
    }
481
482
    /**
483
     * @return bool
484
     */
485
    public function isDoSumUpWhitespace(): bool
486
    {
487
        return $this->doSumUpWhitespace;
488
    }
489
490
    /**
491
     * @return bool
492
     */
493
    public function isDoRemoveSpacesBetweenTags(): bool
494
    {
495
        return $this->doRemoveSpacesBetweenTags;
496
    }
497
498
    /**
499
     * @return bool
500
     */
501
    public function isDoOptimizeViaHtmlDomParser(): bool
502
    {
503
        return $this->doOptimizeViaHtmlDomParser;
504
    }
505
506
    /**
507
     * @return bool
508
     */
509
    public function isDoOptimizeAttributes(): bool
510
    {
511
        return $this->doOptimizeAttributes;
512
    }
513
514
    /**
515
     * @return bool
516
     */
517
    public function isDoRemoveComments(): bool
518
    {
519
        return $this->doRemoveComments;
520
    }
521
522
    /**
523
     * @return bool
524
     */
525
    public function isDoRemoveWhitespaceAroundTags(): bool
526
    {
527
        return $this->doRemoveWhitespaceAroundTags;
528
    }
529
530
    /**
531
     * @return bool
532
     */
533
    public function isDoRemoveOmittedQuotes(): bool
534
    {
535
        return $this->doRemoveOmittedQuotes;
536
    }
537
538
    /**
539
     * @return bool
540
     */
541
    public function isDoRemoveOmittedHtmlTags(): bool
542
    {
543
        return $this->doRemoveOmittedHtmlTags;
544
    }
545
546
    /**
547
     * @return bool
548
     */
549 24
    public function isDoRemoveHttpPrefixFromAttributes(): bool
550
    {
551 24
        return $this->doRemoveHttpPrefixFromAttributes;
552
    }
553
554
    /**
555
     * @return array
556
     */
557
    public function getDomainsToRemoveHttpPrefixFromAttributes(): array
558
    {
559
        return $this->domainsToRemoveHttpPrefixFromAttributes;
560
    }
561
562
    /**
563
     * @param bool $doRemoveOmittedHtmlTags
564
     *
565
     * @return $this
566
     */
567 1
    public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
568
    {
569 1
        $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
570
571 1
        return $this;
572
    }
573
574
    /**
575
     * @param bool $doRemoveOmittedQuotes
576
     *
577
     * @return $this
578
     */
579 1
    public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
580
    {
581 1
        $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
582
583 1
        return $this;
584
    }
585
586
    /**
587
     * @param bool $doRemoveSpacesBetweenTags
588
     *
589
     * @return $this
590
     */
591
    public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
592
    {
593
        $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
594
595
        return $this;
596
    }
597
598
    /**
599
     * @param bool $doRemoveValueFromEmptyInput
600
     *
601
     * @return $this
602
     */
603 2
    public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
604
    {
605 2
        $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
606
607 2
        return $this;
608
    }
609
610
    /**
611
     * @param bool $doRemoveWhitespaceAroundTags
612
     *
613
     * @return $this
614
     */
615 4
    public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
616
    {
617 4
        $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
618
619 4
        return $this;
620
    }
621
622
    /**
623
     * @param bool $doSortCssClassNames
624
     *
625
     * @return $this
626
     */
627 2
    public function doSortCssClassNames(bool $doSortCssClassNames = true): self
628
    {
629 2
        $this->doSortCssClassNames = $doSortCssClassNames;
630
631 2
        return $this;
632
    }
633
634
    /**
635
     * @param bool $doSortHtmlAttributes
636
     *
637
     * @return $this
638
     */
639 2
    public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
640
    {
641 2
        $this->doSortHtmlAttributes = $doSortHtmlAttributes;
642
643 2
        return $this;
644
    }
645
646
    /**
647
     * @param bool $doSumUpWhitespace
648
     *
649
     * @return $this
650
     */
651 2
    public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
652
    {
653 2
        $this->doSumUpWhitespace = $doSumUpWhitespace;
654
655 2
        return $this;
656
    }
657
658 40
    private function domNodeAttributesToString(\DOMNode $node): string
659
    {
660
        // Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
661 40
        $attr_str = '';
662 40
        if ($node->attributes !== null) {
663 40
            foreach ($node->attributes as $attribute) {
664 24
                $attr_str .= $attribute->name;
665
666
                if (
667 24
                    $this->doOptimizeAttributes
668
                    &&
669 24
                    isset(self::$booleanAttributes[$attribute->name])
670
                ) {
671 8
                    $attr_str .= ' ';
672
673 8
                    continue;
674
                }
675
676 24
                $attr_str .= '=';
677
678
                // http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
679 24
                $omit_quotes = $this->doRemoveOmittedQuotes
680
                               &&
681 24
                               $attribute->value !== ''
682
                               &&
683 24
                               \preg_match('/["\'=<>` \t\r\n\f]+/', $attribute->value) === 0;
684
685
                if (
686 24
                    $this->doOptimizeAttributes
687
                    &&
688
                    (
689 23
                        $attribute->name === 'srcset'
690
                        ||
691 24
                        $attribute->name === 'sizes'
692
                    )
693
                ) {
694 1
                    $attr_val = \preg_replace(self::$regExSpace, ' ', $attribute->value);
695
                } else {
696 24
                    $attr_val = $attribute->value;
697
                }
698
699 24
                $attr_str .= ($omit_quotes ? '' : '"') . $attr_val . ($omit_quotes ? '' : '"');
700 24
                $attr_str .= ' ';
701
            }
702
        }
703
704 40
        return \trim($attr_str);
705
    }
706
707
    /**
708
     * @param \DOMNode $node
709
     *
710
     * @return bool
711
     */
712 39
    private function domNodeClosingTagOptional(\DOMNode $node): bool
713
    {
714 39
        $tag_name = $node->nodeName;
715 39
        $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
716
717
        // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
718
719
        // Implemented:
720
        //
721
        // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
722
        // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
723
        // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
724
        // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
725
        // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
726
        // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
727
        // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
728
        // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
729
        // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
730
731
        // TODO:
732
        //
733
        // <html> may be omitted if first thing inside is not comment
734
        // <head> may be omitted if first thing inside is an element
735
        // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
736
        // <colgroup> may be omitted if first thing inside is <col>
737
        // <tbody> may be omitted if first thing inside is <tr>
738
        // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
739
        // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
740
        // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
741
        // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
742
        // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
743
        // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
744
        // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
745
        // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
746
        //
747
        // <-- However, a start tag must never be omitted if it has any attributes.
748
749 39
        return \in_array($tag_name, self::$optional_end_tags, true)
750
               ||
751
               (
752 36
                   $tag_name === 'li'
753
                   &&
754
                   (
755 5
                       $nextSibling === null
756
                       ||
757
                       (
758 3
                           $nextSibling instanceof \DOMElement
759
                           &&
760 36
                           $nextSibling->tagName === 'li'
761
                       )
762
                   )
763
               )
764
               ||
765
               (
766
                   (
767 36
                       $tag_name === 'rp'
768
                   )
769
                   &&
770
                   (
771
                       $nextSibling === null
772
                       ||
773
                       (
774
                           $nextSibling instanceof \DOMElement
775
                           &&
776
                           (
777
                               $nextSibling->tagName === 'rp'
778
                               ||
779 36
                               $nextSibling->tagName === 'rt'
780
                           )
781
                       )
782
                   )
783
               )
784
               ||
785
               (
786 36
                   $tag_name === 'tr'
787
                   &&
788
                   (
789 1
                       $nextSibling === null
790
                       ||
791
                       (
792 1
                           $nextSibling instanceof \DOMElement
793
                           &&
794 36
                           $nextSibling->tagName === 'tr'
795
                       )
796
                   )
797
               )
798
               ||
799
               (
800
                   (
801 36
                       $tag_name === 'td'
802
                       ||
803 36
                       $tag_name === 'th'
804
                   )
805
                   &&
806
                   (
807 1
                       $nextSibling === null
808
                       ||
809
                       (
810 1
                           $nextSibling instanceof \DOMElement
811
                           &&
812
                           (
813 1
                               $nextSibling->tagName === 'td'
814
                               ||
815 36
                               $nextSibling->tagName === 'th'
816
                           )
817
                       )
818
                   )
819
               )
820
               ||
821
               (
822
                   (
823 36
                       $tag_name === 'dd'
824
                       ||
825 36
                       $tag_name === 'dt'
826
                   )
827
                   &&
828
                   (
829
                       (
830 3
                           $nextSibling === null
831
                           &&
832 3
                           $tag_name === 'dd'
833
                       )
834
                       ||
835
                       (
836 3
                           $nextSibling instanceof \DOMElement
837
                           &&
838
                           (
839 3
                               $nextSibling->tagName === 'dd'
840
                               ||
841 36
                               $nextSibling->tagName === 'dt'
842
                           )
843
                       )
844
                   )
845
               )
846
               ||
847
               (
848 36
                   $tag_name === 'option'
849
                   &&
850
                   (
851
                       $nextSibling === null
852
                       ||
853
                       (
854
                           $nextSibling instanceof \DOMElement
855
                           &&
856
                           (
857
                               $nextSibling->tagName === 'option'
858
                               ||
859 36
                               $nextSibling->tagName === 'optgroup'
860
                           )
861
                       )
862
                   )
863
               )
864
               ||
865
               (
866 36
                   $tag_name === 'p'
867
                   &&
868
                   (
869
                       (
870 12
                           $nextSibling === null
871
                           &&
872
                           (
873 10
                               $node->parentNode !== null
874
                               &&
875 10
                               !\in_array(
876 10
                                   $node->parentNode->nodeName,
877
                                   [
878 10
                                       'a',
879
                                       'audio',
880
                                       'del',
881
                                       'ins',
882
                                       'map',
883
                                       'noscript',
884
                                       'video',
885
                                   ],
886 10
                                   true
887
                               )
888
                           )
889
                       )
890
                       ||
891
                       (
892 9
                           $nextSibling instanceof \DOMElement
893
                           &&
894 9
                           \in_array(
895 9
                               $nextSibling->tagName,
896
                               [
897 9
                                   'address',
898
                                   'article',
899
                                   'aside',
900
                                   'blockquote',
901
                                   'dir',
902
                                   'div',
903
                                   'dl',
904
                                   'fieldset',
905
                                   'footer',
906
                                   'form',
907
                                   'h1',
908
                                   'h2',
909
                                   'h3',
910
                                   'h4',
911
                                   'h5',
912
                                   'h6',
913
                                   'header',
914
                                   'hgroup',
915
                                   'hr',
916
                                   'menu',
917
                                   'nav',
918
                                   'ol',
919
                                   'p',
920
                                   'pre',
921
                                   'section',
922
                                   'table',
923
                                   'ul',
924
                               ],
925 39
                               true
926
                           )
927
                       )
928
                   )
929
               );
930
    }
931
932 40
    protected function domNodeToString(\DOMNode $node): string
933
    {
934
        // init
935 40
        $html = '';
936 40
        $emptyStringTmp = '';
937
938 40
        foreach ($node->childNodes as $child) {
939 40
            if ($emptyStringTmp === 'is_empty') {
940 23
                $emptyStringTmp = 'last_was_empty';
941
            } else {
942 40
                $emptyStringTmp = '';
943
            }
944
945 40
            if ($child instanceof \DOMDocumentType) {
946
                // add the doc-type only if it wasn't generated by DomDocument
947 11
                if (!$this->withDocType) {
948
                    continue;
949
                }
950
951 11
                if ($child->name) {
952 11
                    if (!$child->publicId && $child->systemId) {
953
                        $tmpTypeSystem = 'SYSTEM';
954
                        $tmpTypePublic = '';
955
                    } else {
956 11
                        $tmpTypeSystem = '';
957 11
                        $tmpTypePublic = 'PUBLIC';
958
                    }
959
960 11
                    $html .= '<!DOCTYPE ' . $child->name . ''
961 11
                             . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
962 11
                             . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
963 11
                             . '>';
964
                }
965 40
            } elseif ($child instanceof \DOMElement) {
966 40
                $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
967 40
                $html .= '>' . $this->domNodeToString($child);
968
969
                if (
970 40
                    !$this->doRemoveOmittedHtmlTags
971
                    ||
972 40
                    !$this->domNodeClosingTagOptional($child)
973
                ) {
974 35
                    $html .= '</' . $child->tagName . '>';
975
                }
976
977 40
                if (!$this->doRemoveWhitespaceAroundTags) {
978
                    if (
979 39
                        $child->nextSibling instanceof \DOMText
980
                        &&
981 39
                        $child->nextSibling->wholeText === ' '
982
                    ) {
983
                        if (
984 22
                            $emptyStringTmp !== 'last_was_empty'
985
                            &&
986 22
                            \substr($html, -1) !== ' '
987
                        ) {
988 22
                            $html .= ' ';
989
                        }
990 40
                        $emptyStringTmp = 'is_empty';
991
                    }
992
                }
993 36
            } elseif ($child instanceof \DOMText) {
994 36
                if ($child->isElementContentWhitespace()) {
995
                    if (
996 25
                        $child->previousSibling !== null
997
                        &&
998 25
                        $child->nextSibling !== null
999
                    ) {
1000
                        if (
1001 18
                            $emptyStringTmp !== 'last_was_empty'
1002
                            &&
1003 18
                            \substr($html, -1) !== ' '
1004
                        ) {
1005 5
                            $html .= ' ';
1006
                        }
1007 25
                        $emptyStringTmp = 'is_empty';
1008
                    }
1009
                } else {
1010 36
                    $html .= $child->wholeText;
1011
                }
1012 1
            } elseif ($child instanceof \DOMComment) {
1013 40
                $html .= '<!--' . $child->textContent . '-->';
1014
            }
1015
        }
1016
1017 40
        return $html;
1018
    }
1019
1020
    /**
1021
     * @param \DOMNode $node
1022
     *
1023
     * @return \DOMNode|null
1024
     */
1025 39
    protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
1026
    {
1027
        do {
1028 39
            $node = $node->nextSibling;
1029 39
        } while (!($node === null || $node instanceof \DOMElement));
1030
1031 39
        return $node;
1032
    }
1033
1034
    /**
1035
     * Check if the current string is an conditional comment.
1036
     *
1037
     * INFO: since IE >= 10 conditional comment are not working anymore
1038
     *
1039
     * <!--[if expression]> HTML <![endif]-->
1040
     * <![if expression]> HTML <![endif]>
1041
     *
1042
     * @param string $comment
1043
     *
1044
     * @return bool
1045
     */
1046 4
    private function isConditionalComment($comment): bool
1047
    {
1048 4
        if (\preg_match('/^\[if [^\]]+\]/', $comment)) {
1049 2
            return true;
1050
        }
1051
1052 4
        if (\preg_match('/\[endif\]$/', $comment)) {
1053 1
            return true;
1054
        }
1055
1056 4
        return false;
1057
    }
1058
1059
    /**
1060
     * @param string $html
1061
     * @param bool   $decodeUtf8Specials <p>Use this only in special cases, e.g. for PHP 5.3</p>
1062
     *
1063
     * @return string
1064
     */
1065 44
    public function minify($html, $decodeUtf8Specials = false): string
1066
    {
1067 44
        $html = (string) $html;
1068 44
        if (!isset($html[0])) {
1069 1
            return '';
1070
        }
1071
1072 44
        $html = \trim($html);
1073 44
        if (!$html) {
1074 3
            return '';
1075
        }
1076
1077
        // init
1078 41
        static $CACHE_SELF_CLOSING_TAGS = null;
1079 41
        if ($CACHE_SELF_CLOSING_TAGS === null) {
1080 1
            $CACHE_SELF_CLOSING_TAGS = \implode('|', self::$selfClosingTags);
1081
        }
1082
1083
        // reset
1084 41
        $this->protectedChildNodes = [];
1085
1086
        // save old content
1087 41
        $origHtml = $html;
1088 41
        $origHtmlLength = \strlen($html);
1089
1090
        // -------------------------------------------------------------------------
1091
        // Minify the HTML via "HtmlDomParser"
1092
        // -------------------------------------------------------------------------
1093
1094 41
        if ($this->doOptimizeViaHtmlDomParser) {
1095 40
            $html = $this->minifyHtmlDom($html, $decodeUtf8Specials);
1096
        }
1097
1098
        // -------------------------------------------------------------------------
1099
        // Trim whitespace from html-string. [protected html is still protected]
1100
        // -------------------------------------------------------------------------
1101
1102
        // Remove extra white-space(s) between HTML attribute(s)
1103 41
        $html = (string) \preg_replace_callback(
1104 41
            '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
1105 41
            function ($matches) {
1106 41
                return '<' . $matches[1] . (string) \preg_replace('#([^\s=]+)(\=([\'"]?)(.*?)\3)?(\s+|$)#s', ' $1$2', $matches[2]) . $matches[3] . '>';
1107 41
            },
1108 41
            $html
1109
        );
1110
1111 41
        if ($this->doRemoveSpacesBetweenTags) {
1112
            // Remove spaces that are between > and <
1113
            $html = (string) \preg_replace('/(>) (<)/', '>$2', $html);
1114
        }
1115
1116
        // -------------------------------------------------------------------------
1117
        // Restore protected HTML-code.
1118
        // -------------------------------------------------------------------------
1119
1120 41
        $html = (string) \preg_replace_callback(
1121 41
            '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
1122 41
            [$this, 'restoreProtectedHtml'],
1123 41
            $html
1124
        );
1125
1126
        // -------------------------------------------------------------------------
1127
        // Restore protected HTML-entities.
1128
        // -------------------------------------------------------------------------
1129
1130 41
        if ($this->doOptimizeViaHtmlDomParser) {
1131 40
            $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
1132
        }
1133
1134
        // ------------------------------------
1135
        // Final clean-up
1136
        // ------------------------------------
1137
1138 41
        $html = \str_replace(
1139
            [
1140 41
                'html>' . "\n",
1141
                "\n" . '<html',
1142
                'html/>' . "\n",
1143
                "\n" . '</html',
1144
                'head>' . "\n",
1145
                "\n" . '<head',
1146
                'head/>' . "\n",
1147
                "\n" . '</head',
1148
            ],
1149
            [
1150 41
                'html>',
1151
                '<html',
1152
                'html/>',
1153
                '</html',
1154
                'head>',
1155
                '<head',
1156
                'head/>',
1157
                '</head',
1158
            ],
1159 41
            $html
1160
        );
1161
1162
        // self closing tags, don't need a trailing slash ...
1163 41
        $replace = [];
1164 41
        $replacement = [];
1165 41
        foreach (self::$selfClosingTags as $selfClosingTag) {
1166 41
            $replace[] = '<' . $selfClosingTag . '/>';
1167 41
            $replacement[] = '<' . $selfClosingTag . '>';
1168 41
            $replace[] = '<' . $selfClosingTag . ' />';
1169 41
            $replacement[] = '<' . $selfClosingTag . '>';
1170
        }
1171 41
        $html = \str_replace(
1172 41
            $replace,
1173 41
            $replacement,
1174 41
            $html
1175
        );
1176
1177 41
        $html = (string) \preg_replace('#<\b(' . $CACHE_SELF_CLOSING_TAGS . ')([^>]*+)><\/\b\1>#', '<\\1\\2>', $html);
1178
1179
        // ------------------------------------
1180
        // check if compression worked
1181
        // ------------------------------------
1182
1183 41
        if ($origHtmlLength < \strlen($html)) {
1184 3
            $html = $origHtml;
1185
        }
1186
1187 41
        return $html;
1188
    }
1189
1190
    /**
1191
     * @param $html
1192
     * @param $decodeUtf8Specials
1193
     *
1194
     * @return string
1195
     */
1196 40
    private function minifyHtmlDom($html, $decodeUtf8Specials): string
1197
    {
1198
        // init dom
1199 40
        $dom = new HtmlDomParser();
1200
        /** @noinspection UnusedFunctionResultInspection */
1201 40
        $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1202
1203 40
        $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1204 40
        $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1205
1206
        // load dom
1207
        /** @noinspection UnusedFunctionResultInspection */
1208 40
        $dom->loadHtml($html);
1209
1210 40
        $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1211
1212 40
        foreach ($dom->find('*') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('*') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1213 40
            $this->notifyObserversAboutDomElementBeforeMinification($element);
1214
        }
1215
1216
        // -------------------------------------------------------------------------
1217
        // Protect HTML tags and conditional comments.
1218
        // -------------------------------------------------------------------------
1219
1220 40
        $dom = $this->protectTags($dom);
1221
1222
        // -------------------------------------------------------------------------
1223
        // Remove default HTML comments. [protected html is still protected]
1224
        // -------------------------------------------------------------------------
1225
1226 40
        if ($this->doRemoveComments) {
1227 38
            $dom = $this->removeComments($dom);
1228
        }
1229
1230
        // -------------------------------------------------------------------------
1231
        // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1232
        // -------------------------------------------------------------------------
1233
1234 40
        if ($this->doSumUpWhitespace) {
1235 39
            $dom = $this->sumUpWhitespace($dom);
1236
        }
1237
1238 40
        foreach ($dom->find('*') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('*') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1239
1240
            // -------------------------------------------------------------------------
1241
            // Remove whitespace around tags. [protected html is still protected]
1242
            // -------------------------------------------------------------------------
1243
1244 40
            if ($this->doRemoveWhitespaceAroundTags) {
1245 3
                $this->removeWhitespaceAroundTags($element);
1246
            }
1247
1248 40
            $this->notifyObserversAboutDomElementAfterMinification($element);
1249
        }
1250
1251
        // -------------------------------------------------------------------------
1252
        // Convert the Dom into a string.
1253
        // -------------------------------------------------------------------------
1254
1255 40
        return $dom->fixHtmlOutput(
1256 40
            $this->domNodeToString($dom->getDocument()),
1257 40
            $decodeUtf8Specials
1258
        );
1259
    }
1260
1261
    /**
1262
     * Prevent changes of inline "styles" and "scripts".
1263
     *
1264
     * @param HtmlDomParser $dom
1265
     *
1266
     * @return HtmlDomParser
1267
     */
1268 40
    private function protectTags(HtmlDomParser $dom): HtmlDomParser
1269
    {
1270
        // init
1271 40
        $counter = 0;
1272
1273 40
        foreach ($dom->find('script, style') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('script, style') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1274
1275
            // skip external links
1276 5
            if ($element->tag === 'script' || $element->tag === 'style') {
1277 5
                $attributes = $element->getAllAttributes();
1278 5
                if (isset($attributes['src'])) {
1279 3
                    continue;
1280
                }
1281
            }
1282
1283 3
            $this->protectedChildNodes[$counter] = $element->text();
1284 3
            $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
1285
1286 3
            ++$counter;
1287
        }
1288
1289 40
        foreach ($dom->find('code, nocompress') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('code, nocompress') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1290 3
            if ($element->isRemoved()) {
1291 1
                continue;
1292
            }
1293
1294 3
            $this->protectedChildNodes[$counter] = $element->parentNode()->innerHtml();
1295 3
            $element->getNode()->parentNode->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
1296
1297 3
            ++$counter;
1298
        }
1299
1300 40
        foreach ($dom->find('//comment()') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('//comment()') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1301 4
            $text = $element->text();
1302
1303
            // skip normal comments
1304 4
            if (!$this->isConditionalComment($text)) {
1305 4
                continue;
1306
            }
1307
1308 2
            $this->protectedChildNodes[$counter] = '<!--' . $text . '-->';
1309
1310
            /* @var $node \DOMComment */
1311 2
            $node = $element->getNode();
1312 2
            $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>');
1313
            /** @noinspection UnusedFunctionResultInspection */
1314 2
            $element->getNode()->parentNode->replaceChild($child, $node);
1315
1316 2
            ++$counter;
1317
        }
1318
1319 40
        return $dom;
1320
    }
1321
1322
    /**
1323
     * Remove comments in the dom.
1324
     *
1325
     * @param HtmlDomParser $dom
1326
     *
1327
     * @return HtmlDomParser
1328
     */
1329 38
    private function removeComments(HtmlDomParser $dom): HtmlDomParser
1330
    {
1331 38
        foreach ($dom->find('//comment()') as $commentWrapper) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('//comment()') of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1332 3
            $comment = $commentWrapper->getNode();
1333 3
            $val = $comment->nodeValue;
1334 3
            if (\strpos($val, '[') === false) {
1335
                /** @noinspection UnusedFunctionResultInspection */
1336 3
                $comment->parentNode->removeChild($comment);
1337
            }
1338
        }
1339
1340 38
        $dom->getDocument()->normalizeDocument();
1341
1342 38
        return $dom;
1343
    }
1344
1345
    /**
1346
     * Trim tags in the dom.
1347
     *
1348
     * @param SimpleHtmlDom $element
1349
     *
1350
     * @return void
1351
     */
1352 3
    private function removeWhitespaceAroundTags(SimpleHtmlDom $element)
1353
    {
1354 3
        if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
1355 1
            $node = $element->getNode();
1356
1357
            /** @var \DOMNode[] $candidates */
1358 1
            $candidates = [];
1359 1
            if ($node->childNodes->length > 0) {
1360 1
                $candidates[] = $node->firstChild;
1361 1
                $candidates[] = $node->lastChild;
1362 1
                $candidates[] = $node->previousSibling;
1363 1
                $candidates[] = $node->nextSibling;
1364
            }
1365
1366 1
            foreach ($candidates as &$candidate) {
1367 1
                if ($candidate === null) {
1368
                    continue;
1369
                }
1370
1371 1
                if ($candidate->nodeType === \XML_TEXT_NODE) {
1372 1
                    $candidate->nodeValue = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1373
                }
1374
            }
1375
        }
1376 3
    }
1377
1378
    /**
1379
     * Callback function for preg_replace_callback use.
1380
     *
1381
     * @param array $matches PREG matches
1382
     *
1383
     * @return string
1384
     */
1385 6
    private function restoreProtectedHtml($matches): string
1386
    {
1387 6
        \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1388
1389 6
        $html = '';
1390 6
        if (isset($this->protectedChildNodes[$matchesInner['id']])) {
1391 6
            $html .= $this->protectedChildNodes[$matchesInner['id']];
1392
        }
1393
1394 6
        return $html;
1395
    }
1396
1397
    /**
1398
     * @param array $domainsToRemoveHttpPrefixFromAttributes
1399
     *
1400
     * @return $this
1401
     */
1402 2
    public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1403
    {
1404 2
        $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1405
1406 2
        return $this;
1407
    }
1408
1409
    /**
1410
     * Sum-up extra whitespace from dom-nodes.
1411
     *
1412
     * @param HtmlDomParser $dom
1413
     *
1414
     * @return HtmlDomParser
1415
     */
1416 39
    private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1417
    {
1418 39
        $text_nodes = $dom->find('//text()');
1419 39
        foreach ($text_nodes as $text_node_wrapper) {
0 ignored issues
show
Bug introduced by
The expression $text_nodes of type object<voku\helper\Simpl...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1420
            /* @var $text_node \DOMNode */
1421 35
            $text_node = $text_node_wrapper->getNode();
1422 35
            $xp = $text_node->getNodePath();
1423
1424 35
            $doSkip = false;
1425 35
            foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1426 35
                if (\strpos($xp, "/${pattern}") !== false) {
1427 5
                    $doSkip = true;
1428
1429 35
                    break;
1430
                }
1431
            }
1432 35
            if ($doSkip) {
1433 5
                continue;
1434
            }
1435
1436 34
            $text_node->nodeValue = \preg_replace(self::$regExSpace, ' ', $text_node->nodeValue);
1437
        }
1438
1439
        $dom->getDocument()->normalizeDocument();
1440
1441
        return $dom;
1442
    }
1443
1444
    /**
1445
     * WARNING: maybe bad for performance ...
1446
     *
1447
     * @param bool $keepBrokenHtml
1448
     *
1449
     * @return HtmlMin
1450
     */
1451
    public function useKeepBrokenHtml(bool $keepBrokenHtml): self
1452
    {
1453 2
        $this->keepBrokenHtml = $keepBrokenHtml;
1454
1455 2
        return $this;
1456
    }
1457
}
1458