Completed
Push — master ( ba1ca2...bd099b )
by Lars
03:16
created

HtmlMin::useKeepBrokenHtml()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * Class HtmlMin
9
 *
10
 * Inspired by:
11
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
12
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
13
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
14
 * - PHP: https://github.com/zaininnari/html-minifier
15
 * - PHP: https://github.com/ampaze/PHP-HTML-Minifier
16
 * - Java: https://code.google.com/archive/p/htmlcompressor/
17
 *
18
 * Ideas:
19
 * - http://perfectionkills.com/optimizing-html/
20
 *
21
 * @package voku\helper
22
 */
23
class HtmlMin
24
{
25
  /**
26
   * @var string
27
   */
28
  private static $regExSpace = "/[[:space:]]{2,}|[\r\n]+/u";
29
30
  /**
31
   * @var array
32
   */
33
  private static $optional_end_tags = [
34
      'html',
35
      'head',
36
      'body',
37
  ];
38
39
  /**
40
   * // https://mathiasbynens.be/demo/javascript-mime-type
41
   * // https://developer.mozilla.org/en/docs/Web/HTML/Element/script#attr-type
42
   *
43
   * @var array
44
   */
45
  private static $executableScriptsMimeTypes = [
46
      'text/javascript'          => '',
47
      'text/ecmascript'          => '',
48
      'text/jscript'             => '',
49
      'application/javascript'   => '',
50
      'application/x-javascript' => '',
51
      'application/ecmascript'   => '',
52
  ];
53
54
  private static $selfClosingTags = [
55
      'area',
56
      'base',
57
      'basefont',
58
      'br',
59
      'col',
60
      'command',
61
      'embed',
62
      'frame',
63
      'hr',
64
      'img',
65
      'input',
66
      'isindex',
67
      'keygen',
68
      'link',
69
      'meta',
70
      'param',
71
      'source',
72
      'track',
73
      'wbr',
74
  ];
75
76
  private static $trimWhitespaceFromTags = [
77
      'article' => '',
78
      'br'      => '',
79
      'div'     => '',
80
      'footer'  => '',
81
      'hr'      => '',
82
      'nav'     => '',
83
      'p'       => '',
84
      'script'  => '',
85
  ];
86
87
  /**
88
   * @var array
89
   */
90
  private static $booleanAttributes = [
91
      'allowfullscreen' => '',
92
      'async'           => '',
93
      'autofocus'       => '',
94
      'autoplay'        => '',
95
      'checked'         => '',
96
      'compact'         => '',
97
      'controls'        => '',
98
      'declare'         => '',
99
      'default'         => '',
100
      'defaultchecked'  => '',
101
      'defaultmuted'    => '',
102
      'defaultselected' => '',
103
      'defer'           => '',
104
      'disabled'        => '',
105
      'enabled'         => '',
106
      'formnovalidate'  => '',
107
      'hidden'          => '',
108
      'indeterminate'   => '',
109
      'inert'           => '',
110
      'ismap'           => '',
111
      'itemscope'       => '',
112
      'loop'            => '',
113
      'multiple'        => '',
114
      'muted'           => '',
115
      'nohref'          => '',
116
      'noresize'        => '',
117
      'noshade'         => '',
118
      'novalidate'      => '',
119
      'nowrap'          => '',
120
      'open'            => '',
121
      'pauseonexit'     => '',
122
      'readonly'        => '',
123
      'required'        => '',
124
      'reversed'        => '',
125
      'scoped'          => '',
126
      'seamless'        => '',
127
      'selected'        => '',
128
      'sortable'        => '',
129
      'truespeed'       => '',
130
      'typemustmatch'   => '',
131
      'visible'         => '',
132
  ];
133
  /**
134
   * @var array
135
   */
136
  private static $skipTagsForRemoveWhitespace = [
137
      'code',
138
      'pre',
139
      'script',
140
      'style',
141
      'textarea',
142
  ];
143
144
  /**
145
   * @var array
146
   */
147
  private $protectedChildNodes = [];
148
149
  /**
150
   * @var string
151
   */
152
  private $protectedChildNodesHelper = 'html-min--voku--saved-content';
153
154
  /**
155
   * @var bool
156
   */
157
  private $doOptimizeViaHtmlDomParser = true;
158
159
  /**
160
   * @var bool
161
   */
162
  private $doOptimizeAttributes = true;
163
164
  /**
165
   * @var bool
166
   */
167
  private $doRemoveComments = true;
168
169
  /**
170
   * @var bool
171
   */
172
  private $doRemoveWhitespaceAroundTags = false;
173
174
  /**
175
   * @var bool
176
   */
177
  private $doRemoveOmittedQuotes = true;
178
179
  /**
180
   * @var bool
181
   */
182
  private $doRemoveOmittedHtmlTags = true;
183
184
  /**
185
   * @var bool
186
   */
187
  private $doRemoveHttpPrefixFromAttributes = false;
188
189
  /**
190
   * @var array
191
   */
192
  private $domainsToRemoveHttpPrefixFromAttributes = [
193
      'google.com',
194
      'google.de',
195
  ];
196
197
  /**
198
   * @var bool
199
   */
200
  private $doSortCssClassNames = true;
201
202
  /**
203
   * @var bool
204
   */
205
  private $doSortHtmlAttributes = true;
206
207
  /**
208
   * @var bool
209
   */
210
  private $doRemoveDeprecatedScriptCharsetAttribute = true;
211
212
  /**
213
   * @var bool
214
   */
215
  private $doRemoveDefaultAttributes = false;
216
217
  /**
218
   * @var bool
219
   */
220
  private $doRemoveDeprecatedAnchorName = true;
221
222
  /**
223
   * @var bool
224
   */
225
  private $doRemoveDeprecatedTypeFromStylesheetLink = true;
226
227
  /**
228
   * @var bool
229
   */
230
  private $doRemoveDeprecatedTypeFromScriptTag = true;
231
232
  /**
233
   * @var bool
234
   */
235
  private $doRemoveValueFromEmptyInput = true;
236
237
  /**
238
   * @var bool
239
   */
240
  private $doRemoveEmptyAttributes = true;
241
242
  /**
243
   * @var bool
244
   */
245
  private $doSumUpWhitespace = true;
246
247
  /**
248
   * @var bool
249
   */
250
  private $doRemoveSpacesBetweenTags = false;
251
252
  /**
253
   * @var bool
254
   */
255
  private $keepBrokenHtml = false;
256
257
  /**
258
   * @var
259
   */
260
  private $withDocType;
261
262
  /**
263
   * HtmlMin constructor.
264
   */
265 34
  public function __construct()
266
  {
267 34
  }
268
269
  /**
270
   * @param boolean $doOptimizeAttributes
271
   *
272
   * @return $this
273
   */
274 2
  public function doOptimizeAttributes(bool $doOptimizeAttributes = true): self
275
  {
276 2
    $this->doOptimizeAttributes = $doOptimizeAttributes;
277
278 2
    return $this;
279
  }
280
281
  /**
282
   * @param boolean $doOptimizeViaHtmlDomParser
283
   *
284
   * @return $this
285
   */
286 1
  public function doOptimizeViaHtmlDomParser(bool $doOptimizeViaHtmlDomParser = true): self
287
  {
288 1
    $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
289
290 1
    return $this;
291
  }
292
293
  /**
294
   * @param boolean $doRemoveComments
295
   *
296
   * @return $this
297
   */
298 2
  public function doRemoveComments(bool $doRemoveComments = true): self
299
  {
300 2
    $this->doRemoveComments = $doRemoveComments;
301
302 2
    return $this;
303
  }
304
305
  /**
306
   * @param boolean $doRemoveDefaultAttributes
307
   *
308
   * @return $this
309
   */
310 2
  public function doRemoveDefaultAttributes(bool $doRemoveDefaultAttributes = true): self
311
  {
312 2
    $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
313
314 2
    return $this;
315
  }
316
317
  /**
318
   * @param boolean $doRemoveDeprecatedAnchorName
319
   *
320
   * @return $this
321
   */
322 2
  public function doRemoveDeprecatedAnchorName(bool $doRemoveDeprecatedAnchorName = true): self
323
  {
324 2
    $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
325
326 2
    return $this;
327
  }
328
329
  /**
330
   * @param boolean $doRemoveDeprecatedScriptCharsetAttribute
331
   *
332
   * @return $this
333
   */
334 2
  public function doRemoveDeprecatedScriptCharsetAttribute(bool $doRemoveDeprecatedScriptCharsetAttribute = true): self
335
  {
336 2
    $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
337
338 2
    return $this;
339
  }
340
341
  /**
342
   * @param boolean $doRemoveDeprecatedTypeFromScriptTag
343
   *
344
   * @return $this
345
   */
346 2
  public function doRemoveDeprecatedTypeFromScriptTag(bool $doRemoveDeprecatedTypeFromScriptTag = true): self
347
  {
348 2
    $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
349
350 2
    return $this;
351
  }
352
353
  /**
354
   * @param boolean $doRemoveDeprecatedTypeFromStylesheetLink
355
   *
356
   * @return $this
357
   */
358 2
  public function doRemoveDeprecatedTypeFromStylesheetLink(bool $doRemoveDeprecatedTypeFromStylesheetLink = true): self
359
  {
360 2
    $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
361
362 2
    return $this;
363
  }
364
365
  /**
366
   * @param boolean $doRemoveEmptyAttributes
367
   *
368
   * @return $this
369
   */
370 2
  public function doRemoveEmptyAttributes(bool $doRemoveEmptyAttributes = true): self
371
  {
372 2
    $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
373
374 2
    return $this;
375
  }
376
377
  /**
378
   * @param boolean $doRemoveHttpPrefixFromAttributes
379
   *
380
   * @return $this
381
   */
382 4
  public function doRemoveHttpPrefixFromAttributes(bool $doRemoveHttpPrefixFromAttributes = true): self
383
  {
384 4
    $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
385
386 4
    return $this;
387
  }
388
389
  /**
390
   * @param boolean $doRemoveSpacesBetweenTags
391
   *
392
   * @return $this
393
   */
394
  public function doRemoveSpacesBetweenTags(bool $doRemoveSpacesBetweenTags = true): self
395
  {
396
    $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
397
398
    return $this;
399
  }
400
401
  /**
402
   * @param boolean $doRemoveValueFromEmptyInput
403
   *
404
   * @return $this
405
   */
406 2
  public function doRemoveValueFromEmptyInput(bool $doRemoveValueFromEmptyInput = true): self
407
  {
408 2
    $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
409
410 2
    return $this;
411
  }
412
413
  /**
414
   * @param boolean $doRemoveWhitespaceAroundTags
415
   *
416
   * @return $this
417
   */
418 4
  public function doRemoveWhitespaceAroundTags(bool $doRemoveWhitespaceAroundTags = true): self
419
  {
420 4
    $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
421
422 4
    return $this;
423
  }
424
425
  /**
426
   * @param bool $doRemoveOmittedQuotes
427
   *
428
   * @return $this
429
   */
430 1
  public function doRemoveOmittedQuotes(bool $doRemoveOmittedQuotes = true): self
431
  {
432 1
    $this->doRemoveOmittedQuotes = $doRemoveOmittedQuotes;
433
434 1
    return $this;
435
  }
436
437
  /**
438
   * @param bool $doRemoveOmittedHtmlTags
439
   *
440
   * @return $this
441
   */
442 1
  public function doRemoveOmittedHtmlTags(bool $doRemoveOmittedHtmlTags = true): self
443
  {
444 1
    $this->doRemoveOmittedHtmlTags = $doRemoveOmittedHtmlTags;
445
446 1
    return $this;
447
  }
448
449
  /**
450
   * @param boolean $doSortCssClassNames
451
   *
452
   * @return $this
453
   */
454 2
  public function doSortCssClassNames(bool $doSortCssClassNames = true): self
455
  {
456 2
    $this->doSortCssClassNames = $doSortCssClassNames;
457
458 2
    return $this;
459
  }
460
461
  /**
462
   * @param boolean $doSortHtmlAttributes
463
   *
464
   * @return $this
465
   */
466 2
  public function doSortHtmlAttributes(bool $doSortHtmlAttributes = true): self
467
  {
468 2
    $this->doSortHtmlAttributes = $doSortHtmlAttributes;
469
470 2
    return $this;
471
  }
472
473
  /**
474
   * @param boolean $doSumUpWhitespace
475
   *
476
   * @return $this
477
   */
478 2
  public function doSumUpWhitespace(bool $doSumUpWhitespace = true): self
479
  {
480 2
    $this->doSumUpWhitespace = $doSumUpWhitespace;
481
482 2
    return $this;
483
  }
484
485 30
  private function domNodeAttributesToString(\DOMNode $node): string
486
  {
487
    # Remove quotes around attribute values, when allowed (<p class="foo"> → <p class=foo>)
488 30
    $attrstr = '';
489 30
    if ($node->attributes != null) {
490 30
      foreach ($node->attributes as $attribute) {
491 20
        $attrstr .= $attribute->name;
492
493
        if (
494 20
            $this->doOptimizeAttributes === true
495
            &&
496 20
            isset(self::$booleanAttributes[$attribute->name])
497
        ) {
498 7
          $attrstr .= ' ';
499 7
          continue;
500
        }
501
502 20
        $attrstr .= '=';
503
504
        # http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#attributes-0
505 20
        $omitquotes = $this->doRemoveOmittedQuotes
506
                      &&
507 20
                      $attribute->value != ''
508
                      &&
509 20
                      0 == \preg_match('/["\'=<>` \t\r\n\f]+/', $attribute->value);
510
511 20
        $attr_val = $attribute->value;
512 20
        $attrstr .= ($omitquotes ? '' : '"') . $attr_val . ($omitquotes ? '' : '"');
513 20
        $attrstr .= ' ';
514
      }
515
    }
516
517 30
    return \trim($attrstr);
518
  }
519
520
  /**
521
   * @param \DOMNode $node
522
   *
523
   * @return bool
524
   */
525 29
  private function domNodeClosingTagOptional(\DOMNode $node): bool
526
  {
527 29
    $tag_name = $node->nodeName;
528 29
    $nextSibling = $this->getNextSiblingOfTypeDOMElement($node);
529
530
    // https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-omission
531
532
    // Implemented:
533
    //
534
    // A <p> element's end tag may be omitted if the p element is immediately followed by an address, article, aside, blockquote, details, div, dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no more content in the parent element and the parent element is an HTML element that is not an a, audio, del, ins, map, noscript, or video element, or an autonomous custom element.
535
    // An <li> element's end tag may be omitted if the li element is immediately followed by another li element or if there is no more content in the parent element.
536
    // A <td> element's end tag may be omitted if the td element is immediately followed by a td or th element, or if there is no more content in the parent element.
537
    // An <option> element's end tag may be omitted if the option element is immediately followed by another option element, or if it is immediately followed by an optgroup element, or if there is no more content in the parent element.
538
    // A <tr> element's end tag may be omitted if the tr element is immediately followed by another tr element, or if there is no more content in the parent element.
539
    // A <th> element's end tag may be omitted if the th element is immediately followed by a td or th element, or if there is no more content in the parent element.
540
    // A <dt> element's end tag may be omitted if the dt element is immediately followed by another dt element or a dd element.
541
    // A <dd> element's end tag may be omitted if the dd element is immediately followed by another dd element or a dt element, or if there is no more content in the parent element.
542
    // An <rp> element's end tag may be omitted if the rp element is immediately followed by an rt or rp element, or if there is no more content in the parent element.
543
544
    // TODO:
545
    //
546
    // <html> may be omitted if first thing inside is not comment
547
    // <head> may be omitted if first thing inside is an element
548
    // <body> may be omitted if first thing inside is not space, comment, <meta>, <link>, <script>, <style> or <template>
549
    // <colgroup> may be omitted if first thing inside is <col>
550
    // <tbody> may be omitted if first thing inside is <tr>
551
    // An <optgroup> element's end tag may be omitted if the optgroup element is immediately followed by another optgroup element, or if there is no more content in the parent element.
552
    // A <colgroup> element's start tag may be omitted if the first thing inside the colgroup element is a col element, and if the element is not immediately preceded by another colgroup element whose end tag has been omitted. (It can't be omitted if the element is empty.)
553
    // A <colgroup> element's end tag may be omitted if the colgroup element is not immediately followed by ASCII whitespace or a comment.
554
    // A <caption> element's end tag may be omitted if the caption element is not immediately followed by ASCII whitespace or a comment.
555
    // A <thead> element's end tag may be omitted if the thead element is immediately followed by a tbody or tfoot element.
556
    // A <tbody> element's start tag may be omitted if the first thing inside the tbody element is a tr element, and if the element is not immediately preceded by a tbody, thead, or tfoot element whose end tag has been omitted. (It can't be omitted if the element is empty.)
557
    // A <tbody> element's end tag may be omitted if the tbody element is immediately followed by a tbody or tfoot element, or if there is no more content in the parent element.
558
    // A <tfoot> element's end tag may be omitted if there is no more content in the parent element.
559
    //
560
    // <-- However, a start tag must never be omitted if it has any attributes.
561
562 29
    return \in_array($tag_name, self::$optional_end_tags, true)
563
           ||
564
           (
565 27
               $tag_name == 'li'
566
               &&
567
               (
568 5
                   $nextSibling === null
569
                   ||
570
                   (
571 3
                       $nextSibling instanceof \DOMElement
572
                       &&
573 27
                       $nextSibling->tagName == 'li'
574
                   )
575
               )
576
           )
577
           ||
578
           (
579
               (
580 27
                   $tag_name == 'rp'
581
               )
582
               &&
583
               (
584
                   $nextSibling === null
585
                   ||
586
                   (
587
                       $nextSibling instanceof \DOMElement
588
                       &&
589
                       (
590
                           $nextSibling->tagName == 'rp'
591
                           ||
592 27
                           $nextSibling->tagName == 'rt'
593
                       )
594
                   )
595
               )
596
           )
597
           ||
598
           (
599 27
               $tag_name == 'tr'
600
               &&
601
               (
602 1
                   $nextSibling === null
603
                   ||
604
                   (
605 1
                       $nextSibling instanceof \DOMElement
606
                       &&
607 27
                       $nextSibling->tagName == 'tr'
608
                   )
609
               )
610
           )
611
           ||
612
           (
613
               (
614 27
                   $tag_name == 'td'
615
                   ||
616 27
                   $tag_name == 'th'
617
               )
618
               &&
619
               (
620 1
                   $nextSibling === null
621
                   ||
622
                   (
623 1
                       $nextSibling instanceof \DOMElement
624
                       &&
625
                       (
626 1
                           $nextSibling->tagName == 'td'
627
                           ||
628 27
                           $nextSibling->tagName == 'th'
629
                       )
630
                   )
631
               )
632
           )
633
           ||
634
           (
635
               (
636 27
                   $tag_name == 'dd'
637
                   ||
638 27
                   $tag_name == 'dt'
639
               )
640
               &&
641
               (
642
                   (
643 3
                       $nextSibling === null
644
                       &&
645 3
                       $tag_name == 'dd'
646
                   )
647
                   ||
648
                   (
649 3
                       $nextSibling instanceof \DOMElement
650
                       &&
651
                       (
652 3
                           $nextSibling->tagName == 'dd'
653
                           ||
654 27
                           $nextSibling->tagName == 'dt'
655
                       )
656
                   )
657
               )
658
           )
659
           ||
660
           (
661 27
               $tag_name == 'option'
662
               &&
663
               (
664
                   $nextSibling === null
665
                   ||
666
                   (
667
                       $nextSibling instanceof \DOMElement
668
                       &&
669
                       (
670
                           $nextSibling->tagName == 'option'
671
                           ||
672 27
                           $nextSibling->tagName == 'optgroup'
673
                       )
674
                   )
675
               )
676
           )
677
           ||
678
           (
679 27
               $tag_name == 'p'
680
               &&
681
               (
682
                   (
683 10
                       $nextSibling === null
684
                       &&
685
                       (
686 9
                           $node->parentNode !== null
687
                           &&
688 9
                           !\in_array(
689 9
                               $node->parentNode->nodeName,
690
                               [
691 9
                                   'a',
692
                                   'audio',
693
                                   'del',
694
                                   'ins',
695
                                   'map',
696
                                   'noscript',
697
                                   'video',
698
                               ],
699 9
                               true
700
                           )
701
                       )
702
                   )
703
                   ||
704
                   (
705 6
                       $nextSibling instanceof \DOMElement
706
                       &&
707 6
                       \in_array(
708 6
                           $nextSibling->tagName,
709
                           [
710 6
                               'address',
711
                               'article',
712
                               'aside',
713
                               'blockquote',
714
                               'dir',
715
                               'div',
716
                               'dl',
717
                               'fieldset',
718
                               'footer',
719
                               'form',
720
                               'h1',
721
                               'h2',
722
                               'h3',
723
                               'h4',
724
                               'h5',
725
                               'h6',
726
                               'header',
727
                               'hgroup',
728
                               'hr',
729
                               'menu',
730
                               'nav',
731
                               'ol',
732
                               'p',
733
                               'pre',
734
                               'section',
735
                               'table',
736
                               'ul',
737
                           ],
738 29
                           true
739
                       )
740
                   )
741
               )
742
           );
743
  }
744
745 30
  protected function domNodeToString(\DOMNode $node): string
746
  {
747
    // init
748 30
    $html = '';
749 30
    $emptyStringTmp = '';
750
751 30
    foreach ($node->childNodes as $child) {
752
753 30
      if ($emptyStringTmp === 'is_empty') {
754 18
        $emptyStringTmp = 'last_was_empty';
755
      } else {
756 30
        $emptyStringTmp = '';
757
      }
758
759 30
      if ($child instanceof \DOMDocumentType) {
760
761
        // add the doc-type only if it wasn't generated by DomDocument
762 8
        if ($this->withDocType !== true) {
763
          continue;
764
        }
765
766 8
        if ($child->name) {
767
768 8
          if (!$child->publicId && $child->systemId) {
769
            $tmpTypeSystem = 'SYSTEM';
770
            $tmpTypePublic = '';
771
          } else {
772 8
            $tmpTypeSystem = '';
773 8
            $tmpTypePublic = 'PUBLIC';
774
          }
775
776 8
          $html .= '<!DOCTYPE ' . $child->name . ''
777 8
                   . ($child->publicId ? ' ' . $tmpTypePublic . ' "' . $child->publicId . '"' : '')
778 8
                   . ($child->systemId ? ' ' . $tmpTypeSystem . ' "' . $child->systemId . '"' : '')
779 8
                   . '>';
780
        }
781
782 30
      } elseif ($child instanceof \DOMElement) {
783
784 30
        $html .= \rtrim('<' . $child->tagName . ' ' . $this->domNodeAttributesToString($child));
785 30
        $html .= '>' . $this->domNodeToString($child);
786
787
        if (
788 30
            $this->doRemoveOmittedHtmlTags === false
789
            ||
790 30
            !$this->domNodeClosingTagOptional($child)
791
        ) {
792 25
          $html .= '</' . $child->tagName . '>';
793
        }
794
795 30
        if ($this->doRemoveWhitespaceAroundTags === false) {
796
          if (
797 29
              $child->nextSibling instanceof \DOMText
798
              &&
799 29
              $child->nextSibling->wholeText === ' '
800
          ) {
801
            if (
802 17
                $emptyStringTmp !== 'last_was_empty'
803
                &&
804 17
                substr($html, -1) !== ' '
805
            ) {
806 17
              $html .= ' ';
807
            }
808 30
            $emptyStringTmp = 'is_empty';
809
          }
810
        }
811
812 26
      } elseif ($child instanceof \DOMText) {
813
814 26
        if ($child->isElementContentWhitespace()) {
815
          if (
816 20
              $child->previousSibling !== null
817
              &&
818 20
              $child->nextSibling !== null
819
          ) {
820
            if (
821 13
                $emptyStringTmp !== 'last_was_empty'
822
                &&
823 13
                substr($html, -1) !== ' '
824
            ) {
825 4
              $html .= ' ';
826
            }
827 20
            $emptyStringTmp = 'is_empty';
828
          }
829
830
        } else {
831
832 26
          $html .= $child->wholeText;
833
834
        }
835
836
      } elseif ($child instanceof \DOMComment) {
837
838 30
        $html .= $child->wholeText;
839
840
      }
841
    }
842
843 30
    return $html;
844
  }
845
846
  /**
847
   * @param \DOMNode $node
848
   *
849
   * @return \DOMNode|null
850
   */
851 29
  protected function getNextSiblingOfTypeDOMElement(\DOMNode $node)
852
  {
853
    do {
854 29
      $node = $node->nextSibling;
855 29
    } while (!($node === null || $node instanceof \DOMElement));
856
857 29
    return $node;
858
  }
859
860
  /**
861
   * Check if the current string is an conditional comment.
862
   *
863
   * INFO: since IE >= 10 conditional comment are not working anymore
864
   *
865
   * <!--[if expression]> HTML <![endif]-->
866
   * <![if expression]> HTML <![endif]>
867
   *
868
   * @param string $comment
869
   *
870
   * @return bool
871
   */
872 3
  private function isConditionalComment($comment): bool
873
  {
874 3
    if (preg_match('/^\[if [^\]]+\]/', $comment)) {
875 2
      return true;
876
    }
877
878 3
    if (preg_match('/\[endif\]$/', $comment)) {
879 1
      return true;
880
    }
881
882 3
    return false;
883
  }
884
885
  /**
886
   * @param string $html
887
   * @param bool   $decodeUtf8Specials <p>Use this only in special cases, e.g. for PHP 5.3</p>
888
   *
889
   * @return string
890
   */
891 34
  public function minify($html, $decodeUtf8Specials = false): string
892
  {
893 34
    $html = (string)$html;
894 34
    if (!isset($html[0])) {
895 1
      return '';
896
    }
897
898 34
    $html = \trim($html);
899 34
    if (!$html) {
900 3
      return '';
901
    }
902
903
    // init
904 31
    static $CACHE_SELF_CLOSING_TAGS = null;
905 31
    if ($CACHE_SELF_CLOSING_TAGS === null) {
906 1
      $CACHE_SELF_CLOSING_TAGS = \implode('|', self::$selfClosingTags);
907
    }
908
909
    // reset
910 31
    $this->protectedChildNodes = [];
911
912
    // save old content
913 31
    $origHtml = $html;
914 31
    $origHtmlLength = \strlen($html);
915
916
    // -------------------------------------------------------------------------
917
    // Minify the HTML via "HtmlDomParser"
918
    // -------------------------------------------------------------------------
919
920 31
    if ($this->doOptimizeViaHtmlDomParser === true) {
921 30
      $html = $this->minifyHtmlDom($html, $decodeUtf8Specials);
922
    }
923
924
    // -------------------------------------------------------------------------
925
    // Trim whitespace from html-string. [protected html is still protected]
926
    // -------------------------------------------------------------------------
927
928
    // Remove extra white-space(s) between HTML attribute(s)
929 31
    $html = (string)\preg_replace_callback(
930 31
        '#<([^/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(/?)>#',
931 31
        function ($matches) {
932 31
          return '<' . $matches[1] . (string)\preg_replace('#([^\s=]+)(\=([\'"]?)(.*?)\3)?(\s+|$)#s', ' $1$2', $matches[2]) . $matches[3] . '>';
933 31
        },
934 31
        $html
935
    );
936
937 31
    if ($this->doRemoveSpacesBetweenTags === true) {
938
      // Remove spaces that are between > and <
939
      $html = (string)\preg_replace('/(>) (<)/', '>$2', $html);
940
    }
941
942
    // -------------------------------------------------------------------------
943
    // Restore protected HTML-code.
944
    // -------------------------------------------------------------------------
945
946 31
    $html = (string)\preg_replace_callback(
947 31
        '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
948 31
        [$this, 'restoreProtectedHtml'],
949 31
        $html
950
    );
951
952
    // -------------------------------------------------------------------------
953
    // Restore protected HTML-entities.
954
    // -------------------------------------------------------------------------
955
956 31
    if ($this->doOptimizeViaHtmlDomParser === true) {
957 30
      $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
958
    }
959
960
    // ------------------------------------
961
    // Final clean-up
962
    // ------------------------------------
963
964 31
    $html = \str_replace(
965
        [
966 31
            'html>' . "\n",
967
            "\n" . '<html',
968
            'html/>' . "\n",
969
            "\n" . '</html',
970
            'head>' . "\n",
971
            "\n" . '<head',
972
            'head/>' . "\n",
973
            "\n" . '</head',
974
        ],
975
        [
976 31
            'html>',
977
            '<html',
978
            'html/>',
979
            '</html',
980
            'head>',
981
            '<head',
982
            'head/>',
983
            '</head',
984
        ],
985 31
        $html
986
    );
987
988
    // self closing tags, don't need a trailing slash ...
989 31
    $replace = [];
990 31
    $replacement = [];
991 31
    foreach (self::$selfClosingTags as $selfClosingTag) {
992 31
      $replace[] = '<' . $selfClosingTag . '/>';
993 31
      $replacement[] = '<' . $selfClosingTag . '>';
994 31
      $replace[] = '<' . $selfClosingTag . ' />';
995 31
      $replacement[] = '<' . $selfClosingTag . '>';
996
    }
997 31
    $html = \str_replace(
998 31
        $replace,
999 31
        $replacement,
1000 31
        $html
1001
    );
1002
1003 31
    $html = (string)\preg_replace('#<\b(' . $CACHE_SELF_CLOSING_TAGS . ')([^>]*+)><\/\b\1>#', '<\\1\\2>', $html);
1004
1005
    // ------------------------------------
1006
    // check if compression worked
1007
    // ------------------------------------
1008
1009 31
    if ($origHtmlLength < \strlen($html)) {
1010 3
      $html = $origHtml;
1011
    }
1012
1013 31
    return $html;
1014
  }
1015
1016
  /**
1017
   * @param $html
1018
   * @param $decodeUtf8Specials
1019
   *
1020
   * @return string
1021
   */
1022 30
  private function minifyHtmlDom($html, $decodeUtf8Specials): string
1023
  {
1024
    // init dom
1025 30
    $dom = new HtmlDomParser();
1026 30
    $dom->useKeepBrokenHtml($this->keepBrokenHtml);
1027
1028 30
    $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
1029 30
    $dom->getDocument()->formatOutput = false; // do not formats output with indentation
1030
1031
    // load dom
1032 30
    $dom->loadHtml($html);
1033
1034 30
    $this->withDocType = (\stripos(\ltrim($html), '<!DOCTYPE') === 0);
1035
1036
    // -------------------------------------------------------------------------
1037
    // Protect HTML tags and conditional comments.
1038
    // -------------------------------------------------------------------------
1039
1040 30
    $dom = $this->protectTags($dom);
1041
1042
    // -------------------------------------------------------------------------
1043
    // Remove default HTML comments. [protected html is still protected]
1044
    // -------------------------------------------------------------------------
1045
1046 30
    if ($this->doRemoveComments === true) {
1047 29
      $dom = $this->removeComments($dom);
1048
    }
1049
1050
    // -------------------------------------------------------------------------
1051
    // Sum-Up extra whitespace from the Dom. [protected html is still protected]
1052
    // -------------------------------------------------------------------------
1053
1054 30
    if ($this->doSumUpWhitespace === true) {
1055 29
      $dom = $this->sumUpWhitespace($dom);
1056
    }
1057
1058 30
    foreach ($dom->find('*') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('*') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1059
1060
      // -------------------------------------------------------------------------
1061
      // Optimize html attributes. [protected html is still protected]
1062
      // -------------------------------------------------------------------------
1063
1064 30
      if ($this->doOptimizeAttributes === true) {
1065 29
        $this->optimizeAttributes($element);
1066
      }
1067
1068
      // -------------------------------------------------------------------------
1069
      // Remove whitespace around tags. [protected html is still protected]
1070
      // -------------------------------------------------------------------------
1071
1072 30
      if ($this->doRemoveWhitespaceAroundTags === true) {
1073 30
        $this->removeWhitespaceAroundTags($element);
1074
      }
1075
    }
1076
1077
    // -------------------------------------------------------------------------
1078
    // Convert the Dom into a string.
1079
    // -------------------------------------------------------------------------
1080
1081 30
    $html = $dom->fixHtmlOutput(
1082 30
        $this->domNodeToString($dom->getDocument()),
1083 30
        $decodeUtf8Specials
1084
    );
1085
1086 30
    return $html;
1087
  }
1088
1089
  /**
1090
   * Sort HTML-Attributes, so that gzip can do better work and remove some default attributes...
1091
   *
1092
   * @param SimpleHtmlDom $element
1093
   *
1094
   * @return bool
1095
   */
1096 29
  private function optimizeAttributes(SimpleHtmlDom $element): bool
1097
  {
1098 29
    $attributes = $element->getAllAttributes();
1099 29
    if ($attributes === null) {
1100 29
      return false;
1101
    }
1102
1103 19
    $attrs = [];
1104 19
    foreach ((array)$attributes as $attrName => $attrValue) {
1105
1106
      // -------------------------------------------------------------------------
1107
      // Remove optional "http:"-prefix from attributes.
1108
      // -------------------------------------------------------------------------
1109
1110 19
      if ($this->doRemoveHttpPrefixFromAttributes === true) {
1111
        if (
1112 3
            ($attrName === 'href' || $attrName === 'src' || $attrName === 'action')
1113
            &&
1114 3
            !(isset($attributes['rel']) && $attributes['rel'] === 'external')
1115
            &&
1116 3
            !(isset($attributes['target']) && $attributes['target'] === '_blank')
1117
        ) {
1118 2
          $attrValue = \str_replace('http://', '//', $attrValue);
1119
        }
1120
      }
1121
1122 19
      if ($this->removeAttributeHelper($element->tag, $attrName, $attrValue, $attributes)) {
1123 4
        $element->{$attrName} = null;
1124 4
        continue;
1125
      }
1126
1127
      // -------------------------------------------------------------------------
1128
      // Sort css-class-names, for better gzip results.
1129
      // -------------------------------------------------------------------------
1130
1131 19
      if ($this->doSortCssClassNames === true) {
1132 19
        $attrValue = $this->sortCssClassNames($attrName, $attrValue);
1133
      }
1134
1135 19
      if ($this->doSortHtmlAttributes === true) {
1136 19
        $attrs[$attrName] = $attrValue;
1137 19
        $element->{$attrName} = null;
1138
      }
1139
    }
1140
1141
    // -------------------------------------------------------------------------
1142
    // Sort html-attributes, for better gzip results.
1143
    // -------------------------------------------------------------------------
1144
1145 19
    if ($this->doSortHtmlAttributes === true) {
1146 19
      \ksort($attrs);
1147 19
      foreach ($attrs as $attrName => $attrValue) {
1148 19
        $attrValue = HtmlDomParser::replaceToPreserveHtmlEntities($attrValue);
1149 19
        $element->setAttribute($attrName, $attrValue, true);
1150
      }
1151
    }
1152
1153 19
    return true;
1154
  }
1155
1156
  /**
1157
   * Prevent changes of inline "styles" and "scripts".
1158
   *
1159
   * @param HtmlDomParser $dom
1160
   *
1161
   * @return HtmlDomParser
1162
   */
1163 30
  private function protectTags(HtmlDomParser $dom): HtmlDomParser
1164
  {
1165
    // init
1166 30
    $counter = 0;
1167
1168 30
    foreach ($dom->find('script, style') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('script, style') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1169
1170
      // skip external links
1171 4
      if ($element->tag === 'script' || $element->tag === 'style') {
1172 4
        $attributes = $element->getAllAttributes();
1173 4
        if (isset($attributes['src'])) {
1174 3
          continue;
1175
        }
1176
      }
1177
1178 2
      $this->protectedChildNodes[$counter] = $element->text();
1179 2
      $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
1180
1181 2
      ++$counter;
1182
    }
1183
1184 30
    $dom->getDocument()->normalizeDocument();
1185
1186 30
    foreach ($dom->find('//comment()') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('//comment()') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1187 3
      $text = $element->text();
1188
1189
      // skip normal comments
1190 3
      if ($this->isConditionalComment($text) === false) {
1191 3
        continue;
1192
      }
1193
1194 2
      $this->protectedChildNodes[$counter] = '<!--' . $text . '-->';
1195
1196
      /* @var $node \DOMComment */
1197 2
      $node = $element->getNode();
1198 2
      $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>');
1199 2
      $element->getNode()->parentNode->replaceChild($child, $node);
1200
1201 2
      ++$counter;
1202
    }
1203
1204 30
    $dom->getDocument()->normalizeDocument();
1205
1206 30
    return $dom;
1207
  }
1208
1209
  /**
1210
   * Check if the attribute can be removed.
1211
   *
1212
   * @param string $tag
1213
   * @param string $attrName
1214
   * @param string $attrValue
1215
   * @param array  $allAttr
1216
   *
1217
   * @return bool
1218
   */
1219 19
  private function removeAttributeHelper($tag, $attrName, $attrValue, $allAttr): bool
1220
  {
1221
    // remove defaults
1222 19
    if ($this->doRemoveDefaultAttributes === true) {
1223
1224 1
      if ($tag === 'script' && $attrName === 'language' && $attrValue === 'javascript') {
1225
        return true;
1226
      }
1227
1228 1
      if ($tag === 'form' && $attrName === 'method' && $attrValue === 'get') {
1229
        return true;
1230
      }
1231
1232 1
      if ($tag === 'input' && $attrName === 'type' && $attrValue === 'text') {
1233
        return true;
1234
      }
1235
1236 1
      if ($tag === 'area' && $attrName === 'shape' && $attrValue === 'rect') {
1237
        return true;
1238
      }
1239
    }
1240
1241
    // remove deprecated charset-attribute (the browser will use the charset from the HTTP-Header, anyway)
1242 19
    if ($this->doRemoveDeprecatedScriptCharsetAttribute === true) {
1243 19
      if ($tag === 'script' && $attrName === 'charset' && !isset($allAttr['src'])) {
1244
        return true;
1245
      }
1246
    }
1247
1248
    // remove deprecated anchor-jump
1249 19
    if ($this->doRemoveDeprecatedAnchorName === true) {
1250 19
      if ($tag === 'a' && $attrName === 'name' && isset($allAttr['id']) && $allAttr['id'] === $attrValue) {
1251
        return true;
1252
      }
1253
    }
1254
1255
    // remove "type=text/css" for css links
1256 19 View Code Duplication
    if ($this->doRemoveDeprecatedTypeFromStylesheetLink === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1257 19
      if ($tag === 'link' && $attrName === 'type' && $attrValue === 'text/css' && isset($allAttr['rel']) && $allAttr['rel'] === 'stylesheet') {
1258 1
        return true;
1259
      }
1260
    }
1261
1262
    // remove deprecated script-mime-types
1263 19
    if ($this->doRemoveDeprecatedTypeFromScriptTag === true) {
1264 19
      if ($tag === 'script' && $attrName === 'type' && isset($allAttr['src'], self::$executableScriptsMimeTypes[$attrValue])) {
1265 1
        return true;
1266
      }
1267
    }
1268
1269
    // remove 'value=""' from <input type="text">
1270 19 View Code Duplication
    if ($this->doRemoveValueFromEmptyInput === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1271 19
      if ($tag === 'input' && $attrName === 'value' && $attrValue === '' && isset($allAttr['type']) && $allAttr['type'] === 'text') {
1272 1
        return true;
1273
      }
1274
    }
1275
1276
    // remove some empty attributes
1277 19
    if ($this->doRemoveEmptyAttributes === true) {
1278 19
      if (\trim($attrValue) === '' && \preg_match('/^(?:class|id|style|title|lang|dir|on(?:focus|blur|change|click|dblclick|mouse(?:down|up|over|move|out)|key(?:press|down|up)))$/', $attrName)) {
1279 3
        return true;
1280
      }
1281
    }
1282
1283 19
    return false;
1284
  }
1285
1286
  /**
1287
   * Remove comments in the dom.
1288
   *
1289
   * @param HtmlDomParser $dom
1290
   *
1291
   * @return HtmlDomParser
1292
   */
1293 29
  private function removeComments(HtmlDomParser $dom): HtmlDomParser
1294
  {
1295 29
    foreach ($dom->find('//comment()') as $commentWrapper) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('//comment()') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1296 3
      $comment = $commentWrapper->getNode();
1297 3
      $val = $comment->nodeValue;
1298 3
      if (\strpos($val, '[') === false) {
1299 3
        $comment->parentNode->removeChild($comment);
1300
      }
1301
    }
1302
1303 29
    $dom->getDocument()->normalizeDocument();
1304
1305 29
    return $dom;
1306
  }
1307
1308
  /**
1309
   * Trim tags in the dom.
1310
   *
1311
   * @param SimpleHtmlDom $element
1312
   *
1313
   * @return void
1314
   */
1315 3
  private function removeWhitespaceAroundTags(SimpleHtmlDom $element)
1316
  {
1317 3
    if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
1318 1
      $node = $element->getNode();
1319
1320 1
      $candidates = [];
1321 1
      if ($node->childNodes->length > 0) {
1322 1
        $candidates[] = $node->firstChild;
1323 1
        $candidates[] = $node->lastChild;
1324 1
        $candidates[] = $node->previousSibling;
1325 1
        $candidates[] = $node->nextSibling;
1326
      }
1327
1328 1
      foreach ($candidates as &$candidate) {
1329 1
        if ($candidate === null) {
1330
          continue;
1331
        }
1332
1333 1
        if ($candidate->nodeType === 3) {
1334 1
          $candidate->nodeValue = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
1335
        }
1336
      }
1337
    }
1338 3
  }
1339
1340
  /**
1341
   * Callback function for preg_replace_callback use.
1342
   *
1343
   * @param array $matches PREG matches
1344
   *
1345
   * @return string
1346
   */
1347 2
  private function restoreProtectedHtml($matches): string
1348
  {
1349 2
    \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
1350
1351 2
    $html = '';
1352 2
    if (isset($this->protectedChildNodes[$matchesInner['id']])) {
1353 2
      $html .= $this->protectedChildNodes[$matchesInner['id']];
1354
    }
1355
1356 2
    return $html;
1357
  }
1358
1359
  /**
1360
   * WARNING: maybe bad for performance ...
1361
   *
1362
   * @param bool $keepBrokenHtml
1363
   *
1364
   * @return HtmlMin
1365
   */
1366 1
  public function useKeepBrokenHtml(bool $keepBrokenHtml): HtmlMin
1367
  {
1368 1
    $this->keepBrokenHtml = $keepBrokenHtml;
1369
1370 1
    return $this;
1371
  }
1372
1373
  /**
1374
   * @param array $domainsToRemoveHttpPrefixFromAttributes
1375
   *
1376
   * @return $this
1377
   */
1378 2
  public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes): self
1379
  {
1380 2
    $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
1381
1382 2
    return $this;
1383
  }
1384
1385
  /**
1386
   * @param $attrName
1387
   * @param $attrValue
1388
   *
1389
   * @return string
1390
   */
1391 19
  private function sortCssClassNames($attrName, $attrValue): string
1392
  {
1393 19
    if ($attrName !== 'class' || !$attrValue) {
1394 16
      return $attrValue;
1395
    }
1396
1397 11
    $classes = \array_unique(
1398 11
        \explode(' ', $attrValue)
1399
    );
1400 11
    \sort($classes);
1401
1402 11
    $attrValue = '';
1403 11
    foreach ($classes as $class) {
1404
1405 11
      if (!$class) {
1406 3
        continue;
1407
      }
1408
1409 11
      $attrValue .= \trim($class) . ' ';
1410
    }
1411 11
    $attrValue = \trim($attrValue);
1412
1413 11
    return $attrValue;
1414
  }
1415
1416
  /**
1417
   * Sum-up extra whitespace from dom-nodes.
1418
   *
1419
   * @param HtmlDomParser $dom
1420
   *
1421
   * @return HtmlDomParser
1422
   */
1423 29
  private function sumUpWhitespace(HtmlDomParser $dom): HtmlDomParser
1424
  {
1425 29
    $textnodes = $dom->find('//text()');
1426 29
    foreach ($textnodes as $textnodeWrapper) {
0 ignored issues
show
Bug introduced by
The expression $textnodes of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
1427
      /* @var $textnode \DOMNode */
1428 25
      $textnode = $textnodeWrapper->getNode();
1429 25
      $xp = $textnode->getNodePath();
1430
1431 25
      $doSkip = false;
1432 25
      foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
1433 25
        if (\strpos($xp, "/$pattern") !== false) {
1434 3
          $doSkip = true;
1435 25
          break;
1436
        }
1437
      }
1438 25
      if ($doSkip) {
1439 3
        continue;
1440
      }
1441
1442 25
      $textnode->nodeValue = \preg_replace(self::$regExSpace, ' ', $textnode->nodeValue);
1443
    }
1444
1445 29
    $dom->getDocument()->normalizeDocument();
1446
1447 29
    return $dom;
1448
  }
1449
}
1450