Completed
Push — master ( 0b219f...9cd978 )
by Lars
01:56
created

setDomainsToRemoveHttpPrefixFromAttributes()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 0
cts 0
cp 0
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 3
nc 1
nop 1
crap 2
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * Class HtmlMin
7
 *
8
 * Inspired by:
9
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
10
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
11
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
12
 * - PHP: https://github.com/zaininnari/html-minifier
13
 * - Java: https://code.google.com/archive/p/htmlcompressor/
14
 *
15
 * @package voku\helper
16
 */
17
class HtmlMin
18
{
19
  /**
20
   * @var string
21
   */
22
  private static $regExSpace = "/[[:space:]]{2,}|[\r\n]+/u";
23
24
  /**
25
   * // https://mathiasbynens.be/demo/javascript-mime-type
26
   * // https://developer.mozilla.org/en/docs/Web/HTML/Element/script#attr-type
27
   *
28
   * @var array
29
   */
30
  private static $executableScriptsMimeTypes = array(
31
      'text/javascript'          => '',
32
      'text/ecmascript'          => '',
33
      'text/jscript'             => '',
34
      'application/javascript'   => '',
35
      'application/x-javascript' => '',
36
      'application/ecmascript'   => '',
37
  );
38
39
  private static $selfClosingTags = array(
40
      'area',
41
      'base',
42
      'basefont',
43
      'br',
44
      'col',
45
      'command',
46
      'embed',
47
      'frame',
48
      'hr',
49
      'img',
50
      'input',
51
      'isindex',
52
      'keygen',
53
      'link',
54
      'meta',
55
      'param',
56
      'source',
57
      'track',
58
      'wbr',
59
  );
60
61
  private static $trimWhitespaceFromTags = array(
62
      'article' => '',
63
      'br'      => '',
64
      'div'     => '',
65
      'footer'  => '',
66
      'hr'      => '',
67
      'nav'     => '',
68
      'p'       => '',
69
      'script'  => '',
70
  );
71
72
  /**
73
   * @var array
74
   */
75
  private static $booleanAttributes = array(
76
      'allowfullscreen' => '',
77
      'async'           => '',
78
      'autofocus'       => '',
79
      'autoplay'        => '',
80
      'checked'         => '',
81
      'compact'         => '',
82
      'controls'        => '',
83
      'declare'         => '',
84
      'default'         => '',
85
      'defaultchecked'  => '',
86
      'defaultmuted'    => '',
87
      'defaultselected' => '',
88
      'defer'           => '',
89
      'disabled'        => '',
90
      'enabled'         => '',
91
      'formnovalidate'  => '',
92
      'hidden'          => '',
93
      'indeterminate'   => '',
94
      'inert'           => '',
95
      'ismap'           => '',
96
      'itemscope'       => '',
97
      'loop'            => '',
98
      'multiple'        => '',
99
      'muted'           => '',
100
      'nohref'          => '',
101
      'noresize'        => '',
102
      'noshade'         => '',
103
      'novalidate'      => '',
104
      'nowrap'          => '',
105
      'open'            => '',
106
      'pauseonexit'     => '',
107
      'readonly'        => '',
108
      'required'        => '',
109
      'reversed'        => '',
110
      'scoped'          => '',
111
      'seamless'        => '',
112
      'selected'        => '',
113
      'sortable'        => '',
114
      'truespeed'       => '',
115
      'typemustmatch'   => '',
116
      'visible'         => '',
117
  );
118
  /**
119
   * @var array
120
   */
121
  private static $skipTagsForRemoveWhitespace = array(
122
      'code',
123
      'pre',
124
      'script',
125
      'style',
126
      'textarea',
127
  );
128
129
  /**
130
   * @var array
131
   */
132
  private $protectedChildNodes = array();
133
134
  /**
135
   * @var string
136 23
   */
137
  private $protectedChildNodesHelper = 'html-min--voku--saved-content';
138 23
139 23
  /**
140
   * @var string
141 23
   */
142 23
  private $booleanAttributesHelper = 'html-min--voku--delete-this';
143 23
144
  /**
145
   * @var bool
146
   */
147
  private $doOptimizeViaHtmlDomParser = true;
148
149
  /**
150 23
   * @var bool
151
   */
152 23
  private $doOptimizeAttributes = true;
153 23
154 1
  /**
155
   * @var bool
156
   */
157 23
  private $doRemoveComments = true;
158 23
159 3
  /**
160
   * @var bool
161
   */
162
  private $doRemoveWhitespaceAroundTags = true;
163 20
164 20
  /**
165 20
   * @var bool
166
   */
167 20
  private $doRemoveHttpPrefixFromAttributes = false;
168 20
169 20
170
  /**
171 20
   * @var array
172
   */
173 20
  private $domainsToRemoveHttpPrefixFromAttributes = array(
174 20
      'google.com',
175 11
      'google.de',
176 11
  );
177 11
178
  /**
179 11
   * @var bool
180
   */
181
  private $doSortCssClassNames = true;
182
183
  /**
184
   * @var bool
185
   */
186 11
  private $doSortHtmlAttributes = true;
187
188 11
  /**
189
   * @var bool
190 11
   */
191
  private $doRemoveDeprecatedScriptCharsetAttribute = true;
192
193
  /**
194
   * @var bool
195
   */
196 11
  private $doRemoveDefaultAttributes = false;
197 11
198 11
  /**
199
   * @var bool
200 11
   */
201 11
  private $doRemoveDeprecatedAnchorName = true;
202
203
  /**
204
   * @var bool
205
   */
206
  private $doRemoveDeprecatedTypeFromStylesheetLink = true;
207 11
208
  /**
209 11
   * @var bool
210
   */
211 11
  private $doRemoveDeprecatedTypeFromScriptTag = true;
212 11
213 11
  /**
214 11
   * @var bool
215 11
   */
216 11
  private $doRemoveValueFromEmptyInput = true;
217
218 11
  /**
219 11
   * @var bool
220 11
   */
221 11
  private $doRemoveEmptyAttributes = true;
222 11
223 11
  /**
224
   * @var bool
225 11
   */
226
  private $doSumUpWhitespace = true;
227 11
228
  /**
229 11
   * @var bool
230 11
   */
231
  private $doRemoveSpacesBetweenTags = false;
232 11
233 11
  /**
234
   * HtmlMin constructor.
235 11
   */
236
  public function __construct()
237 11
  {
238 11
  }
239 1
240 1
  /**
241 11
   * @param boolean $doOptimizeAttributes
242
   *
243
   * @return $this
244
   */
245
  public function doOptimizeAttributes($doOptimizeAttributes = true)
246
  {
247 11
    $this->doOptimizeAttributes = $doOptimizeAttributes;
248 3
249 3
    return $this;
250
  }
251 11
252
  /**
253
   * @param boolean $doOptimizeViaHtmlDomParser
254
   *
255
   * @return $this
256
   */
257
  public function doOptimizeViaHtmlDomParser($doOptimizeViaHtmlDomParser = true)
258
  {
259
    $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
260
261
    return $this;
262 20
  }
263
264 20
  /**
265 20
   * @param boolean $doRemoveComments
266 17
   *
267
   * @return $this
268
   */
269 9
  public function doRemoveComments($doRemoveComments = true)
270 9
  {
271
    $this->doRemoveComments = $doRemoveComments;
272 9
273 5
    return $this;
274 5
  }
275 5
276
  /**
277
   * @param boolean $doRemoveDefaultAttributes
278
   *
279 9
   * @return $this
280 9
   */
281
  public function doRemoveDefaultAttributes($doRemoveDefaultAttributes = true)
282 9
  {
283
    $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
284 9
285
    return $this;
286
  }
287
288 9
  /**
289
   * @param boolean $doRemoveDeprecatedAnchorName
290
   *
291
   * @return $this
292
   */
293 9
  public function doRemoveDeprecatedAnchorName($doRemoveDeprecatedAnchorName = true)
294
  {
295 9
    $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
296 9
297 9
    return $this;
298
  }
299 9
300 9
  /**
301 9
   * @param boolean $doRemoveDeprecatedScriptCharsetAttribute
302
   *
303
   * @return $this
304
   */
305
  public function doRemoveDeprecatedScriptCharsetAttribute($doRemoveDeprecatedScriptCharsetAttribute = true)
306
  {
307
    $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
308
309
    return $this;
310
  }
311
312
  /**
313
   * @param boolean $doRemoveDeprecatedTypeFromScriptTag
314
   *
315
   * @return $this
316
   */
317
  public function doRemoveDeprecatedTypeFromScriptTag($doRemoveDeprecatedTypeFromScriptTag = true)
318 9
  {
319
    $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
320
321 9
    return $this;
322
  }
323
324
  /**
325
   * @param boolean $doRemoveDeprecatedTypeFromStylesheetLink
326 9
   *
327
   * @return $this
328
   */
329
  public function doRemoveDeprecatedTypeFromStylesheetLink($doRemoveDeprecatedTypeFromStylesheetLink = true)
330
  {
331 9
    $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
332
333
    return $this;
334
  }
335
336 9
  /**
337
   * @param boolean $doRemoveEmptyAttributes
338
   *
339
   * @return $this
340
   */
341 9
  public function doRemoveEmptyAttributes($doRemoveEmptyAttributes = true)
342
  {
343
    $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
344
345
    return $this;
346 9
  }
347
348
  /**
349
   * @param boolean $doRemoveHttpPrefixFromAttributes
350
   *
351 9
   * @return $this
352
   */
353
  public function doRemoveHttpPrefixFromAttributes($doRemoveHttpPrefixFromAttributes = true)
354
  {
355
    $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
356 9
357
    return $this;
358
  }
359
360
  /**
361 9
   * @param boolean $doRemoveValueFromEmptyInput
362
   *
363
   * @return $this
364
   */
365
  public function doRemoveValueFromEmptyInput($doRemoveValueFromEmptyInput = true)
366 9
  {
367
    $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
368
369
    return $this;
370 9
  }
371
372
  /**
373
   * @param boolean $doRemoveWhitespaceAroundTags
374
   *
375
   * @return $this
376
   */
377
  public function doRemoveWhitespaceAroundTags($doRemoveWhitespaceAroundTags = true)
378
  {
379
    $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
380 20
381
    return $this;
382 20
  }
383 20
384 17
  /**
385
   * @param boolean $doSortCssClassNames
386 11
   *
387
   * @return $this
388
   */
389
  public function doSortCssClassNames($doSortCssClassNames = true)
390
  {
391
    $this->doSortCssClassNames = $doSortCssClassNames;
392
393
    return $this;
394
  }
395
396 20
  /**
397
   * @param boolean $doSortHtmlAttributes
398
   *
399 20
   * @return $this
400
   */
401 20
  public function doSortHtmlAttributes($doSortHtmlAttributes = true)
402
  {
403
    $this->doSortHtmlAttributes = $doSortHtmlAttributes;
404 4
405 4
    return $this;
406 4
  }
407 2
408
  /**
409 3
   * @param boolean $doSumUpWhitespace
410
   *
411 3
   * @return $this
412 3
   */
413 3
  public function doSumUpWhitespace($doSumUpWhitespace = true)
414 3
  {
415 3
    $this->doSumUpWhitespace = $doSumUpWhitespace;
416
417 3
    return $this;
418 3
  }
419
420 3
  /**
421
   * @param boolean $doRemoveSpacesBetweenTags
422 3
   *
423 20
   * @return $this
424
   */
425 20
  public function doRemoveSpacesBetweenTags($doRemoveSpacesBetweenTags = true)
426
  {
427
    $this->doRemoveSpacesBetweenTags = $doRemoveSpacesBetweenTags;
428
429
    return $this;
430
  }
431
432
  /**
433
   * Check if the current string is an conditional comment.
434
   *
435 11
   * INFO: since IE >= 10 conditional comment are not working anymore
436
   *
437 11
   * <!--[if expression]> HTML <![endif]-->
438
   * <![if expression]> HTML <![endif]>
439
   *
440
   * @param string $comment
441
   *
442
   * @return bool
443 11
   */
444
  private function isConditionalComment($comment)
445 11
  {
446
    if (preg_match('/^\[if [^\]]+\]/', $comment)) {
447 11
      return true;
448
    }
449
450
    if (preg_match('/\[endif\]$/', $comment)) {
451
      return true;
452
    }
453
454
    return false;
455
  }
456
457 12
  /**
458
   * @param string $html
459 11
   * @param bool   $decodeUtf8Specials <p>Use this only in special cases, e.g. for PHP 5.3</p>
460 11
   *
461 11
   * @return string
462 11
   */
463
  public function minify($html, $decodeUtf8Specials = false)
464 11
  {
465 11
    $html = (string)$html;
466 11
    if (!isset($html[0])) {
467
      return '';
468
    }
469
470 11
    $html = trim($html);
471
    if (!$html) {
472 11
      return '';
473 1
    }
474
475
    // init
476 11
    static $CACHE_SELF_CLOSING_TAGS = null;
477 12
    if ($CACHE_SELF_CLOSING_TAGS === null) {
478
      $CACHE_SELF_CLOSING_TAGS = implode('|', self::$selfClosingTags);
479 11
    }
480
481 11
    // reset
482
    $this->protectedChildNodes = array();
483
484
    // save old content
485
    $origHtml = $html;
486
    $origHtmlLength = UTF8::strlen($html);
487
488
    // -------------------------------------------------------------------------
489
    // Minify the HTML via "HtmlDomParser"
490
    // -------------------------------------------------------------------------
491
492
    if ($this->doOptimizeViaHtmlDomParser === true) {
493
      $html = $this->minifyHtmlDom($html, $decodeUtf8Specials);
494
    }
495
496
    // -------------------------------------------------------------------------
497
    // Trim whitespace from html-string. [protected html is still protected]
498
    // -------------------------------------------------------------------------
499
500
    // Remove extra white-space(s) between HTML attribute(s)
501
    $html = (string)\preg_replace_callback(
502
        '#<([^\/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(\/?)>#',
503
        function ($matches) {
504
          return '<' . $matches[1] . (string)\preg_replace('#([^\s=]+)(\=([\'"]?)(.*?)\3)?(\s+|$)#s', ' $1$2', $matches[2]) . $matches[3] . '>';
505
        },
506
        $html
507
    );
508
509
510
    if($this->doRemoveSpacesBetweenTags === true){
511 9
      // Remove spaces that are between > and <
512
      $html = (string)\preg_replace('/(>) (<)/', '>$2', $html);
513 9
    }
514 8
    // -------------------------------------------------------------------------
515
    // Restore protected HTML-code.
516
    // -------------------------------------------------------------------------
517 3
518 3
    $html = (string)\preg_replace_callback(
519
        '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
520
        array($this, 'restoreProtectedHtml'),
521
        $html
522 3
    );
523 3
524 3
    // -------------------------------------------------------------------------
525 3
    // Restore protected HTML-entities.
526
    // -------------------------------------------------------------------------
527
528 3
    if ($this->doOptimizeViaHtmlDomParser === true) {
529 3
      $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
530 3
    }
531
532 3
    // ------------------------------------
533
    // Final clean-up
534
    // ------------------------------------
535
536
    $html = UTF8::cleanup($html);
537
538
    $html = \str_replace(
539
        array(
540
            'html>' . "\n",
541
            "\n" . '<html',
542 11
            'html/>' . "\n",
543
            "\n" . '</html',
544 11
            'head>' . "\n",
545 11
            "\n" . '<head',
546 3
            'head/>' . "\n",
547
            "\n" . '</head',
548 3
            '="' . $this->booleanAttributesHelper . '"',
549
        ),
550 3
        array(
551 3
            'html>',
552 3
            '<html',
553 3
            'html/>',
554 3
            '</html',
555 3
            'head>',
556
            '<head',
557 3
            'head/>',
558 3
            '</head',
559 3
            '',
560
        ),
561
        $html
562 3
    );
563 3
564 3
    $html = (string)\preg_replace('#<\b(' . $CACHE_SELF_CLOSING_TAGS . ')([^>]+)><\/\b\1>#', '<\\1\\2/>', $html);
565 3
566 11
    // ------------------------------------
567
    // check if compression worked
568 11
    // ------------------------------------
569
570 11
    if ($origHtmlLength < UTF8::strlen($html)) {
571
      $html = $origHtml;
572
    }
573
574
    return $html;
575
  }
576
577
  /**
578
   * @param $html
579
   * @param $decodeUtf8Specials
580
   *
581
   * @return string
582
   */
583
  private function minifyHtmlDom($html, $decodeUtf8Specials)
584
  {
585
    // init dom
586
    $dom = new HtmlDomParser();
587
    $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
588
    $dom->getDocument()->formatOutput = false; // do not formats output with indentation
589
590
    // load dom
591
    $dom->loadHtml($html);
592
593
    // -------------------------------------------------------------------------
594
    // Protect HTML tags and conditional comments.
595
    // -------------------------------------------------------------------------
596
597
    $dom = $this->protectTags($dom);
598
599
    // -------------------------------------------------------------------------
600
    // Remove default HTML comments. [protected html is still protected]
601
    // -------------------------------------------------------------------------
602
603
    if ($this->doRemoveComments === true) {
604
      $dom = $this->removeComments($dom);
605
    }
606
607
    // -------------------------------------------------------------------------
608
    // Sum-Up extra whitespace from the Dom. [protected html is still protected]
609
    // -------------------------------------------------------------------------
610
611
    if ($this->doSumUpWhitespace === true) {
612
      $dom = $this->sumUpWhitespace($dom);
613
    }
614
615
    foreach ($dom->find('*') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('*') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
616
617
      // -------------------------------------------------------------------------
618
      // Optimize html attributes. [protected html is still protected]
619
      // -------------------------------------------------------------------------
620
621
      if ($this->doOptimizeAttributes === true) {
622
        $this->optimizeAttributes($element);
623
      }
624
625
      // -------------------------------------------------------------------------
626
      // Remove whitespace around tags. [protected html is still protected]
627
      // -------------------------------------------------------------------------
628
629
      if ($this->doRemoveWhitespaceAroundTags === true) {
630
        $this->removeWhitespaceAroundTags($element);
631
      }
632
    }
633
634
    // -------------------------------------------------------------------------
635
    // Convert the Dom into a string.
636
    // -------------------------------------------------------------------------
637
638
    $html = $dom->html($decodeUtf8Specials);
639
640
    return $html;
641
  }
642
643
  /**
644
   * Sort HTML-Attributes, so that gzip can do better work and remove some default attributes...
645
   *
646
   * @param SimpleHtmlDom $element
647
   *
648
   * @return bool
649
   */
650
  private function optimizeAttributes(SimpleHtmlDom $element)
651
  {
652
    $attributes = $element->getAllAttributes();
653
    if ($attributes === null) {
654
      return false;
655
    }
656
657
    $attrs = array();
658
    foreach ((array)$attributes as $attrName => $attrValue) {
659
660 View Code Duplication
      if (isset(self::$booleanAttributes[$attrName])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
661
662
        if ($this->doSortHtmlAttributes === true) {
663
          $attrs[$attrName] = $this->booleanAttributesHelper;
664
          $element->{$attrName} = null;
665
        }
666
667
        continue;
668
      }
669
670
      // -------------------------------------------------------------------------
671
      // Remove optional "http:"-prefix from attributes.
672
      // -------------------------------------------------------------------------
673
674
      if ($this->doRemoveHttpPrefixFromAttributes === true) {
675
        if (
676
            ($attrName === 'href' || $attrName === 'src' || $attrName === 'action')
677
            &&
678
            !(isset($attributes['rel']) && $attributes['rel'] === 'external')
679
            &&
680
            !(isset($attributes['target']) && $attributes['target'] === '_blank')
681
        ) {
682
          $attrValue = \str_replace('http://', '//', $attrValue);
683
        }
684
      }
685
686
      if ($this->removeAttributeHelper($element->tag, $attrName, $attrValue, $attributes)) {
687
        $element->{$attrName} = null;
688
        continue;
689
      }
690
691
      // -------------------------------------------------------------------------
692
      // Sort css-class-names, for better gzip results.
693
      // -------------------------------------------------------------------------
694
695
      if ($this->doSortCssClassNames === true) {
696
        $attrValue = $this->sortCssClassNames($attrName, $attrValue);
697
      }
698
699 View Code Duplication
      if ($this->doSortHtmlAttributes === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
700
        $attrs[$attrName] = $attrValue;
701
        $element->{$attrName} = null;
702
      }
703
    }
704
705
    // -------------------------------------------------------------------------
706
    // Sort html-attributes, for better gzip results.
707
    // -------------------------------------------------------------------------
708
709
    if ($this->doSortHtmlAttributes === true) {
710
      \ksort($attrs);
711
      foreach ($attrs as $attrName => $attrValue) {
712
        $attrValue = HtmlDomParser::replaceToPreserveHtmlEntities($attrValue);
713
        $element->setAttribute($attrName, $attrValue, true);
714
      }
715
    }
716
717
    return true;
718
  }
719
720
  /**
721
   * Prevent changes of inline "styles" and "scripts".
722
   *
723
   * @param HtmlDomParser $dom
724
   *
725
   * @return HtmlDomParser
726
   */
727
  private function protectTags(HtmlDomParser $dom)
728
  {
729
    // init
730
    $counter = 0;
731
732
    foreach ($dom->find('script, style') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('script, style') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
733
734
      // skip external links
735
      if ($element->tag === 'script' || $element->tag === 'style') {
736
        $attributes = $element->getAllAttributes();
737
        if (isset($attributes['src'])) {
738
          continue;
739
        }
740
      }
741
742
      $this->protectedChildNodes[$counter] = $element->text();
743
      $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
744
745
      ++$counter;
746
    }
747
748
    $dom->getDocument()->normalizeDocument();
749
750
    foreach ($dom->find('//comment()') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('//comment()') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
751
      $text = $element->text();
752
753
      // skip normal comments
754
      if ($this->isConditionalComment($text) === false) {
755
        continue;
756
      }
757
758
      $this->protectedChildNodes[$counter] = '<!--' . $text . '-->';
759
760
      /* @var $node \DOMComment */
761
      $node = $element->getNode();
762
      $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>');
763
      $element->getNode()->parentNode->replaceChild($child, $node);
764
765
      ++$counter;
766
    }
767
768
    $dom->getDocument()->normalizeDocument();
769
770
    return $dom;
771
  }
772
773
  /**
774
   * Check if the attribute can be removed.
775
   *
776
   * @param string $tag
777
   * @param string $attrName
778
   * @param string $attrValue
779
   * @param array  $allAttr
780
   *
781
   * @return bool
782
   */
783
  private function removeAttributeHelper($tag, $attrName, $attrValue, $allAttr)
784
  {
785
    // remove defaults
786
    if ($this->doRemoveDefaultAttributes === true) {
787
788
      if ($tag === 'script' && $attrName === 'language' && $attrValue === 'javascript') {
789
        return true;
790
      }
791
792
      if ($tag === 'form' && $attrName === 'method' && $attrValue === 'get') {
793
        return true;
794
      }
795
796
      if ($tag === 'input' && $attrName === 'type' && $attrValue === 'text') {
797
        return true;
798
      }
799
800
      if ($tag === 'area' && $attrName === 'shape' && $attrValue === 'rect') {
801
        return true;
802
      }
803
    }
804
805
    // remove deprecated charset-attribute (the browser will use the charset from the HTTP-Header, anyway)
806 View Code Duplication
    if ($this->doRemoveDeprecatedScriptCharsetAttribute === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
807
      if ($tag === 'script' && $attrName === 'charset' && !isset($allAttr['src'])) {
808
        return true;
809
      }
810
    }
811
812
    // remove deprecated anchor-jump
813 View Code Duplication
    if ($this->doRemoveDeprecatedAnchorName === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
814
      if ($tag === 'a' && $attrName === 'name' && isset($allAttr['id']) && $allAttr['id'] === $attrValue) {
815
        return true;
816
      }
817
    }
818
819
    // remove "type=text/css" for css links
820 View Code Duplication
    if ($this->doRemoveDeprecatedTypeFromStylesheetLink === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
821
      if ($tag === 'link' && $attrName === 'type' && $attrValue === 'text/css' && isset($allAttr['rel']) && $allAttr['rel'] === 'stylesheet') {
822
        return true;
823
      }
824
    }
825
826
    // remove deprecated script-mime-types
827 View Code Duplication
    if ($this->doRemoveDeprecatedTypeFromScriptTag === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
828
      if ($tag === 'script' && $attrName === 'type' && isset($allAttr['src'], self::$executableScriptsMimeTypes[$attrValue])) {
829
        return true;
830
      }
831
    }
832
833
    // remove 'value=""' from <input type="text">
834
    if ($this->doRemoveValueFromEmptyInput === true) {
835
      if ($tag === 'input' && $attrName === 'value' && $attrValue === '' && isset($allAttr['type']) && $allAttr['type'] === 'text') {
836
        return true;
837
      }
838
    }
839
840
    // remove some empty attributes
841
    if ($this->doRemoveEmptyAttributes === true) {
842
      if (\trim($attrValue) === '' && \preg_match('/^(?:class|id|style|title|lang|dir|on(?:focus|blur|change|click|dblclick|mouse(?:down|up|over|move|out)|key(?:press|down|up)))$/', $attrName)) {
843
        return true;
844
      }
845
    }
846
847
    return false;
848
  }
849
850
  /**
851
   * Remove comments in the dom.
852
   *
853
   * @param HtmlDomParser $dom
854
   *
855
   * @return HtmlDomParser
856
   */
857
  private function removeComments(HtmlDomParser $dom)
858
  {
859
    foreach ($dom->find('//comment()') as $commentWrapper) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('//comment()') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
860
      $comment = $commentWrapper->getNode();
861
      $val = $comment->nodeValue;
862
      if (\strpos($val, '[') === false) {
863
        $comment->parentNode->removeChild($comment);
864
      }
865
    }
866
867
    $dom->getDocument()->normalizeDocument();
868
869
    return $dom;
870
  }
871
872
  /**
873
   * Trim tags in the dom.
874
   *
875
   * @param SimpleHtmlDom $element
876
   *
877
   * @return void
878
   */
879
  private function removeWhitespaceAroundTags(SimpleHtmlDom $element)
880
  {
881
    if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
882
      $node = $element->getNode();
883
884
      $candidates = array();
885
      if ($node->childNodes->length > 0) {
886
        $candidates[] = $node->firstChild;
887
        $candidates[] = $node->lastChild;
888
        $candidates[] = $node->previousSibling;
889
        $candidates[] = $node->nextSibling;
890
      }
891
892
      foreach ($candidates as &$candidate) {
893
        if ($candidate === null) {
894
          continue;
895
        }
896
897
        if ($candidate->nodeType === 3) {
898
          $candidate->nodeValue = \preg_replace(self::$regExSpace, ' ', $candidate->nodeValue);
899
        }
900
      }
901
    }
902
  }
903
904
  /**
905
   * Callback function for preg_replace_callback use.
906
   *
907
   * @param array $matches PREG matches
908
   *
909
   * @return string
910
   */
911
  private function restoreProtectedHtml($matches)
912
  {
913
    \preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
914
915
    $html = '';
916
    if (isset($this->protectedChildNodes[$matchesInner['id']])) {
917
      $html .= $this->protectedChildNodes[$matchesInner['id']];
918
    }
919
920
    return $html;
921
  }
922
923
  /**
924
   * @param array $domainsToRemoveHttpPrefixFromAttributes
925
   *
926
   * @return $this
927
   */
928
  public function setDomainsToRemoveHttpPrefixFromAttributes($domainsToRemoveHttpPrefixFromAttributes)
929
  {
930
    $this->domainsToRemoveHttpPrefixFromAttributes = $domainsToRemoveHttpPrefixFromAttributes;
931
932
    return $this;
933
  }
934
935
  /**
936
   * @param $attrName
937
   * @param $attrValue
938
   *
939
   * @return string
940
   */
941
  private function sortCssClassNames($attrName, $attrValue)
942
  {
943
    if ($attrName !== 'class' || !$attrValue) {
944
      return $attrValue;
945
    }
946
947
    $classes = \array_unique(
948
        \explode(' ', $attrValue)
949
    );
950
    \sort($classes);
951
952
    $attrValue = '';
953
    foreach ($classes as $class) {
954
955
      if (!$class) {
956
        continue;
957
      }
958
959
      $attrValue .= \trim($class) . ' ';
960
    }
961
    $attrValue = \trim($attrValue);
962
963
    return $attrValue;
964
  }
965
966
  /**
967
   * Sum-up extra whitespace from dom-nodes.
968
   *
969
   * @param HtmlDomParser $dom
970
   *
971
   * @return HtmlDomParser
972
   */
973
  private function sumUpWhitespace(HtmlDomParser $dom)
974
  {
975
    $textnodes = $dom->find('//text()');
976
    foreach ($textnodes as $textnodeWrapper) {
0 ignored issues
show
Bug introduced by
The expression $textnodes of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
977
      /* @var $textnode \DOMNode */
978
      $textnode = $textnodeWrapper->getNode();
979
      $xp = $textnode->getNodePath();
980
981
      $doSkip = false;
982
      foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
983
        if (\strpos($xp, "/$pattern") !== false) {
984
          $doSkip = true;
985
          break;
986
        }
987
      }
988
      if ($doSkip) {
989
        continue;
990
      }
991
992
      $textnode->nodeValue = \preg_replace(self::$regExSpace, ' ', $textnode->nodeValue);
993
    }
994
995
    $dom->getDocument()->normalizeDocument();
996
997
    return $dom;
998
  }
999
}
1000