Completed
Push — master ( 7e63c6...f7b5dd )
by Lars
02:39
created

HtmlMin::doOptimizeViaHtmlDomParser()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
crap 1
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * Class HtmlMin
7
 *
8
 * Inspired by:
9
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
10
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
11
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
12
 * - PHP: https://github.com/zaininnari/html-minifier
13
 * - Java: https://code.google.com/archive/p/htmlcompressor/
14
 *
15
 * @package voku\helper
16
 */
17
class HtmlMin
18
{
19
  /**
20
   * // https://mathiasbynens.be/demo/javascript-mime-type
21
   * // https://developer.mozilla.org/en/docs/Web/HTML/Element/script#attr-type
22
   *
23
   * @var array
24
   */
25
  private static $executableScriptsMimeTypes = array(
26
      'text/javascript'          => '',
27
      'text/ecmascript'          => '',
28
      'text/jscript'             => '',
29
      'application/javascript'   => '',
30
      'application/x-javascript' => '',
31
      'application/ecmascript'   => '',
32
  );
33
34
  private static $selfClosingTags = array(
35
      'area',
36
      'base',
37
      'basefont',
38
      'br',
39
      'col',
40
      'command',
41
      'embed',
42
      'frame',
43
      'hr',
44
      'img',
45
      'input',
46
      'isindex',
47
      'keygen',
48
      'link',
49
      'meta',
50
      'param',
51
      'source',
52
      'track',
53
      'wbr',
54
  );
55
56
  private static $trimWhitespaceFromTags = array(
57
      'article' => '',
58
      'br'      => '',
59
      'div'     => '',
60
      'footer'  => '',
61
      'hr'      => '',
62
      'nav'     => '',
63
      'p'       => '',
64
      'script'  => '',
65
  );
66
67
  /**
68
   * @var array
69
   */
70
  private static $booleanAttributes = array(
71
      'allowfullscreen' => '',
72
      'async'           => '',
73
      'autofocus'       => '',
74
      'autoplay'        => '',
75
      'checked'         => '',
76
      'compact'         => '',
77
      'controls'        => '',
78
      'declare'         => '',
79
      'default'         => '',
80
      'defaultchecked'  => '',
81
      'defaultmuted'    => '',
82
      'defaultselected' => '',
83
      'defer'           => '',
84
      'disabled'        => '',
85
      'enabled'         => '',
86
      'formnovalidate'  => '',
87
      'hidden'          => '',
88
      'indeterminate'   => '',
89
      'inert'           => '',
90
      'ismap'           => '',
91
      'itemscope'       => '',
92
      'loop'            => '',
93
      'multiple'        => '',
94
      'muted'           => '',
95
      'nohref'          => '',
96
      'noresize'        => '',
97
      'noshade'         => '',
98
      'novalidate'      => '',
99
      'nowrap'          => '',
100
      'open'            => '',
101
      'pauseonexit'     => '',
102
      'readonly'        => '',
103
      'required'        => '',
104
      'reversed'        => '',
105
      'scoped'          => '',
106
      'seamless'        => '',
107
      'selected'        => '',
108
      'sortable'        => '',
109
      'truespeed'       => '',
110
      'typemustmatch'   => '',
111
      'visible'         => '',
112
  );
113
  /**
114
   * @var array
115
   */
116
  private static $skipTagsForRemoveWhitespace = array(
117
      'code',
118
      'pre',
119
      'script',
120
      'style',
121
      'textarea',
122
  );
123
124
  /**
125
   * @var array
126
   */
127
  private $protectedChildNodes = array();
128
129
  /**
130
   * @var string
131
   */
132
  private $protectedChildNodesHelper = 'html-min--voku--saved-content';
133
134
  /**
135
   * @var string
136 23
   */
137
  private $booleanAttributesHelper = 'html-min--voku--delete-this';
138 23
139 23
  /**
140
   * @var bool
141 23
   */
142 23
  private $doOptimizeViaHtmlDomParser = true;
143 23
144
  /**
145
   * @var bool
146
   */
147
  private $doOptimizeAttributes = true;
148
149
  /**
150 23
   * @var bool
151
   */
152 23
  private $doRemoveComments = true;
153 23
154 1
  /**
155
   * @var bool
156
   */
157 23
  private $doRemoveWhitespaceAroundTags = true;
158 23
159 3
  /**
160
   * @var bool
161
   */
162
  private $doRemoveHttpPrefixFromAttributes = true;
163 20
164 20
  /**
165 20
   * @var bool
166
   */
167 20
  private $doSortCssClassNames = true;
168 20
169 20
  /**
170
   * @var bool
171 20
   */
172
  private $doSortHtmlAttributes = true;
173 20
174 20
  /**
175 11
   * @var bool
176 11
   */
177 11
  private $doRemoveDeprecatedScriptCharsetAttribute = true;
178
179 11
  /**
180
   * @var bool
181
   */
182
  private $doRemoveDefaultAttributes = true;
183
184
  /**
185
   * @var bool
186 11
   */
187
  private $doRemoveDeprecatedAnchorName = true;
188 11
189
  /**
190 11
   * @var bool
191
   */
192
  private $doRemoveDeprecatedTypeFromStylesheetLink = true;
193
194
  /**
195
   * @var bool
196 11
   */
197 11
  private $doRemoveDeprecatedTypeFromScriptTag = true;
198 11
199
  /**
200 11
   * @var bool
201 11
   */
202
  private $doRemoveValueFromEmptyInput = true;
203
204
  /**
205
   * @var bool
206
   */
207 11
  private $doRemoveEmptyAttributes = true;
208
209 11
  /**
210
   * @var bool
211 11
   */
212 11
  private $doSumUpWhitespace = true;
213 11
214 11
  /**
215 11
   * HtmlMin constructor.
216 11
   */
217
  public function __construct()
218 11
  {
219 11
  }
220 11
221 11
  /**
222 11
   * @param boolean $doOptimizeAttributes
223 11
   */
224
  public function doOptimizeAttributes($doOptimizeAttributes = true)
225 11
  {
226
    $this->doOptimizeAttributes = $doOptimizeAttributes;
227 11
  }
228
229 11
  /**
230 11
   * @param boolean $doOptimizeViaHtmlDomParser
231
   */
232 11
  public function doOptimizeViaHtmlDomParser($doOptimizeViaHtmlDomParser = true)
233 11
  {
234
    $this->doOptimizeViaHtmlDomParser = $doOptimizeViaHtmlDomParser;
235 11
  }
236
237 11
  /**
238 11
   * @param boolean $doRemoveComments
239 1
   */
240 1
  public function doRemoveComments($doRemoveComments = true)
241 11
  {
242
    $this->doRemoveComments = $doRemoveComments;
243
  }
244
245
  /**
246
   * @param boolean $doRemoveDefaultAttributes
247 11
   */
248 3
  public function doRemoveDefaultAttributes($doRemoveDefaultAttributes = true)
249 3
  {
250
    $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
251 11
  }
252
253
  /**
254
   * @param boolean $doRemoveDeprecatedAnchorName
255
   */
256
  public function doRemoveDeprecatedAnchorName($doRemoveDeprecatedAnchorName = true)
257
  {
258
    $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
259
  }
260
261
  /**
262 20
   * @param boolean $doRemoveDeprecatedScriptCharsetAttribute
263
   */
264 20
  public function doRemoveDeprecatedScriptCharsetAttribute($doRemoveDeprecatedScriptCharsetAttribute = true)
265 20
  {
266 17
    $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
267
  }
268
269 9
  /**
270 9
   * @param boolean $doRemoveDeprecatedTypeFromScriptTag
271
   */
272 9
  public function doRemoveDeprecatedTypeFromScriptTag($doRemoveDeprecatedTypeFromScriptTag = true)
273 5
  {
274 5
    $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
275 5
  }
276
277
  /**
278
   * @param boolean $doRemoveDeprecatedTypeFromStylesheetLink
279 9
   */
280 9
  public function doRemoveDeprecatedTypeFromStylesheetLink($doRemoveDeprecatedTypeFromStylesheetLink = true)
281
  {
282 9
    $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
283
  }
284 9
285
  /**
286
   * @param boolean $doRemoveEmptyAttributes
287
   */
288 9
  public function doRemoveEmptyAttributes($doRemoveEmptyAttributes = true)
289
  {
290
    $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
291
  }
292
293 9
  /**
294
   * @param boolean $doRemoveHttpPrefixFromAttributes
295 9
   */
296 9
  public function doRemoveHttpPrefixFromAttributes($doRemoveHttpPrefixFromAttributes = true)
297 9
  {
298
    $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
299 9
  }
300 9
301 9
  /**
302
   * @param boolean $doRemoveValueFromEmptyInput
303
   */
304
  public function doRemoveValueFromEmptyInput($doRemoveValueFromEmptyInput = true)
305
  {
306
    $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
307
  }
308
309
  /**
310
   * @param boolean $doRemoveWhitespaceAroundTags
311
   */
312
  public function doRemoveWhitespaceAroundTags($doRemoveWhitespaceAroundTags = true)
313
  {
314
    $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
315
  }
316
317
  /**
318 9
   * @param boolean $doSortCssClassNames
319
   */
320
  public function doSortCssClassNames($doSortCssClassNames = true)
321 9
  {
322
    $this->doSortCssClassNames = $doSortCssClassNames;
323
  }
324
325
  /**
326 9
   * @param boolean $doSortHtmlAttributes
327
   */
328
  public function doSortHtmlAttributes($doSortHtmlAttributes = true)
329
  {
330
    $this->doSortHtmlAttributes = $doSortHtmlAttributes;
331 9
  }
332
333
  /**
334
   * @param boolean $doSumUpWhitespace
335
   */
336 9
  public function doSumUpWhitespace($doSumUpWhitespace = true)
337
  {
338
    $this->doSumUpWhitespace = $doSumUpWhitespace;
339
  }
340
341 9
  /**
342
   * Check if the current string is an conditional comment.
343
   *
344
   * INFO: since IE >= 10 conditional comment are not working anymore
345
   *
346 9
   * <!--[if expression]> HTML <![endif]-->
347
   * <![if expression]> HTML <![endif]>
348
   *
349
   * @param string $comment
350
   *
351 9
   * @return bool
352
   */
353
  private function isConditionalComment($comment)
354
  {
355
    if (preg_match('/^\[if [^\]]+\]/', $comment)) {
356 9
      return true;
357
    }
358
359
    if (preg_match('/\[endif\]$/', $comment)) {
360
      return true;
361 9
    }
362
363
    return false;
364
  }
365
366 9
  /**
367
   * @param string $html
368
   * @param bool   $decodeUtf8Specials <p>Use this only in special cases, e.g. for PHP 5.3</p>
369
   *
370 9
   * @return string
371
   */
372
  public function minify($html, $decodeUtf8Specials = false)
373
  {
374
    $html = (string)$html;
375
    if (!isset($html[0])) {
376
      return '';
377
    }
378
379
    $html = trim($html);
380 20
    if (!$html) {
381
      return '';
382 20
    }
383 20
384 17
    // init
385
    static $cacheSelfClosingTags = null;
386 11
    if ($cacheSelfClosingTags === null) {
387
      $cacheSelfClosingTags = implode('|', self::$selfClosingTags);
388
    }
389
390
    // reset
391
    $this->protectedChildNodes = array();
392
393
    // save old content
394
    $origHtml = $html;
395
    $origHtmlLength = UTF8::strlen($html);
396 20
397
    // -------------------------------------------------------------------------
398
    // Minify the HTML via "HtmlDomParser"
399 20
    // -------------------------------------------------------------------------
400
401 20
    if ($this->doOptimizeViaHtmlDomParser === true) {
402
      $html = $this->minifyHtmlDom($html, $decodeUtf8Specials);
403
    }
404 4
405 4
    // -------------------------------------------------------------------------
406 4
    // Trim whitespace from html-string. [protected html is still protected]
407 2
    // -------------------------------------------------------------------------
408
409 3
    // Remove extra white-space(s) between HTML attribute(s)
410
    $html = preg_replace_callback(
411 3
        '#<([^\/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(\/?)>#',
412 3
        function ($matches) {
413 3
          return '<' . $matches[1] . preg_replace('#([^\s=]+)(\=([\'"]?)(.*?)\3)?(\s+|$)#s', ' $1$2', $matches[2]) . $matches[3] . '>';
414 3
        },
415 3
        $html
416
    );
417 3
418 3
    // Remove spaces that are between > and <
419
    $html = preg_replace('/(>) (<)/', '>$2', $html);
420 3
421
    // -------------------------------------------------------------------------
422 3
    // Restore protected HTML-code.
423 20
    // -------------------------------------------------------------------------
424
425 20
    $html = preg_replace_callback(
426
        '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
427
        array($this, 'restoreProtectedHtml'),
428
        $html
429
    );
430
431
    // -------------------------------------------------------------------------
432
    // Restore protected HTML-entities.
433
    // -------------------------------------------------------------------------
434
435 11
    if ($this->doOptimizeViaHtmlDomParser === true) {
436
      $html = HtmlDomParser::putReplacedBackToPreserveHtmlEntities($html);
437 11
    }
438
439
    // ------------------------------------
440
    // Final clean-up
441
    // ------------------------------------
442
443 11
    $html = UTF8::cleanup($html);
444
445 11
    $html = str_replace(
446
        array(
447 11
            'html>' . "\n",
448
            "\n" . '<html',
449
            'html/>' . "\n",
450
            "\n" . '</html',
451
            'head>' . "\n",
452
            "\n" . '<head',
453
            'head/>' . "\n",
454
            "\n" . '</head',
455
            '="' . $this->booleanAttributesHelper . '"',
456
        ),
457 12
        array(
458
            'html>',
459 11
            '<html',
460 11
            'html/>',
461 11
            '</html',
462 11
            'head>',
463
            '<head',
464 11
            'head/>',
465 11
            '</head',
466 11
            '',
467
        ),
468
        $html
469
    );
470 11
471
    $html = preg_replace('#<\b(' . $cacheSelfClosingTags . ')([^>]+)><\/\b\1>#', '<\\1\\2/>', $html);
472 11
473 1
    // ------------------------------------
474
    // check if compression worked
475
    // ------------------------------------
476 11
477 12
    if ($origHtmlLength < UTF8::strlen($html)) {
478
      $html = $origHtml;
479 11
    }
480
481 11
    return $html;
482
  }
483
484
  /**
485
   * @param $html
486
   * @param $decodeUtf8Specials
487
   *
488
   * @return string
489
   */
490
  public function minifyHtmlDom($html, $decodeUtf8Specials)
491
  {
492
    // init dom
493
    $dom = new HtmlDomParser();
494
    $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
495
    $dom->getDocument()->formatOutput = false; // do not formats output with indentation
496
497
    // load dom
498
    $dom->loadHtml($html);
499
500
    // -------------------------------------------------------------------------
501
    // Protect HTML tags and conditional comments.
502
    // -------------------------------------------------------------------------
503
504
    $dom = $this->protectTags($dom);
505
506
    // -------------------------------------------------------------------------
507
    // Remove default HTML comments. [protected html is still protected]
508
    // -------------------------------------------------------------------------
509
510
    if ($this->doRemoveComments === true) {
511 9
      $dom = $this->removeComments($dom);
512
    }
513 9
514 8
    // -------------------------------------------------------------------------
515
    // Sum-Up extra whitespace from the Dom. [protected html is still protected]
516
    // -------------------------------------------------------------------------
517 3
518 3
    if ($this->doSumUpWhitespace === true) {
519
      $dom = $this->sumUpWhitespace($dom);
520
    }
521
522 3
    foreach ($dom->find('*') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('*') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
523 3
524 3
      // -------------------------------------------------------------------------
525 3
      // Optimize html attributes. [protected html is still protected]
526
      // -------------------------------------------------------------------------
527
528 3
      if ($this->doOptimizeAttributes === true) {
529 3
        $this->optimizeAttributes($element);
530 3
      }
531
532 3
      // -------------------------------------------------------------------------
533
      // Remove whitespace around tags. [protected html is still protected]
534
      // -------------------------------------------------------------------------
535
536
      if ($this->doRemoveWhitespaceAroundTags === true) {
537
        $this->removeWhitespaceAroundTags($element);
538
      }
539
    }
540
541
    // -------------------------------------------------------------------------
542 11
    // Convert the Dom into a string.
543
    // -------------------------------------------------------------------------
544 11
545 11
    $html = $dom->html($decodeUtf8Specials);
546 3
547
    return $html;
548 3
  }
549
550 3
  /**
551 3
   * Sort HTML-Attributes, so that gzip can do better work and remove some default attributes...
552 3
   *
553 3
   * @param SimpleHtmlDom $element
554 3
   *
555 3
   * @return bool
556
   */
557 3
  private function optimizeAttributes(SimpleHtmlDom $element)
558 3
  {
559 3
    $attributes = $element->getAllAttributes();
560
    if ($attributes === null) {
561
      return false;
562 3
    }
563 3
564 3
    $attrs = array();
565 3
    foreach ((array)$attributes as $attrName => $attrValue) {
566 11
567 View Code Duplication
      if (isset(self::$booleanAttributes[$attrName])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
568 11
569
        if ($this->doSortHtmlAttributes === true) {
570 11
          $attrs[$attrName] = $this->booleanAttributesHelper;
571
          $element->{$attrName} = null;
572
        }
573
574
        continue;
575
      }
576
577
      // -------------------------------------------------------------------------
578
      // Remove optional "http:"-prefix from attributes.
579
      // -------------------------------------------------------------------------
580
581
      if ($this->doRemoveHttpPrefixFromAttributes === true) {
582
        if (
583
            ($attrName === 'href' || $attrName === 'src' || $attrName === 'action')
584
            &&
585
            !(isset($attributes['rel']) && $attributes['rel'] === 'external')
586
            &&
587
            !(isset($attributes['target']) && $attributes['target'] === '_blank')
588
        ) {
589
          $attrValue = str_replace('http://', '//', $attrValue);
590
        }
591
      }
592
593
      if ($this->removeAttributeHelper($element->tag, $attrName, $attrValue, $attributes)) {
594
        $element->{$attrName} = null;
595
        continue;
596
      }
597
598
      // -------------------------------------------------------------------------
599
      // Sort css-class-names, for better gzip results.
600
      // -------------------------------------------------------------------------
601
602
      if ($this->doSortCssClassNames === true) {
603
        $attrValue = $this->sortCssClassNames($attrName, $attrValue);
604
      }
605
606 View Code Duplication
      if ($this->doSortHtmlAttributes === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
607
        $attrs[$attrName] = $attrValue;
608
        $element->{$attrName} = null;
609
      }
610
    }
611
612
    // -------------------------------------------------------------------------
613
    // Sort html-attributes, for better gzip results.
614
    // -------------------------------------------------------------------------
615
616
    if ($this->doSortHtmlAttributes === true) {
617
      ksort($attrs);
618
      foreach ($attrs as $attrName => $attrValue) {
619
        $attrValue = HtmlDomParser::replaceToPreserveHtmlEntities($attrValue);
620
        $element->setAttribute($attrName, $attrValue, true);
621
      }
622
    }
623
624
    return true;
625
  }
626
627
  /**
628
   * Prevent changes of inline "styles" and "scripts".
629
   *
630
   * @param HtmlDomParser $dom
631
   *
632
   * @return HtmlDomParser
633
   */
634
  private function protectTags(HtmlDomParser $dom)
635
  {
636
    // init
637
    $counter = 0;
638
639
    foreach ($dom->find('script, style') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('script, style') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
640
641
      // skip external links
642
      if ($element->tag === 'script' || $element->tag === 'style') {
643
        $attributes = $element->getAllAttributes();
644
        if (isset($attributes['src'])) {
645
          continue;
646
        }
647
      }
648
649
      $this->protectedChildNodes[$counter] = $element->text();
650
      $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
651
652
      ++$counter;
653
    }
654
655
    $dom->getDocument()->normalizeDocument();
656
657
    foreach ($dom->find('//comment()') as $element) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('//comment()') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
658
      $text = $element->text();
659
660
      // skip normal comments
661
      if ($this->isConditionalComment($text) === false) {
662
        continue;
663
      }
664
665
      $this->protectedChildNodes[$counter] = '<!--' . $text . '-->';
666
667
      /* @var $node \DOMComment */
668
      $node = $element->getNode();
669
      $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>');
670
      $element->getNode()->parentNode->replaceChild($child, $node);
671
672
      ++$counter;
673
    }
674
675
    $dom->getDocument()->normalizeDocument();
676
677
    return $dom;
678
  }
679
680
  /**
681
   * Check if the attribute can be removed.
682
   *
683
   * @param string $tag
684
   * @param string $attrName
685
   * @param string $attrValue
686
   * @param array  $allAttr
687
   *
688
   * @return bool
689
   */
690
  private function removeAttributeHelper($tag, $attrName, $attrValue, $allAttr)
691
  {
692
    // remove defaults
693
    if ($this->doRemoveDefaultAttributes === true) {
694
695
      if ($tag === 'script' && $attrName === 'language' && $attrValue === 'javascript') {
696
        return true;
697
      }
698
699
      if ($tag === 'form' && $attrName === 'method' && $attrValue === 'get') {
700
        return true;
701
      }
702
703
      if ($tag === 'input' && $attrName === 'type' && $attrValue === 'text') {
704
        return true;
705
      }
706
707
      if ($tag === 'area' && $attrName === 'shape' && $attrValue === 'rect') {
708
        return true;
709
      }
710
    }
711
712
    // remove deprecated charset-attribute (the browser will use the charset from the HTTP-Header, anyway)
713 View Code Duplication
    if ($this->doRemoveDeprecatedScriptCharsetAttribute === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
714
      if ($tag === 'script' && $attrName === 'charset' && !isset($allAttr['src'])) {
715
        return true;
716
      }
717
    }
718
719
    // remove deprecated anchor-jump
720 View Code Duplication
    if ($this->doRemoveDeprecatedAnchorName === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
721
      if ($tag === 'a' && $attrName === 'name' && isset($allAttr['id']) && $allAttr['id'] === $attrValue) {
722
        return true;
723
      }
724
    }
725
726
    // remove "type=text/css" for css links
727 View Code Duplication
    if ($this->doRemoveDeprecatedTypeFromStylesheetLink === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
728
      if ($tag === 'link' && $attrName === 'type' && $attrValue === 'text/css' && isset($allAttr['rel']) && $allAttr['rel'] === 'stylesheet') {
729
        return true;
730
      }
731
    }
732
733
    // remove deprecated script-mime-types
734 View Code Duplication
    if ($this->doRemoveDeprecatedTypeFromScriptTag === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
735
      if ($tag === 'script' && $attrName === 'type' && isset($allAttr['src'], self::$executableScriptsMimeTypes[$attrValue])) {
736
        return true;
737
      }
738
    }
739
740
    // remove 'value=""' from <input type="text">
741
    if ($this->doRemoveValueFromEmptyInput === true) {
742
      if ($tag === 'input' && $attrName === 'value' && $attrValue === '' && isset($allAttr['type']) && $allAttr['type'] === 'text') {
743
        return true;
744
      }
745
    }
746
747
    // remove some empty attributes
748
    if ($this->doRemoveEmptyAttributes === true) {
749
      if (trim($attrValue) === '' && preg_match('/^(?:class|id|style|title|lang|dir|on(?:focus|blur|change|click|dblclick|mouse(?:down|up|over|move|out)|key(?:press|down|up)))$/', $attrName)) {
750
        return true;
751
      }
752
    }
753
754
    return false;
755
  }
756
757
  /**
758
   * Remove comments in the dom.
759
   *
760
   * @param HtmlDomParser $dom
761
   *
762
   * @return HtmlDomParser
763
   */
764
  private function removeComments(HtmlDomParser $dom)
765
  {
766
    foreach ($dom->find('//comment()') as $commentWrapper) {
0 ignored issues
show
Bug introduced by
The expression $dom->find('//comment()') of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
767
      $comment = $commentWrapper->getNode();
768
      $val = $comment->nodeValue;
769
      if (strpos($val, '[') === false) {
770
        $comment->parentNode->removeChild($comment);
771
      }
772
    }
773
774
    $dom->getDocument()->normalizeDocument();
775
776
    return $dom;
777
  }
778
779
  /**
780
   * Trim tags in the dom.
781
   *
782
   * @param SimpleHtmlDom $element
783
   *
784
   * @return void
785
   */
786
  private function removeWhitespaceAroundTags(SimpleHtmlDom $element)
787
  {
788
    if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
789
      $node = $element->getNode();
790
791
      $candidates = array();
792
      if ($node->childNodes->length > 0) {
793
        $candidates[] = $node->firstChild;
794
        $candidates[] = $node->lastChild;
795
        $candidates[] = $node->previousSibling;
796
        $candidates[] = $node->nextSibling;
797
      }
798
799
      foreach ($candidates as &$candidate) {
800
        if ($candidate === null) {
801
          continue;
802
        }
803
804
        if ($candidate->nodeType === 3) {
805
          $candidate->nodeValue = trim($candidate->nodeValue);
806
        }
807
      }
808
    }
809
  }
810
811
  /**
812
   * Callback function for preg_replace_callback use.
813
   *
814
   * @param array $matches PREG matches
815
   *
816
   * @return string
817
   */
818
  private function restoreProtectedHtml($matches)
819
  {
820
    preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
821
822
    $html = '';
823
    if (isset($this->protectedChildNodes[$matchesInner['id']])) {
824
      $html .= $this->protectedChildNodes[$matchesInner['id']];
825
    }
826
827
    return $html;
828
  }
829
830
  /**
831
   * @param $attrName
832
   * @param $attrValue
833
   *
834
   * @return string
835
   */
836
  private function sortCssClassNames($attrName, $attrValue)
837
  {
838
    if ($attrName !== 'class' || !$attrValue) {
839
      return $attrValue;
840
    }
841
842
    $classes = array_unique(
843
        explode(' ', $attrValue)
844
    );
845
    sort($classes);
846
847
    $attrValue = '';
848
    foreach ($classes as $class) {
849
850
      if (!$class) {
851
        continue;
852
      }
853
854
      $attrValue .= trim($class) . ' ';
855
    }
856
    $attrValue = trim($attrValue);
857
858
    return $attrValue;
859
  }
860
861
  /**
862
   * Sum-up extra whitespace from dom-nodes.
863
   *
864
   * @param HtmlDomParser $dom
865
   *
866
   * @return HtmlDomParser
867
   */
868
  private function sumUpWhitespace(HtmlDomParser $dom)
869
  {
870
    $textnodes = $dom->find('//text()');
871
    foreach ($textnodes as $textnodeWrapper) {
0 ignored issues
show
Bug introduced by
The expression $textnodes of type array<integer,object<vok...leHtmlDomNodeInterface> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
872
      /* @var $textnode \DOMNode */
873
      $textnode = $textnodeWrapper->getNode();
874
      $xp = $textnode->getNodePath();
875
876
      $doSkip = false;
877
      foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
878
        if (strpos($xp, "/$pattern") !== false) {
879
          $doSkip = true;
880
          break;
881
        }
882
      }
883
      if ($doSkip) {
884
        continue;
885
      }
886
887
      $textnode->nodeValue = preg_replace("/\s{2,}/", ' ', $textnode->nodeValue);
888
    }
889
890
    $dom->getDocument()->normalizeDocument();
891
892
    return $dom;
893
  }
894
}
895