Completed
Push — master ( d81673...325c5b )
by Lars
02:00
created

setDoRemoveDeprecatedScriptCharsetAttribute()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 0
cts 0
cp 0
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
crap 2
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * Class HtmlMin
7
 *
8
 * Inspired by:
9
 * - JS: https://github.com/kangax/html-minifier/blob/gh-pages/src/htmlminifier.js
10
 * - PHP: https://github.com/searchturbine/phpwee-php-minifier
11
 * - PHP: https://github.com/WyriHaximus/HtmlCompress
12
 * - PHP: https://github.com/zaininnari/html-minifier
13
 * - Java: https://code.google.com/archive/p/htmlcompressor/
14
 *
15
 * @package voku\helper
16
 */
17
class HtmlMin
18
{
19
  /**
20
   * // https://mathiasbynens.be/demo/javascript-mime-type
21
   * // https://developer.mozilla.org/en/docs/Web/HTML/Element/script#attr-type
22
   *
23
   * @var array
24
   */
25
  private static $executableScriptsMimeTypes = array(
26
      'text/javascript'          => '',
27
      'text/ecmascript'          => '',
28
      'text/jscript'             => '',
29
      'application/javascript'   => '',
30
      'application/x-javascript' => '',
31
      'application/ecmascript'   => '',
32
  );
33
34
  private static $selfClosingTags = array(
35
      'area',
36
      'base',
37
      'basefont',
38
      'br',
39
      'col',
40
      'command',
41
      'embed',
42
      'frame',
43
      'hr',
44
      'img',
45
      'input',
46
      'isindex',
47
      'keygen',
48
      'link',
49
      'meta',
50
      'param',
51
      'source',
52
      'track',
53
      'wbr',
54
  );
55
56
  private static $trimWhitespaceFromTags = array(
57
      'article' => '',
58
      'br'      => '',
59
      'div'     => '',
60
      'footer'  => '',
61
      'hr'      => '',
62
      'nav'     => '',
63
      'p'       => '',
64
      'script'  => '',
65
  );
66
67
  /**
68
   * @var array
69
   */
70
  private static $booleanAttributes = array(
71
      'allowfullscreen' => '',
72
      'async'           => '',
73
      'autofocus'       => '',
74
      'autoplay'        => '',
75
      'checked'         => '',
76
      'compact'         => '',
77
      'controls'        => '',
78
      'declare'         => '',
79
      'default'         => '',
80
      'defaultchecked'  => '',
81
      'defaultmuted'    => '',
82
      'defaultselected' => '',
83
      'defer'           => '',
84
      'disabled'        => '',
85
      'enabled'         => '',
86
      'formnovalidate'  => '',
87
      'hidden'          => '',
88
      'indeterminate'   => '',
89
      'inert'           => '',
90
      'ismap'           => '',
91
      'itemscope'       => '',
92
      'loop'            => '',
93
      'multiple'        => '',
94
      'muted'           => '',
95
      'nohref'          => '',
96
      'noresize'        => '',
97
      'noshade'         => '',
98
      'novalidate'      => '',
99
      'nowrap'          => '',
100
      'open'            => '',
101
      'pauseonexit'     => '',
102
      'readonly'        => '',
103
      'required'        => '',
104
      'reversed'        => '',
105
      'scoped'          => '',
106
      'seamless'        => '',
107
      'selected'        => '',
108
      'sortable'        => '',
109
      'truespeed'       => '',
110
      'typemustmatch'   => '',
111
      'visible'         => '',
112
  );
113
  /**
114
   * @var array
115
   */
116
  private static $skipTagsForRemoveWhitespace = array(
117
      'code',
118
      'pre',
119
      'script',
120
      'style',
121
      'textarea',
122
  );
123
124
  /**
125
   * @var array
126
   */
127
  private $protectedChildNodes = array();
128
129
  /**
130
   * @var string
131
   */
132
  private $protectedChildNodesHelper = 'html-min--voku--saved-content';
133
134
  /**
135
   * @var string
136 23
   */
137
  private $booleanAttributesHelper = 'html-min--voku--delete-this';
138 23
139 23
  /**
140
   * @var bool
141 23
   */
142 23
  private $doOptimizeAttributes = true;
143 23
144
  /**
145
   * @var bool
146
   */
147
  private $doRemoveComments = true;
148
149
  /**
150 23
   * @var bool
151
   */
152 23
  private $doRemoveWhitespaceAroundTags = true;
153 23
154 1
  /**
155
   * @var bool
156
   */
157 23
  private $doRemoveHttpPrefixFromAttributes = true;
158 23
159 3
  /**
160
   * @var bool
161
   */
162
  private $doSortCssClassNames = true;
163 20
164 20
  /**
165 20
   * @var bool
166
   */
167 20
  private $doSortHtmlAttributes = true;
168 20
169 20
  /**
170
   * @var bool
171 20
   */
172
  private $doRemoveDeprecatedScriptCharsetAttribute = true;
173 20
174 20
  /**
175 11
   * @var bool
176 11
   */
177 11
  private $doRemoveDefaultAttributes = true;
178
179 11
  /**
180
   * @var bool
181
   */
182
  private $doRemoveDeprecatedAnchorName = true;
183
184
  /**
185
   * @var bool
186 11
   */
187
  private $doRemoveDeprecatedTypeFromStylesheetLink = true;
188 11
189
  /**
190 11
   * @var bool
191
   */
192
  private $doRemoveDeprecatedTypeFromScriptTag = true;
193
194
  /**
195
   * @var bool
196 11
   */
197 11
  private $doRemoveValueFromEmptyInput = true;
198 11
199
  /**
200 11
   * @var bool
201 11
   */
202
  private $doRemoveEmptyAttributes = true;
203
204
  /**
205
   * @var bool
206
   */
207 11
  private $doSumUpWhitespace = true;
208
209 11
  /**
210
   * HtmlMin constructor.
211 11
   */
212 11
  public function __construct()
213 11
  {
214 11
  }
215 11
216 11
  /**
217
   * Check if the current string is an conditional comment.
218 11
   *
219 11
   * INFO: since IE >= 10 conditional comment are not working anymore
220 11
   *
221 11
   * <!--[if expression]> HTML <![endif]-->
222 11
   * <![if expression]> HTML <![endif]>
223 11
   *
224
   * @param string $comment
225 11
   *
226
   * @return bool
227 11
   */
228
  private function isConditionalComment($comment)
229 11
  {
230 11
    if (preg_match('/^\[if [^\]]+\]/', $comment)) {
231
      return true;
232 11
    }
233 11
234
    if (preg_match('/\[endif\]$/', $comment)) {
235 11
      return true;
236
    }
237 11
238 11
    return false;
239 1
  }
240 1
241 11
  /**
242
   * @param string $html
243
   *
244
   * @return string
245
   */
246
  public function minify($html)
247 11
  {
248 3
    $html = (string)$html;
249 3
    if (!isset($html[0])) {
250
      return '';
251 11
    }
252
253
    $html = trim($html);
254
    if (!$html) {
255
      return '';
256
    }
257
258
    // init
259
    static $cacheSelfClosingTags = null;
260
    if ($cacheSelfClosingTags === null) {
261
      $cacheSelfClosingTags = implode('|', self::$selfClosingTags);
262 20
    }
263
264 20
    // reset
265 20
    $this->protectedChildNodes = array();
266 17
267
    // save old content
268
    $origHtml = $html;
269 9
    $origHtmlLength = UTF8::strlen($html);
270 9
271
    // init dom
272 9
    $dom = new HtmlDomParser();
273 5
    $dom->getDocument()->preserveWhiteSpace = false; // remove redundant white space
274 5
    $dom->getDocument()->formatOutput = false; // do not formats output with indentation
275 5
276
    // load dom
277
    $dom->loadHtml($html);
278
279 9
    // -------------------------------------------------------------------------
280 9
    // Protect HTML tags and conditional comments.
281
    // -------------------------------------------------------------------------
282 9
283
    $dom = $this->protectTags($dom);
284 9
285
    // -------------------------------------------------------------------------
286
    // Remove default HTML comments. [protected html is still protected]
287
    // -------------------------------------------------------------------------
288 9
289
    if ($this->doRemoveComments === true) {
290
      $dom = $this->removeComments($dom);
291
    }
292
293 9
    // -------------------------------------------------------------------------
294
    // Remove whitespace from the Dom. [protected html is still protected]
295 9
    // -------------------------------------------------------------------------
296 9
297 9
    if ($this->doSumUpWhitespace === true) {
298
      $dom = $this->sumUpWhitespace($dom);
299 9
    }
300 9
301 9
    foreach ($dom->find('*') as $element) {
302
303
      // -------------------------------------------------------------------------
304
      // Optimize html attributes. [protected html is still protected]
305
      // -------------------------------------------------------------------------
306
307
      if ($this->doOptimizeAttributes === true) {
308
        $this->optimizeAttributes($element);
309
      }
310
311
      if ($this->doRemoveWhitespaceAroundTags === true) {
312
        $this->removeWhitespaceAroundTags($element);
313
      }
314
    }
315
316
    // -------------------------------------------------------------------------
317
    // Convert the Dom into a string.
318 9
    // -------------------------------------------------------------------------
319
320
    $html = $dom->html();
321 9
322
    // -------------------------------------------------------------------------
323
    // Trim whitespace from html-string. [protected html is still protected]
324
    // -------------------------------------------------------------------------
325
326 9
    // Remove spaces that are followed by either > or <
327
    $html = preg_replace('/ (>)/', '$1', $html);
328
    // Remove spaces that are preceded by either > or <
329
    $html = preg_replace('/(<) /', '$1', $html);
330
    // Remove spaces that are between > and <
331 9
    $html = preg_replace('/(>) (<)/', '>$2', $html);
332
333
    // -------------------------------------------------------------------------
334
    // Restore protected HTML-code.
335
    // -------------------------------------------------------------------------
336 9
337
    $html = preg_replace_callback(
338
        '/<(?<element>' . $this->protectedChildNodesHelper . ')(?<attributes> [^>]*)?>(?<value>.*?)<\/' . $this->protectedChildNodesHelper . '>/',
339
        array($this, 'restoreProtectedHtml'),
340
        $html
341 9
    );
342
    $html = $dom::putReplacedBackToPreserveHtmlEntities($html);
343
344
    // ------------------------------------
345
    // Final clean-up
346 9
    // ------------------------------------
347
348
    $html = UTF8::cleanup($html);
349
350
    $html = str_replace(
351 9
        array(
352
            'html>' . "\n",
353
            "\n" . '<html',
354
            'html/>' . "\n",
355
            "\n" . '</html',
356 9
            'head>' . "\n",
357
            "\n" . '<head',
358
            'head/>' . "\n",
359
            "\n" . '</head',
360
            '="' . $this->booleanAttributesHelper . '"',
361 9
        ),
362
        array(
363
            'html>',
364
            '<html',
365
            'html/>',
366 9
            '</html',
367
            'head>',
368
            '<head',
369
            'head/>',
370 9
            '</head',
371
            '',
372
        ),
373
        $html
374
    );
375
376
    $html = preg_replace('#<\b(' . $cacheSelfClosingTags . ')([^>]+)><\/\b\1>#', '<\\1\\2/>', $html);
377
378
    // ------------------------------------
379
    // check if compression worked
380 20
    // ------------------------------------
381
382 20
    if ($origHtmlLength < UTF8::strlen($html)) {
383 20
      $html = $origHtml;
384 17
    }
385
386 11
    return $html;
387
  }
388
389
  /**
390
   * Sort HTML-Attributes, so that gzip can do better work
391
   *  and remove some default attributes.
392
   *
393
   * @param SimpleHtmlDom $element
394
   *
395
   * @return bool
396 20
   */
397
  private function optimizeAttributes(SimpleHtmlDom $element)
398
  {
399 20
    $attributes = $element->getAllAttributes();
400
    if ($attributes === null) {
401 20
      return false;
402
    }
403
404 4
    $attrs = array();
405 4
    foreach ((array)$attributes as $attrName => $attrValue) {
406 4
407 2 View Code Duplication
      if (isset(self::$booleanAttributes[$attrName])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
408
409 3
        if ($this->doSortHtmlAttributes === true) {
410
          $attrs[$attrName] = $this->booleanAttributesHelper;
411 3
          $element->{$attrName} = null;
412 3
        }
413 3
414 3
        continue;
415 3
      }
416
417 3
      if ($this->doRemoveHttpPrefixFromAttributes === true) {
418 3
        if (
419
            ($attrName === 'href' || $attrName === 'src' || $attrName === 'action')
420 3
            &&
421
            !(isset($attributes['rel']) && $attributes['rel'] === 'external')
422 3
            &&
423 20
            !(isset($attributes['target']) && $attributes['target'] === '_blank')
424
        ) {
425 20
          $attrValue = str_replace('http://', '//', $attrValue);
426
        }
427
      }
428
429
      if ($this->removeAttributeHelper($element->tag, $attrName, $attrValue, $attributes)) {
0 ignored issues
show
Documentation introduced by
$attributes is of type array, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
430
        $element->{$attrName} = null;
431
        continue;
432
      }
433
434
      if ($this->doSortCssClassNames === true) {
435 11
        $attrValue = $this->sortCssClassNames($attrName, $attrValue);
436
      }
437 11
438 View Code Duplication
      if ($this->doSortHtmlAttributes === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
439
        $attrs[$attrName] = $attrValue;
440
        $element->{$attrName} = null;
441
      }
442
    }
443 11
444
    if ($this->doSortHtmlAttributes === true) {
445 11
      ksort($attrs);
446
      foreach ($attrs as $attrName => $attrValue) {
447 11
        $attrValue = HtmlDomParser::replaceToPreserveHtmlEntities($attrValue);
0 ignored issues
show
Bug introduced by
The method replaceToPreserveHtmlEntities() cannot be called from this context as it is declared protected in class voku\helper\HtmlDomParser.

This check looks for access to methods that are not accessible from the current context.

If you need to make a method accessible to another context you can raise its visibility level in the defining class.

Loading history...
448
        $element->setAttribute($attrName, $attrValue, true);
449
      }
450
    }
451
452
    return true;
453
  }
454
455
  /**
456
   * Prevent changes of inline "styles" and "scripts".
457 12
   *
458
   * @param HtmlDomParser $dom
459 11
   *
460 11
   * @return HtmlDomParser
461 11
   */
462 11
  private function protectTags(HtmlDomParser $dom)
463
  {
464 11
    // init
465 11
    $counter = 0;
466 11
467
    foreach ($dom->find('script, style') as $element) {
468
469
      // skip external links
470 11
      if ($element->tag === 'script' || $element->tag === 'style') {
471
        $attributes = $element->getAllAttributes();
472 11
        if (isset($attributes['src'])) {
473 1
          continue;
474
        }
475
      }
476 11
477 12
      $this->protectedChildNodes[$counter] = $element->text();
478
      $element->getNode()->nodeValue = '<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>';
479 11
480
      ++$counter;
481 11
    }
482
483
    $dom->getDocument()->normalizeDocument();
484
485
    foreach ($dom->find('//comment()') as $element) {
486
      $text = $element->text();
487
488
      // skip normal comments
489
      if ($this->isConditionalComment($text) === false) {
490
        continue;
491
      }
492
493
      $this->protectedChildNodes[$counter] = '<!--' . $text . '-->';
494
495
      /* @var $node \DOMComment */
496
      $node = $element->getNode();
497
      $child = new \DOMText('<' . $this->protectedChildNodesHelper . ' data-' . $this->protectedChildNodesHelper . '="' . $counter . '"></' . $this->protectedChildNodesHelper . '>');
498
      $element->getNode()->parentNode->replaceChild($child, $node);
499
500
      ++$counter;
501
    }
502
503
    $dom->getDocument()->normalizeDocument();
504
505
    return $dom;
506
  }
507
508
  /**
509
   * Check if the attribute can be removed.
510
   *
511 9
   * @param string $tag
512
   * @param string $attrName
513 9
   * @param string $attrValue
514 8
   * @param string $allAttr
515
   *
516
   * @return bool
517 3
   */
518 3
  private function removeAttributeHelper($tag, $attrName, $attrValue, $allAttr)
519
  {
520
    // remove defaults
521
    if ($this->doRemoveDefaultAttributes === true) {
522 3
523 3
      if ($tag === 'script' && $attrName === 'language' && $attrValue === 'javascript') {
524 3
        return true;
525 3
      }
526
527
      if ($tag === 'form' && $attrName === 'method' && $attrValue === 'get') {
528 3
        return true;
529 3
      }
530 3
531
      if ($tag === 'input' && $attrName === 'type' && $attrValue === 'text') {
532 3
        return true;
533
      }
534
535
      if ($tag === 'area' && $attrName === 'shape' && $attrValue === 'rect') {
536
        return true;
537
      }
538
    }
539
540
    // remove deprecated charset-attribute (the Browser will use the charset from the HTTP-Header, anyway)
541 View Code Duplication
    if ($this->doRemoveDeprecatedScriptCharsetAttribute === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
542 11
      if ($tag === 'script' && $attrName === 'charset' && !isset($allAttr['src'])) {
543
        return true;
544 11
      }
545 11
    }
546 3
547
    // remove deprecated anchor-jump
548 3 View Code Duplication
    if ($this->doRemoveDeprecatedAnchorName === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
549
      if ($tag === 'a' && $attrName === 'name' && isset($allAttr['id']) && $allAttr['id'] === $attrValue) {
550 3
        return true;
551 3
      }
552 3
    }
553 3
554 3
    // remove "type=text/css" for css links
555 3 View Code Duplication
    if ($this->doRemoveDeprecatedTypeFromStylesheetLink === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
556
      if ($tag === 'link' && $attrName === 'type' && $attrValue === 'text/css' && isset($allAttr['rel']) && $allAttr['rel'] === 'stylesheet') {
557 3
        return true;
558 3
      }
559 3
    }
560
561
    // remove deprecated script-mime-types
562 3 View Code Duplication
    if ($this->doRemoveDeprecatedTypeFromScriptTag === true) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
563 3
      if ($tag === 'script' && $attrName === 'type' && isset($allAttr['src'], self::$executableScriptsMimeTypes[$attrValue])) {
564 3
        return true;
565 3
      }
566 11
    }
567
568 11
    if ($this->doRemoveValueFromEmptyInput === true) {
569
      // remove "value" from empty <input>
570 11
      if ($tag === 'input' && $attrName === 'value' && $attrValue === '') {
571
        return true;
572
      }
573
    }
574
575
    // remove some empty attributes
576
    if ($this->doRemoveEmptyAttributes === true) {
577
      if ($attrValue === '' && preg_match('/^(?:class|id|style|title|lang|dir|on(?:focus|blur|change|click|dblclick|mouse(?:down|up|over|move|out)|key(?:press|down|up)))$/', $attrName)) {
578
        return true;
579
      }
580
    }
581
582
    return false;
583
  }
584
585
  /**
586
   * Remove comments in the dom.
587
   *
588
   * @param HtmlDomParser $dom
589
   *
590
   * @return HtmlDomParser
591
   */
592
  private function removeComments(HtmlDomParser $dom)
593
  {
594
    foreach ($dom->find('//comment()') as $commentWrapper) {
595
      $comment = $commentWrapper->getNode();
596
      $val = $comment->nodeValue;
597
      if (strpos($val, '[') === false) {
598
        $comment->parentNode->removeChild($comment);
599
      }
600
    }
601
602
    $dom->getDocument()->normalizeDocument();
603
604
    return $dom;
605
  }
606
607
  /**
608
   * Trim tags in the dom.
609
   *
610
   * @param SimpleHtmlDom $element
611
   *
612
   * @return void
613
   */
614
  private function removeWhitespaceAroundTags(SimpleHtmlDom $element)
615
  {
616
    if (isset(self::$trimWhitespaceFromTags[$element->tag])) {
617
      $node = $element->getNode();
618
619
      $candidates = array();
620
      /** @noinspection PhpParamsInspection */
621
      if (count($node->childNodes) > 0) {
622
        $candidates[] = $node->firstChild;
623
        $candidates[] = $node->lastChild;
624
        $candidates[] = $node->previousSibling;
625
        $candidates[] = $node->nextSibling;
626
      }
627
628
      foreach ($candidates as &$candidate) {
629
        if ($candidate === null) {
630
          continue;
631
        }
632
633
        if ($candidate->nodeType === 3) {
634
          $candidate->nodeValue = trim($candidate->nodeValue);
635
        }
636
      }
637
    }
638
  }
639
640
  /**
641
   * Callback function for preg_replace_callback use.
642
   *
643
   * @param array $matches PREG matches
644
   *
645
   * @return string
646
   */
647
  private function restoreProtectedHtml($matches)
648
  {
649
    preg_match('/.*"(?<id>\d*)"/', $matches['attributes'], $matchesInner);
650
651
    $html = '';
652
    if (isset($this->protectedChildNodes[$matchesInner['id']])) {
653
      $html .= $this->protectedChildNodes[$matchesInner['id']];
654
    }
655
656
    return $html;
657
  }
658
659
  /**
660
   * @param boolean $doOptimizeAttributes
661
   */
662
  public function setDoOptimizeAttributes($doOptimizeAttributes)
663
  {
664
    $this->doOptimizeAttributes = $doOptimizeAttributes;
665
  }
666
667
  /**
668
   * @param boolean $doRemoveComments
669
   */
670
  public function setDoRemoveComments($doRemoveComments)
671
  {
672
    $this->doRemoveComments = $doRemoveComments;
673
  }
674
675
  /**
676
   * @param boolean $doRemoveDefaultAttributes
677
   */
678
  public function setDoRemoveDefaultAttributes($doRemoveDefaultAttributes)
679
  {
680
    $this->doRemoveDefaultAttributes = $doRemoveDefaultAttributes;
681
  }
682
683
  /**
684
   * @param boolean $doRemoveDeprecatedAnchorName
685
   */
686
  public function setDoRemoveDeprecatedAnchorName($doRemoveDeprecatedAnchorName)
687
  {
688
    $this->doRemoveDeprecatedAnchorName = $doRemoveDeprecatedAnchorName;
689
  }
690
691
  /**
692
   * @param boolean $doRemoveDeprecatedScriptCharsetAttribute
693
   */
694
  public function setDoRemoveDeprecatedScriptCharsetAttribute($doRemoveDeprecatedScriptCharsetAttribute)
695
  {
696
    $this->doRemoveDeprecatedScriptCharsetAttribute = $doRemoveDeprecatedScriptCharsetAttribute;
697
  }
698
699
  /**
700
   * @param boolean $doRemoveDeprecatedTypeFromScriptTag
701
   */
702
  public function setDoRemoveDeprecatedTypeFromScriptTag($doRemoveDeprecatedTypeFromScriptTag)
703
  {
704
    $this->doRemoveDeprecatedTypeFromScriptTag = $doRemoveDeprecatedTypeFromScriptTag;
705
  }
706
707
  /**
708
   * @param boolean $doRemoveDeprecatedTypeFromStylesheetLink
709
   */
710
  public function setDoRemoveDeprecatedTypeFromStylesheetLink($doRemoveDeprecatedTypeFromStylesheetLink)
711
  {
712
    $this->doRemoveDeprecatedTypeFromStylesheetLink = $doRemoveDeprecatedTypeFromStylesheetLink;
713
  }
714
715
  /**
716
   * @param boolean $doRemoveEmptyAttributes
717
   */
718
  public function setDoRemoveEmptyAttributes($doRemoveEmptyAttributes)
719
  {
720
    $this->doRemoveEmptyAttributes = $doRemoveEmptyAttributes;
721
  }
722
723
  /**
724
   * @param boolean $doRemoveHttpPrefixFromAttributes
725
   */
726
  public function setDoRemoveHttpPrefixFromAttributes($doRemoveHttpPrefixFromAttributes)
727
  {
728
    $this->doRemoveHttpPrefixFromAttributes = $doRemoveHttpPrefixFromAttributes;
729
  }
730
731
  /**
732
   * @param boolean $doRemoveValueFromEmptyInput
733
   */
734
  public function setDoRemoveValueFromEmptyInput($doRemoveValueFromEmptyInput)
735
  {
736
    $this->doRemoveValueFromEmptyInput = $doRemoveValueFromEmptyInput;
737
  }
738
739
  /**
740
   * @param boolean $doRemoveWhitespaceAroundTags
741
   */
742
  public function setDoRemoveWhitespaceAroundTags($doRemoveWhitespaceAroundTags)
743
  {
744
    $this->doRemoveWhitespaceAroundTags = $doRemoveWhitespaceAroundTags;
745
  }
746
747
  /**
748
   * @param boolean $doSortCssClassNames
749
   */
750
  public function setDoSortCssClassNames($doSortCssClassNames)
751
  {
752
    $this->doSortCssClassNames = $doSortCssClassNames;
753
  }
754
755
  /**
756
   * @param boolean $doSortHtmlAttributes
757
   */
758
  public function setDoSortHtmlAttributes($doSortHtmlAttributes)
759
  {
760
    $this->doSortHtmlAttributes = $doSortHtmlAttributes;
761
  }
762
763
  /**
764
   * @param boolean $doSumUpWhitespace
765
   */
766
  public function setDoSumUpWhitespace($doSumUpWhitespace)
767
  {
768
    $this->doSumUpWhitespace = $doSumUpWhitespace;
769
  }
770
771
  /**
772
   * @param $attrName
773
   * @param $attrValue
774
   *
775
   * @return string
776
   */
777
  private function sortCssClassNames($attrName, $attrValue)
778
  {
779
    if ($attrName !== 'class' || !$attrValue) {
780
      return $attrValue;
781
    }
782
783
    $classes = explode(' ', $attrValue);
784
    if (!$classes) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $classes of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
785
      return '';
786
    }
787
788
    sort($classes);
789
    $attrValue = '';
790
    foreach ($classes as $class) {
791
      if (!$class) {
792
        continue;
793
      }
794
      $attrValue .= trim($class) . ' ';
795
    }
796
    $attrValue = trim($attrValue);
797
798
    return $attrValue;
799
  }
800
801
  /**
802
   * Sum-up extra whitespace from dom-nodes.
803
   *
804
   * @param HtmlDomParser $dom
805
   *
806
   * @return HtmlDomParser
807
   */
808
  private function sumUpWhitespace(HtmlDomParser $dom)
809
  {
810
    $textnodes = $dom->find('//text()');
811
    foreach ($textnodes as $textnodeWrapper) {
812
      $textnode = $textnodeWrapper->getNode();
813
      $xp = $textnode->getNodePath();
814
815
      $doSkip = false;
816
      foreach (self::$skipTagsForRemoveWhitespace as $pattern) {
817
        if (strpos($xp, "/$pattern") !== false) {
818
          $doSkip = true;
819
          break;
820
        }
821
      }
822
      if ($doSkip) {
823
        continue;
824
      }
825
826
      $textnode->nodeValue = preg_replace("/\s{2,}/", ' ', $textnode->nodeValue);
827
    }
828
829
    $dom->getDocument()->normalizeDocument();
830
831
    return $dom;
832
  }
833
}
834