Completed
Push — master ( f8da28...d77e91 )
by Lars
02:16
created

CssToInlineStyles::doCleanup()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 22
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 22
ccs 11
cts 11
cp 1
rs 9.2
cc 2
eloc 10
nc 2
nop 1
crap 2
1
<?php
2
namespace voku\CssToInlineStyles;
3
4
use Symfony\Component\CssSelector\CssSelector;
5
use Symfony\Component\CssSelector\Exception\ExceptionInterface;
6
use voku\helper\UTF8;
7
8
/**
9
 * CSS to Inline Styles class
10
 *
11
 * @author     Tijs Verkoyen <[email protected]>
12
 */
13
class CssToInlineStyles
14
{
15
16
  /**
17
   * regular expression: css media queries
18
   *
19
   * @var string
20
   */
21
  private static $cssMediaQueriesRegEx = '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU';
22
23
  /**
24
   * regular expression: conditional inline style tags
25
   *
26
   * @var string
27
   */
28
  private static $excludeConditionalInlineStylesBlockRegEx = '/<!--.*<style.*?-->/is';
29
30
  /**
31
   * regular expression: inline style tags
32
   *
33
   * @var string
34
   */
35
  private static $styleTagRegEx = '|<style(.*)>(.*)</style>|isU';
36
37
  /**
38
   * regular expression: css-comments
39
   *
40
   * @var string
41
   */
42
  private static $styleCommentRegEx = '/\\/\\*.*\\*\\//sU';
43
44
  /**
45
   * The CSS to use
46
   *
47
   * @var  string
48
   */
49
  private $css;
50
51
  /**
52
   * Should the generated HTML be cleaned
53
   *
54
   * @var  bool
55
   */
56
  private $cleanup = false;
57
58
  /**
59
   * The encoding to use.
60
   *
61
   * @var  string
62
   */
63
  private $encoding = 'UTF-8';
64
65
  /**
66
   * The HTML to process
67
   *
68
   * @var  string
69
   */
70
  private $html;
71
72
  /**
73
   * Use inline-styles block as CSS
74
   *
75
   * @var  bool
76
   */
77
  private $useInlineStylesBlock = false;
78
79
  /**
80
   * Strip original style tags
81
   *
82
   * @var bool
83
   */
84
  private $stripOriginalStyleTags = false;
85
86
  /**
87
   * Exclude conditional inline-style blocks
88
   *
89
   * @var bool
90
   */
91
  private $excludeConditionalInlineStylesBlock = true;
92
93
  /**
94
   * Exclude media queries from "$this->css" and keep media queries for inline-styles blocks
95
   *
96
   * @var bool
97
   */
98
  private $excludeMediaQueries = true;
99
100
  /**
101
   * Creates an instance, you could set the HTML and CSS here, or load it
102
   * later.
103
   *
104
   * @param  null|string $html The HTML to process.
105
   * @param  null|string $css  The CSS to use.
106
   */
107 40
  public function __construct($html = null, $css = null)
108
  {
109 40
    if (null !== $html) {
110 1
      $this->setHTML($html);
111 1
    }
112
113 40
    if (null !== $css) {
114 1
      $this->setCSS($css);
115 1
    }
116 40
  }
117
118
  /**
119
   * Set HTML to process
120
   *
121
   * @param  string $html The HTML to process.
122
   */
123 40
  public function setHTML($html)
124
  {
125
    // strip style definitions, if we use css-class "cleanup" on a style-element
126 40
    $this->html = (string)preg_replace('/<style[^>]+class="cleanup"[^>]*>.*<\/style>/Usi', ' ', $html);
127 40
  }
128
129
  /**
130
   * Set CSS to use
131
   *
132
   * @param  string $css The CSS to use.
133
   */
134 38
  public function setCSS($css)
135
  {
136 38
    $this->css = (string)$css;
137 38
  }
138
139
  /**
140
   * Sort an array on the specificity element
141
   *
142
   * @return int
143
   *
144
   * @param Specificity[] $e1 The first element.
145
   * @param Specificity[] $e2 The second element.
146
   */
147 13
  private static function sortOnSpecificity($e1, $e2)
148
  {
149
    // Compare the specificity
150 13
    $value = $e1['specificity']->compareTo($e2['specificity']);
151
152
    // if the specificity is the same, use the order in which the element appeared
153 13
    if (0 === $value) {
154 9
      $value = $e1['order'] - $e2['order'];
155 9
    }
156
157 13
    return $value;
158
  }
159
160
  /**
161
   * Converts the loaded HTML into an HTML-string with inline styles based on the loaded CSS
162
   *
163
   * @return string
164
   *
165
   * @param  bool $outputXHTML Should we output valid XHTML?
166
   *
167
   * @throws Exception
168
   */
169 40
  public function convert($outputXHTML = false)
170
  {
171
    // redefine
172 40
    $outputXHTML = (bool)$outputXHTML;
173
174
    // validate
175 40
    if (!$this->html) {
176 1
      throw new Exception('No HTML provided.');
177
    }
178
179
    // use local variables
180 39
    $css = $this->css;
181
182
    // should we use inline style-block
183 39
    if ($this->useInlineStylesBlock) {
184
185 24
      if (true === $this->excludeConditionalInlineStylesBlock) {
186 21
        $this->html = preg_replace(self::$excludeConditionalInlineStylesBlockRegEx, '', $this->html);
187 21
      }
188
189 24
      $css .= $this->getCssFromInlineHtmlStyleBlock($this->html);
190 24
    }
191
192
    // process css
193 39
    $cssRules = $this->processCSS($css);
194
195
    // create new DOMDocument
196 39
    $document = $this->createDOMDocument($this->html);
197
198
    // create new XPath
199 39
    $xPath = $this->createXPath($document, $cssRules);
200
201
    // strip original style tags if we need to
202 39
    if ($this->stripOriginalStyleTags === true) {
203 12
      $this->stripOriginalStyleTags($xPath);
204 12
    }
205
206
    // cleanup the HTML if we need to
207 39
    if (true === $this->cleanup) {
208 3
      $this->cleanupHTML($xPath);
209 3
    }
210
211
    // should we output XHTML?
212 39
    if (true === $outputXHTML) {
213
      // set formatting
214 4
      $document->formatOutput = true;
215
216
      // get the HTML as XML
217 4
      $html = $document->saveXML(null, LIBXML_NOEMPTYTAG);
218
219
      // remove the XML-header
220 4
      return UTF8::ltrim(preg_replace('/<\?xml.*\?>/', '', $html));
221
    }
222
223
    // just regular HTML 4.01 as it should be used in newsletters
224 35
    return $document->saveHTML();
225
  }
226
227
  /**
228
   * get css from inline-html style-block
229
   *
230
   * @param string $html
231
   *
232
   * @return string
233
   */
234 24
  public function getCssFromInlineHtmlStyleBlock($html)
235
  {
236
    // init var
237 24
    $css = '';
238 24
    $matches = array();
239
240
    // match the style blocks
241 24
    preg_match_all(self::$styleTagRegEx, $html, $matches);
242
243
    // any style-blocks found?
244 24
    if (!empty($matches[2])) {
245
      // add
246 23
      foreach ($matches[2] as $match) {
247 23
        $css .= trim($match) . "\n";
248 23
      }
249 23
    }
250
251 24
    return $css;
252
  }
253
254
  /**
255
   * @param string $css
256
   *
257
   * @return string
258
   */
259 39
  private function doCleanup($css)
260
  {
261
    // remove newlines & replace double quotes by single quotes
262 39
    $css = str_replace(
263 39
        array("\r", "\n", '"'),
264 39
        array('', '', '\''),
265
        $css
266 39
    );
267
268
    // remove comments
269 39
    $css = preg_replace(self::$styleCommentRegEx, '', $css);
270
271
    // remove spaces
272 39
    $css = preg_replace('/\s\s+/', ' ', $css);
273
274
    // remove css media queries
275 39
    if (true === $this->excludeMediaQueries) {
276 38
      $css = $this->stripeMediaQueries($css);
277 38
    }
278
279 39
    return (string)$css;
280
  }
281
282
  /**
283
   * Process the loaded CSS
284
   *
285
   * @param $css
286
   *
287
   * @return array
288
   */
289 39
  private function processCSS($css)
290
  {
291
    //reset current set of rules
292 39
    $cssRules = array();
293
294
    // init vars
295 39
    $css = (string)$css;
296
297 39
    $css = $this->doCleanup($css);
298
299
    // rules are splitted by }
300 39
    $rules = (array)explode('}', $css);
301
302
    // init var
303 39
    $i = 1;
304
305
    // loop rules
306 39
    foreach ($rules as $rule) {
307
      // split into chunks
308 39
      $chunks = explode('{', $rule);
309
310
      // invalid rule?
311 39
      if (!isset($chunks[1])) {
312 39
        continue;
313
      }
314
315
      // set the selectors
316 29
      $selectors = trim($chunks[0]);
317
318
      // get cssProperties
319 29
      $cssProperties = trim($chunks[1]);
320
321
      // split multiple selectors
322 29
      $selectors = (array)explode(',', $selectors);
323
324
      // loop selectors
325 29
      foreach ($selectors as $selector) {
326
        // cleanup
327 29
        $selector = trim($selector);
328
329
        // build an array for each selector
330 29
        $ruleSet = array();
331
332
        // store selector
333 29
        $ruleSet['selector'] = $selector;
334
335
        // process the properties
336 29
        $ruleSet['properties'] = $this->processCSSProperties($cssProperties);
337
338
339
        // calculate specificity
340 29
        $ruleSet['specificity'] = Specificity::fromSelector($selector);
341
342
        // remember the order in which the rules appear
343 29
        $ruleSet['order'] = $i;
344
345
        // add into rules
346 29
        $cssRules[] = $ruleSet;
347
348
        // increment
349 29
        $i++;
350 29
      }
351 39
    }
352
353
    // sort based on specificity
354 39
    if (0 !== count($cssRules)) {
355 29
      usort($cssRules, array(__CLASS__, 'sortOnSpecificity'));
356 29
    }
357
358 39
    return $cssRules;
359
  }
360
361
  /**
362
   * remove css media queries from the string
363
   *
364
   * @param string $css
365
   *
366
   * @return string
367
   */
368 38
  private function stripeMediaQueries($css)
369
  {
370
    // remove comments previously to matching media queries
371 38
    $css = preg_replace(self::$styleCommentRegEx, '', $css);
372
373 38
    return (string)preg_replace(self::$cssMediaQueriesRegEx, '', $css);
374
  }
375
376
  /**
377
   * Process the CSS-properties
378
   *
379
   * @return array
380
   *
381
   * @param  string $propertyString The CSS-properties.
382
   */
383 29
  private function processCSSProperties($propertyString)
384
  {
385
    // split into chunks
386 29
    $properties = $this->splitIntoProperties($propertyString);
387
388
    // init var
389 29
    $pairs = array();
390
391
    // loop properties
392 29
    foreach ($properties as $property) {
393
      // split into chunks
394 29
      $chunks = (array)explode(':', $property, 2);
395
396
      // validate
397 29
      if (!isset($chunks[1])) {
398 23
        continue;
399
      }
400
401
      // cleanup
402 28
      $chunks[0] = trim($chunks[0]);
403 28
      $chunks[1] = trim($chunks[1]);
404
405
      // add to pairs array
406
      if (
407 28
          !isset($pairs[$chunks[0]])
408 28
          ||
409 3
          !in_array($chunks[1], $pairs[$chunks[0]], true)
410 28
      ) {
411 28
        $pairs[$chunks[0]][] = $chunks[1];
412 28
      }
413 29
    }
414
415
    // sort the pairs
416 29
    ksort($pairs);
417
418
    // return
419 29
    return $pairs;
420
  }
421
422
  /**
423
   * Split a style string into an array of properties.
424
   * The returned array can contain empty strings.
425
   *
426
   * @param string $styles ex: 'color:blue;font-size:12px;'
427
   *
428
   * @return array an array of strings containing css property ex: array('color:blue','font-size:12px')
429
   */
430 29
  private function splitIntoProperties($styles)
431
  {
432 29
    $properties = (array)explode(';', $styles);
433 29
    $propertiesCount = count($properties);
434
435 29
    for ($i = 0; $i < $propertiesCount; $i++) {
436
      // If next property begins with base64,
437
      // Then the ';' was part of this property (and we should not have split on it).
438
      if (
439 29
          isset($properties[$i + 1])
440 29
          &&
441 22
          UTF8::strpos($properties[$i + 1], 'base64,') !== false
442 29
      ) {
443 1
        $properties[$i] .= ';' . $properties[$i + 1];
444 1
        $properties[$i + 1] = '';
445 1
        ++$i;
446 1
      }
447 29
    }
448
449 29
    return $properties;
450
  }
451
452
  /**
453
   * create DOMDocument from HTML
454
   *
455
   * @param $html
456
   *
457
   * @return \DOMDocument
458
   */
459 39
  private function createDOMDocument($html)
460
  {
461
    // create new DOMDocument
462 39
    $document = new \DOMDocument('1.0', $this->getEncoding());
463
464
    // DOMDocument settings
465 39
    $document->preserveWhiteSpace = false;
466 39
    $document->formatOutput = true;
467
468
    // set error level
469 39
    $internalErrors = libxml_use_internal_errors(true);
470
471
    // load HTML
472
    //
473
    // with UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
474
    //
475 39
    $document->loadHTML('<?xml encoding="' . $this->getEncoding() . '">' . $html);
476
477
    // remove the "xml-encoding" hack
478 39
    foreach ($document->childNodes as $child) {
479 39
      if ($child->nodeType == XML_PI_NODE) {
480 39
        $document->removeChild($child);
481 39
      }
482 39
    }
483
484
    // set encoding
485 39
    $document->encoding = $this->getEncoding();
486
487
    // restore error level
488 39
    libxml_use_internal_errors($internalErrors);
489
490 39
    return $document;
491
  }
492
493
  /**
494
   * Get the encoding to use
495
   *
496
   * @return string
497
   */
498 39
  private function getEncoding()
499
  {
500 39
    return $this->encoding;
501
  }
502
503
  /**
504
   * create XPath
505
   *
506
   * @param \DOMDocument $document
507
   * @param array        $cssRules
508
   *
509
   * @return \DOMXPath
510
   */
511 39
  private function createXPath(\DOMDocument $document, array $cssRules)
512
  {
513 39
    $xPath = new \DOMXPath($document);
514
515
    // any rules?
516 39
    if (0 !== count($cssRules)) {
517
      // loop rules
518 29
      foreach ($cssRules as $rule) {
519
520
        try {
521 29
          $query = CssSelector::toXPath($rule['selector']);
522 29
        } catch (ExceptionInterface $e) {
523 4
          $query = null;
524
        }
525
526
        // validate query
527 29
        if (null === $query) {
528 4
          continue;
529
        }
530
531
        // search elements
532 27
        $elements = $xPath->query($query);
533
534
        // validate elements
535 27
        if (false === $elements) {
536
          continue;
537
        }
538
539
        // loop found elements
540 27
        foreach ($elements as $element) {
541
542
          /**
543
           * @var $element \DOMElement
544
           */
545
546
          // no styles stored?
547 27
          if (null === $element->attributes->getNamedItem('data-css-to-inline-styles-original-styles')) {
548
549
            // init var
550 27
            $originalStyle = '';
551
552 27
            if (null !== $element->attributes->getNamedItem('style')) {
553 4
              $originalStyle = $element->attributes->getNamedItem('style')->value;
554 4
            }
555
556
            // store original styles
557 27
            $element->setAttribute('data-css-to-inline-styles-original-styles', $originalStyle);
558
559
            // clear the styles
560 27
            $element->setAttribute('style', '');
561 27
          }
562
563 27
          $propertiesString = $this->createPropertyChunks($element, $rule['properties']);
564
565
          // set attribute
566 27
          if ('' != $propertiesString) {
567 27
            $element->setAttribute('style', $propertiesString);
568 27
          }
569 27
        }
570 29
      }
571
572
      // reapply original styles
573
      // search elements
574 29
      $elements = $xPath->query('//*[@data-css-to-inline-styles-original-styles]');
575
576
      // loop found elements
577 29
      foreach ($elements as $element) {
578
        // get the original styles
579 27
        $originalStyle = $element->attributes->getNamedItem('data-css-to-inline-styles-original-styles')->value;
580
581 27
        if ('' != $originalStyle) {
582 4
          $originalStyles = $this->splitIntoProperties($originalStyle);
583
584 4
          $originalProperties = $this->splitStyleIntoChunks($originalStyles);
585
586 4
          $propertiesString = $this->createPropertyChunks($element, $originalProperties);
587
588
          // set attribute
589 4
          if ('' != $propertiesString) {
590 4
            $element->setAttribute('style', $propertiesString);
591 4
          }
592 4
        }
593
594
        // remove placeholder
595 27
        $element->removeAttribute('data-css-to-inline-styles-original-styles');
596 29
      }
597 29
    }
598
599 39
    return $xPath;
600
  }
601
602
  /**
603
   * @param array $definedProperties
604
   *
605
   * @return array
606
   */
607 27
  private function splitStyleIntoChunks(array $definedProperties)
608
  {
609
    // init var
610 27
    $properties = array();
611
612
    // loop properties
613 27
    foreach ($definedProperties as $property) {
614
      // validate property
615 27
      if (!$property) {
616 27
        continue;
617
      }
618
619
      // split into chunks
620 12
      $chunks = (array)explode(':', trim($property), 2);
621
622
      // validate
623 12
      if (!isset($chunks[1])) {
624
        continue;
625
      }
626
627
      // loop chunks
628 12
      $properties[$chunks[0]] = trim($chunks[1]);
629 27
    }
630
631 27
    return $properties;
632
  }
633
634
  /**
635
   * @param \DOMElement $element
636
   * @param array       $ruleProperties
637
   *
638
   * @return array
639
   */
640 27
  private function createPropertyChunks(\DOMElement $element, array $ruleProperties)
641
  {
642
    // init var
643 27
    $properties = array();
644
645
    // get current styles
646 27
    $stylesAttribute = $element->attributes->getNamedItem('style');
647
648
    // any styles defined before?
649 27
    if (null !== $stylesAttribute) {
650
      // get value for the styles attribute
651 27
      $definedStyles = (string)$stylesAttribute->value;
652
653
      // split into properties
654 27
      $definedProperties = $this->splitIntoProperties($definedStyles);
655
656 27
      $properties = $this->splitStyleIntoChunks($definedProperties);
657 27
    }
658
659
    // add new properties into the list
660 27
    foreach ($ruleProperties as $key => $value) {
661
      // If one of the rules is already set and is !important, don't apply it,
662
      // except if the new rule is also important.
663
      if (
664 27
          !isset($properties[$key])
665 27
          ||
666 5
          false === UTF8::stristr($properties[$key], '!important')
667 5
          ||
668 1
          false !== UTF8::stristr(implode('', $value), '!important')
669 27
      ) {
670 27
        $properties[$key] = $value;
671 27
      }
672 27
    }
673
674
    // build string
675 27
    $propertyChunks = array();
676
677
    // build chunks
678 27
    foreach ($properties as $key => $values) {
679 27
      foreach ((array)$values as $value) {
680 27
        $propertyChunks[] = $key . ': ' . $value . ';';
681 27
      }
682 27
    }
683
684 27
    return implode(' ', $propertyChunks);
685
  }
686
687
  /**
688
   * Strip style tags into the generated HTML
689
   *
690
   * @param  \DOMXPath $xPath The DOMXPath for the entire document.
691
   *
692
   * @return string
693
   */
694 12
  private function stripOriginalStyleTags(\DOMXPath $xPath)
695
  {
696
    // get all style tags
697 12
    $nodes = $xPath->query('descendant-or-self::style');
698 12
    foreach ($nodes as $node) {
699 11
      if ($this->excludeMediaQueries === true) {
700
701
        // remove comments previously to matching media queries
702 10
        $node->nodeValue = preg_replace(self::$styleCommentRegEx, '', $node->nodeValue);
703
704
        // search for Media Queries
705 10
        preg_match_all(self::$cssMediaQueriesRegEx, $node->nodeValue, $mqs);
706
707
        // replace the nodeValue with just the Media Queries
708 10
        $node->nodeValue = implode("\n", $mqs[0]);
709
710 10
      } else {
711
        // remove the entire style tag
712 1
        $node->parentNode->removeChild($node);
713
      }
714 12
    }
715 12
  }
716
717
  /**
718
   * Remove id and class attributes.
719
   *
720
   * @param  \DOMXPath $xPath The DOMXPath for the entire document.
721
   *
722
   * @return string
723
   */
724 3
  private function cleanupHTML(\DOMXPath $xPath)
725
  {
726 3
    $nodes = $xPath->query('//@class | //@id');
727 3
    foreach ($nodes as $node) {
728 3
      $node->ownerElement->removeAttributeNode($node);
729 3
    }
730 3
  }
731
732
  /**
733
   * Should the IDs and classes be removed?
734
   *
735
   * @param  bool $on Should we enable cleanup?
736
   */
737 3
  public function setCleanup($on = true)
738
  {
739 3
    $this->cleanup = (bool)$on;
740 3
  }
741
742
  /**
743
   * Set the encoding to use with the DOMDocument
744
   *
745
   * @param  string $encoding The encoding to use.
746
   *
747
   * @deprecated Doesn't have any effect
748
   */
749
  public function setEncoding($encoding)
750
  {
751
    $this->encoding = (string)$encoding;
752
  }
753
754
  /**
755
   * Set use of inline styles block
756
   * If this is enabled the class will use the style-block in the HTML.
757
   *
758
   * @param  bool $on Should we process inline styles?
759
   */
760 24
  public function setUseInlineStylesBlock($on = true)
761
  {
762 24
    $this->useInlineStylesBlock = (bool)$on;
763 24
  }
764
765
  /**
766
   * Set strip original style tags
767
   * If this is enabled the class will remove all style tags in the HTML.
768
   *
769
   * @param  bool $on Should we process inline styles?
770
   */
771 15
  public function setStripOriginalStyleTags($on = true)
772
  {
773 15
    $this->stripOriginalStyleTags = (bool)$on;
774 15
  }
775
776
  /**
777
   * Set exclude media queries
778
   *
779
   * If this is enabled the media queries will be removed before inlining the rules.
780
   *
781
   * WARNING: If you use inline styles block "<style>" the this option will keep the media queries.
782
   *
783
   * @param bool $on
784
   */
785 12
  public function setExcludeMediaQueries($on = true)
786
  {
787 12
    $this->excludeMediaQueries = (bool)$on;
788 12
  }
789
790
  /**
791
   * Set exclude conditional inline-style blocks e.g.: <!--[if gte mso 9]><style>.foo { bar } </style><![endif]-->
792
   *
793
   * @param bool $on
794
   */
795 4
  public function setExcludeConditionalInlineStylesBlock($on = true)
796
  {
797 4
    $this->excludeConditionalInlineStylesBlock = (bool)$on;
798 4
  }
799
800
}
1 ignored issue
show
Coding Style introduced by
According to PSR2, the closing brace of classes should be placed on the next line directly after the body.

Below you find some examples:

// Incorrect placement according to PSR2
class MyClass
{
    public function foo()
    {

    }
    // This blank line is not allowed.

}

// Correct
class MyClass
{
    public function foo()
    {

    } // No blank lines after this line.
}
Loading history...
801