Completed
Push — master ( 89b559...765a44 )
by Lars
03:35
created

CssToInlineStyles::createDOMDocument()   B

Complexity

Conditions 3
Paths 3

Size

Total Lines 33
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 3.0884

Importance

Changes 3
Bugs 1 Features 0
Metric Value
c 3
b 1
f 0
dl 0
loc 33
ccs 11
cts 14
cp 0.7856
rs 8.8571
cc 3
eloc 12
nc 3
nop 2
crap 3.0884
1
<?php
2
namespace voku\CssToInlineStyles;
3
4
use Symfony\Component\CssSelector\CssSelectorConverter;
5
use Symfony\Component\CssSelector\Exception\ExceptionInterface;
6
7
/**
8
 * CSS to Inline Styles class
9
 *
10
 * @author     Tijs Verkoyen <[email protected]>
11
 */
12
class CssToInlineStyles
13
{
14
15
  /**
16
   * regular expression: css media queries
17
   *
18
   * @var string
19
   */
20
  private static $cssMediaQueriesRegEx = '#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU';
21
22
  /**
23
   * regular expression: conditional inline style tags
24
   *
25
   * @var string
26
   */
27
  private static $excludeConditionalInlineStylesBlockRegEx = '/<!--.*<style.*?-->/is';
28
29
  /**
30
   * regular expression: inline style tags
31
   *
32
   * @var string
33
   */
34
  private static $styleTagRegEx = '|<style(.*)>(.*)</style>|isU';
35
36
  /**
37
   * regular expression: css-comments
38
   *
39
   * @var string
40
   */
41
  private static $styleCommentRegEx = '/\\/\\*.*\\*\\//sU';
42
43
  /**
44
   * The CSS to use
45
   *
46
   * @var  string
47
   */
48
  private $css;
49
50
  /**
51
   * Should the generated HTML be cleaned
52
   *
53
   * @var  bool
54
   */
55
  private $cleanup = false;
56
57
  /**
58
   * The encoding to use.
59
   *
60
   * @var  string
61
   */
62
  private $encoding = 'UTF-8';
63
64
  /**
65
   * The HTML to process
66
   *
67
   * @var  string
68
   */
69
  private $html;
70
71
  /**
72
   * Use inline-styles block as CSS
73
   *
74
   * @var  bool
75
   */
76
  private $useInlineStylesBlock = false;
77
78
  /**
79
   * Strip original style tags
80
   *
81
   * @var bool
82
   */
83
  private $stripOriginalStyleTags = false;
84
85
  /**
86
   * Exclude conditional inline-style blocks
87
   *
88
   * @var bool
89
   */
90
  private $excludeConditionalInlineStylesBlock = true;
91
92
  /**
93
   * Exclude media queries from "$this->css" and keep media queries for inline-styles blocks
94
   *
95
   * @var bool
96
   */
97
  private $excludeMediaQueries = true;
98
99
  /**
100
   * Creates an instance, you could set the HTML and CSS here, or load it
101
   * later.
102
   *
103
   * @param  null|string $html The HTML to process.
104
   * @param  null|string $css  The CSS to use.
105
   */
106 43
  public function __construct($html = null, $css = null)
107
  {
108 43
    if (null !== $html) {
109 1
      $this->setHTML($html);
110 1
    }
111
112 43
    if (null !== $css) {
113 1
      $this->setCSS($css);
114 1
    }
115 43
  }
116
117
  /**
118
   * Set HTML to process
119
   *
120
   * @param  string $html The HTML to process.
121
   */
122 41
  public function setHTML($html)
123
  {
124
    // strip style definitions, if we use css-class "cleanup" on a style-element
125 41
    $this->html = (string)preg_replace('/<style[^>]+class="cleanup"[^>]*>.*<\/style>/Usi', ' ', $html);
126 41
  }
127
128
  /**
129
   * Set CSS to use
130
   *
131
   * @param  string $css The CSS to use.
132
   */
133 39
  public function setCSS($css)
134
  {
135 39
    $this->css = (string)$css;
136 39
  }
137
138
  /**
139
   * Sort an array on the specificity element
140
   *
141
   * @return int
142
   *
143
   * @param Specificity[] $e1 The first element.
144
   * @param Specificity[] $e2 The second element.
145
   */
146 13
  private static function sortOnSpecificity($e1, $e2)
147
  {
148
    // Compare the specificity
149 13
    $value = $e1['specificity']->compareTo($e2['specificity']);
150
151
    // if the specificity is the same, use the order in which the element appeared
152 13
    if (0 === $value) {
153 8
      $value = $e1['order'] - $e2['order'];
154 8
    }
155
156 13
    return $value;
157
  }
158
159
  /**
160
   * Converts the loaded HTML into an HTML-string with inline styles based on the loaded CSS
161
   *
162
   * @return string
163
   *
164
   * @param  bool $outputXHTML Should we output valid XHTML?
165
   * @param  integer [optional] $libXMLOptions Since PHP 5.4.0 and Libxml 2.6.0, you may also use the
166
   *                                           options parameter to specify additional Libxml parameters.
167
   *                                           Recommend these options: LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
168
   *
169
   * @throws Exception
170
   */
171 41
  public function convert($outputXHTML = false, $libXMLOptions = 0)
172
  {
173
    // redefine
174 41
    $outputXHTML = (bool)$outputXHTML;
175
176
    // validate
177 41
    if (!$this->html) {
178 1
      throw new Exception('No HTML provided.');
179
    }
180
181
    // use local variables
182 40
    $css = $this->css;
183
184
    // should we use inline style-block
185 40
    if ($this->useInlineStylesBlock) {
186
187 23
      if (true === $this->excludeConditionalInlineStylesBlock) {
188 20
        $this->html = preg_replace(self::$excludeConditionalInlineStylesBlockRegEx, '', $this->html);
189 20
      }
190
191 23
      $css .= $this->getCssFromInlineHtmlStyleBlock($this->html);
192 23
    }
193
194
    // process css
195 40
    $cssRules = $this->processCSS($css);
196
197
    // create new DOMDocument
198 40
    $document = $this->createDOMDocument($this->html, $libXMLOptions);
199
200
    // create new XPath
201 40
    $xPath = $this->createXPath($document, $cssRules);
202
203
    // strip original style tags if we need to
204 40
    if ($this->stripOriginalStyleTags === true) {
205 12
      $this->stripOriginalStyleTags($xPath);
206 12
    }
207
208
    // cleanup the HTML if we need to
209 40
    if (true === $this->cleanup) {
210 3
      $this->cleanupHTML($xPath);
211 3
    }
212
213
    // should we output XHTML?
214 40
    if (true === $outputXHTML) {
215
      // set formatting
216 4
      $document->formatOutput = true;
217
218
      // get the HTML as XML
219 4
      $html = $document->saveXML(null, LIBXML_NOEMPTYTAG);
220
221
      // remove the XML-header
222 4
      return ltrim(preg_replace('/<\?xml.*\?>/', '', $html));
223
    }
224
225
    // just regular HTML 4.01 as it should be used in newsletters
226 36
    return $document->saveHTML();
227
  }
228
229
  /**
230
   * get css from inline-html style-block
231
   *
232
   * @param string $html
233
   *
234
   * @return string
235
   */
236 25
  public function getCssFromInlineHtmlStyleBlock($html)
237
  {
238
    // init var
239 25
    $css = '';
240 25
    $matches = array();
241
242
    // match the style blocks
243 25
    preg_match_all(self::$styleTagRegEx, $html, $matches);
244
245
    // any style-blocks found?
246 25
    if (!empty($matches[2])) {
247
      // add
248 24
      foreach ($matches[2] as $match) {
249 24
        $css .= trim($match) . "\n";
250 24
      }
251 24
    }
252
253 25
    return $css;
254
  }
255
256
  /**
257
   * Process the loaded CSS
258
   *
259
   * @param $css
260
   *
261
   * @return array
262
   */
263 40
  private function processCSS($css)
264
  {
265
    //reset current set of rules
266 40
    $cssRules = array();
267
268
    // init vars
269 40
    $css = (string)$css;
270
271 40
    $css = $this->doCleanup($css);
272
273
    // rules are splitted by }
274 40
    $rules = (array)explode('}', $css);
275
276
    // init var
277 40
    $i = 1;
278
279
    // loop rules
280 40
    foreach ($rules as $rule) {
281
      // split into chunks
282 40
      $chunks = explode('{', $rule);
283
284
      // invalid rule?
285 40
      if (!isset($chunks[1])) {
286 40
        continue;
287
      }
288
289
      // set the selectors
290 30
      $selectors = trim($chunks[0]);
291
292
      // get cssProperties
293 30
      $cssProperties = trim($chunks[1]);
294
295
      // split multiple selectors
296 30
      $selectors = (array)explode(',', $selectors);
297
298
      // loop selectors
299 30
      foreach ($selectors as $selector) {
300
        // cleanup
301 30
        $selector = trim($selector);
302
303
        // build an array for each selector
304 30
        $ruleSet = array();
305
306
        // store selector
307 30
        $ruleSet['selector'] = $selector;
308
309
        // process the properties
310 30
        $ruleSet['properties'] = $this->processCSSProperties($cssProperties);
311
312
313
        // calculate specificity
314 30
        $ruleSet['specificity'] = Specificity::fromSelector($selector);
315
316
        // remember the order in which the rules appear
317 30
        $ruleSet['order'] = $i;
318
319
        // add into rules
320 30
        $cssRules[] = $ruleSet;
321
322
        // increment
323 30
        $i++;
324 30
      }
325 40
    }
326
327
    // sort based on specificity
328 40
    if (0 !== count($cssRules)) {
329 30
      usort($cssRules, array(__CLASS__, 'sortOnSpecificity'));
330 30
    }
331
332 40
    return $cssRules;
333
  }
334
335
  /**
336
   * @param string $css
337
   *
338
   * @return string
339
   */
340 40
  private function doCleanup($css)
341
  {
342
    // remove newlines & replace double quotes by single quotes
343 40
    $css = str_replace(
344 40
        array("\r", "\n", '"'),
345 40
        array('', '', '\''),
346
        $css
347 40
    );
348
349
    // remove comments
350 40
    $css = preg_replace(self::$styleCommentRegEx, '', $css);
351
352
    // remove spaces
353 40
    $css = preg_replace('/\s\s+/', ' ', $css);
354
355
    // remove css media queries
356 40
    if (true === $this->excludeMediaQueries) {
357 39
      $css = $this->stripeMediaQueries($css);
358 39
    }
359
360 40
    return (string)$css;
361
  }
362
363
  /**
364
   * remove css media queries from the string
365
   *
366
   * @param string $css
367
   *
368
   * @return string
369
   */
370 39
  private function stripeMediaQueries($css)
371
  {
372
    // remove comments previously to matching media queries
373 39
    $css = preg_replace(self::$styleCommentRegEx, '', $css);
374
375 39
    return (string)preg_replace(self::$cssMediaQueriesRegEx, '', $css);
376
  }
377
378
  /**
379
   * Process the CSS-properties
380
   *
381
   * @return array
382
   *
383
   * @param  string $propertyString The CSS-properties.
384
   */
385 30
  private function processCSSProperties($propertyString)
386
  {
387
    // split into chunks
388 30
    $properties = $this->splitIntoProperties($propertyString);
389
390
    // init var
391 30
    $pairs = array();
392
393
    // loop properties
394 30
    foreach ($properties as $property) {
395
      // split into chunks
396 30
      $chunks = (array)explode(':', $property, 2);
397
398
      // validate
399 30
      if (!isset($chunks[1])) {
400 24
        continue;
401
      }
402
403
      // cleanup
404 29
      $chunks[0] = trim($chunks[0]);
405 29
      $chunks[1] = trim($chunks[1]);
406
407
      // add to pairs array
408
      if (
409 29
          !isset($pairs[$chunks[0]])
410 29
          ||
411 3
          !in_array($chunks[1], $pairs[$chunks[0]], true)
412 29
      ) {
413 29
        $pairs[$chunks[0]][] = $chunks[1];
414 29
      }
415 30
    }
416
417
    // sort the pairs
418 30
    ksort($pairs);
419
420
    // return
421 30
    return $pairs;
422
  }
423
424
  /**
425
   * Split a style string into an array of properties.
426
   * The returned array can contain empty strings.
427
   *
428
   * @param string $styles ex: 'color:blue;font-size:12px;'
429
   *
430
   * @return array an array of strings containing css property ex: array('color:blue','font-size:12px')
431
   */
432 30
  private function splitIntoProperties($styles)
433
  {
434 30
    $properties = (array)explode(';', $styles);
435 30
    $propertiesCount = count($properties);
436
437 30
    for ($i = 0; $i < $propertiesCount; $i++) {
438
      // If next property begins with base64,
439
      // Then the ';' was part of this property (and we should not have split on it).
440
      if (
441 30
          isset($properties[$i + 1])
442 30
          &&
443 23
          strpos($properties[$i + 1], 'base64,') !== false
444 30
      ) {
445 1
        $properties[$i] .= ';' . $properties[$i + 1];
446 1
        $properties[$i + 1] = '';
447 1
        ++$i;
448 1
      }
449 30
    }
450
451 30
    return $properties;
452
  }
453
454
  /**
455
   * create DOMDocument from HTML
456
   *
457
   * @param $html
458
   *
459
   * @return \DOMDocument
460
   */
461 40
  private function createDOMDocument($html, $libXMLOptions)
462
  {
463
    // create new DOMDocument
464 40
    $document = new \DOMDocument('1.0', $this->getEncoding());
465
466
    // DOMDocument settings
467 40
    $document->preserveWhiteSpace = false;
468 40
    $document->formatOutput = true;
469
470
    // set error level
471 40
    $internalErrors = libxml_use_internal_errors(true);
472
473
    // load HTML
474
    //
475
    // with UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
476
    //
477 40
    $document->loadHTML('<?xml encoding="' . $this->getEncoding() . '">' . $html, $libXMLOptions);
478
479
    // remove the "xml-encoding" hack
480 40
    foreach ($document->childNodes as $child) {
481
      if ($child->nodeType == XML_PI_NODE) {
482
        $document->removeChild($child);
483
      }
484 40
    }
485
486
    // set encoding
487 40
    $document->encoding = $this->getEncoding();
488
489
    // restore error level
490 40
    libxml_use_internal_errors($internalErrors);
491
492 40
    return $document;
493
  }
494
495
  /**
496
   * Get the encoding to use
497
   *
498
   * @return string
499
   */
500 40
  private function getEncoding()
501
  {
502 40
    return $this->encoding;
503
  }
504
505
  /**
506
   * create XPath
507
   *
508
   * @param \DOMDocument $document
509
   * @param array        $cssRules
510
   *
511
   * @return \DOMXPath
512
   */
513 40
  private function createXPath(\DOMDocument $document, array $cssRules)
514
  {
515 40
    $xPath = new \DOMXPath($document);
516
517
    // any rules?
518 40
    if (0 !== count($cssRules)) {
519
      // loop rules
520 30
      foreach ($cssRules as $rule) {
521
522
        try {
523 30
          $converter = new CssSelectorConverter();
524 30
          $query = $converter->toXPath($rule['selector']);
525 30
        } catch (ExceptionInterface $e) {
526 4
          $query = null;
527
        }
528 30
        $converter = null;
529
530
        // validate query
531 30
        if (null === $query) {
532 4
          continue;
533
        }
534
535
        // search elements
536 28
        $elements = $xPath->query($query);
537
538
        // validate elements
539 28
        if (false === $elements) {
540
          continue;
541
        }
542
543
        // loop found elements
544 28
        foreach ($elements as $element) {
545
546
          /**
547
           * @var $element \DOMElement
548
           */
549
550
          // no styles stored?
551
          if (null === $element->attributes->getNamedItem('data-css-to-inline-styles-original-styles')) {
552
553
            // init var
554
            $originalStyle = '';
555
556
            if (null !== $element->attributes->getNamedItem('style')) {
557
              $originalStyle = $element->attributes->getNamedItem('style')->value;
558
            }
559
560
            // store original styles
561
            $element->setAttribute('data-css-to-inline-styles-original-styles', $originalStyle);
562
563
            // clear the styles
564
            $element->setAttribute('style', '');
565
          }
566
567
          $propertiesString = $this->createPropertyChunks($element, $rule['properties']);
568
569
          // set attribute
570
          if ('' != $propertiesString) {
571
            $element->setAttribute('style', $propertiesString);
572
          }
573 28
        }
574 30
      }
575
576
      // reapply original styles
577
      // search elements
578 30
      $elements = $xPath->query('//*[@data-css-to-inline-styles-original-styles]');
579
580
      // loop found elements
581 30
      foreach ($elements as $element) {
582
        // get the original styles
583
        $originalStyle = $element->attributes->getNamedItem('data-css-to-inline-styles-original-styles')->value;
584
585
        if ('' != $originalStyle) {
586
          $originalStyles = $this->splitIntoProperties($originalStyle);
587
588
          $originalProperties = $this->splitStyleIntoChunks($originalStyles);
589
590
          $propertiesString = $this->createPropertyChunks($element, $originalProperties);
591
592
          // set attribute
593
          if ('' != $propertiesString) {
594
            $element->setAttribute('style', $propertiesString);
595
          }
596
        }
597
598
        // remove placeholder
599
        $element->removeAttribute('data-css-to-inline-styles-original-styles');
600 30
      }
601 30
    }
602
603 40
    return $xPath;
604
  }
605
606
  /**
607
   * @param \DOMElement $element
608
   * @param array       $ruleProperties
609
   *
610
   * @return array
611
   */
612
  private function createPropertyChunks(\DOMElement $element, array $ruleProperties)
613
  {
614
    // init var
615
    $properties = array();
616
617
    // get current styles
618
    $stylesAttribute = $element->attributes->getNamedItem('style');
619
620
    // any styles defined before?
621
    if (null !== $stylesAttribute) {
622
      // get value for the styles attribute
623
      $definedStyles = (string)$stylesAttribute->value;
624
625
      // split into properties
626
      $definedProperties = $this->splitIntoProperties($definedStyles);
627
628
      $properties = $this->splitStyleIntoChunks($definedProperties);
629
    }
630
631
    // add new properties into the list
632
    foreach ($ruleProperties as $key => $value) {
633
      // If one of the rules is already set and is !important, don't apply it,
634
      // except if the new rule is also important.
635
      if (
636
          !isset($properties[$key])
637
          ||
638
          false === stripos($properties[$key], '!important')
639
          ||
640
          false !== stripos(implode('', (array)$value), '!important')
641
      ) {
642
        $properties[$key] = $value;
643
      }
644
    }
645
646
    // build string
647
    $propertyChunks = array();
648
649
    // build chunks
650
    foreach ($properties as $key => $values) {
651
      foreach ((array)$values as $value) {
652
        $propertyChunks[] = $key . ': ' . $value . ';';
653
      }
654
    }
655
656
    return implode(' ', $propertyChunks);
657
  }
658
659
  /**
660
   * @param array $definedProperties
661
   *
662
   * @return array
663
   */
664
  private function splitStyleIntoChunks(array $definedProperties)
665
  {
666
    // init var
667
    $properties = array();
668
669
    // loop properties
670
    foreach ($definedProperties as $property) {
671
      // validate property
672
      if (
673
          !$property
674
          ||
675
          strpos($property, ':') === false
676
      ) {
677
        continue;
678
      }
679
680
      // split into chunks
681
      $chunks = (array)explode(':', trim($property), 2);
682
683
      // validate
684
      if (!isset($chunks[1])) {
685
        continue;
686
      }
687
688
      // loop chunks
689
      $properties[$chunks[0]] = trim($chunks[1]);
690
    }
691
692
    return $properties;
693
  }
694
695
  /**
696
   * Strip style tags into the generated HTML
697
   *
698
   * @param  \DOMXPath $xPath The DOMXPath for the entire document.
699
   *
700
   * @return string
701
   */
702 12
  private function stripOriginalStyleTags(\DOMXPath $xPath)
703
  {
704
    // get all style tags
705 12
    $nodes = $xPath->query('descendant-or-self::style');
706 12
    foreach ($nodes as $node) {
707
      if ($this->excludeMediaQueries === true) {
708
709
        // remove comments previously to matching media queries
710
        $node->nodeValue = preg_replace(self::$styleCommentRegEx, '', $node->nodeValue);
711
712
        // search for Media Queries
713
        preg_match_all(self::$cssMediaQueriesRegEx, $node->nodeValue, $mqs);
714
715
        // replace the nodeValue with just the Media Queries
716
        $node->nodeValue = implode("\n", $mqs[0]);
717
718
      } else {
719
        // remove the entire style tag
720
        $node->parentNode->removeChild($node);
721
      }
722 12
    }
723 12
  }
724
725
  /**
726
   * Remove id and class attributes.
727
   *
728
   * @param  \DOMXPath $xPath The DOMXPath for the entire document.
729
   *
730
   * @return string
731
   */
732 3
  private function cleanupHTML(\DOMXPath $xPath)
733
  {
734 3
    $nodes = $xPath->query('//@class | //@id');
735 3
    foreach ($nodes as $node) {
736
      $node->ownerElement->removeAttributeNode($node);
737 3
    }
738 3
  }
739
740
  /**
741
   * Should the IDs and classes be removed?
742
   *
743
   * @param  bool $on Should we enable cleanup?
744
   */
745 3
  public function setCleanup($on = true)
746
  {
747 3
    $this->cleanup = (bool)$on;
748 3
  }
749
750
  /**
751
   * Set the encoding to use with the DOMDocument
752
   *
753
   * @param  string $encoding The encoding to use.
754
   *
755
   * @deprecated Doesn't have any effect
756
   */
757
  public function setEncoding($encoding)
758
  {
759
    $this->encoding = (string)$encoding;
760
  }
761
762
  /**
763
   * Set use of inline styles block
764
   * If this is enabled the class will use the style-block in the HTML.
765
   *
766
   * @param  bool $on Should we process inline styles?
767
   */
768 23
  public function setUseInlineStylesBlock($on = true)
769
  {
770 23
    $this->useInlineStylesBlock = (bool)$on;
771 23
  }
772
773
  /**
774
   * Set strip original style tags
775
   * If this is enabled the class will remove all style tags in the HTML.
776
   *
777
   * @param  bool $on Should we process inline styles?
778
   */
779 15
  public function setStripOriginalStyleTags($on = true)
780
  {
781 15
    $this->stripOriginalStyleTags = (bool)$on;
782 15
  }
783
784
  /**
785
   * Set exclude media queries
786
   *
787
   * If this is enabled the media queries will be removed before inlining the rules.
788
   *
789
   * WARNING: If you use inline styles block "<style>" the this option will keep the media queries.
790
   *
791
   * @param bool $on
792
   */
793 12
  public function setExcludeMediaQueries($on = true)
794
  {
795 12
    $this->excludeMediaQueries = (bool)$on;
796 12
  }
797
798
  /**
799
   * Set exclude conditional inline-style blocks e.g.: <!--[if gte mso 9]><style>.foo { bar } </style><![endif]-->
800
   *
801
   * @param bool $on
802
   */
803 4
  public function setExcludeConditionalInlineStylesBlock($on = true)
804
  {
805 4
    $this->excludeConditionalInlineStylesBlock = (bool)$on;
806 4
  }
807
808
}
809