Passed
Branch master (50908e)
by Stiofan
07:01
created

Emogrifier   F

Complexity

Total Complexity 82

Size/Duplication

Total Lines 776
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 274
c 1
b 0
f 0
dl 0
loc 776
rs 2
wmc 82

29 Methods

Rating   Name   Duplication   Size   Complexity  
A removeUnprocessableHtmlTag() 0 4 2
A setCss() 0 2 1
A parseCssDeclarationBlock() 0 19 4
A generateStyleStringFromDeclarationsArrays() 0 7 2
A translateCssToXpath() 0 55 2
A matchIdAttributes() 0 2 2
A matchClassAttributes() 0 6 2
A strtolower() 0 2 1
A clearAllCaches() 0 5 1
A _media_concat() 0 2 1
A __construct() 0 3 1
A translateNthOfType() 0 12 3
B parseNth() 0 32 8
A __destruct() 0 2 1
A copyCssWithMediaToStyleNode() 0 3 3
A addUnprocessableHtmlTag() 0 2 1
A getCssSelectorPrecedence() 0 21 4
A clearCache() 0 7 2
A addStyleElementToDocument() 0 8 1
A getOrCreateHeadElement() 0 10 2
A setHtml() 0 2 1
F emogrify() 0 125 21
A createXmlDocument() 0 12 1
A purgeVisitedNodes() 0 3 1
A translateNthChild() 0 12 3
A getUnifiedHtml() 0 12 3
A getCssFromAllStyleNodes() 0 15 3
A sortBySelectorPrecedence() 0 12 4
A splitCssAndMediaQuery() 0 23 1

How to fix   Complexity   

Complex Class

Complex classes like Emogrifier often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Emogrifier, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * This class provides functions for converting CSS styles into inline style attributes in your HTML code
4
 *
5
 * For more information, please see the README.md file.
6
 *
7
 * @author Cameron Brooks
8
 * @author Jaime Prado
9
 * @author Roman Ožana <[email protected]>
10
 */
11
class Emogrifier {
12
	/**
13
	 * @var string
14
	 */
15
	const ENCODING = 'UTF-8';
16
17
	/**
18
	 * @var integer
19
	 */
20
	const CACHE_KEY_CSS = 0;
21
22
	/**
23
	 * @var integer
24
	 */
25
	const CACHE_KEY_SELECTOR = 1;
26
27
	/**
28
	 * @var integer
29
	 */
30
	const CACHE_KEY_XPATH = 2;
31
32
	/**
33
	 * @var integer
34
	 */
35
	const CACHE_KEY_CSS_DECLARATION_BLOCK = 3;
36
37
	/**
38
	 * for calculating nth-of-type and nth-child selectors.
39
	 *
40
	 * @var integer
41
	 */
42
	const INDEX = 0;
43
44
	/**
45
	 * for calculating nth-of-type and nth-child selectors.
46
	 *
47
	 * @var integer
48
	 */
49
	const MULTIPLIER = 1;
50
51
	/**
52
	 * @var string
53
	 */
54
	const ID_ATTRIBUTE_MATCHER = '/(\\w+)?\\#([\\w\\-]+)/';
55
56
	/**
57
	 * @var string
58
	 */
59
	const CLASS_ATTRIBUTE_MATCHER = '/(\\w+|[\\*\\]])?((\\.[\\w\\-]+)+)/';
60
61
	/**
62
	 * @var string
63
	 */
64
	private $html = '';
65
66
	/**
67
	 * @var string
68
	 */
69
	private $css = '';
70
71
	/**
72
	 * @var array<string>
73
	 */
74
	private $unprocessableHtmlTags = array('wbr');
75
76
	/**
77
	 * @var array<array>
78
	 */
79
	private $caches = array(
80
		self::CACHE_KEY_CSS => array(),
81
		self::CACHE_KEY_SELECTOR => array(),
82
		self::CACHE_KEY_XPATH => array(),
83
		self::CACHE_KEY_CSS_DECLARATION_BLOCK => array(),
84
	);
85
86
	/**
87
	 * the visited nodes with the XPath paths as array keys.
88
	 *
89
	 * @var array<\DOMNode>
90
	 */
91
	private $visitedNodes = array();
92
93
	/**
94
	 * the styles to apply to the nodes with the XPath paths as array keys for the outer array and the attribute names/values.
95
	 * as key/value pairs for the inner array.
96
	 *
97
	 * @var array<array><string>
98
	 */
99
	private $styleAttributesForNodes = array();
100
101
	/**
102
	 * This attribute applies to the case where you want to preserve your original text encoding.
103
	 *
104
	 * By default, emogrifier translates your text into HTML entities for two reasons:
105
	 *
106
	 * 1. Because of client incompatibilities, it is better practice to send out HTML entities rather than unicode over email.
107
	 *
108
	 * 2. It translates any illegal XML characters that DOMDocument cannot work with.
109
	 *
110
	 * If you would like to preserve your original encoding, set this attribute to TRUE.
111
	 *
112
	 * @var boolean
113
	 */
114
	public $preserveEncoding = false;
115
116
	public static $_media = '';
117
118
	/**
119
	 * The constructor.
120
	 *
121
	 * @param string $html the HTML to emogrify, must be UTF-8-encoded
122
	 * @param string $css the CSS to merge, must be UTF-8-encoded
123
	 */
124
	public function __construct($html = '', $css = '') {
125
		$this->setHtml($html);
126
		$this->setCss($css);
127
	}
128
129
	/**
130
	 * The destructor.
131
	 */
132
	public function __destruct() {
133
		$this->purgeVisitedNodes();
134
	}
135
136
	/**
137
	 * Sets the HTML to emogrify.
138
	 *
139
	 * @param string $html the HTML to emogrify, must be UTF-8-encoded
140
	 */
141
	public function setHtml($html = '') {
142
		$this->html = $html;
143
	}
144
145
	/**
146
	 * Sets the CSS to merge with the HTML.
147
	 *
148
	 * @param string $css the CSS to merge, must be UTF-8-encoded
149
	 */
150
	public function setCss($css = '') {
151
		$this->css = $css;
152
	}
153
154
	/**
155
	 * Clears all caches.
156
	 */
157
	private function clearAllCaches() {
158
		$this->clearCache(self::CACHE_KEY_CSS);
159
		$this->clearCache(self::CACHE_KEY_SELECTOR);
160
		$this->clearCache(self::CACHE_KEY_XPATH);
161
		$this->clearCache(self::CACHE_KEY_CSS_DECLARATION_BLOCK);
162
	}
163
164
	/**
165
	 * Clears a single cache by key.
166
	 *
167
	 * @param integer $key the cache key, must be CACHE_KEY_CSS, CACHE_KEY_SELECTOR, CACHE_KEY_XPATH or CACHE_KEY_CSS_DECLARATION_BLOCK
168
	 *
169
	 * @throws InvalidArgumentException
170
	 */
171
	private function clearCache($key) {
172
		$allowedCacheKeys = array(self::CACHE_KEY_CSS, self::CACHE_KEY_SELECTOR, self::CACHE_KEY_XPATH, self::CACHE_KEY_CSS_DECLARATION_BLOCK);
173
		if (!in_array($key, $allowedCacheKeys, true)) {
174
			throw new InvalidArgumentException('Invalid cache key: ' . $key, 1391822035);
175
		}
176
177
		$this->caches[$key] = array();
178
	}
179
180
	/**
181
	 * Purges the visited nodes.
182
	 */
183
	private function purgeVisitedNodes() {
184
		$this->visitedNodes = array();
185
		$this->styleAttributesForNodes = array();
186
	}
187
188
	/**
189
	 * Marks a tag for removal.
190
	 *
191
	 * There are some HTML tags that DOMDocument cannot process, and it will throw an error if it encounters them.
192
	 * In particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document.
193
	 *
194
	 * Note: The tags will not be removed if they have any content.
195
	 *
196
	 * @param string $tagName the tag name, e.g., "p"
197
	 */
198
	public function addUnprocessableHtmlTag($tagName) {
199
		$this->unprocessableHtmlTags[] = $tagName;
200
	}
201
202
	/**
203
	 * Drops a tag from the removal list.
204
	 *
205
	 * @param string $tagName the tag name, e.g., "p"
206
	 */
207
	public function removeUnprocessableHtmlTag($tagName) {
208
		$key = array_search($tagName, $this->unprocessableHtmlTags, true);
209
		if ($key !== false) {
210
			unset($this->unprocessableHtmlTags[$key]);
211
		}
212
	}
213
214
	/**
215
	 * Applies the CSS you submit to the HTML you submit.
216
	 *
217
	 * This method places the CSS inline.
218
	 *
219
	 * @return string
220
	 *
221
	 * @throws BadMethodCallException
222
	 */
223
	public function emogrify() {
224
		if ($this->html === '') {
225
			throw new BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096);
226
		}
227
228
		$xmlDocument = $this->createXmlDocument();
229
		$xpath = new DOMXPath($xmlDocument);
230
		$this->clearAllCaches();
231
232
		// before be begin processing the CSS file, parse the document and normalize all existing CSS attributes (changes 'DISPLAY: none' to 'display: none');
233
		// we wouldn't have to do this if DOMXPath supported XPath 2.0.
234
		// also store a reference of nodes with existing inline styles so we don't overwrite them
235
		$this->purgeVisitedNodes();
236
237
		$nodesWithStyleAttributes = $xpath->query('//*[@style]');
238
		if ($nodesWithStyleAttributes !== false) {
239
			/** @var $nodeWithStyleAttribute DOMNode */
240
			foreach ($nodesWithStyleAttributes as $node) {
241
				$normalizedOriginalStyle = preg_replace_callback( '/[A-z\\-]+(?=\\:)/S', array( $this, 'strtolower' ), $node->getAttribute('style') );
242
243
				// in order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles
244
				$nodePath = $node->getNodePath();
245
				if (!isset($this->styleAttributesForNodes[$nodePath])) {
246
					$this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationBlock($normalizedOriginalStyle);
247
					$this->visitedNodes[$nodePath] = $node;
248
				}
249
250
				$node->setAttribute('style', $normalizedOriginalStyle);
251
			}
252
		}
253
254
		// grab any existing style blocks from the html and append them to the existing CSS
255
		// (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS)
256
		$allCss = $this->css;
257
258
		$allCss .= $this->getCssFromAllStyleNodes($xpath);
259
260
		$cssParts = $this->splitCssAndMediaQuery($allCss);
261
		self::$_media = ''; // reset
262
263
		$cssKey = md5($cssParts['css']);
264
		if (!isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) {
265
			// process the CSS file for selectors and definitions
266
			preg_match_all('/(?:^|[\\s^{}]*)([^{]+){([^}]*)}/mis', $cssParts['css'], $matches, PREG_SET_ORDER);
267
268
			$allSelectors = array();
269
			foreach ($matches as $key => $selectorString) {
270
				// if there is a blank definition, skip
271
				if (!strlen(trim($selectorString[2]))) {
272
					continue;
273
				}
274
275
				// else split by commas and duplicate attributes so we can sort by selector precedence
276
				$selectors = explode(',', $selectorString[1]);
277
				foreach ($selectors as $selector) {
278
					// don't process pseudo-elements and behavioral (dynamic) pseudo-classes; ONLY allow structural pseudo-classes
279
					if (strpos($selector, ':') !== false && !preg_match('/:\\S+\\-(child|type)\\(/i', $selector)) {
280
						continue;
281
					}
282
283
					$allSelectors[] = array('selector' => trim($selector),
284
											 'attributes' => trim($selectorString[2]),
285
											 // keep track of where it appears in the file, since order is important
286
											 'line' => $key,
287
					);
288
				}
289
			}
290
291
			// now sort the selectors by precedence
292
			usort($allSelectors, array($this,'sortBySelectorPrecedence'));
293
294
			$this->caches[self::CACHE_KEY_CSS][$cssKey] = $allSelectors;
295
		}
296
297
		foreach ($this->caches[self::CACHE_KEY_CSS][$cssKey] as $value) {
298
			// query the body for the xpath selector
299
			$nodesMatchingCssSelectors = $xpath->query($this->translateCssToXpath($value['selector']));
300
301
			/** @var $node \DOMNode */
302
			foreach ($nodesMatchingCssSelectors as $node) {
303
				// if it has a style attribute, get it, process it, and append (overwrite) new stuff
304
				if ($node->hasAttribute('style')) {
305
					// break it up into an associative array
306
					$oldStyleDeclarations = $this->parseCssDeclarationBlock($node->getAttribute('style'));
307
				} else {
308
					$oldStyleDeclarations = array();
309
				}
310
				$newStyleDeclarations = $this->parseCssDeclarationBlock($value['attributes']);
311
				$node->setAttribute('style', $this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations));
312
			}
313
		}
314
315
		// now iterate through the nodes that contained inline styles in the original HTML
316
		foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) {
317
			$node = $this->visitedNodes[$nodePath];
318
			$currentStyleAttributes = $this->parseCssDeclarationBlock($node->getAttribute('style'));
319
			$node->setAttribute('style', $this->generateStyleStringFromDeclarationsArrays($currentStyleAttributes, $styleAttributesForNode));
320
		}
321
322
		// This removes styles from your email that contain display:none.
323
		// We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only supports XPath 1.0,
324
		// lower-case() isn't available to us. We've thus far only set attributes to lowercase, not attribute values. Consequently, we need
325
		// to translate() the letters that would be in 'NONE' ("NOE") to lowercase.
326
		$nodesWithStyleDisplayNone = $xpath->query('//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]');
327
		// The checks on parentNode and is_callable below ensure that if we've deleted the parent node,
328
		// we don't try to call removeChild on a nonexistent child node
329
		if ($nodesWithStyleDisplayNone->length > 0) {
330
			/** @var $node \DOMNode */
331
			foreach ($nodesWithStyleDisplayNone as $node) {
332
				if ($node->parentNode && is_callable(array($node->parentNode,'removeChild'))) {
333
					$node->parentNode->removeChild($node);
334
				}
335
			}
336
		}
337
338
		$this->copyCssWithMediaToStyleNode($cssParts, $xmlDocument);
339
340
		if ($this->preserveEncoding) {
341
			if ( function_exists( 'mb_convert_encoding' ) ) {
342
				return mb_convert_encoding( $xmlDocument->saveHTML(), self::ENCODING, 'HTML-ENTITIES' );
343
			} else {
344
				return htmlspecialchars_decode( utf8_encode( html_entity_decode( $xmlDocument->saveHTML(), ENT_COMPAT, self::ENCODING ) ) );
345
			}
346
		} else {
347
			return $xmlDocument->saveHTML();
348
		}
349
	}
350
351
	public function strtolower(array $m) {
352
		return strtolower($m[0]);
353
	}
354
355
356
	/**
357
	 * This method merges old or existing name/value array with new name/value array.
358
	 * and then generates a string of the combined style suitable for placing inline.
359
	 * This becomes the single point for CSS string generation allowing for consistent.
360
	 * CSS output no matter where the CSS originally came from.
361
	 * @param array $oldStyles
362
	 * @param array $newStyles
363
	 * @return string
364
	 */
365
	private function generateStyleStringFromDeclarationsArrays(array $oldStyles, array $newStyles) {
366
		$combinedStyles = array_merge($oldStyles, $newStyles);
367
		$style = '';
368
		foreach ($combinedStyles as $attributeName => $attributeValue) {
369
			$style .= (strtolower(trim($attributeName)) . ': ' . trim($attributeValue) . '; ');
370
		}
371
		return trim($style);
372
	}
373
374
375
	/**
376
	 * Copies the media part from CSS array parts to $xmlDocument.
377
	 *
378
	 * @param array $cssParts
379
	 * @param DOMDocument $xmlDocument
380
	 */
381
	public function copyCssWithMediaToStyleNode(array $cssParts, DOMDocument $xmlDocument) {
382
		if (isset($cssParts['media']) && $cssParts['media'] !== '') {
383
			$this->addStyleElementToDocument($xmlDocument, $cssParts['media']);
384
		}
385
	}
386
387
	/**
388
	 * Returns CSS content.
389
	 *
390
	 * @param DOMXPath $xpath
391
	 * @return string
392
	 */
393
	private function getCssFromAllStyleNodes(DOMXPath $xpath) {
394
		$styleNodes = $xpath->query('//style');
395
396
		if ($styleNodes === false) {
397
			return '';
398
		}
399
400
		$css = '';
401
		/** @var $styleNode DOMNode */
402
		foreach ($styleNodes as $styleNode) {
403
			$css .= "\n\n" . $styleNode->nodeValue;
404
			$styleNode->parentNode->removeChild($styleNode);
405
		}
406
407
		return $css;
408
	}
409
410
	/**
411
	 * Adds a style element with $css to $document.
412
	 *
413
	 * @param DOMDocument $document
414
	 * @param string $css
415
	 */
416
	private function addStyleElementToDocument(DOMDocument $document, $css) {
417
		$styleElement = $document->createElement('style', $css);
418
		$styleAttribute = $document->createAttribute('type');
419
		$styleAttribute->value = 'text/css';
420
		$styleElement->appendChild($styleAttribute);
421
422
		$head = $this->getOrCreateHeadElement($document);
423
		$head->appendChild($styleElement);
424
	}
425
426
	/**
427
	 * Returns the existing or creates a new head element in $document.
428
	 *
429
	 * @param DOMDocument $document
430
	 * @return DOMNode the head element
431
	 */
432
	private function getOrCreateHeadElement(DOMDocument $document) {
433
		$head = $document->getElementsByTagName('head')->item(0);
434
435
		if ($head === null) {
436
			$head = $document->createElement('head');
437
			$html = $document->getElementsByTagName('html')->item(0);
438
			$html->insertBefore($head, $document->getElementsByTagName('body')->item(0));
439
		}
440
441
		return $head;
442
	}
443
444
	/**
445
	 * Splits input CSS code to an array where:
446
	 *
447
	 * - key "css" will be contains clean CSS code.
448
	 * - key "media" will be contains all valuable media queries.
449
	 *
450
	 * Example:
451
	 *
452
	 * The CSS code.
453
	 *
454
	 *   "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}"
455
	 *
456
	 * will be parsed into the following array:
457
	 *
458
	 *   "css" => "h1 { color:red; }"
459
	 *   "media" => "@media { h1 {}}"
460
	 *
461
	 * @param string $css
462
	 * @return array
463
	 */
464
	private function splitCssAndMediaQuery($css) {
465
		$css = preg_replace_callback( '#@media\\s+(?:only\\s)?(?:[\\s{\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU', array( $this, '_media_concat' ), $css );
466
467
		// filter the CSS
468
		$search = array(
469
			// get rid of css comment code
470
			'/\\/\\*.*\\*\\//sU',
471
			// strip out any import directives
472
			'/^\\s*@import\\s[^;]+;/misU',
473
			// strip remains media enclosures
474
			'/^\\s*@media\\s[^{]+{(.*)}\\s*}\\s/misU',
475
		);
476
477
		$replace = array(
478
			'',
479
			'',
480
			'',
481
		);
482
483
		// clean CSS before output
484
		$css = preg_replace($search, $replace, $css);
485
486
		return array('css' => $css, 'media' => self::$_media);
487
	}
488
489
	private function _media_concat( $matches ) {
490
		self::$_media .= $matches[0];
491
	}
492
493
	/**
494
	 * Creates a DOMDocument instance with the current HTML.
495
	 *
496
	 * @return DOMDocument
497
	 */
498
	private function createXmlDocument() {
499
		$xmlDocument = new DOMDocument;
500
		$xmlDocument->encoding = self::ENCODING;
501
		$xmlDocument->strictErrorChecking = false;
502
		$xmlDocument->formatOutput = true;
503
		$libXmlState = libxml_use_internal_errors(true);
504
		$xmlDocument->loadHTML($this->getUnifiedHtml());
505
		libxml_clear_errors();
506
		libxml_use_internal_errors($libXmlState);
507
		$xmlDocument->normalizeDocument();
508
509
		return $xmlDocument;
510
	}
511
512
	/**
513
	 * Returns the HTML with the non-ASCII characters converts into HTML entities and the unprocessable HTML tags removed.
514
	 *
515
	 * @return string the unified HTML
516
	 *
517
	 * @throws BadMethodCallException
518
	 */
519
	private function getUnifiedHtml() {
520
		if (!empty($this->unprocessableHtmlTags)) {
521
			$unprocessableHtmlTags = implode('|', $this->unprocessableHtmlTags);
522
			$bodyWithoutUnprocessableTags = preg_replace('/<\\/?(' . $unprocessableHtmlTags . ')[^>]*>/i', '', $this->html);
523
		} else {
524
			$bodyWithoutUnprocessableTags = $this->html;
525
		}
526
527
		if ( function_exists( 'mb_convert_encoding' ) ) {
528
			return mb_convert_encoding( $bodyWithoutUnprocessableTags, 'HTML-ENTITIES', self::ENCODING );
529
		} else {
530
			return htmlspecialchars_decode( utf8_decode( htmlentities( $bodyWithoutUnprocessableTags, ENT_COMPAT, self::ENCODING, false ) ) );
531
		}
532
	}
533
534
	/**
535
	 * @param array $a
536
	 * @param array $b
537
	 *
538
	 * @return integer
539
	 */
540
	private function sortBySelectorPrecedence(array $a, array $b) {
541
		$precedenceA = $this->getCssSelectorPrecedence($a['selector']);
542
		$precedenceB = $this->getCssSelectorPrecedence($b['selector']);
543
544
		// We want these sorted in ascending order so selectors with lesser precedence get processed first and
545
		// selectors with greater precedence get sorted last.
546
		// The parenthesis around the -1 are necessary to avoid a PHP_CodeSniffer warning about missing spaces around
547
		// arithmetic operators.
548
		// @see http://forge.typo3.org/issues/55605
549
		$precedenceForEquals = ($a['line'] < $b['line'] ? (-1) : 1);
550
		$precedenceForNotEquals = ($precedenceA < $precedenceB ? (-1) : 1);
551
		return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals;
552
	}
553
554
	/**
555
	 * @param string $selector
556
	 *
557
	 * @return integer
558
	 */
559
	private function getCssSelectorPrecedence($selector) {
560
		$selectorKey = md5($selector);
561
		if (!isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) {
562
			$precedence = 0;
563
			$value = 100;
564
			// ids: worth 100, classes: worth 10, elements: worth 1
565
			$search = array('\\#','\\.','');
566
567
			foreach ($search as $s) {
568
				if (trim($selector == '')) {
569
					break;
570
				}
571
				$number = 0;
572
				$selector = preg_replace('/' . $s . '\\w+/', '', $selector, -1, $number);
573
				$precedence += ($value * $number);
574
				$value /= 10;
575
			}
576
			$this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence;
577
		}
578
579
		return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey];
580
	}
581
582
	/**
583
	 * Right now, we support all CSS 1 selectors and most CSS2/3 selectors.
584
	 *
585
	 * @see http://plasmasturm.org/log/444/
586
	 *
587
	 * @param string $paramCssSelector
588
	 *
589
	 * @return string
590
	 */
591
	private function translateCssToXpath($paramCssSelector) {
592
		$cssSelector = ' ' . $paramCssSelector . ' ';
593
		$cssSelector = preg_replace_callback( '/\s+\w+\s+/', array( $this, 'strtolower' ), $cssSelector );
594
		$cssSelector = trim($cssSelector);
595
		$xpathKey = md5($cssSelector);
596
		if (!isset($this->caches[self::CACHE_KEY_XPATH][$xpathKey])) {
597
			// returns an Xpath selector
598
			$search = array(
599
				// Matches any element that is a child of parent.
600
				'/\\s+>\\s+/',
601
				// Matches any element that is an adjacent sibling.
602
				'/\\s+\\+\\s+/',
603
				// Matches any element that is a descendant of an parent element element.
604
				'/\\s+/',
605
				// first-child pseudo-selector
606
				'/([^\\/]+):first-child/i',
607
				// last-child pseudo-selector
608
				'/([^\\/]+):last-child/i',
609
				// Matches attribute only selector
610
				'/^\\[(\\w+)\\]/',
611
				// Matches element with attribute
612
				'/(\\w)\\[(\\w+)\\]/',
613
				// Matches element with EXACT attribute
614
				'/(\\w)\\[(\\w+)\\=[\'"]?(\\w+)[\'"]?\\]/',
615
			);
616
			$replace = array(
617
				'/',
618
				'/following-sibling::*[1]/self::',
619
				'//',
620
				'*[1]/self::\\1',
621
				'*[last()]/self::\\1',
622
				'*[@\\1]',
623
				'\\1[@\\2]',
624
				'\\1[@\\2="\\3"]',
625
			);
626
627
			$cssSelector = '//' . preg_replace($search, $replace, $cssSelector);
628
629
			$cssSelector = preg_replace_callback(self::ID_ATTRIBUTE_MATCHER, array($this, 'matchIdAttributes'), $cssSelector);
630
			$cssSelector = preg_replace_callback(self::CLASS_ATTRIBUTE_MATCHER, array($this, 'matchClassAttributes'), $cssSelector);
631
632
			// Advanced selectors are going to require a bit more advanced emogrification.
633
			// When we required PHP 5.3, we could do this with closures.
634
			$cssSelector = preg_replace_callback(
635
				'/([^\\/]+):nth-child\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
636
				array($this, 'translateNthChild'), $cssSelector
637
			);
638
			$cssSelector = preg_replace_callback(
639
				'/([^\\/]+):nth-of-type\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
640
				array($this, 'translateNthOfType'), $cssSelector
641
			);
642
643
			$this->caches[self::CACHE_KEY_SELECTOR][$xpathKey] = $cssSelector;
644
		}
645
		return $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey];
646
	}
647
648
	/**
649
	 * @param array $match
650
	 *
651
	 * @return string
652
	 */
653
	private function matchIdAttributes(array $match) {
654
		return (strlen($match[1]) ? $match[1] : '*') . '[@id="' . $match[2] . '"]';
655
	}
656
657
	/**
658
	 * @param array $match
659
	 *
660
	 * @return string
661
	 */
662
	private function matchClassAttributes(array $match) {
663
		return (strlen($match[1]) ? $match[1] : '*') . '[contains(concat(" ",@class," "),concat(" ","' .
664
			implode(
665
				'"," "))][contains(concat(" ",@class," "),concat(" ","',
666
				explode('.', substr($match[2], 1))
667
			) . '"," "))]';
668
	}
669
670
	/**
671
	 * @param array $match
672
	 *
673
	 * @return string
674
	 */
675
	private function translateNthChild(array $match) {
676
		$result = $this->parseNth($match);
677
678
		if (isset($result[self::MULTIPLIER])) {
679
			if ($result[self::MULTIPLIER] < 0) {
680
				$result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]);
681
				return sprintf('*[(last() - position()) mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);
682
			} else {
683
				return sprintf('*[position() mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);
684
			}
685
		} else {
686
			return sprintf('*[%u]/self::%s', $result[self::INDEX], $match[1]);
687
		}
688
	}
689
690
	/**
691
	 * @param array $match
692
	 *
693
	 * @return string
694
	 */
695
	private function translateNthOfType(array $match) {
696
		$result = $this->parseNth($match);
697
698
		if (isset($result[self::MULTIPLIER])) {
699
			if ($result[self::MULTIPLIER] < 0) {
700
				$result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]);
701
				return sprintf('%s[(last() - position()) mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);
702
			} else {
703
				return sprintf('%s[position() mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);
704
			}
705
		} else {
706
			return sprintf('%s[%u]', $match[1], $result[self::INDEX]);
707
		}
708
	}
709
710
	/**
711
	 * @param array $match
712
	 *
713
	 * @return array
714
	 */
715
	private function parseNth(array $match) {
716
		if (in_array(strtolower($match[2]), array('even','odd'))) {
717
			$index = strtolower($match[2]) == 'even' ? 0 : 1;
718
			return array(self::MULTIPLIER => 2, self::INDEX => $index);
719
		} elseif (stripos($match[2], 'n') === false) {
720
			// if there is a multiplier
721
			$index = intval(str_replace(' ', '', $match[2]));
722
			return array(self::INDEX => $index);
723
		} else {
724
			if (isset($match[3])) {
725
				$multipleTerm = str_replace($match[3], '', $match[2]);
726
				$index = intval(str_replace(' ', '', $match[3]));
727
			} else {
728
				$multipleTerm = $match[2];
729
				$index = 0;
730
			}
731
732
			$multiplier = str_ireplace('n', '', $multipleTerm);
733
734
			if (!strlen($multiplier)) {
735
				$multiplier = 1;
736
			} elseif ($multiplier == 0) {
737
				return array(self::INDEX => $index);
738
			} else {
739
				$multiplier = intval($multiplier);
740
			}
741
742
			while ($index < 0) {
743
				$index += abs($multiplier);
744
			}
745
746
			return array(self::MULTIPLIER => $multiplier, self::INDEX => $index);
747
		}
748
	}
749
750
	/**
751
	 * Parses a CSS declaration block into property name/value pairs.
752
	 *
753
	 * Example:
754
	 *
755
	 * The declaration block.
756
	 *
757
	 *   "color: #000; font-weight: bold;".
758
	 *
759
	 * will be parsed into the following array:
760
	 *
761
	 *   "color" => "#000"
762
	 *   "font-weight" => "bold"
763
	 *
764
	 * @param string $cssDeclarationBlock the CSS declaration block without the curly braces, may be empty
765
	 *
766
	 * @return array the CSS declarations with the property names as array keys and the property values as array values
767
	 */
768
	private function parseCssDeclarationBlock($cssDeclarationBlock) {
769
		if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock])) {
770
			return $this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock];
771
		}
772
773
		$properties = array();
774
		$declarations = explode(';', $cssDeclarationBlock);
775
		foreach ($declarations as $declaration) {
776
			$matches = array();
777
			if (!preg_match('/ *([A-Za-z\\-]+) *: *([^;]+) */', $declaration, $matches)) {
778
				continue;
779
			}
780
			$propertyName = strtolower($matches[1]);
781
			$propertyValue = $matches[2];
782
			$properties[$propertyName] = $propertyValue;
783
		}
784
		$this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock] = $properties;
785
786
		return $properties;
787
	}
788
}
789