Total Complexity | 84 |
Total Lines | 796 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like Emogrifier often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Emogrifier, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
11 | class Emogrifier { |
||
12 | /** |
||
13 | * @var string |
||
14 | */ |
||
15 | const ENCODING = 'UTF-8'; |
||
16 | |||
17 | /** |
||
18 | * @var integer |
||
19 | */ |
||
20 | const CACHE_KEY_CSS = 0; |
||
21 | |||
22 | /** |
||
23 | * @var integer |
||
24 | */ |
||
25 | const CACHE_KEY_SELECTOR = 1; |
||
26 | |||
27 | /** |
||
28 | * @var integer |
||
29 | */ |
||
30 | const CACHE_KEY_XPATH = 2; |
||
31 | |||
32 | /** |
||
33 | * @var integer |
||
34 | */ |
||
35 | const CACHE_KEY_CSS_DECLARATION_BLOCK = 3; |
||
36 | |||
37 | /** |
||
38 | * for calculating nth-of-type and nth-child selectors. |
||
39 | * |
||
40 | * @var integer |
||
41 | */ |
||
42 | const INDEX = 0; |
||
43 | |||
44 | /** |
||
45 | * for calculating nth-of-type and nth-child selectors. |
||
46 | * |
||
47 | * @var integer |
||
48 | */ |
||
49 | const MULTIPLIER = 1; |
||
50 | |||
51 | /** |
||
52 | * @var string |
||
53 | */ |
||
54 | const ID_ATTRIBUTE_MATCHER = '/(\\w+)?\\#([\\w\\-]+)/'; |
||
55 | |||
56 | /** |
||
57 | * @var string |
||
58 | */ |
||
59 | const CLASS_ATTRIBUTE_MATCHER = '/(\\w+|[\\*\\]])?((\\.[\\w\\-]+)+)/'; |
||
60 | |||
61 | /** |
||
62 | * @var string |
||
63 | */ |
||
64 | private $html = ''; |
||
65 | |||
66 | /** |
||
67 | * @var string |
||
68 | */ |
||
69 | private $css = ''; |
||
70 | |||
71 | /** |
||
72 | * @var array<string> |
||
73 | */ |
||
74 | private $unprocessableHtmlTags = array('wbr'); |
||
75 | |||
76 | /** |
||
77 | * @var array<array> |
||
78 | */ |
||
79 | private $caches = array( |
||
80 | self::CACHE_KEY_CSS => array(), |
||
81 | self::CACHE_KEY_SELECTOR => array(), |
||
82 | self::CACHE_KEY_XPATH => array(), |
||
83 | self::CACHE_KEY_CSS_DECLARATION_BLOCK => array(), |
||
84 | ); |
||
85 | |||
86 | /** |
||
87 | * the visited nodes with the XPath paths as array keys. |
||
88 | * |
||
89 | * @var array<\DOMNode> |
||
90 | */ |
||
91 | private $visitedNodes = array(); |
||
92 | |||
93 | /** |
||
94 | * the styles to apply to the nodes with the XPath paths as array keys for the outer array and the attribute names/values. |
||
95 | * as key/value pairs for the inner array. |
||
96 | * |
||
97 | * @var array<array><string> |
||
98 | */ |
||
99 | private $styleAttributesForNodes = array(); |
||
100 | |||
101 | /** |
||
102 | * This attribute applies to the case where you want to preserve your original text encoding. |
||
103 | * |
||
104 | * By default, emogrifier translates your text into HTML entities for two reasons: |
||
105 | * |
||
106 | * 1. Because of client incompatibilities, it is better practice to send out HTML entities rather than unicode over email. |
||
107 | * |
||
108 | * 2. It translates any illegal XML characters that DOMDocument cannot work with. |
||
109 | * |
||
110 | * If you would like to preserve your original encoding, set this attribute to TRUE. |
||
111 | * |
||
112 | * @var boolean |
||
113 | */ |
||
114 | public $preserveEncoding = false; |
||
115 | |||
116 | public static $_media = ''; |
||
117 | |||
118 | /** |
||
119 | * The constructor. |
||
120 | * |
||
121 | * @param string $html the HTML to emogrify, must be UTF-8-encoded |
||
122 | * @param string $css the CSS to merge, must be UTF-8-encoded |
||
123 | */ |
||
124 | public function __construct($html = '', $css = '') { |
||
125 | $this->setHtml($html); |
||
126 | $this->setCss($css); |
||
127 | } |
||
128 | |||
129 | /** |
||
130 | * The destructor. |
||
131 | */ |
||
132 | public function __destruct() { |
||
133 | $this->purgeVisitedNodes(); |
||
134 | } |
||
135 | |||
136 | /** |
||
137 | * Sets the HTML to emogrify. |
||
138 | * |
||
139 | * @param string $html the HTML to emogrify, must be UTF-8-encoded |
||
140 | */ |
||
141 | public function setHtml($html = '') { |
||
142 | $this->html = $html; |
||
143 | } |
||
144 | |||
145 | /** |
||
146 | * Sets the CSS to merge with the HTML. |
||
147 | * |
||
148 | * @param string $css the CSS to merge, must be UTF-8-encoded |
||
149 | */ |
||
150 | public function setCss($css = '') { |
||
151 | $this->css = $css; |
||
152 | } |
||
153 | |||
154 | /** |
||
155 | * Clears all caches. |
||
156 | */ |
||
157 | private function clearAllCaches() { |
||
158 | $this->clearCache(self::CACHE_KEY_CSS); |
||
159 | $this->clearCache(self::CACHE_KEY_SELECTOR); |
||
160 | $this->clearCache(self::CACHE_KEY_XPATH); |
||
161 | $this->clearCache(self::CACHE_KEY_CSS_DECLARATION_BLOCK); |
||
162 | } |
||
163 | |||
164 | /** |
||
165 | * Clears a single cache by key. |
||
166 | * |
||
167 | * @param integer $key the cache key, must be CACHE_KEY_CSS, CACHE_KEY_SELECTOR, CACHE_KEY_XPATH or CACHE_KEY_CSS_DECLARATION_BLOCK |
||
168 | * |
||
169 | * @throws InvalidArgumentException |
||
170 | */ |
||
171 | private function clearCache($key) { |
||
172 | $allowedCacheKeys = array(self::CACHE_KEY_CSS, self::CACHE_KEY_SELECTOR, self::CACHE_KEY_XPATH, self::CACHE_KEY_CSS_DECLARATION_BLOCK); |
||
173 | if (!in_array($key, $allowedCacheKeys, true)) { |
||
174 | throw new InvalidArgumentException('Invalid cache key: ' . $key, 1391822035); |
||
175 | } |
||
176 | |||
177 | $this->caches[$key] = array(); |
||
178 | } |
||
179 | |||
180 | /** |
||
181 | * Purges the visited nodes. |
||
182 | */ |
||
183 | private function purgeVisitedNodes() { |
||
184 | $this->visitedNodes = array(); |
||
185 | $this->styleAttributesForNodes = array(); |
||
186 | } |
||
187 | |||
188 | /** |
||
189 | * Marks a tag for removal. |
||
190 | * |
||
191 | * There are some HTML tags that DOMDocument cannot process, and it will throw an error if it encounters them. |
||
192 | * In particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document. |
||
193 | * |
||
194 | * Note: The tags will not be removed if they have any content. |
||
195 | * |
||
196 | * @param string $tagName the tag name, e.g., "p" |
||
197 | */ |
||
198 | public function addUnprocessableHtmlTag($tagName) { |
||
199 | $this->unprocessableHtmlTags[] = $tagName; |
||
200 | } |
||
201 | |||
202 | /** |
||
203 | * Drops a tag from the removal list. |
||
204 | * |
||
205 | * @param string $tagName the tag name, e.g., "p" |
||
206 | */ |
||
207 | public function removeUnprocessableHtmlTag($tagName) { |
||
211 | } |
||
212 | } |
||
213 | |||
214 | /** |
||
215 | * Applies the CSS you submit to the HTML you submit. |
||
216 | * |
||
217 | * This method places the CSS inline. |
||
218 | * |
||
219 | * @return string |
||
220 | * |
||
221 | * @throws BadMethodCallException |
||
222 | */ |
||
223 | public function emogrify() { |
||
224 | if ($this->html === '') { |
||
225 | throw new BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096); |
||
226 | } |
||
227 | |||
228 | $xmlDocument = $this->createXmlDocument(); |
||
229 | $xpath = new DOMXPath($xmlDocument); |
||
230 | $this->clearAllCaches(); |
||
231 | |||
232 | // before be begin processing the CSS file, parse the document and normalize all existing CSS attributes (changes 'DISPLAY: none' to 'display: none'); |
||
233 | // we wouldn't have to do this if DOMXPath supported XPath 2.0. |
||
234 | // also store a reference of nodes with existing inline styles so we don't overwrite them |
||
235 | $this->purgeVisitedNodes(); |
||
236 | |||
237 | $nodesWithStyleAttributes = $xpath->query('//*[@style]'); |
||
238 | if ($nodesWithStyleAttributes !== false) { |
||
239 | /** @var $nodeWithStyleAttribute DOMNode */ |
||
240 | foreach ($nodesWithStyleAttributes as $node) { |
||
241 | $normalizedOriginalStyle = preg_replace_callback( '/[A-z\\-]+(?=\\:)/S', array( $this, 'strtolower' ), $node->getAttribute('style') ); |
||
242 | |||
243 | // in order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles |
||
244 | $nodePath = $node->getNodePath(); |
||
245 | if (!isset($this->styleAttributesForNodes[$nodePath])) { |
||
246 | $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationBlock($normalizedOriginalStyle); |
||
247 | $this->visitedNodes[$nodePath] = $node; |
||
248 | } |
||
249 | |||
250 | $node->setAttribute('style', $normalizedOriginalStyle); |
||
251 | } |
||
252 | } |
||
253 | |||
254 | // grab any existing style blocks from the html and append them to the existing CSS |
||
255 | // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS) |
||
256 | $allCss = $this->css; |
||
257 | |||
258 | $allCss .= $this->getCssFromAllStyleNodes($xpath); |
||
259 | |||
260 | $cssParts = $this->splitCssAndMediaQuery($allCss); |
||
261 | self::$_media = ''; // reset |
||
262 | |||
263 | $cssKey = md5($cssParts['css']); |
||
264 | if (!isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) { |
||
265 | // process the CSS file for selectors and definitions |
||
266 | preg_match_all('/(?:^|[\\s^{}]*)([^{]+){([^}]*)}/mis', $cssParts['css'], $matches, PREG_SET_ORDER); |
||
267 | |||
268 | $allSelectors = array(); |
||
269 | foreach ($matches as $key => $selectorString) { |
||
270 | // if there is a blank definition, skip |
||
271 | if (!strlen(trim($selectorString[2]))) { |
||
272 | continue; |
||
273 | } |
||
274 | |||
275 | // else split by commas and duplicate attributes so we can sort by selector precedence |
||
276 | $selectors = explode(',', $selectorString[1]); |
||
277 | foreach ($selectors as $selector) { |
||
278 | // don't process pseudo-elements and behavioral (dynamic) pseudo-classes; ONLY allow structural pseudo-classes |
||
279 | if (strpos($selector, ':') !== false && !preg_match('/:\\S+\\-(child|type)\\(/i', $selector)) { |
||
280 | continue; |
||
281 | } |
||
282 | |||
283 | $allSelectors[] = array('selector' => trim($selector), |
||
284 | 'attributes' => trim($selectorString[2]), |
||
285 | // keep track of where it appears in the file, since order is important |
||
286 | 'line' => $key, |
||
287 | ); |
||
288 | } |
||
289 | } |
||
290 | |||
291 | // now sort the selectors by precedence |
||
292 | usort($allSelectors, array($this,'sortBySelectorPrecedence')); |
||
293 | |||
294 | $this->caches[self::CACHE_KEY_CSS][$cssKey] = $allSelectors; |
||
295 | } |
||
296 | |||
297 | foreach ($this->caches[self::CACHE_KEY_CSS][$cssKey] as $value) { |
||
298 | // query the body for the xpath selector |
||
299 | $nodesMatchingCssSelectors = $xpath->query($this->translateCssToXpath($value['selector'])); |
||
300 | |||
301 | /** @var $node \DOMNode */ |
||
302 | foreach ($nodesMatchingCssSelectors as $node) { |
||
303 | // if it has a style attribute, get it, process it, and append (overwrite) new stuff |
||
304 | if ($node->hasAttribute('style')) { |
||
305 | // break it up into an associative array |
||
306 | $oldStyleDeclarations = $this->parseCssDeclarationBlock($node->getAttribute('style')); |
||
307 | } else { |
||
308 | $oldStyleDeclarations = array(); |
||
309 | } |
||
310 | $newStyleDeclarations = $this->parseCssDeclarationBlock($value['attributes']); |
||
311 | $node->setAttribute('style', $this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations)); |
||
312 | } |
||
313 | } |
||
314 | |||
315 | // now iterate through the nodes that contained inline styles in the original HTML |
||
316 | foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) { |
||
317 | $node = $this->visitedNodes[$nodePath]; |
||
318 | $currentStyleAttributes = $this->parseCssDeclarationBlock($node->getAttribute('style')); |
||
319 | $node->setAttribute('style', $this->generateStyleStringFromDeclarationsArrays($currentStyleAttributes, $styleAttributesForNode)); |
||
320 | } |
||
321 | |||
322 | // This removes styles from your email that contain display:none. |
||
323 | // We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only supports XPath 1.0, |
||
324 | // lower-case() isn't available to us. We've thus far only set attributes to lowercase, not attribute values. Consequently, we need |
||
325 | // to translate() the letters that would be in 'NONE' ("NOE") to lowercase. |
||
326 | $nodesWithStyleDisplayNone = $xpath->query('//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]'); |
||
327 | // The checks on parentNode and is_callable below ensure that if we've deleted the parent node, |
||
328 | // we don't try to call removeChild on a nonexistent child node |
||
329 | if ($nodesWithStyleDisplayNone->length > 0) { |
||
330 | /** @var $node \DOMNode */ |
||
331 | foreach ($nodesWithStyleDisplayNone as $node) { |
||
332 | if ($node->parentNode && is_callable(array($node->parentNode,'removeChild'))) { |
||
333 | $node->parentNode->removeChild($node); |
||
334 | } |
||
335 | } |
||
336 | } |
||
337 | |||
338 | $this->copyCssWithMediaToStyleNode($cssParts, $xmlDocument); |
||
339 | |||
340 | if ($this->preserveEncoding) { |
||
341 | // Deprecated since PHP 8.2 |
||
342 | if ( version_compare( PHP_VERSION, '8.2', '<' ) && function_exists( 'mb_convert_encoding' ) ) { |
||
343 | return mb_convert_encoding( $xmlDocument->saveHTML(), self::ENCODING, 'HTML-ENTITIES' ); |
||
|
|||
344 | } else { |
||
345 | return mb_encode_numericentity( $xmlDocument->saveHTML(), [0x80, 0x10FFFF, 0, ~0], self::ENCODING ); |
||
346 | //return htmlspecialchars_decode( utf8_encode( html_entity_decode( $xmlDocument->saveHTML(), ENT_COMPAT, self::ENCODING ) ) ); |
||
347 | } |
||
348 | } else { |
||
349 | return $xmlDocument->saveHTML(); |
||
350 | } |
||
351 | } |
||
352 | |||
353 | /** |
||
354 | * String to lower. |
||
355 | * |
||
356 | * @since 2.0.0 |
||
357 | * |
||
358 | * @param array $m |
||
359 | * @return string |
||
360 | */ |
||
361 | public function strtolower(array $m) { |
||
362 | return strtolower($m[0]); |
||
363 | } |
||
364 | |||
365 | |||
366 | /** |
||
367 | * This method merges old or existing name/value array with new name/value array. |
||
368 | * and then generates a string of the combined style suitable for placing inline. |
||
369 | * This becomes the single point for CSS string generation allowing for consistent. |
||
370 | * CSS output no matter where the CSS originally came from. |
||
371 | * @param array $oldStyles |
||
372 | * @param array $newStyles |
||
373 | * @return string |
||
374 | */ |
||
375 | private function generateStyleStringFromDeclarationsArrays(array $oldStyles, array $newStyles) { |
||
376 | $combinedStyles = array_merge($oldStyles, $newStyles); |
||
377 | $style = ''; |
||
378 | foreach ($combinedStyles as $attributeName => $attributeValue) { |
||
379 | $style .= (strtolower(trim($attributeName)) . ': ' . trim($attributeValue) . '; '); |
||
380 | } |
||
381 | return trim($style); |
||
382 | } |
||
383 | |||
384 | |||
385 | /** |
||
386 | * Copies the media part from CSS array parts to $xmlDocument. |
||
387 | * |
||
388 | * @param array $cssParts |
||
389 | * @param DOMDocument $xmlDocument |
||
390 | */ |
||
391 | public function copyCssWithMediaToStyleNode(array $cssParts, DOMDocument $xmlDocument) { |
||
392 | if (isset($cssParts['media']) && $cssParts['media'] !== '') { |
||
393 | $this->addStyleElementToDocument($xmlDocument, $cssParts['media']); |
||
394 | } |
||
395 | } |
||
396 | |||
397 | /** |
||
398 | * Returns CSS content. |
||
399 | * |
||
400 | * @param DOMXPath $xpath |
||
401 | * @return string |
||
402 | */ |
||
403 | private function getCssFromAllStyleNodes(DOMXPath $xpath) { |
||
404 | $styleNodes = $xpath->query('//style'); |
||
405 | |||
406 | if ($styleNodes === false) { |
||
407 | return ''; |
||
408 | } |
||
409 | |||
410 | $css = ''; |
||
411 | /** @var $styleNode DOMNode */ |
||
412 | foreach ($styleNodes as $styleNode) { |
||
413 | $css .= "\n\n" . $styleNode->nodeValue; |
||
414 | $styleNode->parentNode->removeChild($styleNode); |
||
415 | } |
||
416 | |||
417 | return $css; |
||
418 | } |
||
419 | |||
420 | /** |
||
421 | * Adds a style element with $css to $document. |
||
422 | * |
||
423 | * @param DOMDocument $document |
||
424 | * @param string $css |
||
425 | */ |
||
426 | private function addStyleElementToDocument(DOMDocument $document, $css) { |
||
427 | $styleElement = $document->createElement('style', $css); |
||
428 | $styleAttribute = $document->createAttribute('type'); |
||
429 | $styleAttribute->value = 'text/css'; |
||
430 | $styleElement->appendChild($styleAttribute); |
||
431 | |||
432 | $head = $this->getOrCreateHeadElement($document); |
||
433 | $head->appendChild($styleElement); |
||
434 | } |
||
435 | |||
436 | /** |
||
437 | * Returns the existing or creates a new head element in $document. |
||
438 | * |
||
439 | * @param DOMDocument $document |
||
440 | * @return DOMNode the head element |
||
441 | */ |
||
442 | private function getOrCreateHeadElement(DOMDocument $document) { |
||
443 | $head = $document->getElementsByTagName('head')->item(0); |
||
444 | |||
445 | if ($head === null) { |
||
446 | $head = $document->createElement('head'); |
||
447 | $html = $document->getElementsByTagName('html')->item(0); |
||
448 | $html->insertBefore($head, $document->getElementsByTagName('body')->item(0)); |
||
449 | } |
||
450 | |||
451 | return $head; |
||
452 | } |
||
453 | |||
454 | /** |
||
455 | * Splits input CSS code to an array where: |
||
456 | * |
||
457 | * - key "css" will be contains clean CSS code. |
||
458 | * - key "media" will be contains all valuable media queries. |
||
459 | * |
||
460 | * Example: |
||
461 | * |
||
462 | * The CSS code. |
||
463 | * |
||
464 | * "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}" |
||
465 | * |
||
466 | * will be parsed into the following array: |
||
467 | * |
||
468 | * "css" => "h1 { color:red; }" |
||
469 | * "media" => "@media { h1 {}}" |
||
470 | * |
||
471 | * @param string $css |
||
472 | * @return array |
||
473 | */ |
||
474 | private function splitCssAndMediaQuery($css) { |
||
475 | $css = preg_replace_callback( '#@media\\s+(?:only\\s)?(?:[\\s{\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU', array( $this, '_media_concat' ), $css ); |
||
476 | |||
477 | // filter the CSS |
||
478 | $search = array( |
||
479 | // get rid of css comment code |
||
480 | '/\\/\\*.*\\*\\//sU', |
||
481 | // strip out any import directives |
||
482 | '/^\\s*@import\\s[^;]+;/misU', |
||
483 | // strip remains media enclosures |
||
484 | '/^\\s*@media\\s[^{]+{(.*)}\\s*}\\s/misU', |
||
485 | ); |
||
486 | |||
487 | $replace = array( |
||
488 | '', |
||
489 | '', |
||
490 | '', |
||
491 | ); |
||
492 | |||
493 | // clean CSS before output |
||
494 | $css = preg_replace($search, $replace, $css); |
||
495 | |||
496 | return array('css' => $css, 'media' => self::$_media); |
||
497 | } |
||
498 | |||
499 | /** |
||
500 | * Media concat. |
||
501 | * |
||
502 | * @since 2.0.0 |
||
503 | * |
||
504 | * @param array $matches Matches result array. |
||
505 | */ |
||
506 | private function _media_concat( $matches ) { |
||
507 | self::$_media .= $matches[0]; |
||
508 | } |
||
509 | |||
510 | /** |
||
511 | * Creates a DOMDocument instance with the current HTML. |
||
512 | * |
||
513 | * @return DOMDocument |
||
514 | */ |
||
515 | private function createXmlDocument() { |
||
516 | $xmlDocument = new DOMDocument; |
||
517 | $xmlDocument->encoding = self::ENCODING; |
||
518 | $xmlDocument->strictErrorChecking = false; |
||
519 | $xmlDocument->formatOutput = true; |
||
520 | $libXmlState = libxml_use_internal_errors(true); |
||
521 | // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged |
||
522 | /** @scrutinizer ignore-unhandled */ @$xmlDocument->loadHTML($this->getUnifiedHtml()); |
||
523 | libxml_clear_errors(); |
||
524 | libxml_use_internal_errors($libXmlState); |
||
525 | $xmlDocument->normalizeDocument(); |
||
526 | |||
527 | return $xmlDocument; |
||
528 | } |
||
529 | |||
530 | /** |
||
531 | * Returns the HTML with the non-ASCII characters converts into HTML entities and the unprocessable HTML tags removed. |
||
532 | * |
||
533 | * @return string the unified HTML |
||
534 | * |
||
535 | * @throws BadMethodCallException |
||
536 | */ |
||
537 | private function getUnifiedHtml() { |
||
538 | if (!empty($this->unprocessableHtmlTags)) { |
||
539 | $unprocessableHtmlTags = implode('|', $this->unprocessableHtmlTags); |
||
540 | $bodyWithoutUnprocessableTags = preg_replace('/<\\/?(' . $unprocessableHtmlTags . ')[^>]*>/i', '', $this->html); |
||
541 | } else { |
||
542 | $bodyWithoutUnprocessableTags = $this->html; |
||
543 | } |
||
544 | |||
545 | // Deprecated since PHP 8.2 |
||
546 | if ( version_compare( PHP_VERSION, '8.2', '<' ) && function_exists( 'mb_convert_encoding' ) ) { |
||
547 | return mb_convert_encoding( $bodyWithoutUnprocessableTags, 'HTML-ENTITIES', self::ENCODING ); |
||
548 | } else { |
||
549 | return mb_encode_numericentity( $bodyWithoutUnprocessableTags, [0x80, 0x10FFFF, 0, ~0], self::ENCODING ); |
||
550 | //return htmlspecialchars_decode( utf8_decode( htmlentities( $bodyWithoutUnprocessableTags, ENT_COMPAT, self::ENCODING, false ) ) ); |
||
551 | } |
||
552 | } |
||
553 | |||
554 | /** |
||
555 | * @param array $a |
||
556 | * @param array $b |
||
557 | * |
||
558 | * @return integer |
||
559 | */ |
||
560 | private function sortBySelectorPrecedence(array $a, array $b) { |
||
561 | $precedenceA = $this->getCssSelectorPrecedence($a['selector']); |
||
562 | $precedenceB = $this->getCssSelectorPrecedence($b['selector']); |
||
563 | |||
564 | // We want these sorted in ascending order so selectors with lesser precedence get processed first and |
||
565 | // selectors with greater precedence get sorted last. |
||
566 | // The parenthesis around the -1 are necessary to avoid a PHP_CodeSniffer warning about missing spaces around |
||
567 | // arithmetic operators. |
||
568 | // @see http://forge.typo3.org/issues/55605 |
||
569 | $precedenceForEquals = ($a['line'] < $b['line'] ? (-1) : 1); |
||
570 | $precedenceForNotEquals = ($precedenceA < $precedenceB ? (-1) : 1); |
||
571 | return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals; |
||
572 | } |
||
573 | |||
574 | /** |
||
575 | * @param string $selector |
||
576 | * |
||
577 | * @return integer |
||
578 | */ |
||
579 | private function getCssSelectorPrecedence($selector) { |
||
580 | $selectorKey = md5($selector); |
||
581 | if (!isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) { |
||
582 | $precedence = 0; |
||
583 | $value = 100; |
||
584 | // ids: worth 100, classes: worth 10, elements: worth 1 |
||
585 | $search = array('\\#','\\.',''); |
||
586 | |||
587 | foreach ($search as $s) { |
||
588 | if (trim($selector == '')) { |
||
589 | break; |
||
590 | } |
||
591 | $number = 0; |
||
592 | $selector = preg_replace('/' . $s . '\\w+/', '', $selector, -1, $number); |
||
593 | $precedence += ($value * $number); |
||
594 | $value /= 10; |
||
595 | } |
||
596 | $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence; |
||
597 | } |
||
598 | |||
599 | return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey]; |
||
600 | } |
||
601 | |||
602 | /** |
||
603 | * Right now, we support all CSS 1 selectors and most CSS2/3 selectors. |
||
604 | * |
||
605 | * @see http://plasmasturm.org/log/444/ |
||
606 | * |
||
607 | * @param string $paramCssSelector |
||
608 | * |
||
609 | * @return string |
||
610 | */ |
||
611 | private function translateCssToXpath($paramCssSelector) { |
||
612 | $cssSelector = ' ' . $paramCssSelector . ' '; |
||
613 | $cssSelector = preg_replace_callback( '/\s+\w+\s+/', array( $this, 'strtolower' ), $cssSelector ); |
||
614 | $cssSelector = trim($cssSelector); |
||
615 | $xpathKey = md5($cssSelector); |
||
616 | if (!isset($this->caches[self::CACHE_KEY_XPATH][$xpathKey])) { |
||
617 | // returns an Xpath selector |
||
618 | $search = array( |
||
619 | // Matches any element that is a child of parent. |
||
620 | '/\\s+>\\s+/', |
||
621 | // Matches any element that is an adjacent sibling. |
||
622 | '/\\s+\\+\\s+/', |
||
623 | // Matches any element that is a descendant of an parent element element. |
||
624 | '/\\s+/', |
||
625 | // first-child pseudo-selector |
||
626 | '/([^\\/]+):first-child/i', |
||
627 | // last-child pseudo-selector |
||
628 | '/([^\\/]+):last-child/i', |
||
629 | // Matches attribute only selector |
||
630 | '/^\\[(\\w+)\\]/', |
||
631 | // Matches element with attribute |
||
632 | '/(\\w)\\[(\\w+)\\]/', |
||
633 | // Matches element with EXACT attribute |
||
634 | '/(\\w)\\[(\\w+)\\=[\'"]?(\\w+)[\'"]?\\]/', |
||
635 | ); |
||
636 | $replace = array( |
||
637 | '/', |
||
638 | '/following-sibling::*[1]/self::', |
||
639 | '//', |
||
640 | '*[1]/self::\\1', |
||
641 | '*[last()]/self::\\1', |
||
642 | '*[@\\1]', |
||
643 | '\\1[@\\2]', |
||
644 | '\\1[@\\2="\\3"]', |
||
645 | ); |
||
646 | |||
647 | $cssSelector = '//' . preg_replace($search, $replace, $cssSelector); |
||
648 | |||
649 | $cssSelector = preg_replace_callback(self::ID_ATTRIBUTE_MATCHER, array($this, 'matchIdAttributes'), $cssSelector); |
||
650 | $cssSelector = preg_replace_callback(self::CLASS_ATTRIBUTE_MATCHER, array($this, 'matchClassAttributes'), $cssSelector); |
||
651 | |||
652 | // Advanced selectors are going to require a bit more advanced emogrification. |
||
653 | // When we required PHP 5.3, we could do this with closures. |
||
654 | $cssSelector = preg_replace_callback( |
||
655 | '/([^\\/]+):nth-child\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i', |
||
656 | array($this, 'translateNthChild'), $cssSelector |
||
657 | ); |
||
658 | $cssSelector = preg_replace_callback( |
||
659 | '/([^\\/]+):nth-of-type\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i', |
||
660 | array($this, 'translateNthOfType'), $cssSelector |
||
661 | ); |
||
662 | |||
663 | $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey] = $cssSelector; |
||
664 | } |
||
665 | return $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey]; |
||
666 | } |
||
667 | |||
668 | /** |
||
669 | * @param array $match |
||
670 | * |
||
671 | * @return string |
||
672 | */ |
||
673 | private function matchIdAttributes(array $match) { |
||
674 | return (strlen($match[1]) ? $match[1] : '*') . '[@id="' . $match[2] . '"]'; |
||
675 | } |
||
676 | |||
677 | /** |
||
678 | * @param array $match |
||
679 | * |
||
680 | * @return string |
||
681 | */ |
||
682 | private function matchClassAttributes(array $match) { |
||
683 | return (strlen($match[1]) ? $match[1] : '*') . '[contains(concat(" ",@class," "),concat(" ","' . |
||
684 | implode( |
||
685 | '"," "))][contains(concat(" ",@class," "),concat(" ","', |
||
686 | explode('.', substr($match[2], 1)) |
||
687 | ) . '"," "))]'; |
||
688 | } |
||
689 | |||
690 | /** |
||
691 | * @param array $match |
||
692 | * |
||
693 | * @return string |
||
694 | */ |
||
695 | private function translateNthChild(array $match) { |
||
696 | $result = $this->parseNth($match); |
||
697 | |||
698 | if (isset($result[self::MULTIPLIER])) { |
||
699 | if ($result[self::MULTIPLIER] < 0) { |
||
700 | $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]); |
||
701 | return sprintf('*[(last() - position()) mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]); |
||
702 | } else { |
||
703 | return sprintf('*[position() mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]); |
||
704 | } |
||
705 | } else { |
||
706 | return sprintf('*[%u]/self::%s', $result[self::INDEX], $match[1]); |
||
707 | } |
||
708 | } |
||
709 | |||
710 | /** |
||
711 | * @param array $match |
||
712 | * |
||
713 | * @return string |
||
714 | */ |
||
715 | private function translateNthOfType(array $match) { |
||
716 | $result = $this->parseNth($match); |
||
717 | |||
718 | if (isset($result[self::MULTIPLIER])) { |
||
719 | if ($result[self::MULTIPLIER] < 0) { |
||
720 | $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]); |
||
721 | return sprintf('%s[(last() - position()) mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]); |
||
722 | } else { |
||
723 | return sprintf('%s[position() mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]); |
||
724 | } |
||
725 | } else { |
||
726 | return sprintf('%s[%u]', $match[1], $result[self::INDEX]); |
||
727 | } |
||
728 | } |
||
729 | |||
730 | /** |
||
731 | * @param array $match |
||
732 | * |
||
733 | * @return array |
||
734 | */ |
||
735 | private function parseNth(array $match) { |
||
736 | if (in_array(strtolower($match[2]), array('even','odd'))) { |
||
737 | $index = strtolower($match[2]) == 'even' ? 0 : 1; |
||
738 | return array(self::MULTIPLIER => 2, self::INDEX => $index); |
||
739 | } elseif (stripos($match[2], 'n') === false) { |
||
740 | // if there is a multiplier |
||
741 | $index = intval(str_replace(' ', '', $match[2])); |
||
742 | return array(self::INDEX => $index); |
||
743 | } else { |
||
744 | if (isset($match[3])) { |
||
745 | $multipleTerm = str_replace($match[3], '', $match[2]); |
||
746 | $index = intval(str_replace(' ', '', $match[3])); |
||
747 | } else { |
||
748 | $multipleTerm = $match[2]; |
||
749 | $index = 0; |
||
750 | } |
||
751 | |||
752 | $multiplier = str_ireplace('n', '', $multipleTerm); |
||
753 | |||
754 | if (!strlen($multiplier)) { |
||
755 | $multiplier = 1; |
||
756 | } elseif ($multiplier == 0) { |
||
757 | return array(self::INDEX => $index); |
||
758 | } else { |
||
759 | $multiplier = intval($multiplier); |
||
760 | } |
||
761 | |||
762 | while ($index < 0) { |
||
763 | $index += abs($multiplier); |
||
764 | } |
||
765 | |||
766 | return array(self::MULTIPLIER => $multiplier, self::INDEX => $index); |
||
767 | } |
||
768 | } |
||
769 | |||
770 | /** |
||
771 | * Parses a CSS declaration block into property name/value pairs. |
||
772 | * |
||
773 | * Example: |
||
774 | * |
||
775 | * The declaration block. |
||
776 | * |
||
777 | * "color: #000; font-weight: bold;". |
||
778 | * |
||
779 | * will be parsed into the following array: |
||
780 | * |
||
781 | * "color" => "#000" |
||
782 | * "font-weight" => "bold" |
||
783 | * |
||
784 | * @param string $cssDeclarationBlock the CSS declaration block without the curly braces, may be empty |
||
785 | * |
||
786 | * @return array the CSS declarations with the property names as array keys and the property values as array values |
||
787 | */ |
||
788 | private function parseCssDeclarationBlock($cssDeclarationBlock) { |
||
807 | } |
||
808 | } |
||
809 |