This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
1 | <?php |
||||
2 | /** |
||||
3 | * This class provides functions for converting CSS styles into inline style attributes in your HTML code |
||||
4 | * |
||||
5 | * For more information, please see the README.md file. |
||||
6 | * |
||||
7 | * @author Cameron Brooks |
||||
8 | * @author Jaime Prado |
||||
9 | * @author Roman Ožana <[email protected]> |
||||
10 | */ |
||||
11 | class Emogrifier { |
||||
12 | /** |
||||
13 | * @var string |
||||
14 | */ |
||||
15 | const ENCODING = 'UTF-8'; |
||||
16 | |||||
17 | /** |
||||
18 | * @var integer |
||||
19 | */ |
||||
20 | const CACHE_KEY_CSS = 0; |
||||
21 | |||||
22 | /** |
||||
23 | * @var integer |
||||
24 | */ |
||||
25 | const CACHE_KEY_SELECTOR = 1; |
||||
26 | |||||
27 | /** |
||||
28 | * @var integer |
||||
29 | */ |
||||
30 | const CACHE_KEY_XPATH = 2; |
||||
31 | |||||
32 | /** |
||||
33 | * @var integer |
||||
34 | */ |
||||
35 | const CACHE_KEY_CSS_DECLARATION_BLOCK = 3; |
||||
36 | |||||
37 | /** |
||||
38 | * for calculating nth-of-type and nth-child selectors. |
||||
39 | * |
||||
40 | * @var integer |
||||
41 | */ |
||||
42 | const INDEX = 0; |
||||
43 | |||||
44 | /** |
||||
45 | * for calculating nth-of-type and nth-child selectors. |
||||
46 | * |
||||
47 | * @var integer |
||||
48 | */ |
||||
49 | const MULTIPLIER = 1; |
||||
50 | |||||
51 | /** |
||||
52 | * @var string |
||||
53 | */ |
||||
54 | const ID_ATTRIBUTE_MATCHER = '/(\\w+)?\\#([\\w\\-]+)/'; |
||||
55 | |||||
56 | /** |
||||
57 | * @var string |
||||
58 | */ |
||||
59 | const CLASS_ATTRIBUTE_MATCHER = '/(\\w+|[\\*\\]])?((\\.[\\w\\-]+)+)/'; |
||||
60 | |||||
61 | /** |
||||
62 | * @var string |
||||
63 | */ |
||||
64 | private $html = ''; |
||||
65 | |||||
66 | /** |
||||
67 | * @var string |
||||
68 | */ |
||||
69 | private $css = ''; |
||||
70 | |||||
71 | /** |
||||
72 | * @var array<string> |
||||
73 | */ |
||||
74 | private $unprocessableHtmlTags = array('wbr'); |
||||
75 | |||||
76 | /** |
||||
77 | * @var array<array> |
||||
78 | */ |
||||
79 | private $caches = array( |
||||
80 | self::CACHE_KEY_CSS => array(), |
||||
81 | self::CACHE_KEY_SELECTOR => array(), |
||||
82 | self::CACHE_KEY_XPATH => array(), |
||||
83 | self::CACHE_KEY_CSS_DECLARATION_BLOCK => array(), |
||||
84 | ); |
||||
85 | |||||
86 | /** |
||||
87 | * the visited nodes with the XPath paths as array keys. |
||||
88 | * |
||||
89 | * @var array<\DOMNode> |
||||
90 | */ |
||||
91 | private $visitedNodes = array(); |
||||
92 | |||||
93 | /** |
||||
94 | * the styles to apply to the nodes with the XPath paths as array keys for the outer array and the attribute names/values. |
||||
95 | * as key/value pairs for the inner array. |
||||
96 | * |
||||
97 | * @var array<array><string> |
||||
98 | */ |
||||
99 | private $styleAttributesForNodes = array(); |
||||
100 | |||||
101 | /** |
||||
102 | * This attribute applies to the case where you want to preserve your original text encoding. |
||||
103 | * |
||||
104 | * By default, emogrifier translates your text into HTML entities for two reasons: |
||||
105 | * |
||||
106 | * 1. Because of client incompatibilities, it is better practice to send out HTML entities rather than unicode over email. |
||||
107 | * |
||||
108 | * 2. It translates any illegal XML characters that DOMDocument cannot work with. |
||||
109 | * |
||||
110 | * If you would like to preserve your original encoding, set this attribute to TRUE. |
||||
111 | * |
||||
112 | * @var boolean |
||||
113 | */ |
||||
114 | public $preserveEncoding = false; |
||||
115 | |||||
116 | public static $_media = ''; |
||||
117 | |||||
118 | /** |
||||
119 | * The constructor. |
||||
120 | * |
||||
121 | * @param string $html the HTML to emogrify, must be UTF-8-encoded |
||||
122 | * @param string $css the CSS to merge, must be UTF-8-encoded |
||||
123 | */ |
||||
124 | public function __construct($html = '', $css = '') { |
||||
125 | $this->setHtml($html); |
||||
126 | $this->setCss($css); |
||||
127 | } |
||||
128 | |||||
129 | /** |
||||
130 | * The destructor. |
||||
131 | */ |
||||
132 | public function __destruct() { |
||||
133 | $this->purgeVisitedNodes(); |
||||
134 | } |
||||
135 | |||||
136 | /** |
||||
137 | * Sets the HTML to emogrify. |
||||
138 | * |
||||
139 | * @param string $html the HTML to emogrify, must be UTF-8-encoded |
||||
140 | */ |
||||
141 | public function setHtml($html = '') { |
||||
142 | $this->html = $html; |
||||
143 | } |
||||
144 | |||||
145 | /** |
||||
146 | * Sets the CSS to merge with the HTML. |
||||
147 | * |
||||
148 | * @param string $css the CSS to merge, must be UTF-8-encoded |
||||
149 | */ |
||||
150 | public function setCss($css = '') { |
||||
151 | $this->css = $css; |
||||
152 | } |
||||
153 | |||||
154 | /** |
||||
155 | * Clears all caches. |
||||
156 | */ |
||||
157 | private function clearAllCaches() { |
||||
158 | $this->clearCache(self::CACHE_KEY_CSS); |
||||
159 | $this->clearCache(self::CACHE_KEY_SELECTOR); |
||||
160 | $this->clearCache(self::CACHE_KEY_XPATH); |
||||
161 | $this->clearCache(self::CACHE_KEY_CSS_DECLARATION_BLOCK); |
||||
162 | } |
||||
163 | |||||
164 | /** |
||||
165 | * Clears a single cache by key. |
||||
166 | * |
||||
167 | * @param integer $key the cache key, must be CACHE_KEY_CSS, CACHE_KEY_SELECTOR, CACHE_KEY_XPATH or CACHE_KEY_CSS_DECLARATION_BLOCK |
||||
168 | * |
||||
169 | * @throws InvalidArgumentException |
||||
170 | */ |
||||
171 | private function clearCache($key) { |
||||
172 | $allowedCacheKeys = array(self::CACHE_KEY_CSS, self::CACHE_KEY_SELECTOR, self::CACHE_KEY_XPATH, self::CACHE_KEY_CSS_DECLARATION_BLOCK); |
||||
173 | if (!in_array($key, $allowedCacheKeys, true)) { |
||||
174 | throw new InvalidArgumentException('Invalid cache key: ' . $key, 1391822035); |
||||
175 | } |
||||
176 | |||||
177 | $this->caches[$key] = array(); |
||||
178 | } |
||||
179 | |||||
180 | /** |
||||
181 | * Purges the visited nodes. |
||||
182 | */ |
||||
183 | private function purgeVisitedNodes() { |
||||
184 | $this->visitedNodes = array(); |
||||
185 | $this->styleAttributesForNodes = array(); |
||||
186 | } |
||||
187 | |||||
188 | /** |
||||
189 | * Marks a tag for removal. |
||||
190 | * |
||||
191 | * There are some HTML tags that DOMDocument cannot process, and it will throw an error if it encounters them. |
||||
192 | * In particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document. |
||||
193 | * |
||||
194 | * Note: The tags will not be removed if they have any content. |
||||
195 | * |
||||
196 | * @param string $tagName the tag name, e.g., "p" |
||||
197 | */ |
||||
198 | public function addUnprocessableHtmlTag($tagName) { |
||||
199 | $this->unprocessableHtmlTags[] = $tagName; |
||||
200 | } |
||||
201 | |||||
202 | /** |
||||
203 | * Drops a tag from the removal list. |
||||
204 | * |
||||
205 | * @param string $tagName the tag name, e.g., "p" |
||||
206 | */ |
||||
207 | public function removeUnprocessableHtmlTag($tagName) { |
||||
208 | $key = array_search($tagName, $this->unprocessableHtmlTags, true); |
||||
209 | if ($key !== false) { |
||||
210 | unset($this->unprocessableHtmlTags[$key]); |
||||
211 | } |
||||
212 | } |
||||
213 | |||||
214 | /** |
||||
215 | * Applies the CSS you submit to the HTML you submit. |
||||
216 | * |
||||
217 | * This method places the CSS inline. |
||||
218 | * |
||||
219 | * @return string |
||||
220 | * |
||||
221 | * @throws BadMethodCallException |
||||
222 | */ |
||||
223 | public function emogrify() { |
||||
224 | if ($this->html === '') { |
||||
225 | throw new BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096); |
||||
226 | } |
||||
227 | |||||
228 | $xmlDocument = $this->createXmlDocument(); |
||||
229 | $xpath = new DOMXPath($xmlDocument); |
||||
230 | $this->clearAllCaches(); |
||||
231 | |||||
232 | // before be begin processing the CSS file, parse the document and normalize all existing CSS attributes (changes 'DISPLAY: none' to 'display: none'); |
||||
233 | // we wouldn't have to do this if DOMXPath supported XPath 2.0. |
||||
234 | // also store a reference of nodes with existing inline styles so we don't overwrite them |
||||
235 | $this->purgeVisitedNodes(); |
||||
236 | |||||
237 | $nodesWithStyleAttributes = $xpath->query('//*[@style]'); |
||||
238 | if ($nodesWithStyleAttributes !== false) { |
||||
239 | /** @var $nodeWithStyleAttribute DOMNode */ |
||||
240 | foreach ($nodesWithStyleAttributes as $node) { |
||||
241 | $normalizedOriginalStyle = preg_replace_callback( '/[A-z\\-]+(?=\\:)/S', array( $this, 'strtolower' ), $node->getAttribute('style') ); |
||||
242 | |||||
243 | // in order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles |
||||
244 | $nodePath = $node->getNodePath(); |
||||
245 | if (!isset($this->styleAttributesForNodes[$nodePath])) { |
||||
246 | $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationBlock($normalizedOriginalStyle); |
||||
247 | $this->visitedNodes[$nodePath] = $node; |
||||
248 | } |
||||
249 | |||||
250 | $node->setAttribute('style', $normalizedOriginalStyle); |
||||
251 | } |
||||
252 | } |
||||
253 | |||||
254 | // grab any existing style blocks from the html and append them to the existing CSS |
||||
255 | // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS) |
||||
256 | $allCss = $this->css; |
||||
257 | |||||
258 | $allCss .= $this->getCssFromAllStyleNodes($xpath); |
||||
259 | |||||
260 | $cssParts = $this->splitCssAndMediaQuery($allCss); |
||||
261 | self::$_media = ''; // reset |
||||
262 | |||||
263 | $cssKey = md5($cssParts['css']); |
||||
264 | if (!isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) { |
||||
265 | // process the CSS file for selectors and definitions |
||||
266 | preg_match_all('/(?:^|[\\s^{}]*)([^{]+){([^}]*)}/mis', $cssParts['css'], $matches, PREG_SET_ORDER); |
||||
267 | |||||
268 | $allSelectors = array(); |
||||
269 | foreach ($matches as $key => $selectorString) { |
||||
270 | // if there is a blank definition, skip |
||||
271 | if (!strlen(trim($selectorString[2]))) { |
||||
272 | continue; |
||||
273 | } |
||||
274 | |||||
275 | // else split by commas and duplicate attributes so we can sort by selector precedence |
||||
276 | $selectors = explode(',', $selectorString[1]); |
||||
277 | foreach ($selectors as $selector) { |
||||
278 | // don't process pseudo-elements and behavioral (dynamic) pseudo-classes; ONLY allow structural pseudo-classes |
||||
279 | if (strpos($selector, ':') !== false && !preg_match('/:\\S+\\-(child|type)\\(/i', $selector)) { |
||||
280 | continue; |
||||
281 | } |
||||
282 | |||||
283 | $allSelectors[] = array('selector' => trim($selector), |
||||
284 | 'attributes' => trim($selectorString[2]), |
||||
285 | // keep track of where it appears in the file, since order is important |
||||
286 | 'line' => $key, |
||||
287 | ); |
||||
288 | } |
||||
289 | } |
||||
290 | |||||
291 | // now sort the selectors by precedence |
||||
292 | usort($allSelectors, array($this,'sortBySelectorPrecedence')); |
||||
293 | |||||
294 | $this->caches[self::CACHE_KEY_CSS][$cssKey] = $allSelectors; |
||||
295 | } |
||||
296 | |||||
297 | foreach ($this->caches[self::CACHE_KEY_CSS][$cssKey] as $value) { |
||||
298 | // query the body for the xpath selector |
||||
299 | $nodesMatchingCssSelectors = $xpath->query($this->translateCssToXpath($value['selector'])); |
||||
300 | |||||
301 | /** @var $node \DOMNode */ |
||||
302 | foreach ($nodesMatchingCssSelectors as $node) { |
||||
303 | // if it has a style attribute, get it, process it, and append (overwrite) new stuff |
||||
304 | if ($node->hasAttribute('style')) { |
||||
305 | // break it up into an associative array |
||||
306 | $oldStyleDeclarations = $this->parseCssDeclarationBlock($node->getAttribute('style')); |
||||
307 | } else { |
||||
308 | $oldStyleDeclarations = array(); |
||||
309 | } |
||||
310 | $newStyleDeclarations = $this->parseCssDeclarationBlock($value['attributes']); |
||||
311 | $node->setAttribute('style', $this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations)); |
||||
312 | } |
||||
313 | } |
||||
314 | |||||
315 | // now iterate through the nodes that contained inline styles in the original HTML |
||||
316 | foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) { |
||||
317 | $node = $this->visitedNodes[$nodePath]; |
||||
318 | $currentStyleAttributes = $this->parseCssDeclarationBlock($node->getAttribute('style')); |
||||
319 | $node->setAttribute('style', $this->generateStyleStringFromDeclarationsArrays($currentStyleAttributes, $styleAttributesForNode)); |
||||
320 | } |
||||
321 | |||||
322 | // This removes styles from your email that contain display:none. |
||||
323 | // We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only supports XPath 1.0, |
||||
324 | // lower-case() isn't available to us. We've thus far only set attributes to lowercase, not attribute values. Consequently, we need |
||||
325 | // to translate() the letters that would be in 'NONE' ("NOE") to lowercase. |
||||
326 | $nodesWithStyleDisplayNone = $xpath->query('//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]'); |
||||
327 | // The checks on parentNode and is_callable below ensure that if we've deleted the parent node, |
||||
328 | // we don't try to call removeChild on a nonexistent child node |
||||
329 | if ($nodesWithStyleDisplayNone->length > 0) { |
||||
330 | /** @var $node \DOMNode */ |
||||
331 | foreach ($nodesWithStyleDisplayNone as $node) { |
||||
332 | if ($node->parentNode && is_callable(array($node->parentNode,'removeChild'))) { |
||||
333 | $node->parentNode->removeChild($node); |
||||
334 | } |
||||
335 | } |
||||
336 | } |
||||
337 | |||||
338 | $this->copyCssWithMediaToStyleNode($cssParts, $xmlDocument); |
||||
339 | |||||
340 | if ($this->preserveEncoding) { |
||||
341 | // Deprecated since PHP 8.2 |
||||
342 | if ( version_compare( PHP_VERSION, '8.2', '<' ) && function_exists( 'mb_convert_encoding' ) ) { |
||||
343 | return mb_convert_encoding( $xmlDocument->saveHTML(), self::ENCODING, 'HTML-ENTITIES' ); |
||||
0 ignored issues
–
show
Bug
Best Practice
introduced
by
![]() |
|||||
344 | } else { |
||||
345 | return mb_encode_numericentity( $xmlDocument->saveHTML(), [0x80, 0x10FFFF, 0, ~0], self::ENCODING ); |
||||
346 | //return htmlspecialchars_decode( utf8_encode( html_entity_decode( $xmlDocument->saveHTML(), ENT_COMPAT, self::ENCODING ) ) ); |
||||
347 | } |
||||
348 | } else { |
||||
349 | return $xmlDocument->saveHTML(); |
||||
350 | } |
||||
351 | } |
||||
352 | |||||
353 | /** |
||||
354 | * String to lower. |
||||
355 | * |
||||
356 | * @since 2.0.0 |
||||
357 | * |
||||
358 | * @param array $m |
||||
359 | * @return string |
||||
360 | */ |
||||
361 | public function strtolower(array $m) { |
||||
362 | return strtolower($m[0]); |
||||
363 | } |
||||
364 | |||||
365 | |||||
366 | /** |
||||
367 | * This method merges old or existing name/value array with new name/value array. |
||||
368 | * and then generates a string of the combined style suitable for placing inline. |
||||
369 | * This becomes the single point for CSS string generation allowing for consistent. |
||||
370 | * CSS output no matter where the CSS originally came from. |
||||
371 | * @param array $oldStyles |
||||
372 | * @param array $newStyles |
||||
373 | * @return string |
||||
374 | */ |
||||
375 | private function generateStyleStringFromDeclarationsArrays(array $oldStyles, array $newStyles) { |
||||
376 | $combinedStyles = array_merge($oldStyles, $newStyles); |
||||
377 | $style = ''; |
||||
378 | foreach ($combinedStyles as $attributeName => $attributeValue) { |
||||
379 | $style .= (strtolower(trim($attributeName)) . ': ' . trim($attributeValue) . '; '); |
||||
380 | } |
||||
381 | return trim($style); |
||||
382 | } |
||||
383 | |||||
384 | |||||
385 | /** |
||||
386 | * Copies the media part from CSS array parts to $xmlDocument. |
||||
387 | * |
||||
388 | * @param array $cssParts |
||||
389 | * @param DOMDocument $xmlDocument |
||||
390 | */ |
||||
391 | public function copyCssWithMediaToStyleNode(array $cssParts, DOMDocument $xmlDocument) { |
||||
392 | if (isset($cssParts['media']) && $cssParts['media'] !== '') { |
||||
393 | $this->addStyleElementToDocument($xmlDocument, $cssParts['media']); |
||||
394 | } |
||||
395 | } |
||||
396 | |||||
397 | /** |
||||
398 | * Returns CSS content. |
||||
399 | * |
||||
400 | * @param DOMXPath $xpath |
||||
401 | * @return string |
||||
402 | */ |
||||
403 | private function getCssFromAllStyleNodes(DOMXPath $xpath) { |
||||
404 | $styleNodes = $xpath->query('//style'); |
||||
405 | |||||
406 | if ($styleNodes === false) { |
||||
407 | return ''; |
||||
408 | } |
||||
409 | |||||
410 | $css = ''; |
||||
411 | /** @var $styleNode DOMNode */ |
||||
412 | foreach ($styleNodes as $styleNode) { |
||||
413 | $css .= "\n\n" . $styleNode->nodeValue; |
||||
414 | $styleNode->parentNode->removeChild($styleNode); |
||||
415 | } |
||||
416 | |||||
417 | return $css; |
||||
418 | } |
||||
419 | |||||
420 | /** |
||||
421 | * Adds a style element with $css to $document. |
||||
422 | * |
||||
423 | * @param DOMDocument $document |
||||
424 | * @param string $css |
||||
425 | */ |
||||
426 | private function addStyleElementToDocument(DOMDocument $document, $css) { |
||||
427 | $styleElement = $document->createElement('style', $css); |
||||
428 | $styleAttribute = $document->createAttribute('type'); |
||||
429 | $styleAttribute->value = 'text/css'; |
||||
430 | $styleElement->appendChild($styleAttribute); |
||||
431 | |||||
432 | $head = $this->getOrCreateHeadElement($document); |
||||
433 | $head->appendChild($styleElement); |
||||
434 | } |
||||
435 | |||||
436 | /** |
||||
437 | * Returns the existing or creates a new head element in $document. |
||||
438 | * |
||||
439 | * @param DOMDocument $document |
||||
440 | * @return DOMNode the head element |
||||
441 | */ |
||||
442 | private function getOrCreateHeadElement(DOMDocument $document) { |
||||
443 | $head = $document->getElementsByTagName('head')->item(0); |
||||
444 | |||||
445 | if ($head === null) { |
||||
446 | $head = $document->createElement('head'); |
||||
447 | $html = $document->getElementsByTagName('html')->item(0); |
||||
448 | $html->insertBefore($head, $document->getElementsByTagName('body')->item(0)); |
||||
449 | } |
||||
450 | |||||
451 | return $head; |
||||
452 | } |
||||
453 | |||||
454 | /** |
||||
455 | * Splits input CSS code to an array where: |
||||
456 | * |
||||
457 | * - key "css" will be contains clean CSS code. |
||||
458 | * - key "media" will be contains all valuable media queries. |
||||
459 | * |
||||
460 | * Example: |
||||
461 | * |
||||
462 | * The CSS code. |
||||
463 | * |
||||
464 | * "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}" |
||||
465 | * |
||||
466 | * will be parsed into the following array: |
||||
467 | * |
||||
468 | * "css" => "h1 { color:red; }" |
||||
469 | * "media" => "@media { h1 {}}" |
||||
470 | * |
||||
471 | * @param string $css |
||||
472 | * @return array |
||||
473 | */ |
||||
474 | private function splitCssAndMediaQuery($css) { |
||||
475 | $css = preg_replace_callback( '#@media\\s+(?:only\\s)?(?:[\\s{\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU', array( $this, '_media_concat' ), $css ); |
||||
476 | |||||
477 | // filter the CSS |
||||
478 | $search = array( |
||||
479 | // get rid of css comment code |
||||
480 | '/\\/\\*.*\\*\\//sU', |
||||
481 | // strip out any import directives |
||||
482 | '/^\\s*@import\\s[^;]+;/misU', |
||||
483 | // strip remains media enclosures |
||||
484 | '/^\\s*@media\\s[^{]+{(.*)}\\s*}\\s/misU', |
||||
485 | ); |
||||
486 | |||||
487 | $replace = array( |
||||
488 | '', |
||||
489 | '', |
||||
490 | '', |
||||
491 | ); |
||||
492 | |||||
493 | // clean CSS before output |
||||
494 | $css = preg_replace($search, $replace, $css); |
||||
495 | |||||
496 | return array('css' => $css, 'media' => self::$_media); |
||||
497 | } |
||||
498 | |||||
499 | /** |
||||
500 | * Media concat. |
||||
501 | * |
||||
502 | * @since 2.0.0 |
||||
503 | * |
||||
504 | * @param array $matches Matches result array. |
||||
505 | */ |
||||
506 | private function _media_concat( $matches ) { |
||||
507 | self::$_media .= $matches[0]; |
||||
508 | } |
||||
509 | |||||
510 | /** |
||||
511 | * Creates a DOMDocument instance with the current HTML. |
||||
512 | * |
||||
513 | * @return DOMDocument |
||||
514 | */ |
||||
515 | private function createXmlDocument() { |
||||
516 | $xmlDocument = new DOMDocument; |
||||
517 | $xmlDocument->encoding = self::ENCODING; |
||||
518 | $xmlDocument->strictErrorChecking = false; |
||||
519 | $xmlDocument->formatOutput = true; |
||||
520 | $libXmlState = libxml_use_internal_errors(true); |
||||
521 | // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged |
||||
522 | /** @scrutinizer ignore-unhandled */ @$xmlDocument->loadHTML($this->getUnifiedHtml()); |
||||
523 | libxml_clear_errors(); |
||||
524 | libxml_use_internal_errors($libXmlState); |
||||
525 | $xmlDocument->normalizeDocument(); |
||||
526 | |||||
527 | return $xmlDocument; |
||||
528 | } |
||||
529 | |||||
530 | /** |
||||
531 | * Returns the HTML with the non-ASCII characters converts into HTML entities and the unprocessable HTML tags removed. |
||||
532 | * |
||||
533 | * @return string the unified HTML |
||||
534 | * |
||||
535 | * @throws BadMethodCallException |
||||
536 | */ |
||||
537 | private function getUnifiedHtml() { |
||||
538 | if (!empty($this->unprocessableHtmlTags)) { |
||||
539 | $unprocessableHtmlTags = implode('|', $this->unprocessableHtmlTags); |
||||
540 | $bodyWithoutUnprocessableTags = preg_replace('/<\\/?(' . $unprocessableHtmlTags . ')[^>]*>/i', '', $this->html); |
||||
541 | } else { |
||||
542 | $bodyWithoutUnprocessableTags = $this->html; |
||||
543 | } |
||||
544 | |||||
545 | // Deprecated since PHP 8.2 |
||||
546 | if ( version_compare( PHP_VERSION, '8.2', '<' ) && function_exists( 'mb_convert_encoding' ) ) { |
||||
547 | return mb_convert_encoding( $bodyWithoutUnprocessableTags, 'HTML-ENTITIES', self::ENCODING ); |
||||
0 ignored issues
–
show
|
|||||
548 | } else { |
||||
549 | return mb_encode_numericentity( $bodyWithoutUnprocessableTags, [0x80, 0x10FFFF, 0, ~0], self::ENCODING ); |
||||
550 | //return htmlspecialchars_decode( utf8_decode( htmlentities( $bodyWithoutUnprocessableTags, ENT_COMPAT, self::ENCODING, false ) ) ); |
||||
551 | } |
||||
552 | } |
||||
553 | |||||
554 | /** |
||||
555 | * @param array $a |
||||
556 | * @param array $b |
||||
557 | * |
||||
558 | * @return integer |
||||
559 | */ |
||||
560 | private function sortBySelectorPrecedence(array $a, array $b) { |
||||
561 | $precedenceA = $this->getCssSelectorPrecedence($a['selector']); |
||||
562 | $precedenceB = $this->getCssSelectorPrecedence($b['selector']); |
||||
563 | |||||
564 | // We want these sorted in ascending order so selectors with lesser precedence get processed first and |
||||
565 | // selectors with greater precedence get sorted last. |
||||
566 | // The parenthesis around the -1 are necessary to avoid a PHP_CodeSniffer warning about missing spaces around |
||||
567 | // arithmetic operators. |
||||
568 | // @see http://forge.typo3.org/issues/55605 |
||||
569 | $precedenceForEquals = ($a['line'] < $b['line'] ? (-1) : 1); |
||||
570 | $precedenceForNotEquals = ($precedenceA < $precedenceB ? (-1) : 1); |
||||
571 | return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals; |
||||
572 | } |
||||
573 | |||||
574 | /** |
||||
575 | * @param string $selector |
||||
576 | * |
||||
577 | * @return integer |
||||
578 | */ |
||||
579 | private function getCssSelectorPrecedence($selector) { |
||||
580 | $selectorKey = md5($selector); |
||||
581 | if (!isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) { |
||||
582 | $precedence = 0; |
||||
583 | $value = 100; |
||||
584 | // ids: worth 100, classes: worth 10, elements: worth 1 |
||||
585 | $search = array('\\#','\\.',''); |
||||
586 | |||||
587 | foreach ($search as $s) { |
||||
588 | if (trim($selector == '')) { |
||||
0 ignored issues
–
show
$selector == '' of type boolean is incompatible with the type string expected by parameter $string of trim() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
589 | break; |
||||
590 | } |
||||
591 | $number = 0; |
||||
592 | $selector = preg_replace('/' . $s . '\\w+/', '', $selector, -1, $number); |
||||
593 | $precedence += ($value * $number); |
||||
594 | $value /= 10; |
||||
595 | } |
||||
596 | $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence; |
||||
597 | } |
||||
598 | |||||
599 | return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey]; |
||||
600 | } |
||||
601 | |||||
602 | /** |
||||
603 | * Right now, we support all CSS 1 selectors and most CSS2/3 selectors. |
||||
604 | * |
||||
605 | * @see http://plasmasturm.org/log/444/ |
||||
606 | * |
||||
607 | * @param string $paramCssSelector |
||||
608 | * |
||||
609 | * @return string |
||||
610 | */ |
||||
611 | private function translateCssToXpath($paramCssSelector) { |
||||
612 | $cssSelector = ' ' . $paramCssSelector . ' '; |
||||
613 | $cssSelector = preg_replace_callback( '/\s+\w+\s+/', array( $this, 'strtolower' ), $cssSelector ); |
||||
614 | $cssSelector = trim($cssSelector); |
||||
615 | $xpathKey = md5($cssSelector); |
||||
616 | if (!isset($this->caches[self::CACHE_KEY_XPATH][$xpathKey])) { |
||||
617 | // returns an Xpath selector |
||||
618 | $search = array( |
||||
619 | // Matches any element that is a child of parent. |
||||
620 | '/\\s+>\\s+/', |
||||
621 | // Matches any element that is an adjacent sibling. |
||||
622 | '/\\s+\\+\\s+/', |
||||
623 | // Matches any element that is a descendant of an parent element element. |
||||
624 | '/\\s+/', |
||||
625 | // first-child pseudo-selector |
||||
626 | '/([^\\/]+):first-child/i', |
||||
627 | // last-child pseudo-selector |
||||
628 | '/([^\\/]+):last-child/i', |
||||
629 | // Matches attribute only selector |
||||
630 | '/^\\[(\\w+)\\]/', |
||||
631 | // Matches element with attribute |
||||
632 | '/(\\w)\\[(\\w+)\\]/', |
||||
633 | // Matches element with EXACT attribute |
||||
634 | '/(\\w)\\[(\\w+)\\=[\'"]?(\\w+)[\'"]?\\]/', |
||||
635 | ); |
||||
636 | $replace = array( |
||||
637 | '/', |
||||
638 | '/following-sibling::*[1]/self::', |
||||
639 | '//', |
||||
640 | '*[1]/self::\\1', |
||||
641 | '*[last()]/self::\\1', |
||||
642 | '*[@\\1]', |
||||
643 | '\\1[@\\2]', |
||||
644 | '\\1[@\\2="\\3"]', |
||||
645 | ); |
||||
646 | |||||
647 | $cssSelector = '//' . preg_replace($search, $replace, $cssSelector); |
||||
648 | |||||
649 | $cssSelector = preg_replace_callback(self::ID_ATTRIBUTE_MATCHER, array($this, 'matchIdAttributes'), $cssSelector); |
||||
650 | $cssSelector = preg_replace_callback(self::CLASS_ATTRIBUTE_MATCHER, array($this, 'matchClassAttributes'), $cssSelector); |
||||
651 | |||||
652 | // Advanced selectors are going to require a bit more advanced emogrification. |
||||
653 | // When we required PHP 5.3, we could do this with closures. |
||||
654 | $cssSelector = preg_replace_callback( |
||||
655 | '/([^\\/]+):nth-child\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i', |
||||
656 | array($this, 'translateNthChild'), $cssSelector |
||||
657 | ); |
||||
658 | $cssSelector = preg_replace_callback( |
||||
659 | '/([^\\/]+):nth-of-type\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i', |
||||
660 | array($this, 'translateNthOfType'), $cssSelector |
||||
661 | ); |
||||
662 | |||||
663 | $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey] = $cssSelector; |
||||
664 | } |
||||
665 | return $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey]; |
||||
666 | } |
||||
667 | |||||
668 | /** |
||||
669 | * @param array $match |
||||
670 | * |
||||
671 | * @return string |
||||
672 | */ |
||||
673 | private function matchIdAttributes(array $match) { |
||||
674 | return (strlen($match[1]) ? $match[1] : '*') . '[@id="' . $match[2] . '"]'; |
||||
675 | } |
||||
676 | |||||
677 | /** |
||||
678 | * @param array $match |
||||
679 | * |
||||
680 | * @return string |
||||
681 | */ |
||||
682 | private function matchClassAttributes(array $match) { |
||||
683 | return (strlen($match[1]) ? $match[1] : '*') . '[contains(concat(" ",@class," "),concat(" ","' . |
||||
684 | implode( |
||||
685 | '"," "))][contains(concat(" ",@class," "),concat(" ","', |
||||
686 | explode('.', substr($match[2], 1)) |
||||
687 | ) . '"," "))]'; |
||||
688 | } |
||||
689 | |||||
690 | /** |
||||
691 | * @param array $match |
||||
692 | * |
||||
693 | * @return string |
||||
694 | */ |
||||
695 | private function translateNthChild(array $match) { |
||||
696 | $result = $this->parseNth($match); |
||||
697 | |||||
698 | if (isset($result[self::MULTIPLIER])) { |
||||
699 | if ($result[self::MULTIPLIER] < 0) { |
||||
700 | $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]); |
||||
701 | return sprintf('*[(last() - position()) mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]); |
||||
702 | } else { |
||||
703 | return sprintf('*[position() mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]); |
||||
704 | } |
||||
705 | } else { |
||||
706 | return sprintf('*[%u]/self::%s', $result[self::INDEX], $match[1]); |
||||
707 | } |
||||
708 | } |
||||
709 | |||||
710 | /** |
||||
711 | * @param array $match |
||||
712 | * |
||||
713 | * @return string |
||||
714 | */ |
||||
715 | private function translateNthOfType(array $match) { |
||||
716 | $result = $this->parseNth($match); |
||||
717 | |||||
718 | if (isset($result[self::MULTIPLIER])) { |
||||
719 | if ($result[self::MULTIPLIER] < 0) { |
||||
720 | $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]); |
||||
721 | return sprintf('%s[(last() - position()) mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]); |
||||
722 | } else { |
||||
723 | return sprintf('%s[position() mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]); |
||||
724 | } |
||||
725 | } else { |
||||
726 | return sprintf('%s[%u]', $match[1], $result[self::INDEX]); |
||||
727 | } |
||||
728 | } |
||||
729 | |||||
730 | /** |
||||
731 | * @param array $match |
||||
732 | * |
||||
733 | * @return array |
||||
734 | */ |
||||
735 | private function parseNth(array $match) { |
||||
736 | if (in_array(strtolower($match[2]), array('even','odd'))) { |
||||
737 | $index = strtolower($match[2]) == 'even' ? 0 : 1; |
||||
738 | return array(self::MULTIPLIER => 2, self::INDEX => $index); |
||||
739 | } elseif (stripos($match[2], 'n') === false) { |
||||
740 | // if there is a multiplier |
||||
741 | $index = intval(str_replace(' ', '', $match[2])); |
||||
742 | return array(self::INDEX => $index); |
||||
743 | } else { |
||||
744 | if (isset($match[3])) { |
||||
745 | $multipleTerm = str_replace($match[3], '', $match[2]); |
||||
746 | $index = intval(str_replace(' ', '', $match[3])); |
||||
747 | } else { |
||||
748 | $multipleTerm = $match[2]; |
||||
749 | $index = 0; |
||||
750 | } |
||||
751 | |||||
752 | $multiplier = str_ireplace('n', '', $multipleTerm); |
||||
753 | |||||
754 | if (!strlen($multiplier)) { |
||||
0 ignored issues
–
show
It seems like
$multiplier can also be of type array ; however, parameter $string of strlen() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
755 | $multiplier = 1; |
||||
756 | } elseif ($multiplier == 0) { |
||||
757 | return array(self::INDEX => $index); |
||||
758 | } else { |
||||
759 | $multiplier = intval($multiplier); |
||||
760 | } |
||||
761 | |||||
762 | while ($index < 0) { |
||||
763 | $index += abs($multiplier); |
||||
764 | } |
||||
765 | |||||
766 | return array(self::MULTIPLIER => $multiplier, self::INDEX => $index); |
||||
767 | } |
||||
768 | } |
||||
769 | |||||
770 | /** |
||||
771 | * Parses a CSS declaration block into property name/value pairs. |
||||
772 | * |
||||
773 | * Example: |
||||
774 | * |
||||
775 | * The declaration block. |
||||
776 | * |
||||
777 | * "color: #000; font-weight: bold;". |
||||
778 | * |
||||
779 | * will be parsed into the following array: |
||||
780 | * |
||||
781 | * "color" => "#000" |
||||
782 | * "font-weight" => "bold" |
||||
783 | * |
||||
784 | * @param string $cssDeclarationBlock the CSS declaration block without the curly braces, may be empty |
||||
785 | * |
||||
786 | * @return array the CSS declarations with the property names as array keys and the property values as array values |
||||
787 | */ |
||||
788 | private function parseCssDeclarationBlock($cssDeclarationBlock) { |
||||
789 | if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock])) { |
||||
790 | return $this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock]; |
||||
791 | } |
||||
792 | |||||
793 | $properties = array(); |
||||
794 | $declarations = explode(';', $cssDeclarationBlock); |
||||
795 | foreach ($declarations as $declaration) { |
||||
796 | $matches = array(); |
||||
797 | if (!preg_match('/ *([A-Za-z\\-]+) *: *([^;]+) */', $declaration, $matches)) { |
||||
798 | continue; |
||||
799 | } |
||||
800 | $propertyName = strtolower($matches[1]); |
||||
801 | $propertyValue = $matches[2]; |
||||
802 | $properties[$propertyName] = $propertyValue; |
||||
803 | } |
||||
804 | $this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock] = $properties; |
||||
805 | |||||
806 | return $properties; |
||||
807 | } |
||||
808 | } |
||||
809 |