@@ -3,48 +3,48 @@ |
||
| 3 | 3 | class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector |
| 4 | 4 | { |
| 5 | 5 | |
| 6 | - private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions; |
|
| 6 | + private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions; |
|
| 7 | 7 | |
| 8 | - public function prepare($config, $context) { |
|
| 9 | - parent::prepare($config, $context); |
|
| 10 | - $this->config = $config; |
|
| 11 | - $this->context = $context; |
|
| 12 | - $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); |
|
| 13 | - $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); |
|
| 14 | - $this->attrValidator = new HTMLPurifier_AttrValidator(); |
|
| 15 | - } |
|
| 8 | + public function prepare($config, $context) { |
|
| 9 | + parent::prepare($config, $context); |
|
| 10 | + $this->config = $config; |
|
| 11 | + $this->context = $context; |
|
| 12 | + $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); |
|
| 13 | + $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); |
|
| 14 | + $this->attrValidator = new HTMLPurifier_AttrValidator(); |
|
| 15 | + } |
|
| 16 | 16 | |
| 17 | - public function handleElement(&$token) { |
|
| 18 | - if (!$token instanceof HTMLPurifier_Token_Start) return; |
|
| 19 | - $next = false; |
|
| 20 | - for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { |
|
| 21 | - $next = $this->inputTokens[$i]; |
|
| 22 | - if ($next instanceof HTMLPurifier_Token_Text) { |
|
| 23 | - if ($next->is_whitespace) continue; |
|
| 24 | - if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { |
|
| 25 | - $plain = str_replace("\xC2\xA0", "", $next->data); |
|
| 26 | - $isWsOrNbsp = $plain === '' || ctype_space($plain); |
|
| 27 | - if ($isWsOrNbsp) continue; |
|
| 28 | - } |
|
| 29 | - } |
|
| 30 | - break; |
|
| 31 | - } |
|
| 32 | - if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { |
|
| 33 | - if ($token->name == 'colgroup') return; |
|
| 34 | - $this->attrValidator->validateToken($token, $this->config, $this->context); |
|
| 35 | - $token->armor['ValidateAttributes'] = true; |
|
| 36 | - if (isset($token->attr['id']) || isset($token->attr['name'])) return; |
|
| 37 | - $token = $i - $this->inputIndex + 1; |
|
| 38 | - for ($b = $this->inputIndex - 1; $b > 0; $b--) { |
|
| 39 | - $prev = $this->inputTokens[$b]; |
|
| 40 | - if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue; |
|
| 41 | - break; |
|
| 42 | - } |
|
| 43 | - // This is safe because we removed the token that triggered this. |
|
| 44 | - $this->rewind($b - 1); |
|
| 45 | - return; |
|
| 46 | - } |
|
| 47 | - } |
|
| 17 | + public function handleElement(&$token) { |
|
| 18 | + if (!$token instanceof HTMLPurifier_Token_Start) return; |
|
| 19 | + $next = false; |
|
| 20 | + for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { |
|
| 21 | + $next = $this->inputTokens[$i]; |
|
| 22 | + if ($next instanceof HTMLPurifier_Token_Text) { |
|
| 23 | + if ($next->is_whitespace) continue; |
|
| 24 | + if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { |
|
| 25 | + $plain = str_replace("\xC2\xA0", "", $next->data); |
|
| 26 | + $isWsOrNbsp = $plain === '' || ctype_space($plain); |
|
| 27 | + if ($isWsOrNbsp) continue; |
|
| 28 | + } |
|
| 29 | + } |
|
| 30 | + break; |
|
| 31 | + } |
|
| 32 | + if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { |
|
| 33 | + if ($token->name == 'colgroup') return; |
|
| 34 | + $this->attrValidator->validateToken($token, $this->config, $this->context); |
|
| 35 | + $token->armor['ValidateAttributes'] = true; |
|
| 36 | + if (isset($token->attr['id']) || isset($token->attr['name'])) return; |
|
| 37 | + $token = $i - $this->inputIndex + 1; |
|
| 38 | + for ($b = $this->inputIndex - 1; $b > 0; $b--) { |
|
| 39 | + $prev = $this->inputTokens[$b]; |
|
| 40 | + if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue; |
|
| 41 | + break; |
|
| 42 | + } |
|
| 43 | + // This is safe because we removed the token that triggered this. |
|
| 44 | + $this->rewind($b - 1); |
|
| 45 | + return; |
|
| 46 | + } |
|
| 47 | + } |
|
| 48 | 48 | |
| 49 | 49 | } |
| 50 | 50 | |
@@ -15,29 +15,41 @@ |
||
| 15 | 15 | } |
| 16 | 16 | |
| 17 | 17 | public function handleElement(&$token) { |
| 18 | - if (!$token instanceof HTMLPurifier_Token_Start) return; |
|
| 18 | + if (!$token instanceof HTMLPurifier_Token_Start) { |
|
| 19 | + return; |
|
| 20 | + } |
|
| 19 | 21 | $next = false; |
| 20 | 22 | for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { |
| 21 | 23 | $next = $this->inputTokens[$i]; |
| 22 | 24 | if ($next instanceof HTMLPurifier_Token_Text) { |
| 23 | - if ($next->is_whitespace) continue; |
|
| 25 | + if ($next->is_whitespace) { |
|
| 26 | + continue; |
|
| 27 | + } |
|
| 24 | 28 | if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { |
| 25 | 29 | $plain = str_replace("\xC2\xA0", "", $next->data); |
| 26 | 30 | $isWsOrNbsp = $plain === '' || ctype_space($plain); |
| 27 | - if ($isWsOrNbsp) continue; |
|
| 31 | + if ($isWsOrNbsp) { |
|
| 32 | + continue; |
|
| 33 | + } |
|
| 28 | 34 | } |
| 29 | 35 | } |
| 30 | 36 | break; |
| 31 | 37 | } |
| 32 | 38 | if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { |
| 33 | - if ($token->name == 'colgroup') return; |
|
| 39 | + if ($token->name == 'colgroup') { |
|
| 40 | + return; |
|
| 41 | + } |
|
| 34 | 42 | $this->attrValidator->validateToken($token, $this->config, $this->context); |
| 35 | 43 | $token->armor['ValidateAttributes'] = true; |
| 36 | - if (isset($token->attr['id']) || isset($token->attr['name'])) return; |
|
| 44 | + if (isset($token->attr['id']) || isset($token->attr['name'])) { |
|
| 45 | + return; |
|
| 46 | + } |
|
| 37 | 47 | $token = $i - $this->inputIndex + 1; |
| 38 | 48 | for ($b = $this->inputIndex - 1; $b > 0; $b--) { |
| 39 | 49 | $prev = $this->inputTokens[$b]; |
| 40 | - if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue; |
|
| 50 | + if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) { |
|
| 51 | + continue; |
|
| 52 | + } |
|
| 41 | 53 | break; |
| 42 | 54 | } |
| 43 | 55 | // This is safe because we removed the token that triggered this. |
@@ -5,56 +5,56 @@ |
||
| 5 | 5 | */ |
| 6 | 6 | class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector |
| 7 | 7 | { |
| 8 | - public $name = 'RemoveSpansWithoutAttributes'; |
|
| 9 | - public $needed = array('span'); |
|
| 10 | - |
|
| 11 | - private $attrValidator; |
|
| 12 | - |
|
| 13 | - /** |
|
| 14 | - * Used by AttrValidator |
|
| 15 | - */ |
|
| 16 | - private $config; |
|
| 17 | - private $context; |
|
| 18 | - |
|
| 19 | - public function prepare($config, $context) { |
|
| 20 | - $this->attrValidator = new HTMLPurifier_AttrValidator(); |
|
| 21 | - $this->config = $config; |
|
| 22 | - $this->context = $context; |
|
| 23 | - return parent::prepare($config, $context); |
|
| 24 | - } |
|
| 25 | - |
|
| 26 | - public function handleElement(&$token) { |
|
| 27 | - if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) { |
|
| 28 | - return; |
|
| 29 | - } |
|
| 30 | - |
|
| 31 | - // We need to validate the attributes now since this doesn't normally |
|
| 32 | - // happen until after MakeWellFormed. If all the attributes are removed |
|
| 33 | - // the span needs to be removed too. |
|
| 34 | - $this->attrValidator->validateToken($token, $this->config, $this->context); |
|
| 35 | - $token->armor['ValidateAttributes'] = true; |
|
| 36 | - |
|
| 37 | - if (!empty($token->attr)) { |
|
| 38 | - return; |
|
| 39 | - } |
|
| 40 | - |
|
| 41 | - $nesting = 0; |
|
| 42 | - $spanContentTokens = array(); |
|
| 43 | - while ($this->forwardUntilEndToken($i, $current, $nesting)) {} |
|
| 44 | - |
|
| 45 | - if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') { |
|
| 46 | - // Mark closing span tag for deletion |
|
| 47 | - $current->markForDeletion = true; |
|
| 48 | - // Delete open span tag |
|
| 49 | - $token = false; |
|
| 50 | - } |
|
| 51 | - } |
|
| 52 | - |
|
| 53 | - public function handleEnd(&$token) { |
|
| 54 | - if ($token->markForDeletion) { |
|
| 55 | - $token = false; |
|
| 56 | - } |
|
| 57 | - } |
|
| 8 | + public $name = 'RemoveSpansWithoutAttributes'; |
|
| 9 | + public $needed = array('span'); |
|
| 10 | + |
|
| 11 | + private $attrValidator; |
|
| 12 | + |
|
| 13 | + /** |
|
| 14 | + * Used by AttrValidator |
|
| 15 | + */ |
|
| 16 | + private $config; |
|
| 17 | + private $context; |
|
| 18 | + |
|
| 19 | + public function prepare($config, $context) { |
|
| 20 | + $this->attrValidator = new HTMLPurifier_AttrValidator(); |
|
| 21 | + $this->config = $config; |
|
| 22 | + $this->context = $context; |
|
| 23 | + return parent::prepare($config, $context); |
|
| 24 | + } |
|
| 25 | + |
|
| 26 | + public function handleElement(&$token) { |
|
| 27 | + if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) { |
|
| 28 | + return; |
|
| 29 | + } |
|
| 30 | + |
|
| 31 | + // We need to validate the attributes now since this doesn't normally |
|
| 32 | + // happen until after MakeWellFormed. If all the attributes are removed |
|
| 33 | + // the span needs to be removed too. |
|
| 34 | + $this->attrValidator->validateToken($token, $this->config, $this->context); |
|
| 35 | + $token->armor['ValidateAttributes'] = true; |
|
| 36 | + |
|
| 37 | + if (!empty($token->attr)) { |
|
| 38 | + return; |
|
| 39 | + } |
|
| 40 | + |
|
| 41 | + $nesting = 0; |
|
| 42 | + $spanContentTokens = array(); |
|
| 43 | + while ($this->forwardUntilEndToken($i, $current, $nesting)) {} |
|
| 44 | + |
|
| 45 | + if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') { |
|
| 46 | + // Mark closing span tag for deletion |
|
| 47 | + $current->markForDeletion = true; |
|
| 48 | + // Delete open span tag |
|
| 49 | + $token = false; |
|
| 50 | + } |
|
| 51 | + } |
|
| 52 | + |
|
| 53 | + public function handleEnd(&$token) { |
|
| 54 | + if ($token->markForDeletion) { |
|
| 55 | + $token = false; |
|
| 56 | + } |
|
| 57 | + } |
|
| 58 | 58 | } |
| 59 | 59 | |
| 60 | 60 | // vim: et sw=4 sts=4 |
@@ -6,85 +6,85 @@ |
||
| 6 | 6 | */ |
| 7 | 7 | class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector |
| 8 | 8 | { |
| 9 | - public $name = 'SafeObject'; |
|
| 10 | - public $needed = array('object', 'param'); |
|
| 9 | + public $name = 'SafeObject'; |
|
| 10 | + public $needed = array('object', 'param'); |
|
| 11 | 11 | |
| 12 | - protected $objectStack = array(); |
|
| 13 | - protected $paramStack = array(); |
|
| 12 | + protected $objectStack = array(); |
|
| 13 | + protected $paramStack = array(); |
|
| 14 | 14 | |
| 15 | - // Keep this synchronized with AttrTransform/SafeParam.php |
|
| 16 | - protected $addParam = array( |
|
| 17 | - 'allowScriptAccess' => 'never', |
|
| 18 | - 'allowNetworking' => 'internal', |
|
| 19 | - ); |
|
| 20 | - protected $allowedParam = array( |
|
| 21 | - 'wmode' => true, |
|
| 22 | - 'movie' => true, |
|
| 23 | - 'flashvars' => true, |
|
| 24 | - 'src' => true, |
|
| 25 | - 'allowFullScreen' => true, // if omitted, assume to be 'false' |
|
| 26 | - ); |
|
| 15 | + // Keep this synchronized with AttrTransform/SafeParam.php |
|
| 16 | + protected $addParam = array( |
|
| 17 | + 'allowScriptAccess' => 'never', |
|
| 18 | + 'allowNetworking' => 'internal', |
|
| 19 | + ); |
|
| 20 | + protected $allowedParam = array( |
|
| 21 | + 'wmode' => true, |
|
| 22 | + 'movie' => true, |
|
| 23 | + 'flashvars' => true, |
|
| 24 | + 'src' => true, |
|
| 25 | + 'allowFullScreen' => true, // if omitted, assume to be 'false' |
|
| 26 | + ); |
|
| 27 | 27 | |
| 28 | - public function prepare($config, $context) { |
|
| 29 | - parent::prepare($config, $context); |
|
| 30 | - } |
|
| 28 | + public function prepare($config, $context) { |
|
| 29 | + parent::prepare($config, $context); |
|
| 30 | + } |
|
| 31 | 31 | |
| 32 | - public function handleElement(&$token) { |
|
| 33 | - if ($token->name == 'object') { |
|
| 34 | - $this->objectStack[] = $token; |
|
| 35 | - $this->paramStack[] = array(); |
|
| 36 | - $new = array($token); |
|
| 37 | - foreach ($this->addParam as $name => $value) { |
|
| 38 | - $new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value)); |
|
| 39 | - } |
|
| 40 | - $token = $new; |
|
| 41 | - } elseif ($token->name == 'param') { |
|
| 42 | - $nest = count($this->currentNesting) - 1; |
|
| 43 | - if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') { |
|
| 44 | - $i = count($this->objectStack) - 1; |
|
| 45 | - if (!isset($token->attr['name'])) { |
|
| 46 | - $token = false; |
|
| 47 | - return; |
|
| 48 | - } |
|
| 49 | - $n = $token->attr['name']; |
|
| 50 | - // We need this fix because YouTube doesn't supply a data |
|
| 51 | - // attribute, which we need if a type is specified. This is |
|
| 52 | - // *very* Flash specific. |
|
| 53 | - if (!isset($this->objectStack[$i]->attr['data']) && |
|
| 54 | - ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) { |
|
| 55 | - $this->objectStack[$i]->attr['data'] = $token->attr['value']; |
|
| 56 | - } |
|
| 57 | - // Check if the parameter is the correct value but has not |
|
| 58 | - // already been added |
|
| 59 | - if ( |
|
| 60 | - !isset($this->paramStack[$i][$n]) && |
|
| 61 | - isset($this->addParam[$n]) && |
|
| 62 | - $token->attr['name'] === $this->addParam[$n] |
|
| 63 | - ) { |
|
| 64 | - // keep token, and add to param stack |
|
| 65 | - $this->paramStack[$i][$n] = true; |
|
| 66 | - } elseif (isset($this->allowedParam[$n])) { |
|
| 67 | - // keep token, don't do anything to it |
|
| 68 | - // (could possibly check for duplicates here) |
|
| 69 | - } else { |
|
| 70 | - $token = false; |
|
| 71 | - } |
|
| 72 | - } else { |
|
| 73 | - // not directly inside an object, DENY! |
|
| 74 | - $token = false; |
|
| 75 | - } |
|
| 76 | - } |
|
| 77 | - } |
|
| 32 | + public function handleElement(&$token) { |
|
| 33 | + if ($token->name == 'object') { |
|
| 34 | + $this->objectStack[] = $token; |
|
| 35 | + $this->paramStack[] = array(); |
|
| 36 | + $new = array($token); |
|
| 37 | + foreach ($this->addParam as $name => $value) { |
|
| 38 | + $new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value)); |
|
| 39 | + } |
|
| 40 | + $token = $new; |
|
| 41 | + } elseif ($token->name == 'param') { |
|
| 42 | + $nest = count($this->currentNesting) - 1; |
|
| 43 | + if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') { |
|
| 44 | + $i = count($this->objectStack) - 1; |
|
| 45 | + if (!isset($token->attr['name'])) { |
|
| 46 | + $token = false; |
|
| 47 | + return; |
|
| 48 | + } |
|
| 49 | + $n = $token->attr['name']; |
|
| 50 | + // We need this fix because YouTube doesn't supply a data |
|
| 51 | + // attribute, which we need if a type is specified. This is |
|
| 52 | + // *very* Flash specific. |
|
| 53 | + if (!isset($this->objectStack[$i]->attr['data']) && |
|
| 54 | + ($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) { |
|
| 55 | + $this->objectStack[$i]->attr['data'] = $token->attr['value']; |
|
| 56 | + } |
|
| 57 | + // Check if the parameter is the correct value but has not |
|
| 58 | + // already been added |
|
| 59 | + if ( |
|
| 60 | + !isset($this->paramStack[$i][$n]) && |
|
| 61 | + isset($this->addParam[$n]) && |
|
| 62 | + $token->attr['name'] === $this->addParam[$n] |
|
| 63 | + ) { |
|
| 64 | + // keep token, and add to param stack |
|
| 65 | + $this->paramStack[$i][$n] = true; |
|
| 66 | + } elseif (isset($this->allowedParam[$n])) { |
|
| 67 | + // keep token, don't do anything to it |
|
| 68 | + // (could possibly check for duplicates here) |
|
| 69 | + } else { |
|
| 70 | + $token = false; |
|
| 71 | + } |
|
| 72 | + } else { |
|
| 73 | + // not directly inside an object, DENY! |
|
| 74 | + $token = false; |
|
| 75 | + } |
|
| 76 | + } |
|
| 77 | + } |
|
| 78 | 78 | |
| 79 | - public function handleEnd(&$token) { |
|
| 80 | - // This is the WRONG way of handling the object and param stacks; |
|
| 81 | - // we should be inserting them directly on the relevant object tokens |
|
| 82 | - // so that the global stack handling handles it. |
|
| 83 | - if ($token->name == 'object') { |
|
| 84 | - array_pop($this->objectStack); |
|
| 85 | - array_pop($this->paramStack); |
|
| 86 | - } |
|
| 87 | - } |
|
| 79 | + public function handleEnd(&$token) { |
|
| 80 | + // This is the WRONG way of handling the object and param stacks; |
|
| 81 | + // we should be inserting them directly on the relevant object tokens |
|
| 82 | + // so that the global stack handling handles it. |
|
| 83 | + if ($token->name == 'object') { |
|
| 84 | + array_pop($this->objectStack); |
|
| 85 | + array_pop($this->paramStack); |
|
| 86 | + } |
|
| 87 | + } |
|
| 88 | 88 | |
| 89 | 89 | } |
| 90 | 90 | |
@@ -5,7 +5,7 @@ |
||
| 5 | 5 | $fallback = 'en'; |
| 6 | 6 | |
| 7 | 7 | $messages = array( |
| 8 | - 'HTMLPurifier' => 'HTML Purifier X' |
|
| 8 | + 'HTMLPurifier' => 'HTML Purifier X' |
|
| 9 | 9 | ); |
| 10 | 10 | |
| 11 | 11 | // vim: et sw=4 sts=4 |
@@ -6,7 +6,7 @@ |
||
| 6 | 6 | $fallback = 'en'; |
| 7 | 7 | |
| 8 | 8 | $messages = array( |
| 9 | - 'HTMLPurifier' => 'HTML Purifier XNone' |
|
| 9 | + 'HTMLPurifier' => 'HTML Purifier XNone' |
|
| 10 | 10 | ); |
| 11 | 11 | |
| 12 | 12 | // vim: et sw=4 sts=4 |
@@ -55,9 +55,9 @@ |
||
| 55 | 55 | ); |
| 56 | 56 | |
| 57 | 57 | $errorNames = array( |
| 58 | - E_ERROR => 'Error', |
|
| 59 | - E_WARNING => 'Warning', |
|
| 60 | - E_NOTICE => 'Notice' |
|
| 58 | + E_ERROR => 'Error', |
|
| 59 | + E_WARNING => 'Warning', |
|
| 60 | + E_NOTICE => 'Notice' |
|
| 61 | 61 | ); |
| 62 | 62 | |
| 63 | 63 | // vim: et sw=4 sts=4 |
@@ -10,188 +10,188 @@ |
||
| 10 | 10 | class HTMLPurifier_LanguageFactory |
| 11 | 11 | { |
| 12 | 12 | |
| 13 | - /** |
|
| 14 | - * Cache of language code information used to load HTMLPurifier_Language objects |
|
| 15 | - * Structure is: $factory->cache[$language_code][$key] = $value |
|
| 16 | - * @value array map |
|
| 17 | - */ |
|
| 18 | - public $cache; |
|
| 19 | - |
|
| 20 | - /** |
|
| 21 | - * Valid keys in the HTMLPurifier_Language object. Designates which |
|
| 22 | - * variables to slurp out of a message file. |
|
| 23 | - * @value array list |
|
| 24 | - */ |
|
| 25 | - public $keys = array('fallback', 'messages', 'errorNames'); |
|
| 26 | - |
|
| 27 | - /** |
|
| 28 | - * Instance of HTMLPurifier_AttrDef_Lang to validate language codes |
|
| 29 | - * @value object HTMLPurifier_AttrDef_Lang |
|
| 30 | - */ |
|
| 31 | - protected $validator; |
|
| 32 | - |
|
| 33 | - /** |
|
| 34 | - * Cached copy of dirname(__FILE__), directory of current file without |
|
| 35 | - * trailing slash |
|
| 36 | - * @value string filename |
|
| 37 | - */ |
|
| 38 | - protected $dir; |
|
| 39 | - |
|
| 40 | - /** |
|
| 41 | - * Keys whose contents are a hash map and can be merged |
|
| 42 | - * @value array lookup |
|
| 43 | - */ |
|
| 44 | - protected $mergeable_keys_map = array('messages' => true, 'errorNames' => true); |
|
| 45 | - |
|
| 46 | - /** |
|
| 47 | - * Keys whose contents are a list and can be merged |
|
| 48 | - * @value array lookup |
|
| 49 | - */ |
|
| 50 | - protected $mergeable_keys_list = array(); |
|
| 51 | - |
|
| 52 | - /** |
|
| 53 | - * Retrieve sole instance of the factory. |
|
| 54 | - * @param $prototype Optional prototype to overload sole instance with, |
|
| 55 | - * or bool true to reset to default factory. |
|
| 56 | - */ |
|
| 57 | - public static function instance($prototype = null) { |
|
| 58 | - static $instance = null; |
|
| 59 | - if ($prototype !== null) { |
|
| 60 | - $instance = $prototype; |
|
| 61 | - } elseif ($instance === null || $prototype == true) { |
|
| 62 | - $instance = new HTMLPurifier_LanguageFactory(); |
|
| 63 | - $instance->setup(); |
|
| 64 | - } |
|
| 65 | - return $instance; |
|
| 66 | - } |
|
| 67 | - |
|
| 68 | - /** |
|
| 69 | - * Sets up the singleton, much like a constructor |
|
| 70 | - * @note Prevents people from getting this outside of the singleton |
|
| 71 | - */ |
|
| 72 | - public function setup() { |
|
| 73 | - $this->validator = new HTMLPurifier_AttrDef_Lang(); |
|
| 74 | - $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier'; |
|
| 75 | - } |
|
| 76 | - |
|
| 77 | - /** |
|
| 78 | - * Creates a language object, handles class fallbacks |
|
| 79 | - * @param $config Instance of HTMLPurifier_Config |
|
| 80 | - * @param $context Instance of HTMLPurifier_Context |
|
| 81 | - * @param $code Code to override configuration with. Private parameter. |
|
| 82 | - */ |
|
| 83 | - public function create($config, $context, $code = false) { |
|
| 84 | - |
|
| 85 | - // validate language code |
|
| 86 | - if ($code === false) { |
|
| 87 | - $code = $this->validator->validate( |
|
| 88 | - $config->get('Core.Language'), $config, $context |
|
| 89 | - ); |
|
| 90 | - } else { |
|
| 91 | - $code = $this->validator->validate($code, $config, $context); |
|
| 92 | - } |
|
| 93 | - if ($code === false) $code = 'en'; // malformed code becomes English |
|
| 94 | - |
|
| 95 | - $pcode = str_replace('-', '_', $code); // make valid PHP classname |
|
| 96 | - static $depth = 0; // recursion protection |
|
| 97 | - |
|
| 98 | - if ($code == 'en') { |
|
| 99 | - $lang = new HTMLPurifier_Language($config, $context); |
|
| 100 | - } else { |
|
| 101 | - $class = 'HTMLPurifier_Language_' . $pcode; |
|
| 102 | - $file = $this->dir . '/Language/classes/' . $code . '.php'; |
|
| 103 | - if (file_exists($file) || class_exists($class, false)) { |
|
| 104 | - $lang = new $class($config, $context); |
|
| 105 | - } else { |
|
| 106 | - // Go fallback |
|
| 107 | - $raw_fallback = $this->getFallbackFor($code); |
|
| 108 | - $fallback = $raw_fallback ? $raw_fallback : 'en'; |
|
| 109 | - $depth++; |
|
| 110 | - $lang = $this->create($config, $context, $fallback); |
|
| 111 | - if (!$raw_fallback) { |
|
| 112 | - $lang->error = true; |
|
| 113 | - } |
|
| 114 | - $depth--; |
|
| 115 | - } |
|
| 116 | - } |
|
| 117 | - |
|
| 118 | - $lang->code = $code; |
|
| 119 | - |
|
| 120 | - return $lang; |
|
| 121 | - |
|
| 122 | - } |
|
| 123 | - |
|
| 124 | - /** |
|
| 125 | - * Returns the fallback language for language |
|
| 126 | - * @note Loads the original language into cache |
|
| 127 | - * @param $code string language code |
|
| 128 | - */ |
|
| 129 | - public function getFallbackFor($code) { |
|
| 130 | - $this->loadLanguage($code); |
|
| 131 | - return $this->cache[$code]['fallback']; |
|
| 132 | - } |
|
| 133 | - |
|
| 134 | - /** |
|
| 135 | - * Loads language into the cache, handles message file and fallbacks |
|
| 136 | - * @param $code string language code |
|
| 137 | - */ |
|
| 138 | - public function loadLanguage($code) { |
|
| 139 | - static $languages_seen = array(); // recursion guard |
|
| 140 | - |
|
| 141 | - // abort if we've already loaded it |
|
| 142 | - if (isset($this->cache[$code])) return; |
|
| 143 | - |
|
| 144 | - // generate filename |
|
| 145 | - $filename = $this->dir . '/Language/messages/' . $code . '.php'; |
|
| 146 | - |
|
| 147 | - // default fallback : may be overwritten by the ensuing include |
|
| 148 | - $fallback = ($code != 'en') ? 'en' : false; |
|
| 149 | - |
|
| 150 | - // load primary localisation |
|
| 151 | - if (!file_exists($filename)) { |
|
| 152 | - // skip the include: will rely solely on fallback |
|
| 153 | - $filename = $this->dir . '/Language/messages/en.php'; |
|
| 154 | - $cache = array(); |
|
| 155 | - } else { |
|
| 156 | - include $filename; |
|
| 157 | - $cache = compact($this->keys); |
|
| 158 | - } |
|
| 159 | - |
|
| 160 | - // load fallback localisation |
|
| 161 | - if (!empty($fallback)) { |
|
| 162 | - |
|
| 163 | - // infinite recursion guard |
|
| 164 | - if (isset($languages_seen[$code])) { |
|
| 165 | - trigger_error('Circular fallback reference in language ' . |
|
| 166 | - $code, E_USER_ERROR); |
|
| 167 | - $fallback = 'en'; |
|
| 168 | - } |
|
| 169 | - $language_seen[$code] = true; |
|
| 170 | - |
|
| 171 | - // load the fallback recursively |
|
| 172 | - $this->loadLanguage($fallback); |
|
| 173 | - $fallback_cache = $this->cache[$fallback]; |
|
| 174 | - |
|
| 175 | - // merge fallback with current language |
|
| 176 | - foreach ( $this->keys as $key ) { |
|
| 177 | - if (isset($cache[$key]) && isset($fallback_cache[$key])) { |
|
| 178 | - if (isset($this->mergeable_keys_map[$key])) { |
|
| 179 | - $cache[$key] = $cache[$key] + $fallback_cache[$key]; |
|
| 180 | - } elseif (isset($this->mergeable_keys_list[$key])) { |
|
| 181 | - $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] ); |
|
| 182 | - } |
|
| 183 | - } else { |
|
| 184 | - $cache[$key] = $fallback_cache[$key]; |
|
| 185 | - } |
|
| 186 | - } |
|
| 187 | - |
|
| 188 | - } |
|
| 189 | - |
|
| 190 | - // save to cache for later retrieval |
|
| 191 | - $this->cache[$code] = $cache; |
|
| 192 | - |
|
| 193 | - return; |
|
| 194 | - } |
|
| 13 | + /** |
|
| 14 | + * Cache of language code information used to load HTMLPurifier_Language objects |
|
| 15 | + * Structure is: $factory->cache[$language_code][$key] = $value |
|
| 16 | + * @value array map |
|
| 17 | + */ |
|
| 18 | + public $cache; |
|
| 19 | + |
|
| 20 | + /** |
|
| 21 | + * Valid keys in the HTMLPurifier_Language object. Designates which |
|
| 22 | + * variables to slurp out of a message file. |
|
| 23 | + * @value array list |
|
| 24 | + */ |
|
| 25 | + public $keys = array('fallback', 'messages', 'errorNames'); |
|
| 26 | + |
|
| 27 | + /** |
|
| 28 | + * Instance of HTMLPurifier_AttrDef_Lang to validate language codes |
|
| 29 | + * @value object HTMLPurifier_AttrDef_Lang |
|
| 30 | + */ |
|
| 31 | + protected $validator; |
|
| 32 | + |
|
| 33 | + /** |
|
| 34 | + * Cached copy of dirname(__FILE__), directory of current file without |
|
| 35 | + * trailing slash |
|
| 36 | + * @value string filename |
|
| 37 | + */ |
|
| 38 | + protected $dir; |
|
| 39 | + |
|
| 40 | + /** |
|
| 41 | + * Keys whose contents are a hash map and can be merged |
|
| 42 | + * @value array lookup |
|
| 43 | + */ |
|
| 44 | + protected $mergeable_keys_map = array('messages' => true, 'errorNames' => true); |
|
| 45 | + |
|
| 46 | + /** |
|
| 47 | + * Keys whose contents are a list and can be merged |
|
| 48 | + * @value array lookup |
|
| 49 | + */ |
|
| 50 | + protected $mergeable_keys_list = array(); |
|
| 51 | + |
|
| 52 | + /** |
|
| 53 | + * Retrieve sole instance of the factory. |
|
| 54 | + * @param $prototype Optional prototype to overload sole instance with, |
|
| 55 | + * or bool true to reset to default factory. |
|
| 56 | + */ |
|
| 57 | + public static function instance($prototype = null) { |
|
| 58 | + static $instance = null; |
|
| 59 | + if ($prototype !== null) { |
|
| 60 | + $instance = $prototype; |
|
| 61 | + } elseif ($instance === null || $prototype == true) { |
|
| 62 | + $instance = new HTMLPurifier_LanguageFactory(); |
|
| 63 | + $instance->setup(); |
|
| 64 | + } |
|
| 65 | + return $instance; |
|
| 66 | + } |
|
| 67 | + |
|
| 68 | + /** |
|
| 69 | + * Sets up the singleton, much like a constructor |
|
| 70 | + * @note Prevents people from getting this outside of the singleton |
|
| 71 | + */ |
|
| 72 | + public function setup() { |
|
| 73 | + $this->validator = new HTMLPurifier_AttrDef_Lang(); |
|
| 74 | + $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier'; |
|
| 75 | + } |
|
| 76 | + |
|
| 77 | + /** |
|
| 78 | + * Creates a language object, handles class fallbacks |
|
| 79 | + * @param $config Instance of HTMLPurifier_Config |
|
| 80 | + * @param $context Instance of HTMLPurifier_Context |
|
| 81 | + * @param $code Code to override configuration with. Private parameter. |
|
| 82 | + */ |
|
| 83 | + public function create($config, $context, $code = false) { |
|
| 84 | + |
|
| 85 | + // validate language code |
|
| 86 | + if ($code === false) { |
|
| 87 | + $code = $this->validator->validate( |
|
| 88 | + $config->get('Core.Language'), $config, $context |
|
| 89 | + ); |
|
| 90 | + } else { |
|
| 91 | + $code = $this->validator->validate($code, $config, $context); |
|
| 92 | + } |
|
| 93 | + if ($code === false) $code = 'en'; // malformed code becomes English |
|
| 94 | + |
|
| 95 | + $pcode = str_replace('-', '_', $code); // make valid PHP classname |
|
| 96 | + static $depth = 0; // recursion protection |
|
| 97 | + |
|
| 98 | + if ($code == 'en') { |
|
| 99 | + $lang = new HTMLPurifier_Language($config, $context); |
|
| 100 | + } else { |
|
| 101 | + $class = 'HTMLPurifier_Language_' . $pcode; |
|
| 102 | + $file = $this->dir . '/Language/classes/' . $code . '.php'; |
|
| 103 | + if (file_exists($file) || class_exists($class, false)) { |
|
| 104 | + $lang = new $class($config, $context); |
|
| 105 | + } else { |
|
| 106 | + // Go fallback |
|
| 107 | + $raw_fallback = $this->getFallbackFor($code); |
|
| 108 | + $fallback = $raw_fallback ? $raw_fallback : 'en'; |
|
| 109 | + $depth++; |
|
| 110 | + $lang = $this->create($config, $context, $fallback); |
|
| 111 | + if (!$raw_fallback) { |
|
| 112 | + $lang->error = true; |
|
| 113 | + } |
|
| 114 | + $depth--; |
|
| 115 | + } |
|
| 116 | + } |
|
| 117 | + |
|
| 118 | + $lang->code = $code; |
|
| 119 | + |
|
| 120 | + return $lang; |
|
| 121 | + |
|
| 122 | + } |
|
| 123 | + |
|
| 124 | + /** |
|
| 125 | + * Returns the fallback language for language |
|
| 126 | + * @note Loads the original language into cache |
|
| 127 | + * @param $code string language code |
|
| 128 | + */ |
|
| 129 | + public function getFallbackFor($code) { |
|
| 130 | + $this->loadLanguage($code); |
|
| 131 | + return $this->cache[$code]['fallback']; |
|
| 132 | + } |
|
| 133 | + |
|
| 134 | + /** |
|
| 135 | + * Loads language into the cache, handles message file and fallbacks |
|
| 136 | + * @param $code string language code |
|
| 137 | + */ |
|
| 138 | + public function loadLanguage($code) { |
|
| 139 | + static $languages_seen = array(); // recursion guard |
|
| 140 | + |
|
| 141 | + // abort if we've already loaded it |
|
| 142 | + if (isset($this->cache[$code])) return; |
|
| 143 | + |
|
| 144 | + // generate filename |
|
| 145 | + $filename = $this->dir . '/Language/messages/' . $code . '.php'; |
|
| 146 | + |
|
| 147 | + // default fallback : may be overwritten by the ensuing include |
|
| 148 | + $fallback = ($code != 'en') ? 'en' : false; |
|
| 149 | + |
|
| 150 | + // load primary localisation |
|
| 151 | + if (!file_exists($filename)) { |
|
| 152 | + // skip the include: will rely solely on fallback |
|
| 153 | + $filename = $this->dir . '/Language/messages/en.php'; |
|
| 154 | + $cache = array(); |
|
| 155 | + } else { |
|
| 156 | + include $filename; |
|
| 157 | + $cache = compact($this->keys); |
|
| 158 | + } |
|
| 159 | + |
|
| 160 | + // load fallback localisation |
|
| 161 | + if (!empty($fallback)) { |
|
| 162 | + |
|
| 163 | + // infinite recursion guard |
|
| 164 | + if (isset($languages_seen[$code])) { |
|
| 165 | + trigger_error('Circular fallback reference in language ' . |
|
| 166 | + $code, E_USER_ERROR); |
|
| 167 | + $fallback = 'en'; |
|
| 168 | + } |
|
| 169 | + $language_seen[$code] = true; |
|
| 170 | + |
|
| 171 | + // load the fallback recursively |
|
| 172 | + $this->loadLanguage($fallback); |
|
| 173 | + $fallback_cache = $this->cache[$fallback]; |
|
| 174 | + |
|
| 175 | + // merge fallback with current language |
|
| 176 | + foreach ( $this->keys as $key ) { |
|
| 177 | + if (isset($cache[$key]) && isset($fallback_cache[$key])) { |
|
| 178 | + if (isset($this->mergeable_keys_map[$key])) { |
|
| 179 | + $cache[$key] = $cache[$key] + $fallback_cache[$key]; |
|
| 180 | + } elseif (isset($this->mergeable_keys_list[$key])) { |
|
| 181 | + $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] ); |
|
| 182 | + } |
|
| 183 | + } else { |
|
| 184 | + $cache[$key] = $fallback_cache[$key]; |
|
| 185 | + } |
|
| 186 | + } |
|
| 187 | + |
|
| 188 | + } |
|
| 189 | + |
|
| 190 | + // save to cache for later retrieval |
|
| 191 | + $this->cache[$code] = $cache; |
|
| 192 | + |
|
| 193 | + return; |
|
| 194 | + } |
|
| 195 | 195 | |
| 196 | 196 | } |
| 197 | 197 | |
@@ -90,7 +90,10 @@ discard block |
||
| 90 | 90 | } else { |
| 91 | 91 | $code = $this->validator->validate($code, $config, $context); |
| 92 | 92 | } |
| 93 | - if ($code === false) $code = 'en'; // malformed code becomes English |
|
| 93 | + if ($code === false) { |
|
| 94 | + $code = 'en'; |
|
| 95 | + } |
|
| 96 | + // malformed code becomes English |
|
| 94 | 97 | |
| 95 | 98 | $pcode = str_replace('-', '_', $code); // make valid PHP classname |
| 96 | 99 | static $depth = 0; // recursion protection |
@@ -139,7 +142,9 @@ discard block |
||
| 139 | 142 | static $languages_seen = array(); // recursion guard |
| 140 | 143 | |
| 141 | 144 | // abort if we've already loaded it |
| 142 | - if (isset($this->cache[$code])) return; |
|
| 145 | + if (isset($this->cache[$code])) { |
|
| 146 | + return; |
|
| 147 | + } |
|
| 143 | 148 | |
| 144 | 149 | // generate filename |
| 145 | 150 | $filename = $this->dir . '/Language/messages/' . $code . '.php'; |
@@ -71,7 +71,7 @@ discard block |
||
| 71 | 71 | */ |
| 72 | 72 | public function setup() { |
| 73 | 73 | $this->validator = new HTMLPurifier_AttrDef_Lang(); |
| 74 | - $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier'; |
|
| 74 | + $this->dir = HTMLPURIFIER_PREFIX.'/HTMLPurifier'; |
|
| 75 | 75 | } |
| 76 | 76 | |
| 77 | 77 | /** |
@@ -98,8 +98,8 @@ discard block |
||
| 98 | 98 | if ($code == 'en') { |
| 99 | 99 | $lang = new HTMLPurifier_Language($config, $context); |
| 100 | 100 | } else { |
| 101 | - $class = 'HTMLPurifier_Language_' . $pcode; |
|
| 102 | - $file = $this->dir . '/Language/classes/' . $code . '.php'; |
|
| 101 | + $class = 'HTMLPurifier_Language_'.$pcode; |
|
| 102 | + $file = $this->dir.'/Language/classes/'.$code.'.php'; |
|
| 103 | 103 | if (file_exists($file) || class_exists($class, false)) { |
| 104 | 104 | $lang = new $class($config, $context); |
| 105 | 105 | } else { |
@@ -142,7 +142,7 @@ discard block |
||
| 142 | 142 | if (isset($this->cache[$code])) return; |
| 143 | 143 | |
| 144 | 144 | // generate filename |
| 145 | - $filename = $this->dir . '/Language/messages/' . $code . '.php'; |
|
| 145 | + $filename = $this->dir.'/Language/messages/'.$code.'.php'; |
|
| 146 | 146 | |
| 147 | 147 | // default fallback : may be overwritten by the ensuing include |
| 148 | 148 | $fallback = ($code != 'en') ? 'en' : false; |
@@ -150,7 +150,7 @@ discard block |
||
| 150 | 150 | // load primary localisation |
| 151 | 151 | if (!file_exists($filename)) { |
| 152 | 152 | // skip the include: will rely solely on fallback |
| 153 | - $filename = $this->dir . '/Language/messages/en.php'; |
|
| 153 | + $filename = $this->dir.'/Language/messages/en.php'; |
|
| 154 | 154 | $cache = array(); |
| 155 | 155 | } else { |
| 156 | 156 | include $filename; |
@@ -162,7 +162,7 @@ discard block |
||
| 162 | 162 | |
| 163 | 163 | // infinite recursion guard |
| 164 | 164 | if (isset($languages_seen[$code])) { |
| 165 | - trigger_error('Circular fallback reference in language ' . |
|
| 165 | + trigger_error('Circular fallback reference in language '. |
|
| 166 | 166 | $code, E_USER_ERROR); |
| 167 | 167 | $fallback = 'en'; |
| 168 | 168 | } |
@@ -173,12 +173,12 @@ discard block |
||
| 173 | 173 | $fallback_cache = $this->cache[$fallback]; |
| 174 | 174 | |
| 175 | 175 | // merge fallback with current language |
| 176 | - foreach ( $this->keys as $key ) { |
|
| 176 | + foreach ($this->keys as $key) { |
|
| 177 | 177 | if (isset($cache[$key]) && isset($fallback_cache[$key])) { |
| 178 | 178 | if (isset($this->mergeable_keys_map[$key])) { |
| 179 | 179 | $cache[$key] = $cache[$key] + $fallback_cache[$key]; |
| 180 | 180 | } elseif (isset($this->mergeable_keys_list[$key])) { |
| 181 | - $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] ); |
|
| 181 | + $cache[$key] = array_merge($fallback_cache[$key], $cache[$key]); |
|
| 182 | 182 | } |
| 183 | 183 | } else { |
| 184 | 184 | $cache[$key] = $fallback_cache[$key]; |
@@ -7,108 +7,108 @@ |
||
| 7 | 7 | class HTMLPurifier_Length |
| 8 | 8 | { |
| 9 | 9 | |
| 10 | - /** |
|
| 11 | - * String numeric magnitude. |
|
| 12 | - */ |
|
| 13 | - protected $n; |
|
| 10 | + /** |
|
| 11 | + * String numeric magnitude. |
|
| 12 | + */ |
|
| 13 | + protected $n; |
|
| 14 | 14 | |
| 15 | - /** |
|
| 16 | - * String unit. False is permitted if $n = 0. |
|
| 17 | - */ |
|
| 18 | - protected $unit; |
|
| 15 | + /** |
|
| 16 | + * String unit. False is permitted if $n = 0. |
|
| 17 | + */ |
|
| 18 | + protected $unit; |
|
| 19 | 19 | |
| 20 | - /** |
|
| 21 | - * Whether or not this length is valid. Null if not calculated yet. |
|
| 22 | - */ |
|
| 23 | - protected $isValid; |
|
| 20 | + /** |
|
| 21 | + * Whether or not this length is valid. Null if not calculated yet. |
|
| 22 | + */ |
|
| 23 | + protected $isValid; |
|
| 24 | 24 | |
| 25 | - /** |
|
| 26 | - * Lookup array of units recognized by CSS 2.1 |
|
| 27 | - */ |
|
| 28 | - protected static $allowedUnits = array( |
|
| 29 | - 'em' => true, 'ex' => true, 'px' => true, 'in' => true, |
|
| 30 | - 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true |
|
| 31 | - ); |
|
| 25 | + /** |
|
| 26 | + * Lookup array of units recognized by CSS 2.1 |
|
| 27 | + */ |
|
| 28 | + protected static $allowedUnits = array( |
|
| 29 | + 'em' => true, 'ex' => true, 'px' => true, 'in' => true, |
|
| 30 | + 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true |
|
| 31 | + ); |
|
| 32 | 32 | |
| 33 | - /** |
|
| 34 | - * @param number $n Magnitude |
|
| 35 | - * @param string $u Unit |
|
| 36 | - */ |
|
| 37 | - public function __construct($n = '0', $u = false) { |
|
| 38 | - $this->n = (string) $n; |
|
| 39 | - $this->unit = $u !== false ? (string) $u : false; |
|
| 40 | - } |
|
| 33 | + /** |
|
| 34 | + * @param number $n Magnitude |
|
| 35 | + * @param string $u Unit |
|
| 36 | + */ |
|
| 37 | + public function __construct($n = '0', $u = false) { |
|
| 38 | + $this->n = (string) $n; |
|
| 39 | + $this->unit = $u !== false ? (string) $u : false; |
|
| 40 | + } |
|
| 41 | 41 | |
| 42 | - /** |
|
| 43 | - * @param string $s Unit string, like '2em' or '3.4in' |
|
| 44 | - * @warning Does not perform validation. |
|
| 45 | - */ |
|
| 46 | - static public function make($s) { |
|
| 47 | - if ($s instanceof HTMLPurifier_Length) return $s; |
|
| 48 | - $n_length = strspn($s, '1234567890.+-'); |
|
| 49 | - $n = substr($s, 0, $n_length); |
|
| 50 | - $unit = substr($s, $n_length); |
|
| 51 | - if ($unit === '') $unit = false; |
|
| 52 | - return new HTMLPurifier_Length($n, $unit); |
|
| 53 | - } |
|
| 42 | + /** |
|
| 43 | + * @param string $s Unit string, like '2em' or '3.4in' |
|
| 44 | + * @warning Does not perform validation. |
|
| 45 | + */ |
|
| 46 | + static public function make($s) { |
|
| 47 | + if ($s instanceof HTMLPurifier_Length) return $s; |
|
| 48 | + $n_length = strspn($s, '1234567890.+-'); |
|
| 49 | + $n = substr($s, 0, $n_length); |
|
| 50 | + $unit = substr($s, $n_length); |
|
| 51 | + if ($unit === '') $unit = false; |
|
| 52 | + return new HTMLPurifier_Length($n, $unit); |
|
| 53 | + } |
|
| 54 | 54 | |
| 55 | - /** |
|
| 56 | - * Validates the number and unit. |
|
| 57 | - */ |
|
| 58 | - protected function validate() { |
|
| 59 | - // Special case: |
|
| 60 | - if ($this->n === '+0' || $this->n === '-0') $this->n = '0'; |
|
| 61 | - if ($this->n === '0' && $this->unit === false) return true; |
|
| 62 | - if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit); |
|
| 63 | - if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) return false; |
|
| 64 | - // Hack: |
|
| 65 | - $def = new HTMLPurifier_AttrDef_CSS_Number(); |
|
| 66 | - $result = $def->validate($this->n, false, false); |
|
| 67 | - if ($result === false) return false; |
|
| 68 | - $this->n = $result; |
|
| 69 | - return true; |
|
| 70 | - } |
|
| 55 | + /** |
|
| 56 | + * Validates the number and unit. |
|
| 57 | + */ |
|
| 58 | + protected function validate() { |
|
| 59 | + // Special case: |
|
| 60 | + if ($this->n === '+0' || $this->n === '-0') $this->n = '0'; |
|
| 61 | + if ($this->n === '0' && $this->unit === false) return true; |
|
| 62 | + if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit); |
|
| 63 | + if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) return false; |
|
| 64 | + // Hack: |
|
| 65 | + $def = new HTMLPurifier_AttrDef_CSS_Number(); |
|
| 66 | + $result = $def->validate($this->n, false, false); |
|
| 67 | + if ($result === false) return false; |
|
| 68 | + $this->n = $result; |
|
| 69 | + return true; |
|
| 70 | + } |
|
| 71 | 71 | |
| 72 | - /** |
|
| 73 | - * Returns string representation of number. |
|
| 74 | - */ |
|
| 75 | - public function toString() { |
|
| 76 | - if (!$this->isValid()) return false; |
|
| 77 | - return $this->n . $this->unit; |
|
| 78 | - } |
|
| 72 | + /** |
|
| 73 | + * Returns string representation of number. |
|
| 74 | + */ |
|
| 75 | + public function toString() { |
|
| 76 | + if (!$this->isValid()) return false; |
|
| 77 | + return $this->n . $this->unit; |
|
| 78 | + } |
|
| 79 | 79 | |
| 80 | - /** |
|
| 81 | - * Retrieves string numeric magnitude. |
|
| 82 | - */ |
|
| 83 | - public function getN() {return $this->n;} |
|
| 80 | + /** |
|
| 81 | + * Retrieves string numeric magnitude. |
|
| 82 | + */ |
|
| 83 | + public function getN() {return $this->n;} |
|
| 84 | 84 | |
| 85 | - /** |
|
| 86 | - * Retrieves string unit. |
|
| 87 | - */ |
|
| 88 | - public function getUnit() {return $this->unit;} |
|
| 85 | + /** |
|
| 86 | + * Retrieves string unit. |
|
| 87 | + */ |
|
| 88 | + public function getUnit() {return $this->unit;} |
|
| 89 | 89 | |
| 90 | - /** |
|
| 91 | - * Returns true if this length unit is valid. |
|
| 92 | - */ |
|
| 93 | - public function isValid() { |
|
| 94 | - if ($this->isValid === null) $this->isValid = $this->validate(); |
|
| 95 | - return $this->isValid; |
|
| 96 | - } |
|
| 90 | + /** |
|
| 91 | + * Returns true if this length unit is valid. |
|
| 92 | + */ |
|
| 93 | + public function isValid() { |
|
| 94 | + if ($this->isValid === null) $this->isValid = $this->validate(); |
|
| 95 | + return $this->isValid; |
|
| 96 | + } |
|
| 97 | 97 | |
| 98 | - /** |
|
| 99 | - * Compares two lengths, and returns 1 if greater, -1 if less and 0 if equal. |
|
| 100 | - * @warning If both values are too large or small, this calculation will |
|
| 101 | - * not work properly |
|
| 102 | - */ |
|
| 103 | - public function compareTo($l) { |
|
| 104 | - if ($l === false) return false; |
|
| 105 | - if ($l->unit !== $this->unit) { |
|
| 106 | - $converter = new HTMLPurifier_UnitConverter(); |
|
| 107 | - $l = $converter->convert($l, $this->unit); |
|
| 108 | - if ($l === false) return false; |
|
| 109 | - } |
|
| 110 | - return $this->n - $l->n; |
|
| 111 | - } |
|
| 98 | + /** |
|
| 99 | + * Compares two lengths, and returns 1 if greater, -1 if less and 0 if equal. |
|
| 100 | + * @warning If both values are too large or small, this calculation will |
|
| 101 | + * not work properly |
|
| 102 | + */ |
|
| 103 | + public function compareTo($l) { |
|
| 104 | + if ($l === false) return false; |
|
| 105 | + if ($l->unit !== $this->unit) { |
|
| 106 | + $converter = new HTMLPurifier_UnitConverter(); |
|
| 107 | + $l = $converter->convert($l, $this->unit); |
|
| 108 | + if ($l === false) return false; |
|
| 109 | + } |
|
| 110 | + return $this->n - $l->n; |
|
| 111 | + } |
|
| 112 | 112 | |
| 113 | 113 | } |
| 114 | 114 | |
@@ -44,11 +44,15 @@ discard block |
||
| 44 | 44 | * @warning Does not perform validation. |
| 45 | 45 | */ |
| 46 | 46 | static public function make($s) { |
| 47 | - if ($s instanceof HTMLPurifier_Length) return $s; |
|
| 47 | + if ($s instanceof HTMLPurifier_Length) { |
|
| 48 | + return $s; |
|
| 49 | + } |
|
| 48 | 50 | $n_length = strspn($s, '1234567890.+-'); |
| 49 | 51 | $n = substr($s, 0, $n_length); |
| 50 | 52 | $unit = substr($s, $n_length); |
| 51 | - if ($unit === '') $unit = false; |
|
| 53 | + if ($unit === '') { |
|
| 54 | + $unit = false; |
|
| 55 | + } |
|
| 52 | 56 | return new HTMLPurifier_Length($n, $unit); |
| 53 | 57 | } |
| 54 | 58 | |
@@ -57,14 +61,24 @@ discard block |
||
| 57 | 61 | */ |
| 58 | 62 | protected function validate() { |
| 59 | 63 | // Special case: |
| 60 | - if ($this->n === '+0' || $this->n === '-0') $this->n = '0'; |
|
| 61 | - if ($this->n === '0' && $this->unit === false) return true; |
|
| 62 | - if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit); |
|
| 63 | - if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) return false; |
|
| 64 | + if ($this->n === '+0' || $this->n === '-0') { |
|
| 65 | + $this->n = '0'; |
|
| 66 | + } |
|
| 67 | + if ($this->n === '0' && $this->unit === false) { |
|
| 68 | + return true; |
|
| 69 | + } |
|
| 70 | + if (!ctype_lower($this->unit)) { |
|
| 71 | + $this->unit = strtolower($this->unit); |
|
| 72 | + } |
|
| 73 | + if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) { |
|
| 74 | + return false; |
|
| 75 | + } |
|
| 64 | 76 | // Hack: |
| 65 | 77 | $def = new HTMLPurifier_AttrDef_CSS_Number(); |
| 66 | 78 | $result = $def->validate($this->n, false, false); |
| 67 | - if ($result === false) return false; |
|
| 79 | + if ($result === false) { |
|
| 80 | + return false; |
|
| 81 | + } |
|
| 68 | 82 | $this->n = $result; |
| 69 | 83 | return true; |
| 70 | 84 | } |
@@ -73,7 +87,9 @@ discard block |
||
| 73 | 87 | * Returns string representation of number. |
| 74 | 88 | */ |
| 75 | 89 | public function toString() { |
| 76 | - if (!$this->isValid()) return false; |
|
| 90 | + if (!$this->isValid()) { |
|
| 91 | + return false; |
|
| 92 | + } |
|
| 77 | 93 | return $this->n . $this->unit; |
| 78 | 94 | } |
| 79 | 95 | |
@@ -91,7 +107,9 @@ discard block |
||
| 91 | 107 | * Returns true if this length unit is valid. |
| 92 | 108 | */ |
| 93 | 109 | public function isValid() { |
| 94 | - if ($this->isValid === null) $this->isValid = $this->validate(); |
|
| 110 | + if ($this->isValid === null) { |
|
| 111 | + $this->isValid = $this->validate(); |
|
| 112 | + } |
|
| 95 | 113 | return $this->isValid; |
| 96 | 114 | } |
| 97 | 115 | |
@@ -101,11 +119,15 @@ discard block |
||
| 101 | 119 | * not work properly |
| 102 | 120 | */ |
| 103 | 121 | public function compareTo($l) { |
| 104 | - if ($l === false) return false; |
|
| 122 | + if ($l === false) { |
|
| 123 | + return false; |
|
| 124 | + } |
|
| 105 | 125 | if ($l->unit !== $this->unit) { |
| 106 | 126 | $converter = new HTMLPurifier_UnitConverter(); |
| 107 | 127 | $l = $converter->convert($l, $this->unit); |
| 108 | - if ($l === false) return false; |
|
| 128 | + if ($l === false) { |
|
| 129 | + return false; |
|
| 130 | + } |
|
| 109 | 131 | } |
| 110 | 132 | return $this->n - $l->n; |
| 111 | 133 | } |
@@ -74,18 +74,18 @@ |
||
| 74 | 74 | */ |
| 75 | 75 | public function toString() { |
| 76 | 76 | if (!$this->isValid()) return false; |
| 77 | - return $this->n . $this->unit; |
|
| 77 | + return $this->n.$this->unit; |
|
| 78 | 78 | } |
| 79 | 79 | |
| 80 | 80 | /** |
| 81 | 81 | * Retrieves string numeric magnitude. |
| 82 | 82 | */ |
| 83 | - public function getN() {return $this->n;} |
|
| 83 | + public function getN() {return $this->n; } |
|
| 84 | 84 | |
| 85 | 85 | /** |
| 86 | 86 | * Retrieves string unit. |
| 87 | 87 | */ |
| 88 | - public function getUnit() {return $this->unit;} |
|
| 88 | + public function getUnit() {return $this->unit; } |
|
| 89 | 89 | |
| 90 | 90 | /** |
| 91 | 91 | * Returns true if this length unit is valid. |
@@ -42,284 +42,284 @@ |
||
| 42 | 42 | class HTMLPurifier_Lexer |
| 43 | 43 | { |
| 44 | 44 | |
| 45 | - /** |
|
| 46 | - * Whether or not this lexer implements line-number/column-number tracking. |
|
| 47 | - * If it does, set to true. |
|
| 48 | - */ |
|
| 49 | - public $tracksLineNumbers = false; |
|
| 50 | - |
|
| 51 | - // -- STATIC ---------------------------------------------------------- |
|
| 52 | - |
|
| 53 | - /** |
|
| 54 | - * Retrieves or sets the default Lexer as a Prototype Factory. |
|
| 55 | - * |
|
| 56 | - * By default HTMLPurifier_Lexer_DOMLex will be returned. There are |
|
| 57 | - * a few exceptions involving special features that only DirectLex |
|
| 58 | - * implements. |
|
| 59 | - * |
|
| 60 | - * @note The behavior of this class has changed, rather than accepting |
|
| 61 | - * a prototype object, it now accepts a configuration object. |
|
| 62 | - * To specify your own prototype, set %Core.LexerImpl to it. |
|
| 63 | - * This change in behavior de-singletonizes the lexer object. |
|
| 64 | - * |
|
| 65 | - * @param $config Instance of HTMLPurifier_Config |
|
| 66 | - * @return Concrete lexer. |
|
| 67 | - */ |
|
| 68 | - public static function create($config) { |
|
| 69 | - |
|
| 70 | - if (!($config instanceof HTMLPurifier_Config)) { |
|
| 71 | - $lexer = $config; |
|
| 72 | - trigger_error("Passing a prototype to |
|
| 45 | + /** |
|
| 46 | + * Whether or not this lexer implements line-number/column-number tracking. |
|
| 47 | + * If it does, set to true. |
|
| 48 | + */ |
|
| 49 | + public $tracksLineNumbers = false; |
|
| 50 | + |
|
| 51 | + // -- STATIC ---------------------------------------------------------- |
|
| 52 | + |
|
| 53 | + /** |
|
| 54 | + * Retrieves or sets the default Lexer as a Prototype Factory. |
|
| 55 | + * |
|
| 56 | + * By default HTMLPurifier_Lexer_DOMLex will be returned. There are |
|
| 57 | + * a few exceptions involving special features that only DirectLex |
|
| 58 | + * implements. |
|
| 59 | + * |
|
| 60 | + * @note The behavior of this class has changed, rather than accepting |
|
| 61 | + * a prototype object, it now accepts a configuration object. |
|
| 62 | + * To specify your own prototype, set %Core.LexerImpl to it. |
|
| 63 | + * This change in behavior de-singletonizes the lexer object. |
|
| 64 | + * |
|
| 65 | + * @param $config Instance of HTMLPurifier_Config |
|
| 66 | + * @return Concrete lexer. |
|
| 67 | + */ |
|
| 68 | + public static function create($config) { |
|
| 69 | + |
|
| 70 | + if (!($config instanceof HTMLPurifier_Config)) { |
|
| 71 | + $lexer = $config; |
|
| 72 | + trigger_error("Passing a prototype to |
|
| 73 | 73 | HTMLPurifier_Lexer::create() is deprecated, please instead |
| 74 | 74 | use %Core.LexerImpl", E_USER_WARNING); |
| 75 | - } else { |
|
| 76 | - $lexer = $config->get('Core.LexerImpl'); |
|
| 77 | - } |
|
| 78 | - |
|
| 79 | - $needs_tracking = |
|
| 80 | - $config->get('Core.MaintainLineNumbers') || |
|
| 81 | - $config->get('Core.CollectErrors'); |
|
| 82 | - |
|
| 83 | - $inst = null; |
|
| 84 | - if (is_object($lexer)) { |
|
| 85 | - $inst = $lexer; |
|
| 86 | - } else { |
|
| 87 | - |
|
| 88 | - if (is_null($lexer)) { do { |
|
| 89 | - // auto-detection algorithm |
|
| 90 | - |
|
| 91 | - if ($needs_tracking) { |
|
| 92 | - $lexer = 'DirectLex'; |
|
| 93 | - break; |
|
| 94 | - } |
|
| 95 | - |
|
| 96 | - if ( |
|
| 97 | - class_exists('DOMDocument') && |
|
| 98 | - method_exists('DOMDocument', 'loadHTML') && |
|
| 99 | - !extension_loaded('domxml') |
|
| 100 | - ) { |
|
| 101 | - // check for DOM support, because while it's part of the |
|
| 102 | - // core, it can be disabled compile time. Also, the PECL |
|
| 103 | - // domxml extension overrides the default DOM, and is evil |
|
| 104 | - // and nasty and we shan't bother to support it |
|
| 105 | - $lexer = 'DOMLex'; |
|
| 106 | - } else { |
|
| 107 | - $lexer = 'DirectLex'; |
|
| 108 | - } |
|
| 109 | - |
|
| 110 | - } while(0); } // do..while so we can break |
|
| 111 | - |
|
| 112 | - // instantiate recognized string names |
|
| 113 | - switch ($lexer) { |
|
| 114 | - case 'DOMLex': |
|
| 115 | - $inst = new HTMLPurifier_Lexer_DOMLex(); |
|
| 116 | - break; |
|
| 117 | - case 'DirectLex': |
|
| 118 | - $inst = new HTMLPurifier_Lexer_DirectLex(); |
|
| 119 | - break; |
|
| 120 | - case 'PH5P': |
|
| 121 | - $inst = new HTMLPurifier_Lexer_PH5P(); |
|
| 122 | - break; |
|
| 123 | - default: |
|
| 124 | - throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer, ENT_COMPAT | ENT_HTML401, 'UTF-8', false)); |
|
| 125 | - } |
|
| 126 | - } |
|
| 127 | - |
|
| 128 | - if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated'); |
|
| 129 | - |
|
| 130 | - // once PHP DOM implements native line numbers, or we |
|
| 131 | - // hack out something using XSLT, remove this stipulation |
|
| 132 | - if ($needs_tracking && !$inst->tracksLineNumbers) { |
|
| 133 | - throw new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'); |
|
| 134 | - } |
|
| 135 | - |
|
| 136 | - return $inst; |
|
| 137 | - |
|
| 138 | - } |
|
| 139 | - |
|
| 140 | - // -- CONVENIENCE MEMBERS --------------------------------------------- |
|
| 141 | - |
|
| 142 | - public function __construct() { |
|
| 143 | - $this->_entity_parser = new HTMLPurifier_EntityParser(); |
|
| 144 | - } |
|
| 145 | - |
|
| 146 | - /** |
|
| 147 | - * Most common entity to raw value conversion table for special entities. |
|
| 148 | - */ |
|
| 149 | - protected $_special_entity2str = |
|
| 150 | - array( |
|
| 151 | - '"' => '"', |
|
| 152 | - '&' => '&', |
|
| 153 | - '<' => '<', |
|
| 154 | - '>' => '>', |
|
| 155 | - ''' => "'", |
|
| 156 | - ''' => "'", |
|
| 157 | - ''' => "'" |
|
| 158 | - ); |
|
| 159 | - |
|
| 160 | - /** |
|
| 161 | - * Parses special entities into the proper characters. |
|
| 162 | - * |
|
| 163 | - * This string will translate escaped versions of the special characters |
|
| 164 | - * into the correct ones. |
|
| 165 | - * |
|
| 166 | - * @warning |
|
| 167 | - * You should be able to treat the output of this function as |
|
| 168 | - * completely parsed, but that's only because all other entities should |
|
| 169 | - * have been handled previously in substituteNonSpecialEntities() |
|
| 170 | - * |
|
| 171 | - * @param $string String character data to be parsed. |
|
| 172 | - * @returns Parsed character data. |
|
| 173 | - */ |
|
| 174 | - public function parseData($string) { |
|
| 175 | - |
|
| 176 | - // following functions require at least one character |
|
| 177 | - if ($string === '') return ''; |
|
| 178 | - |
|
| 179 | - // subtracts amps that cannot possibly be escaped |
|
| 180 | - $num_amp = substr_count($string, '&') - substr_count($string, '& ') - |
|
| 181 | - ($string[strlen($string)-1] === '&' ? 1 : 0); |
|
| 182 | - |
|
| 183 | - if (!$num_amp) return $string; // abort if no entities |
|
| 184 | - $num_esc_amp = substr_count($string, '&'); |
|
| 185 | - $string = strtr($string, $this->_special_entity2str); |
|
| 186 | - |
|
| 187 | - // code duplication for sake of optimization, see above |
|
| 188 | - $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') - |
|
| 189 | - ($string[strlen($string)-1] === '&' ? 1 : 0); |
|
| 190 | - |
|
| 191 | - if ($num_amp_2 <= $num_esc_amp) return $string; |
|
| 192 | - |
|
| 193 | - // hmm... now we have some uncommon entities. Use the callback. |
|
| 194 | - $string = $this->_entity_parser->substituteSpecialEntities($string); |
|
| 195 | - return $string; |
|
| 196 | - } |
|
| 197 | - |
|
| 198 | - /** |
|
| 199 | - * Lexes an HTML string into tokens. |
|
| 200 | - * |
|
| 201 | - * @param $string String HTML. |
|
| 202 | - * @return HTMLPurifier_Token array representation of HTML. |
|
| 203 | - */ |
|
| 204 | - public function tokenizeHTML($string, $config, $context) { |
|
| 205 | - trigger_error('Call to abstract class', E_USER_ERROR); |
|
| 206 | - } |
|
| 207 | - |
|
| 208 | - /** |
|
| 209 | - * Translates CDATA sections into regular sections (through escaping). |
|
| 210 | - * |
|
| 211 | - * @param $string HTML string to process. |
|
| 212 | - * @returns HTML with CDATA sections escaped. |
|
| 213 | - */ |
|
| 214 | - protected static function escapeCDATA($string) { |
|
| 215 | - return preg_replace_callback( |
|
| 216 | - '/<!\[CDATA\[(.+?)\]\]>/s', |
|
| 217 | - array('HTMLPurifier_Lexer', 'CDATACallback'), |
|
| 218 | - $string |
|
| 219 | - ); |
|
| 220 | - } |
|
| 221 | - |
|
| 222 | - /** |
|
| 223 | - * Special CDATA case that is especially convoluted for <script> |
|
| 224 | - */ |
|
| 225 | - protected static function escapeCommentedCDATA($string) { |
|
| 226 | - return preg_replace_callback( |
|
| 227 | - '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s', |
|
| 228 | - array('HTMLPurifier_Lexer', 'CDATACallback'), |
|
| 229 | - $string |
|
| 230 | - ); |
|
| 231 | - } |
|
| 232 | - |
|
| 233 | - /** |
|
| 234 | - * Special Internet Explorer conditional comments should be removed. |
|
| 235 | - */ |
|
| 236 | - protected static function removeIEConditional($string) { |
|
| 237 | - return preg_replace( |
|
| 238 | - '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings |
|
| 239 | - '', |
|
| 240 | - $string |
|
| 241 | - ); |
|
| 242 | - } |
|
| 243 | - |
|
| 244 | - /** |
|
| 245 | - * Callback function for escapeCDATA() that does the work. |
|
| 246 | - * |
|
| 247 | - * @warning Though this is public in order to let the callback happen, |
|
| 248 | - * calling it directly is not recommended. |
|
| 249 | - * @params $matches PCRE matches array, with index 0 the entire match |
|
| 250 | - * and 1 the inside of the CDATA section. |
|
| 251 | - * @returns Escaped internals of the CDATA section. |
|
| 252 | - */ |
|
| 253 | - protected static function CDATACallback($matches) { |
|
| 254 | - // not exactly sure why the character set is needed, but whatever |
|
| 255 | - return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8', false); |
|
| 256 | - } |
|
| 257 | - |
|
| 258 | - /** |
|
| 259 | - * Takes a piece of HTML and normalizes it by converting entities, fixing |
|
| 260 | - * encoding, extracting bits, and other good stuff. |
|
| 261 | - * @todo Consider making protected |
|
| 262 | - */ |
|
| 263 | - public function normalize($html, $config, $context) { |
|
| 264 | - |
|
| 265 | - // normalize newlines to \n |
|
| 266 | - if ($config->get('Core.NormalizeNewlines')) { |
|
| 267 | - $html = str_replace("\r\n", "\n", $html); |
|
| 268 | - $html = str_replace("\r", "\n", $html); |
|
| 269 | - } |
|
| 270 | - |
|
| 271 | - if ($config->get('HTML.Trusted')) { |
|
| 272 | - // escape convoluted CDATA |
|
| 273 | - $html = $this->escapeCommentedCDATA($html); |
|
| 274 | - } |
|
| 275 | - |
|
| 276 | - // escape CDATA |
|
| 277 | - $html = $this->escapeCDATA($html); |
|
| 278 | - |
|
| 279 | - $html = $this->removeIEConditional($html); |
|
| 280 | - |
|
| 281 | - // extract body from document if applicable |
|
| 282 | - if ($config->get('Core.ConvertDocumentToFragment')) { |
|
| 283 | - $e = false; |
|
| 284 | - if ($config->get('Core.CollectErrors')) { |
|
| 285 | - $e =& $context->get('ErrorCollector'); |
|
| 286 | - } |
|
| 287 | - $new_html = $this->extractBody($html); |
|
| 288 | - if ($e && $new_html != $html) { |
|
| 289 | - $e->send(E_WARNING, 'Lexer: Extracted body'); |
|
| 290 | - } |
|
| 291 | - $html = $new_html; |
|
| 292 | - } |
|
| 293 | - |
|
| 294 | - // expand entities that aren't the big five |
|
| 295 | - $html = $this->_entity_parser->substituteNonSpecialEntities($html); |
|
| 296 | - |
|
| 297 | - // clean into wellformed UTF-8 string for an SGML context: this has |
|
| 298 | - // to be done after entity expansion because the entities sometimes |
|
| 299 | - // represent non-SGML characters (horror, horror!) |
|
| 300 | - $html = HTMLPurifier_Encoder::cleanUTF8($html); |
|
| 301 | - |
|
| 302 | - // if processing instructions are to removed, remove them now |
|
| 303 | - if ($config->get('Core.RemoveProcessingInstructions')) { |
|
| 304 | - $html = preg_replace('#<\?.+?\?>#s', '', $html); |
|
| 305 | - } |
|
| 306 | - |
|
| 307 | - return $html; |
|
| 308 | - } |
|
| 309 | - |
|
| 310 | - /** |
|
| 311 | - * Takes a string of HTML (fragment or document) and returns the content |
|
| 312 | - * @todo Consider making protected |
|
| 313 | - */ |
|
| 314 | - public function extractBody($html) { |
|
| 315 | - $matches = array(); |
|
| 316 | - $result = preg_match('!<body[^>]*>(.*)</body>!is', $html, $matches); |
|
| 317 | - if ($result) { |
|
| 318 | - return $matches[1]; |
|
| 319 | - } else { |
|
| 320 | - return $html; |
|
| 321 | - } |
|
| 322 | - } |
|
| 75 | + } else { |
|
| 76 | + $lexer = $config->get('Core.LexerImpl'); |
|
| 77 | + } |
|
| 78 | + |
|
| 79 | + $needs_tracking = |
|
| 80 | + $config->get('Core.MaintainLineNumbers') || |
|
| 81 | + $config->get('Core.CollectErrors'); |
|
| 82 | + |
|
| 83 | + $inst = null; |
|
| 84 | + if (is_object($lexer)) { |
|
| 85 | + $inst = $lexer; |
|
| 86 | + } else { |
|
| 87 | + |
|
| 88 | + if (is_null($lexer)) { do { |
|
| 89 | + // auto-detection algorithm |
|
| 90 | + |
|
| 91 | + if ($needs_tracking) { |
|
| 92 | + $lexer = 'DirectLex'; |
|
| 93 | + break; |
|
| 94 | + } |
|
| 95 | + |
|
| 96 | + if ( |
|
| 97 | + class_exists('DOMDocument') && |
|
| 98 | + method_exists('DOMDocument', 'loadHTML') && |
|
| 99 | + !extension_loaded('domxml') |
|
| 100 | + ) { |
|
| 101 | + // check for DOM support, because while it's part of the |
|
| 102 | + // core, it can be disabled compile time. Also, the PECL |
|
| 103 | + // domxml extension overrides the default DOM, and is evil |
|
| 104 | + // and nasty and we shan't bother to support it |
|
| 105 | + $lexer = 'DOMLex'; |
|
| 106 | + } else { |
|
| 107 | + $lexer = 'DirectLex'; |
|
| 108 | + } |
|
| 109 | + |
|
| 110 | + } while(0); } // do..while so we can break |
|
| 111 | + |
|
| 112 | + // instantiate recognized string names |
|
| 113 | + switch ($lexer) { |
|
| 114 | + case 'DOMLex': |
|
| 115 | + $inst = new HTMLPurifier_Lexer_DOMLex(); |
|
| 116 | + break; |
|
| 117 | + case 'DirectLex': |
|
| 118 | + $inst = new HTMLPurifier_Lexer_DirectLex(); |
|
| 119 | + break; |
|
| 120 | + case 'PH5P': |
|
| 121 | + $inst = new HTMLPurifier_Lexer_PH5P(); |
|
| 122 | + break; |
|
| 123 | + default: |
|
| 124 | + throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer, ENT_COMPAT | ENT_HTML401, 'UTF-8', false)); |
|
| 125 | + } |
|
| 126 | + } |
|
| 127 | + |
|
| 128 | + if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated'); |
|
| 129 | + |
|
| 130 | + // once PHP DOM implements native line numbers, or we |
|
| 131 | + // hack out something using XSLT, remove this stipulation |
|
| 132 | + if ($needs_tracking && !$inst->tracksLineNumbers) { |
|
| 133 | + throw new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'); |
|
| 134 | + } |
|
| 135 | + |
|
| 136 | + return $inst; |
|
| 137 | + |
|
| 138 | + } |
|
| 139 | + |
|
| 140 | + // -- CONVENIENCE MEMBERS --------------------------------------------- |
|
| 141 | + |
|
| 142 | + public function __construct() { |
|
| 143 | + $this->_entity_parser = new HTMLPurifier_EntityParser(); |
|
| 144 | + } |
|
| 145 | + |
|
| 146 | + /** |
|
| 147 | + * Most common entity to raw value conversion table for special entities. |
|
| 148 | + */ |
|
| 149 | + protected $_special_entity2str = |
|
| 150 | + array( |
|
| 151 | + '"' => '"', |
|
| 152 | + '&' => '&', |
|
| 153 | + '<' => '<', |
|
| 154 | + '>' => '>', |
|
| 155 | + ''' => "'", |
|
| 156 | + ''' => "'", |
|
| 157 | + ''' => "'" |
|
| 158 | + ); |
|
| 159 | + |
|
| 160 | + /** |
|
| 161 | + * Parses special entities into the proper characters. |
|
| 162 | + * |
|
| 163 | + * This string will translate escaped versions of the special characters |
|
| 164 | + * into the correct ones. |
|
| 165 | + * |
|
| 166 | + * @warning |
|
| 167 | + * You should be able to treat the output of this function as |
|
| 168 | + * completely parsed, but that's only because all other entities should |
|
| 169 | + * have been handled previously in substituteNonSpecialEntities() |
|
| 170 | + * |
|
| 171 | + * @param $string String character data to be parsed. |
|
| 172 | + * @returns Parsed character data. |
|
| 173 | + */ |
|
| 174 | + public function parseData($string) { |
|
| 175 | + |
|
| 176 | + // following functions require at least one character |
|
| 177 | + if ($string === '') return ''; |
|
| 178 | + |
|
| 179 | + // subtracts amps that cannot possibly be escaped |
|
| 180 | + $num_amp = substr_count($string, '&') - substr_count($string, '& ') - |
|
| 181 | + ($string[strlen($string)-1] === '&' ? 1 : 0); |
|
| 182 | + |
|
| 183 | + if (!$num_amp) return $string; // abort if no entities |
|
| 184 | + $num_esc_amp = substr_count($string, '&'); |
|
| 185 | + $string = strtr($string, $this->_special_entity2str); |
|
| 186 | + |
|
| 187 | + // code duplication for sake of optimization, see above |
|
| 188 | + $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') - |
|
| 189 | + ($string[strlen($string)-1] === '&' ? 1 : 0); |
|
| 190 | + |
|
| 191 | + if ($num_amp_2 <= $num_esc_amp) return $string; |
|
| 192 | + |
|
| 193 | + // hmm... now we have some uncommon entities. Use the callback. |
|
| 194 | + $string = $this->_entity_parser->substituteSpecialEntities($string); |
|
| 195 | + return $string; |
|
| 196 | + } |
|
| 197 | + |
|
| 198 | + /** |
|
| 199 | + * Lexes an HTML string into tokens. |
|
| 200 | + * |
|
| 201 | + * @param $string String HTML. |
|
| 202 | + * @return HTMLPurifier_Token array representation of HTML. |
|
| 203 | + */ |
|
| 204 | + public function tokenizeHTML($string, $config, $context) { |
|
| 205 | + trigger_error('Call to abstract class', E_USER_ERROR); |
|
| 206 | + } |
|
| 207 | + |
|
| 208 | + /** |
|
| 209 | + * Translates CDATA sections into regular sections (through escaping). |
|
| 210 | + * |
|
| 211 | + * @param $string HTML string to process. |
|
| 212 | + * @returns HTML with CDATA sections escaped. |
|
| 213 | + */ |
|
| 214 | + protected static function escapeCDATA($string) { |
|
| 215 | + return preg_replace_callback( |
|
| 216 | + '/<!\[CDATA\[(.+?)\]\]>/s', |
|
| 217 | + array('HTMLPurifier_Lexer', 'CDATACallback'), |
|
| 218 | + $string |
|
| 219 | + ); |
|
| 220 | + } |
|
| 221 | + |
|
| 222 | + /** |
|
| 223 | + * Special CDATA case that is especially convoluted for <script> |
|
| 224 | + */ |
|
| 225 | + protected static function escapeCommentedCDATA($string) { |
|
| 226 | + return preg_replace_callback( |
|
| 227 | + '#<!--//--><!\[CDATA\[//><!--(.+?)//--><!\]\]>#s', |
|
| 228 | + array('HTMLPurifier_Lexer', 'CDATACallback'), |
|
| 229 | + $string |
|
| 230 | + ); |
|
| 231 | + } |
|
| 232 | + |
|
| 233 | + /** |
|
| 234 | + * Special Internet Explorer conditional comments should be removed. |
|
| 235 | + */ |
|
| 236 | + protected static function removeIEConditional($string) { |
|
| 237 | + return preg_replace( |
|
| 238 | + '#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings |
|
| 239 | + '', |
|
| 240 | + $string |
|
| 241 | + ); |
|
| 242 | + } |
|
| 243 | + |
|
| 244 | + /** |
|
| 245 | + * Callback function for escapeCDATA() that does the work. |
|
| 246 | + * |
|
| 247 | + * @warning Though this is public in order to let the callback happen, |
|
| 248 | + * calling it directly is not recommended. |
|
| 249 | + * @params $matches PCRE matches array, with index 0 the entire match |
|
| 250 | + * and 1 the inside of the CDATA section. |
|
| 251 | + * @returns Escaped internals of the CDATA section. |
|
| 252 | + */ |
|
| 253 | + protected static function CDATACallback($matches) { |
|
| 254 | + // not exactly sure why the character set is needed, but whatever |
|
| 255 | + return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8', false); |
|
| 256 | + } |
|
| 257 | + |
|
| 258 | + /** |
|
| 259 | + * Takes a piece of HTML and normalizes it by converting entities, fixing |
|
| 260 | + * encoding, extracting bits, and other good stuff. |
|
| 261 | + * @todo Consider making protected |
|
| 262 | + */ |
|
| 263 | + public function normalize($html, $config, $context) { |
|
| 264 | + |
|
| 265 | + // normalize newlines to \n |
|
| 266 | + if ($config->get('Core.NormalizeNewlines')) { |
|
| 267 | + $html = str_replace("\r\n", "\n", $html); |
|
| 268 | + $html = str_replace("\r", "\n", $html); |
|
| 269 | + } |
|
| 270 | + |
|
| 271 | + if ($config->get('HTML.Trusted')) { |
|
| 272 | + // escape convoluted CDATA |
|
| 273 | + $html = $this->escapeCommentedCDATA($html); |
|
| 274 | + } |
|
| 275 | + |
|
| 276 | + // escape CDATA |
|
| 277 | + $html = $this->escapeCDATA($html); |
|
| 278 | + |
|
| 279 | + $html = $this->removeIEConditional($html); |
|
| 280 | + |
|
| 281 | + // extract body from document if applicable |
|
| 282 | + if ($config->get('Core.ConvertDocumentToFragment')) { |
|
| 283 | + $e = false; |
|
| 284 | + if ($config->get('Core.CollectErrors')) { |
|
| 285 | + $e =& $context->get('ErrorCollector'); |
|
| 286 | + } |
|
| 287 | + $new_html = $this->extractBody($html); |
|
| 288 | + if ($e && $new_html != $html) { |
|
| 289 | + $e->send(E_WARNING, 'Lexer: Extracted body'); |
|
| 290 | + } |
|
| 291 | + $html = $new_html; |
|
| 292 | + } |
|
| 293 | + |
|
| 294 | + // expand entities that aren't the big five |
|
| 295 | + $html = $this->_entity_parser->substituteNonSpecialEntities($html); |
|
| 296 | + |
|
| 297 | + // clean into wellformed UTF-8 string for an SGML context: this has |
|
| 298 | + // to be done after entity expansion because the entities sometimes |
|
| 299 | + // represent non-SGML characters (horror, horror!) |
|
| 300 | + $html = HTMLPurifier_Encoder::cleanUTF8($html); |
|
| 301 | + |
|
| 302 | + // if processing instructions are to removed, remove them now |
|
| 303 | + if ($config->get('Core.RemoveProcessingInstructions')) { |
|
| 304 | + $html = preg_replace('#<\?.+?\?>#s', '', $html); |
|
| 305 | + } |
|
| 306 | + |
|
| 307 | + return $html; |
|
| 308 | + } |
|
| 309 | + |
|
| 310 | + /** |
|
| 311 | + * Takes a string of HTML (fragment or document) and returns the content |
|
| 312 | + * @todo Consider making protected |
|
| 313 | + */ |
|
| 314 | + public function extractBody($html) { |
|
| 315 | + $matches = array(); |
|
| 316 | + $result = preg_match('!<body[^>]*>(.*)</body>!is', $html, $matches); |
|
| 317 | + if ($result) { |
|
| 318 | + return $matches[1]; |
|
| 319 | + } else { |
|
| 320 | + return $html; |
|
| 321 | + } |
|
| 322 | + } |
|
| 323 | 323 | |
| 324 | 324 | } |
| 325 | 325 | |
@@ -125,7 +125,9 @@ discard block |
||
| 125 | 125 | } |
| 126 | 126 | } |
| 127 | 127 | |
| 128 | - if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated'); |
|
| 128 | + if (!$inst) { |
|
| 129 | + throw new HTMLPurifier_Exception('No lexer was instantiated'); |
|
| 130 | + } |
|
| 129 | 131 | |
| 130 | 132 | // once PHP DOM implements native line numbers, or we |
| 131 | 133 | // hack out something using XSLT, remove this stipulation |
@@ -174,13 +176,18 @@ discard block |
||
| 174 | 176 | public function parseData($string) { |
| 175 | 177 | |
| 176 | 178 | // following functions require at least one character |
| 177 | - if ($string === '') return ''; |
|
| 179 | + if ($string === '') { |
|
| 180 | + return ''; |
|
| 181 | + } |
|
| 178 | 182 | |
| 179 | 183 | // subtracts amps that cannot possibly be escaped |
| 180 | 184 | $num_amp = substr_count($string, '&') - substr_count($string, '& ') - |
| 181 | 185 | ($string[strlen($string)-1] === '&' ? 1 : 0); |
| 182 | 186 | |
| 183 | - if (!$num_amp) return $string; // abort if no entities |
|
| 187 | + if (!$num_amp) { |
|
| 188 | + return $string; |
|
| 189 | + } |
|
| 190 | + // abort if no entities |
|
| 184 | 191 | $num_esc_amp = substr_count($string, '&'); |
| 185 | 192 | $string = strtr($string, $this->_special_entity2str); |
| 186 | 193 | |
@@ -188,7 +195,9 @@ discard block |
||
| 188 | 195 | $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') - |
| 189 | 196 | ($string[strlen($string)-1] === '&' ? 1 : 0); |
| 190 | 197 | |
| 191 | - if ($num_amp_2 <= $num_esc_amp) return $string; |
|
| 198 | + if ($num_amp_2 <= $num_esc_amp) { |
|
| 199 | + return $string; |
|
| 200 | + } |
|
| 192 | 201 | |
| 193 | 202 | // hmm... now we have some uncommon entities. Use the callback. |
| 194 | 203 | $string = $this->_entity_parser->substituteSpecialEntities($string); |
@@ -107,7 +107,7 @@ discard block |
||
| 107 | 107 | $lexer = 'DirectLex'; |
| 108 | 108 | } |
| 109 | 109 | |
| 110 | - } while(0); } // do..while so we can break |
|
| 110 | + } while (0); } // do..while so we can break |
|
| 111 | 111 | |
| 112 | 112 | // instantiate recognized string names |
| 113 | 113 | switch ($lexer) { |
@@ -121,7 +121,7 @@ discard block |
||
| 121 | 121 | $inst = new HTMLPurifier_Lexer_PH5P(); |
| 122 | 122 | break; |
| 123 | 123 | default: |
| 124 | - throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer, ENT_COMPAT | ENT_HTML401, 'UTF-8', false)); |
|
| 124 | + throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type ".htmlspecialchars($lexer, ENT_COMPAT | ENT_HTML401, 'UTF-8', false)); |
|
| 125 | 125 | } |
| 126 | 126 | } |
| 127 | 127 | |
@@ -178,7 +178,7 @@ discard block |
||
| 178 | 178 | |
| 179 | 179 | // subtracts amps that cannot possibly be escaped |
| 180 | 180 | $num_amp = substr_count($string, '&') - substr_count($string, '& ') - |
| 181 | - ($string[strlen($string)-1] === '&' ? 1 : 0); |
|
| 181 | + ($string[strlen($string) - 1] === '&' ? 1 : 0); |
|
| 182 | 182 | |
| 183 | 183 | if (!$num_amp) return $string; // abort if no entities |
| 184 | 184 | $num_esc_amp = substr_count($string, '&'); |
@@ -186,7 +186,7 @@ discard block |
||
| 186 | 186 | |
| 187 | 187 | // code duplication for sake of optimization, see above |
| 188 | 188 | $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') - |
| 189 | - ($string[strlen($string)-1] === '&' ? 1 : 0); |
|
| 189 | + ($string[strlen($string) - 1] === '&' ? 1 : 0); |
|
| 190 | 190 | |
| 191 | 191 | if ($num_amp_2 <= $num_esc_amp) return $string; |
| 192 | 192 | |
@@ -282,7 +282,7 @@ discard block |
||
| 282 | 282 | if ($config->get('Core.ConvertDocumentToFragment')) { |
| 283 | 283 | $e = false; |
| 284 | 284 | if ($config->get('Core.CollectErrors')) { |
| 285 | - $e =& $context->get('ErrorCollector'); |
|
| 285 | + $e = & $context->get('ErrorCollector'); |
|
| 286 | 286 | } |
| 287 | 287 | $new_html = $this->extractBody($html); |
| 288 | 288 | if ($e && $new_html != $html) { |