@@ -167,7 +167,7 @@ discard block |
||
| 167 | 167 | /** |
| 168 | 168 | * Method to retrieve an object containing a key/value pairs |
| 169 | 169 | * |
| 170 | - * @return Object Returns an object containing key/value pairs |
|
| 170 | + * @return stdClass Returns an object containing key/value pairs |
|
| 171 | 171 | */ |
| 172 | 172 | function gets() |
| 173 | 173 | { |
@@ -193,7 +193,7 @@ discard block |
||
| 193 | 193 | /** |
| 194 | 194 | * Method to retrieve an object of key/value pairs |
| 195 | 195 | * |
| 196 | - * @return Object |
|
| 196 | + * @return stdClass |
|
| 197 | 197 | */ |
| 198 | 198 | function getObjectVars() |
| 199 | 199 | { |
@@ -96,7 +96,7 @@ discard block |
||
| 96 | 96 | */ |
| 97 | 97 | function setMessage($message = 'success') |
| 98 | 98 | { |
| 99 | - if($str = Context::getLang($message)) |
|
| 99 | + if ($str = Context::getLang($message)) |
|
| 100 | 100 | { |
| 101 | 101 | $this->message = $str; |
| 102 | 102 | } |
@@ -139,14 +139,14 @@ discard block |
||
| 139 | 139 | */ |
| 140 | 140 | function adds($object) |
| 141 | 141 | { |
| 142 | - if(is_object($object)) |
|
| 142 | + if (is_object($object)) |
|
| 143 | 143 | { |
| 144 | 144 | $object = get_object_vars($object); |
| 145 | 145 | } |
| 146 | 146 | |
| 147 | - if(is_array($object)) |
|
| 147 | + if (is_array($object)) |
|
| 148 | 148 | { |
| 149 | - foreach($object as $key => $val) |
|
| 149 | + foreach ($object as $key => $val) |
|
| 150 | 150 | { |
| 151 | 151 | $this->variables[$key] = $val; |
| 152 | 152 | } |
@@ -173,7 +173,7 @@ discard block |
||
| 173 | 173 | { |
| 174 | 174 | $args = func_get_args(); |
| 175 | 175 | $output = new stdClass(); |
| 176 | - foreach($args as $arg) |
|
| 176 | + foreach ($args as $arg) |
|
| 177 | 177 | { |
| 178 | 178 | $output->{$arg} = $this->get($arg); |
| 179 | 179 | } |
@@ -198,7 +198,7 @@ discard block |
||
| 198 | 198 | function getObjectVars() |
| 199 | 199 | { |
| 200 | 200 | $output = new stdClass(); |
| 201 | - foreach($this->variables as $key => $val) |
|
| 201 | + foreach ($this->variables as $key => $val) |
|
| 202 | 202 | { |
| 203 | 203 | $output->{$key} = $val; |
| 204 | 204 | } |
@@ -21,8 +21,7 @@ |
||
| 21 | 21 | if($absolute_url) |
| 22 | 22 | { |
| 23 | 23 | return getFullUrl('','vid',$vid, 'mid',$mid, 'act',$format); |
| 24 | - } |
|
| 25 | - else |
|
| 24 | + } else |
|
| 26 | 25 | { |
| 27 | 26 | return getUrl('','vid',$vid, 'mid',$mid, 'act',$format); |
| 28 | 27 | } |
@@ -295,6 +295,7 @@ discard block |
||
| 295 | 295 | |
| 296 | 296 | /** |
| 297 | 297 | * Check the content. |
| 298 | + * @param string $content |
|
| 298 | 299 | * @return void |
| 299 | 300 | */ |
| 300 | 301 | function check(&$content) |
@@ -487,7 +488,7 @@ discard block |
||
| 487 | 488 | |
| 488 | 489 | /** |
| 489 | 490 | * Check white domain in object data attribute or embed src attribute. |
| 490 | - * @return string |
|
| 491 | + * @return boolean |
|
| 491 | 492 | */ |
| 492 | 493 | function isWhiteDomain($urlAttribute) |
| 493 | 494 | { |
@@ -506,7 +507,7 @@ discard block |
||
| 506 | 507 | |
| 507 | 508 | /** |
| 508 | 509 | * Check white domain in iframe src attribute. |
| 509 | - * @return string |
|
| 510 | + * @return boolean |
|
| 510 | 511 | */ |
| 511 | 512 | function isWhiteIframeDomain($urlAttribute) |
| 512 | 513 | { |
@@ -525,7 +526,7 @@ discard block |
||
| 525 | 526 | |
| 526 | 527 | /** |
| 527 | 528 | * Check white mime type in object type attribute or embed type attribute. |
| 528 | - * @return string |
|
| 529 | + * @return boolean |
|
| 529 | 530 | */ |
| 530 | 531 | function isWhiteMimetype($mimeType) |
| 531 | 532 | { |
@@ -590,7 +591,7 @@ discard block |
||
| 590 | 591 | |
| 591 | 592 | /** |
| 592 | 593 | * Make white domain list cache file from xml config file. |
| 593 | - * @param $whitelist array |
|
| 594 | + * @param stdClass $whitelist array |
|
| 594 | 595 | * @return void |
| 595 | 596 | */ |
| 596 | 597 | function _makeWhiteDomainList($whitelist = NULL) |
@@ -563,14 +563,12 @@ discard block |
||
| 563 | 563 | } |
| 564 | 564 | $this->allowscriptaccessList[count($this->allowscriptaccessList) - 1]--; |
| 565 | 565 | } |
| 566 | - } |
|
| 567 | - else if($m[1] == 'embed') |
|
| 566 | + } else if($m[1] == 'embed') |
|
| 568 | 567 | { |
| 569 | 568 | if(stripos($m[0], 'allowscriptaccess')) |
| 570 | 569 | { |
| 571 | 570 | $m[0] = preg_replace('/always|samedomain/i', 'never', $m[0]); |
| 572 | - } |
|
| 573 | - else |
|
| 571 | + } else |
|
| 574 | 572 | { |
| 575 | 573 | $m[0] = preg_replace('/\<embed/i', '<embed allowscriptaccess="never"', $m[0]); |
| 576 | 574 | } |
@@ -627,8 +625,7 @@ discard block |
||
| 627 | 625 | { |
| 628 | 626 | $whiteUrlList = $whitelist->object; |
| 629 | 627 | $whiteIframeUrlList = $whitelist->iframe; |
| 630 | - } |
|
| 631 | - else |
|
| 628 | + } else |
|
| 632 | 629 | { |
| 633 | 630 | $xmlBuff = FileHandler::readFile($this->whiteUrlXmlFile); |
| 634 | 631 | |
@@ -636,8 +633,12 @@ discard block |
||
| 636 | 633 | $domainListObj = $xmlParser->parse($xmlBuff); |
| 637 | 634 | $embedDomainList = $domainListObj->whiteurl->embed->domain; |
| 638 | 635 | $iframeDomainList = $domainListObj->whiteurl->iframe->domain; |
| 639 | - if(!is_array($embedDomainList)) $embedDomainList = array(); |
|
| 640 | - if(!is_array($iframeDomainList)) $iframeDomainList = array(); |
|
| 636 | + if(!is_array($embedDomainList)) { |
|
| 637 | + $embedDomainList = array(); |
|
| 638 | + } |
|
| 639 | + if(!is_array($iframeDomainList)) { |
|
| 640 | + $iframeDomainList = array(); |
|
| 641 | + } |
|
| 641 | 642 | |
| 642 | 643 | foreach($embedDomainList AS $key => $value) |
| 643 | 644 | { |
@@ -648,8 +649,7 @@ discard block |
||
| 648 | 649 | { |
| 649 | 650 | $whiteUrlList[] = $value->body; |
| 650 | 651 | } |
| 651 | - } |
|
| 652 | - else |
|
| 652 | + } else |
|
| 653 | 653 | { |
| 654 | 654 | $whiteUrlList[] = $patternList->body; |
| 655 | 655 | } |
@@ -664,8 +664,7 @@ discard block |
||
| 664 | 664 | { |
| 665 | 665 | $whiteIframeUrlList[] = $value->body; |
| 666 | 666 | } |
| 667 | - } |
|
| 668 | - else |
|
| 667 | + } else |
|
| 669 | 668 | { |
| 670 | 669 | $whiteIframeUrlList[] = $patternList->body; |
| 671 | 670 | } |
@@ -1,7 +1,7 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | /* Copyright (C) NAVER <http://www.navercorp.com> */ |
| 3 | 3 | |
| 4 | -include _XE_PATH_ . 'classes/security/phphtmlparser/src/htmlparser.inc'; |
|
| 4 | +include _XE_PATH_.'classes/security/phphtmlparser/src/htmlparser.inc'; |
|
| 5 | 5 | |
| 6 | 6 | class EmbedFilter |
| 7 | 7 | { |
@@ -276,7 +276,7 @@ discard block |
||
| 276 | 276 | */ |
| 277 | 277 | function getInstance() |
| 278 | 278 | { |
| 279 | - if(!isset($GLOBALS['__EMBEDFILTER_INSTANCE__'])) |
|
| 279 | + if (!isset($GLOBALS['__EMBEDFILTER_INSTANCE__'])) |
|
| 280 | 280 | { |
| 281 | 281 | $GLOBALS['__EMBEDFILTER_INSTANCE__'] = new EmbedFilter(); |
| 282 | 282 | } |
@@ -316,9 +316,9 @@ discard block |
||
| 316 | 316 | { |
| 317 | 317 | preg_match_all('/<\s*object\s*[^>]+(?:\/?>?)/is', $content, $m); |
| 318 | 318 | $objectTagList = $m[0]; |
| 319 | - if($objectTagList) |
|
| 319 | + if ($objectTagList) |
|
| 320 | 320 | { |
| 321 | - foreach($objectTagList AS $key => $objectTag) |
|
| 321 | + foreach ($objectTagList AS $key => $objectTag) |
|
| 322 | 322 | { |
| 323 | 323 | $isWhiteDomain = true; |
| 324 | 324 | $isWhiteMimetype = true; |
@@ -326,21 +326,21 @@ discard block |
||
| 326 | 326 | $ext = ''; |
| 327 | 327 | |
| 328 | 328 | $parser = new HtmlParser($objectTag); |
| 329 | - while($parser->parse()) |
|
| 329 | + while ($parser->parse()) |
|
| 330 | 330 | { |
| 331 | - if(is_array($parser->iNodeAttributes)) |
|
| 331 | + if (is_array($parser->iNodeAttributes)) |
|
| 332 | 332 | { |
| 333 | - foreach($parser->iNodeAttributes AS $attrName => $attrValue) |
|
| 333 | + foreach ($parser->iNodeAttributes AS $attrName => $attrValue) |
|
| 334 | 334 | { |
| 335 | 335 | // data url check |
| 336 | - if($attrValue && strtolower($attrName) == 'data') |
|
| 336 | + if ($attrValue && strtolower($attrName) == 'data') |
|
| 337 | 337 | { |
| 338 | 338 | $ext = strtolower(substr(strrchr($attrValue, "."), 1)); |
| 339 | 339 | $isWhiteDomain = $this->isWhiteDomain($attrValue); |
| 340 | 340 | } |
| 341 | 341 | |
| 342 | 342 | // mime type check |
| 343 | - if(strtolower($attrName) == 'type' && $attrValue) |
|
| 343 | + if (strtolower($attrName) == 'type' && $attrValue) |
|
| 344 | 344 | { |
| 345 | 345 | $isWhiteMimetype = $this->isWhiteMimetype($attrValue); |
| 346 | 346 | } |
@@ -348,7 +348,7 @@ discard block |
||
| 348 | 348 | } |
| 349 | 349 | } |
| 350 | 350 | |
| 351 | - if(!$isWhiteDomain || !$isWhiteMimetype) |
|
| 351 | + if (!$isWhiteDomain || !$isWhiteMimetype) |
|
| 352 | 352 | { |
| 353 | 353 | $content = str_replace($objectTag, htmlspecialchars($objectTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content); |
| 354 | 354 | } |
@@ -364,9 +364,9 @@ discard block |
||
| 364 | 364 | { |
| 365 | 365 | preg_match_all('/<\s*embed\s*[^>]+(?:\/?>?)/is', $content, $m); |
| 366 | 366 | $embedTagList = $m[0]; |
| 367 | - if($embedTagList) |
|
| 367 | + if ($embedTagList) |
|
| 368 | 368 | { |
| 369 | - foreach($embedTagList AS $key => $embedTag) |
|
| 369 | + foreach ($embedTagList AS $key => $embedTag) |
|
| 370 | 370 | { |
| 371 | 371 | $isWhiteDomain = TRUE; |
| 372 | 372 | $isWhiteMimetype = TRUE; |
@@ -374,21 +374,21 @@ discard block |
||
| 374 | 374 | $ext = ''; |
| 375 | 375 | |
| 376 | 376 | $parser = new HtmlParser($embedTag); |
| 377 | - while($parser->parse()) |
|
| 377 | + while ($parser->parse()) |
|
| 378 | 378 | { |
| 379 | - if(is_array($parser->iNodeAttributes)) |
|
| 379 | + if (is_array($parser->iNodeAttributes)) |
|
| 380 | 380 | { |
| 381 | - foreach($parser->iNodeAttributes AS $attrName => $attrValue) |
|
| 381 | + foreach ($parser->iNodeAttributes AS $attrName => $attrValue) |
|
| 382 | 382 | { |
| 383 | 383 | // src url check |
| 384 | - if($attrValue && strtolower($attrName) == 'src') |
|
| 384 | + if ($attrValue && strtolower($attrName) == 'src') |
|
| 385 | 385 | { |
| 386 | 386 | $ext = strtolower(substr(strrchr($attrValue, "."), 1)); |
| 387 | 387 | $isWhiteDomain = $this->isWhiteDomain($attrValue); |
| 388 | 388 | } |
| 389 | 389 | |
| 390 | 390 | // mime type check |
| 391 | - if(strtolower($attrName) == 'type' && $attrValue) |
|
| 391 | + if (strtolower($attrName) == 'type' && $attrValue) |
|
| 392 | 392 | { |
| 393 | 393 | $isWhiteMimetype = $this->isWhiteMimetype($attrValue); |
| 394 | 394 | } |
@@ -396,7 +396,7 @@ discard block |
||
| 396 | 396 | } |
| 397 | 397 | } |
| 398 | 398 | |
| 399 | - if(!$isWhiteDomain || !$isWhiteMimetype) |
|
| 399 | + if (!$isWhiteDomain || !$isWhiteMimetype) |
|
| 400 | 400 | { |
| 401 | 401 | $content = str_replace($embedTag, htmlspecialchars($embedTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content); |
| 402 | 402 | } |
@@ -415,22 +415,22 @@ discard block |
||
| 415 | 415 | |
| 416 | 416 | preg_match_all('/<\s*iframe\s*[^>]+(?:\/?>?)/is', $content, $m); |
| 417 | 417 | $iframeTagList = $m[0]; |
| 418 | - if($iframeTagList) |
|
| 418 | + if ($iframeTagList) |
|
| 419 | 419 | { |
| 420 | - foreach($iframeTagList AS $key => $iframeTag) |
|
| 420 | + foreach ($iframeTagList AS $key => $iframeTag) |
|
| 421 | 421 | { |
| 422 | 422 | $isWhiteDomain = TRUE; |
| 423 | 423 | $ext = ''; |
| 424 | 424 | |
| 425 | 425 | $parser = new HtmlParser($iframeTag); |
| 426 | - while($parser->parse()) |
|
| 426 | + while ($parser->parse()) |
|
| 427 | 427 | { |
| 428 | - if(is_array($parser->iNodeAttributes)) |
|
| 428 | + if (is_array($parser->iNodeAttributes)) |
|
| 429 | 429 | { |
| 430 | - foreach($parser->iNodeAttributes AS $attrName => $attrValue) |
|
| 430 | + foreach ($parser->iNodeAttributes AS $attrName => $attrValue) |
|
| 431 | 431 | { |
| 432 | 432 | // src url check |
| 433 | - if(strtolower($attrName) == 'src' && $attrValue) |
|
| 433 | + if (strtolower($attrName) == 'src' && $attrValue) |
|
| 434 | 434 | { |
| 435 | 435 | $ext = strtolower(substr(strrchr($attrValue, "."), 1)); |
| 436 | 436 | $isWhiteDomain = $this->isWhiteIframeDomain($attrValue); |
@@ -439,7 +439,7 @@ discard block |
||
| 439 | 439 | } |
| 440 | 440 | } |
| 441 | 441 | |
| 442 | - if(!$isWhiteDomain) |
|
| 442 | + if (!$isWhiteDomain) |
|
| 443 | 443 | { |
| 444 | 444 | $content = str_replace($iframeTag, htmlspecialchars($iframeTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content); |
| 445 | 445 | } |
@@ -455,26 +455,26 @@ discard block |
||
| 455 | 455 | { |
| 456 | 456 | preg_match_all('/<\s*param\s*[^>]+(?:\/?>?)/is', $content, $m); |
| 457 | 457 | $paramTagList = $m[0]; |
| 458 | - if($paramTagList) |
|
| 458 | + if ($paramTagList) |
|
| 459 | 459 | { |
| 460 | - foreach($paramTagList AS $key => $paramTag) |
|
| 460 | + foreach ($paramTagList AS $key => $paramTag) |
|
| 461 | 461 | { |
| 462 | 462 | $isWhiteDomain = TRUE; |
| 463 | 463 | $isWhiteExt = TRUE; |
| 464 | 464 | $ext = ''; |
| 465 | 465 | |
| 466 | 466 | $parser = new HtmlParser($paramTag); |
| 467 | - while($parser->parse()) |
|
| 467 | + while ($parser->parse()) |
|
| 468 | 468 | { |
| 469 | - if($parser->iNodeAttributes['name'] && $parser->iNodeAttributes['value']) |
|
| 469 | + if ($parser->iNodeAttributes['name'] && $parser->iNodeAttributes['value']) |
|
| 470 | 470 | { |
| 471 | 471 | $name = strtolower($parser->iNodeAttributes['name']); |
| 472 | - if($name == 'movie' || $name == 'src' || $name == 'href' || $name == 'url' || $name == 'source') |
|
| 472 | + if ($name == 'movie' || $name == 'src' || $name == 'href' || $name == 'url' || $name == 'source') |
|
| 473 | 473 | { |
| 474 | 474 | $ext = strtolower(substr(strrchr($parser->iNodeAttributes['value'], "."), 1)); |
| 475 | 475 | $isWhiteDomain = $this->isWhiteDomain($parser->iNodeAttributes['value']); |
| 476 | 476 | |
| 477 | - if(!$isWhiteDomain) |
|
| 477 | + if (!$isWhiteDomain) |
|
| 478 | 478 | { |
| 479 | 479 | $content = str_replace($paramTag, htmlspecialchars($paramTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content); |
| 480 | 480 | } |
@@ -491,11 +491,11 @@ discard block |
||
| 491 | 491 | */ |
| 492 | 492 | function isWhiteDomain($urlAttribute) |
| 493 | 493 | { |
| 494 | - if(is_array($this->whiteUrlList)) |
|
| 494 | + if (is_array($this->whiteUrlList)) |
|
| 495 | 495 | { |
| 496 | - foreach($this->whiteUrlList AS $key => $value) |
|
| 496 | + foreach ($this->whiteUrlList AS $key => $value) |
|
| 497 | 497 | { |
| 498 | - if(preg_match('@^' . preg_quote($value) . '@i', $urlAttribute)) |
|
| 498 | + if (preg_match('@^'.preg_quote($value).'@i', $urlAttribute)) |
|
| 499 | 499 | { |
| 500 | 500 | return TRUE; |
| 501 | 501 | } |
@@ -510,11 +510,11 @@ discard block |
||
| 510 | 510 | */ |
| 511 | 511 | function isWhiteIframeDomain($urlAttribute) |
| 512 | 512 | { |
| 513 | - if(is_array($this->whiteIframeUrlList)) |
|
| 513 | + if (is_array($this->whiteIframeUrlList)) |
|
| 514 | 514 | { |
| 515 | - foreach($this->whiteIframeUrlList AS $key => $value) |
|
| 515 | + foreach ($this->whiteIframeUrlList AS $key => $value) |
|
| 516 | 516 | { |
| 517 | - if(preg_match('@^' . preg_quote($value) . '@i', $urlAttribute)) |
|
| 517 | + if (preg_match('@^'.preg_quote($value).'@i', $urlAttribute)) |
|
| 518 | 518 | { |
| 519 | 519 | return TRUE; |
| 520 | 520 | } |
@@ -529,7 +529,7 @@ discard block |
||
| 529 | 529 | */ |
| 530 | 530 | function isWhiteMimetype($mimeType) |
| 531 | 531 | { |
| 532 | - if(isset($this->mimeTypeList[$mimeType])) |
|
| 532 | + if (isset($this->mimeTypeList[$mimeType])) |
|
| 533 | 533 | { |
| 534 | 534 | return TRUE; |
| 535 | 535 | } |
@@ -538,7 +538,7 @@ discard block |
||
| 538 | 538 | |
| 539 | 539 | function isWhiteExt($ext) |
| 540 | 540 | { |
| 541 | - if(isset($this->extList[$ext])) |
|
| 541 | + if (isset($this->extList[$ext])) |
|
| 542 | 542 | { |
| 543 | 543 | return TRUE; |
| 544 | 544 | } |
@@ -547,26 +547,26 @@ discard block |
||
| 547 | 547 | |
| 548 | 548 | function _checkAllowScriptAccess($m) |
| 549 | 549 | { |
| 550 | - if($m[1] == 'object') |
|
| 550 | + if ($m[1] == 'object') |
|
| 551 | 551 | { |
| 552 | 552 | $this->allowscriptaccessList[] = 1; |
| 553 | 553 | } |
| 554 | 554 | |
| 555 | - if($m[1] == 'param') |
|
| 555 | + if ($m[1] == 'param') |
|
| 556 | 556 | { |
| 557 | - if(stripos($m[0], 'allowscriptaccess')) |
|
| 557 | + if (stripos($m[0], 'allowscriptaccess')) |
|
| 558 | 558 | { |
| 559 | 559 | $m[0] = '<param name="allowscriptaccess" value="never"'; |
| 560 | - if(substr($m[0], -1) == '/') |
|
| 560 | + if (substr($m[0], -1) == '/') |
|
| 561 | 561 | { |
| 562 | 562 | $m[0] .= '/'; |
| 563 | 563 | } |
| 564 | 564 | $this->allowscriptaccessList[count($this->allowscriptaccessList) - 1]--; |
| 565 | 565 | } |
| 566 | 566 | } |
| 567 | - else if($m[1] == 'embed') |
|
| 567 | + else if ($m[1] == 'embed') |
|
| 568 | 568 | { |
| 569 | - if(stripos($m[0], 'allowscriptaccess')) |
|
| 569 | + if (stripos($m[0], 'allowscriptaccess')) |
|
| 570 | 570 | { |
| 571 | 571 | $m[0] = preg_replace('/always|samedomain/i', 'never', $m[0]); |
| 572 | 572 | } |
@@ -580,9 +580,9 @@ discard block |
||
| 580 | 580 | |
| 581 | 581 | function _addAllowScriptAccess($m) |
| 582 | 582 | { |
| 583 | - if($this->allowscriptaccessList[$this->allowscriptaccessKey] == 1) |
|
| 583 | + if ($this->allowscriptaccessList[$this->allowscriptaccessKey] == 1) |
|
| 584 | 584 | { |
| 585 | - $m[0] = $m[0] . '<param name="allowscriptaccess" value="never"></param>'; |
|
| 585 | + $m[0] = $m[0].'<param name="allowscriptaccess" value="never"></param>'; |
|
| 586 | 586 | } |
| 587 | 587 | $this->allowscriptaccessKey++; |
| 588 | 588 | return $m[0]; |
@@ -599,31 +599,31 @@ discard block |
||
| 599 | 599 | $whiteUrlCacheFile = FileHandler::getRealPath($this->whiteUrlCacheFile); |
| 600 | 600 | |
| 601 | 601 | $isMake = FALSE; |
| 602 | - if(!file_exists($whiteUrlCacheFile)) |
|
| 602 | + if (!file_exists($whiteUrlCacheFile)) |
|
| 603 | 603 | { |
| 604 | 604 | $isMake = TRUE; |
| 605 | 605 | } |
| 606 | - if(file_exists($whiteUrlCacheFile) && filemtime($whiteUrlCacheFile) < filemtime($whiteUrlXmlFile)) |
|
| 606 | + if (file_exists($whiteUrlCacheFile) && filemtime($whiteUrlCacheFile) < filemtime($whiteUrlXmlFile)) |
|
| 607 | 607 | { |
| 608 | 608 | $isMake = TRUE; |
| 609 | 609 | } |
| 610 | 610 | |
| 611 | - if(gettype($whitelist) == 'array' && gettype($whitelist['object']) == 'array' && gettype($whitelist['iframe']) == 'array') |
|
| 611 | + if (gettype($whitelist) == 'array' && gettype($whitelist['object']) == 'array' && gettype($whitelist['iframe']) == 'array') |
|
| 612 | 612 | { |
| 613 | 613 | $isMake = FALSE; |
| 614 | 614 | } |
| 615 | 615 | |
| 616 | - if(isset($whitelist) && gettype($whitelist) == 'object') |
|
| 616 | + if (isset($whitelist) && gettype($whitelist) == 'object') |
|
| 617 | 617 | { |
| 618 | 618 | $isMake = TRUE; |
| 619 | 619 | } |
| 620 | 620 | |
| 621 | - if($isMake) |
|
| 621 | + if ($isMake) |
|
| 622 | 622 | { |
| 623 | 623 | $whiteUrlList = array(); |
| 624 | 624 | $whiteIframeUrlList = array(); |
| 625 | 625 | |
| 626 | - if(gettype($whitelist->object) == 'array' && gettype($whitelist->iframe) == 'array') |
|
| 626 | + if (gettype($whitelist->object) == 'array' && gettype($whitelist->iframe) == 'array') |
|
| 627 | 627 | { |
| 628 | 628 | $whiteUrlList = $whitelist->object; |
| 629 | 629 | $whiteIframeUrlList = $whitelist->iframe; |
@@ -636,15 +636,15 @@ discard block |
||
| 636 | 636 | $domainListObj = $xmlParser->parse($xmlBuff); |
| 637 | 637 | $embedDomainList = $domainListObj->whiteurl->embed->domain; |
| 638 | 638 | $iframeDomainList = $domainListObj->whiteurl->iframe->domain; |
| 639 | - if(!is_array($embedDomainList)) $embedDomainList = array(); |
|
| 640 | - if(!is_array($iframeDomainList)) $iframeDomainList = array(); |
|
| 639 | + if (!is_array($embedDomainList)) $embedDomainList = array(); |
|
| 640 | + if (!is_array($iframeDomainList)) $iframeDomainList = array(); |
|
| 641 | 641 | |
| 642 | - foreach($embedDomainList AS $key => $value) |
|
| 642 | + foreach ($embedDomainList AS $key => $value) |
|
| 643 | 643 | { |
| 644 | 644 | $patternList = $value->pattern; |
| 645 | - if(is_array($patternList)) |
|
| 645 | + if (is_array($patternList)) |
|
| 646 | 646 | { |
| 647 | - foreach($patternList AS $key => $value) |
|
| 647 | + foreach ($patternList AS $key => $value) |
|
| 648 | 648 | { |
| 649 | 649 | $whiteUrlList[] = $value->body; |
| 650 | 650 | } |
@@ -655,12 +655,12 @@ discard block |
||
| 655 | 655 | } |
| 656 | 656 | } |
| 657 | 657 | |
| 658 | - foreach($iframeDomainList AS $key => $value) |
|
| 658 | + foreach ($iframeDomainList AS $key => $value) |
|
| 659 | 659 | { |
| 660 | 660 | $patternList = $value->pattern; |
| 661 | - if(is_array($patternList)) |
|
| 661 | + if (is_array($patternList)) |
|
| 662 | 662 | { |
| 663 | - foreach($patternList AS $key => $value) |
|
| 663 | + foreach ($patternList AS $key => $value) |
|
| 664 | 664 | { |
| 665 | 665 | $whiteIframeUrlList[] = $value->body; |
| 666 | 666 | } |
@@ -674,12 +674,12 @@ discard block |
||
| 674 | 674 | |
| 675 | 675 | $db_info = Context::getDBInfo(); |
| 676 | 676 | |
| 677 | - if($db_info->embed_white_object) |
|
| 677 | + if ($db_info->embed_white_object) |
|
| 678 | 678 | { |
| 679 | 679 | $whiteUrlList = array_merge($whiteUrlList, $db_info->embed_white_object); |
| 680 | 680 | } |
| 681 | 681 | |
| 682 | - if($db_info->embed_white_iframe) |
|
| 682 | + if ($db_info->embed_white_iframe) |
|
| 683 | 683 | { |
| 684 | 684 | $whiteIframeUrlList = array_merge($whiteIframeUrlList, $db_info->embed_white_iframe); |
| 685 | 685 | } |
@@ -691,8 +691,8 @@ discard block |
||
| 691 | 691 | |
| 692 | 692 | $buff = array(); |
| 693 | 693 | $buff[] = '<?php if(!defined("__XE__")) exit();'; |
| 694 | - $buff[] = '$whiteUrlList = ' . var_export($whiteUrlList, TRUE) . ';'; |
|
| 695 | - $buff[] = '$whiteIframeUrlList = ' . var_export($whiteIframeUrlList, TRUE) . ';'; |
|
| 694 | + $buff[] = '$whiteUrlList = '.var_export($whiteUrlList, TRUE).';'; |
|
| 695 | + $buff[] = '$whiteIframeUrlList = '.var_export($whiteIframeUrlList, TRUE).';'; |
|
| 696 | 696 | |
| 697 | 697 | FileHandler::writeFile($this->whiteUrlCacheFile, implode(PHP_EOL, $buff)); |
| 698 | 698 | } |
@@ -11,6 +11,7 @@ |
||
| 11 | 11 | /** |
| 12 | 12 | * @param HTMLPurifier_Length $max Minimum length, or null for no bound. String is also acceptable. |
| 13 | 13 | * @param HTMLPurifier_Length $max Maximum length, or null for no bound. String is also acceptable. |
| 14 | + * @param string $min |
|
| 14 | 15 | */ |
| 15 | 16 | public function __construct($min = null, $max = null) { |
| 16 | 17 | $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null; |
@@ -6,41 +6,41 @@ |
||
| 6 | 6 | class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef |
| 7 | 7 | { |
| 8 | 8 | |
| 9 | - protected $min, $max; |
|
| 10 | - |
|
| 11 | - /** |
|
| 12 | - * @param HTMLPurifier_Length $max Minimum length, or null for no bound. String is also acceptable. |
|
| 13 | - * @param HTMLPurifier_Length $max Maximum length, or null for no bound. String is also acceptable. |
|
| 14 | - */ |
|
| 15 | - public function __construct($min = null, $max = null) { |
|
| 16 | - $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null; |
|
| 17 | - $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null; |
|
| 18 | - } |
|
| 19 | - |
|
| 20 | - public function validate($string, $config, $context) { |
|
| 21 | - $string = $this->parseCDATA($string); |
|
| 22 | - |
|
| 23 | - // Optimizations |
|
| 24 | - if ($string === '') return false; |
|
| 25 | - if ($string === '0') return '0'; |
|
| 26 | - if (strlen($string) === 1) return false; |
|
| 27 | - |
|
| 28 | - $length = HTMLPurifier_Length::make($string); |
|
| 29 | - if (!$length->isValid()) return false; |
|
| 30 | - |
|
| 31 | - if ($this->min) { |
|
| 32 | - $c = $length->compareTo($this->min); |
|
| 33 | - if ($c === false) return false; |
|
| 34 | - if ($c < 0) return false; |
|
| 35 | - } |
|
| 36 | - if ($this->max) { |
|
| 37 | - $c = $length->compareTo($this->max); |
|
| 38 | - if ($c === false) return false; |
|
| 39 | - if ($c > 0) return false; |
|
| 40 | - } |
|
| 41 | - |
|
| 42 | - return $length->toString(); |
|
| 43 | - } |
|
| 9 | + protected $min, $max; |
|
| 10 | + |
|
| 11 | + /** |
|
| 12 | + * @param HTMLPurifier_Length $max Minimum length, or null for no bound. String is also acceptable. |
|
| 13 | + * @param HTMLPurifier_Length $max Maximum length, or null for no bound. String is also acceptable. |
|
| 14 | + */ |
|
| 15 | + public function __construct($min = null, $max = null) { |
|
| 16 | + $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null; |
|
| 17 | + $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null; |
|
| 18 | + } |
|
| 19 | + |
|
| 20 | + public function validate($string, $config, $context) { |
|
| 21 | + $string = $this->parseCDATA($string); |
|
| 22 | + |
|
| 23 | + // Optimizations |
|
| 24 | + if ($string === '') return false; |
|
| 25 | + if ($string === '0') return '0'; |
|
| 26 | + if (strlen($string) === 1) return false; |
|
| 27 | + |
|
| 28 | + $length = HTMLPurifier_Length::make($string); |
|
| 29 | + if (!$length->isValid()) return false; |
|
| 30 | + |
|
| 31 | + if ($this->min) { |
|
| 32 | + $c = $length->compareTo($this->min); |
|
| 33 | + if ($c === false) return false; |
|
| 34 | + if ($c < 0) return false; |
|
| 35 | + } |
|
| 36 | + if ($this->max) { |
|
| 37 | + $c = $length->compareTo($this->max); |
|
| 38 | + if ($c === false) return false; |
|
| 39 | + if ($c > 0) return false; |
|
| 40 | + } |
|
| 41 | + |
|
| 42 | + return $length->toString(); |
|
| 43 | + } |
|
| 44 | 44 | |
| 45 | 45 | } |
| 46 | 46 | |
@@ -21,22 +21,38 @@ |
||
| 21 | 21 | $string = $this->parseCDATA($string); |
| 22 | 22 | |
| 23 | 23 | // Optimizations |
| 24 | - if ($string === '') return false; |
|
| 25 | - if ($string === '0') return '0'; |
|
| 26 | - if (strlen($string) === 1) return false; |
|
| 24 | + if ($string === '') { |
|
| 25 | + return false; |
|
| 26 | + } |
|
| 27 | + if ($string === '0') { |
|
| 28 | + return '0'; |
|
| 29 | + } |
|
| 30 | + if (strlen($string) === 1) { |
|
| 31 | + return false; |
|
| 32 | + } |
|
| 27 | 33 | |
| 28 | 34 | $length = HTMLPurifier_Length::make($string); |
| 29 | - if (!$length->isValid()) return false; |
|
| 35 | + if (!$length->isValid()) { |
|
| 36 | + return false; |
|
| 37 | + } |
|
| 30 | 38 | |
| 31 | 39 | if ($this->min) { |
| 32 | 40 | $c = $length->compareTo($this->min); |
| 33 | - if ($c === false) return false; |
|
| 34 | - if ($c < 0) return false; |
|
| 41 | + if ($c === false) { |
|
| 42 | + return false; |
|
| 43 | + } |
|
| 44 | + if ($c < 0) { |
|
| 45 | + return false; |
|
| 46 | + } |
|
| 35 | 47 | } |
| 36 | 48 | if ($this->max) { |
| 37 | 49 | $c = $length->compareTo($this->max); |
| 38 | - if ($c === false) return false; |
|
| 39 | - if ($c > 0) return false; |
|
| 50 | + if ($c === false) { |
|
| 51 | + return false; |
|
| 52 | + } |
|
| 53 | + if ($c > 0) { |
|
| 54 | + return false; |
|
| 55 | + } |
|
| 40 | 56 | } |
| 41 | 57 | |
| 42 | 58 | return $length->toString(); |
@@ -9,6 +9,9 @@ |
||
| 9 | 9 | class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4 |
| 10 | 10 | { |
| 11 | 11 | |
| 12 | + /** |
|
| 13 | + * @param string $aIP |
|
| 14 | + */ |
|
| 12 | 15 | public function validate($aIP, $config, $context) { |
| 13 | 16 | |
| 14 | 17 | if (!$this->ip4) $this->_loadRegex(); |
@@ -9,90 +9,90 @@ |
||
| 9 | 9 | class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4 |
| 10 | 10 | { |
| 11 | 11 | |
| 12 | - public function validate($aIP, $config, $context) { |
|
| 13 | - |
|
| 14 | - if (!$this->ip4) $this->_loadRegex(); |
|
| 15 | - |
|
| 16 | - $original = $aIP; |
|
| 17 | - |
|
| 18 | - $hex = '[0-9a-fA-F]'; |
|
| 19 | - $blk = '(?:' . $hex . '{1,4})'; |
|
| 20 | - $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 |
|
| 21 | - |
|
| 22 | - // prefix check |
|
| 23 | - if (strpos($aIP, '/') !== false) |
|
| 24 | - { |
|
| 25 | - if (preg_match('#' . $pre . '$#s', $aIP, $find)) |
|
| 26 | - { |
|
| 27 | - $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
| 28 | - unset($find); |
|
| 29 | - } |
|
| 30 | - else |
|
| 31 | - { |
|
| 32 | - return false; |
|
| 33 | - } |
|
| 34 | - } |
|
| 35 | - |
|
| 36 | - // IPv4-compatiblity check |
|
| 37 | - if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find)) |
|
| 38 | - { |
|
| 39 | - $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
| 40 | - $ip = explode('.', $find[0]); |
|
| 41 | - $ip = array_map('dechex', $ip); |
|
| 42 | - $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; |
|
| 43 | - unset($find, $ip); |
|
| 44 | - } |
|
| 45 | - |
|
| 46 | - // compression check |
|
| 47 | - $aIP = explode('::', $aIP); |
|
| 48 | - $c = count($aIP); |
|
| 49 | - if ($c > 2) |
|
| 50 | - { |
|
| 51 | - return false; |
|
| 52 | - } |
|
| 53 | - elseif ($c == 2) |
|
| 54 | - { |
|
| 55 | - list($first, $second) = $aIP; |
|
| 56 | - $first = explode(':', $first); |
|
| 57 | - $second = explode(':', $second); |
|
| 58 | - |
|
| 59 | - if (count($first) + count($second) > 8) |
|
| 60 | - { |
|
| 61 | - return false; |
|
| 62 | - } |
|
| 63 | - |
|
| 64 | - while(count($first) < 8) |
|
| 65 | - { |
|
| 66 | - array_push($first, '0'); |
|
| 67 | - } |
|
| 68 | - |
|
| 69 | - array_splice($first, 8 - count($second), 8, $second); |
|
| 70 | - $aIP = $first; |
|
| 71 | - unset($first,$second); |
|
| 72 | - } |
|
| 73 | - else |
|
| 74 | - { |
|
| 75 | - $aIP = explode(':', $aIP[0]); |
|
| 76 | - } |
|
| 77 | - $c = count($aIP); |
|
| 78 | - |
|
| 79 | - if ($c != 8) |
|
| 80 | - { |
|
| 81 | - return false; |
|
| 82 | - } |
|
| 83 | - |
|
| 84 | - // All the pieces should be 16-bit hex strings. Are they? |
|
| 85 | - foreach ($aIP as $piece) |
|
| 86 | - { |
|
| 87 | - if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) |
|
| 88 | - { |
|
| 89 | - return false; |
|
| 90 | - } |
|
| 91 | - } |
|
| 92 | - |
|
| 93 | - return $original; |
|
| 94 | - |
|
| 95 | - } |
|
| 12 | + public function validate($aIP, $config, $context) { |
|
| 13 | + |
|
| 14 | + if (!$this->ip4) $this->_loadRegex(); |
|
| 15 | + |
|
| 16 | + $original = $aIP; |
|
| 17 | + |
|
| 18 | + $hex = '[0-9a-fA-F]'; |
|
| 19 | + $blk = '(?:' . $hex . '{1,4})'; |
|
| 20 | + $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 |
|
| 21 | + |
|
| 22 | + // prefix check |
|
| 23 | + if (strpos($aIP, '/') !== false) |
|
| 24 | + { |
|
| 25 | + if (preg_match('#' . $pre . '$#s', $aIP, $find)) |
|
| 26 | + { |
|
| 27 | + $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
| 28 | + unset($find); |
|
| 29 | + } |
|
| 30 | + else |
|
| 31 | + { |
|
| 32 | + return false; |
|
| 33 | + } |
|
| 34 | + } |
|
| 35 | + |
|
| 36 | + // IPv4-compatiblity check |
|
| 37 | + if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find)) |
|
| 38 | + { |
|
| 39 | + $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
| 40 | + $ip = explode('.', $find[0]); |
|
| 41 | + $ip = array_map('dechex', $ip); |
|
| 42 | + $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; |
|
| 43 | + unset($find, $ip); |
|
| 44 | + } |
|
| 45 | + |
|
| 46 | + // compression check |
|
| 47 | + $aIP = explode('::', $aIP); |
|
| 48 | + $c = count($aIP); |
|
| 49 | + if ($c > 2) |
|
| 50 | + { |
|
| 51 | + return false; |
|
| 52 | + } |
|
| 53 | + elseif ($c == 2) |
|
| 54 | + { |
|
| 55 | + list($first, $second) = $aIP; |
|
| 56 | + $first = explode(':', $first); |
|
| 57 | + $second = explode(':', $second); |
|
| 58 | + |
|
| 59 | + if (count($first) + count($second) > 8) |
|
| 60 | + { |
|
| 61 | + return false; |
|
| 62 | + } |
|
| 63 | + |
|
| 64 | + while(count($first) < 8) |
|
| 65 | + { |
|
| 66 | + array_push($first, '0'); |
|
| 67 | + } |
|
| 68 | + |
|
| 69 | + array_splice($first, 8 - count($second), 8, $second); |
|
| 70 | + $aIP = $first; |
|
| 71 | + unset($first,$second); |
|
| 72 | + } |
|
| 73 | + else |
|
| 74 | + { |
|
| 75 | + $aIP = explode(':', $aIP[0]); |
|
| 76 | + } |
|
| 77 | + $c = count($aIP); |
|
| 78 | + |
|
| 79 | + if ($c != 8) |
|
| 80 | + { |
|
| 81 | + return false; |
|
| 82 | + } |
|
| 83 | + |
|
| 84 | + // All the pieces should be 16-bit hex strings. Are they? |
|
| 85 | + foreach ($aIP as $piece) |
|
| 86 | + { |
|
| 87 | + if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) |
|
| 88 | + { |
|
| 89 | + return false; |
|
| 90 | + } |
|
| 91 | + } |
|
| 92 | + |
|
| 93 | + return $original; |
|
| 94 | + |
|
| 95 | + } |
|
| 96 | 96 | |
| 97 | 97 | } |
| 98 | 98 | |
@@ -11,7 +11,9 @@ discard block |
||
| 11 | 11 | |
| 12 | 12 | public function validate($aIP, $config, $context) { |
| 13 | 13 | |
| 14 | - if (!$this->ip4) $this->_loadRegex(); |
|
| 14 | + if (!$this->ip4) { |
|
| 15 | + $this->_loadRegex(); |
|
| 16 | + } |
|
| 15 | 17 | |
| 16 | 18 | $original = $aIP; |
| 17 | 19 | |
@@ -26,8 +28,7 @@ discard block |
||
| 26 | 28 | { |
| 27 | 29 | $aIP = substr($aIP, 0, 0-strlen($find[0])); |
| 28 | 30 | unset($find); |
| 29 | - } |
|
| 30 | - else |
|
| 31 | + } else |
|
| 31 | 32 | { |
| 32 | 33 | return false; |
| 33 | 34 | } |
@@ -49,8 +50,7 @@ discard block |
||
| 49 | 50 | if ($c > 2) |
| 50 | 51 | { |
| 51 | 52 | return false; |
| 52 | - } |
|
| 53 | - elseif ($c == 2) |
|
| 53 | + } elseif ($c == 2) |
|
| 54 | 54 | { |
| 55 | 55 | list($first, $second) = $aIP; |
| 56 | 56 | $first = explode(':', $first); |
@@ -69,8 +69,7 @@ discard block |
||
| 69 | 69 | array_splice($first, 8 - count($second), 8, $second); |
| 70 | 70 | $aIP = $first; |
| 71 | 71 | unset($first,$second); |
| 72 | - } |
|
| 73 | - else |
|
| 72 | + } else |
|
| 74 | 73 | { |
| 75 | 74 | $aIP = explode(':', $aIP[0]); |
| 76 | 75 | } |
@@ -16,15 +16,15 @@ discard block |
||
| 16 | 16 | $original = $aIP; |
| 17 | 17 | |
| 18 | 18 | $hex = '[0-9a-fA-F]'; |
| 19 | - $blk = '(?:' . $hex . '{1,4})'; |
|
| 20 | - $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 |
|
| 19 | + $blk = '(?:'.$hex.'{1,4})'; |
|
| 20 | + $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 |
|
| 21 | 21 | |
| 22 | 22 | // prefix check |
| 23 | 23 | if (strpos($aIP, '/') !== false) |
| 24 | 24 | { |
| 25 | - if (preg_match('#' . $pre . '$#s', $aIP, $find)) |
|
| 25 | + if (preg_match('#'.$pre.'$#s', $aIP, $find)) |
|
| 26 | 26 | { |
| 27 | - $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
| 27 | + $aIP = substr($aIP, 0, 0 - strlen($find[0])); |
|
| 28 | 28 | unset($find); |
| 29 | 29 | } |
| 30 | 30 | else |
@@ -34,12 +34,12 @@ discard block |
||
| 34 | 34 | } |
| 35 | 35 | |
| 36 | 36 | // IPv4-compatiblity check |
| 37 | - if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find)) |
|
| 37 | + if (preg_match('#(?<=:'.')'.$this->ip4.'$#s', $aIP, $find)) |
|
| 38 | 38 | { |
| 39 | - $aIP = substr($aIP, 0, 0-strlen($find[0])); |
|
| 39 | + $aIP = substr($aIP, 0, 0 - strlen($find[0])); |
|
| 40 | 40 | $ip = explode('.', $find[0]); |
| 41 | 41 | $ip = array_map('dechex', $ip); |
| 42 | - $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; |
|
| 42 | + $aIP .= $ip[0].$ip[1].':'.$ip[2].$ip[3]; |
|
| 43 | 43 | unset($find, $ip); |
| 44 | 44 | } |
| 45 | 45 | |
@@ -61,14 +61,14 @@ discard block |
||
| 61 | 61 | return false; |
| 62 | 62 | } |
| 63 | 63 | |
| 64 | - while(count($first) < 8) |
|
| 64 | + while (count($first) < 8) |
|
| 65 | 65 | { |
| 66 | 66 | array_push($first, '0'); |
| 67 | 67 | } |
| 68 | 68 | |
| 69 | 69 | array_splice($first, 8 - count($second), 8, $second); |
| 70 | 70 | $aIP = $first; |
| 71 | - unset($first,$second); |
|
| 71 | + unset($first, $second); |
|
| 72 | 72 | } |
| 73 | 73 | else |
| 74 | 74 | { |
@@ -89,6 +89,7 @@ discard block |
||
| 89 | 89 | /** |
| 90 | 90 | * @param $definition HTMLPurifier_ConfigSchema that defines what directives |
| 91 | 91 | * are allowed. |
| 92 | + * @param HTMLPurifier_PropertyList $parent |
|
| 92 | 93 | */ |
| 93 | 94 | public function __construct($definition, $parent = null) { |
| 94 | 95 | $parent = $parent ? $parent : $definition->defaultPlist; |
@@ -104,7 +105,7 @@ discard block |
||
| 104 | 105 | * an array of directives based on loadArray(), |
| 105 | 106 | * or a string filename of an ini file. |
| 106 | 107 | * @param HTMLPurifier_ConfigSchema Schema object |
| 107 | - * @return Configured HTMLPurifier_Config object |
|
| 108 | + * @return HTMLPurifier_Config HTMLPurifier_Config object |
|
| 108 | 109 | */ |
| 109 | 110 | public static function create($config, $schema = null) { |
| 110 | 111 | if ($config instanceof HTMLPurifier_Config) { |
@@ -133,7 +134,7 @@ discard block |
||
| 133 | 134 | |
| 134 | 135 | /** |
| 135 | 136 | * Convenience constructor that creates a default configuration object. |
| 136 | - * @return Default HTMLPurifier_Config object. |
|
| 137 | + * @return HTMLPurifier_Config HTMLPurifier_Config object. |
|
| 137 | 138 | */ |
| 138 | 139 | public static function createDefault() { |
| 139 | 140 | $definition = HTMLPurifier_ConfigSchema::instance(); |
@@ -143,7 +144,7 @@ discard block |
||
| 143 | 144 | |
| 144 | 145 | /** |
| 145 | 146 | * Retreives a value from the configuration. |
| 146 | - * @param $key String key |
|
| 147 | + * @param string $key String key |
|
| 147 | 148 | */ |
| 148 | 149 | public function get($key, $a = null) { |
| 149 | 150 | if ($a !== null) { |
@@ -231,7 +232,7 @@ discard block |
||
| 231 | 232 | |
| 232 | 233 | /** |
| 233 | 234 | * Sets a value to configuration. |
| 234 | - * @param $key String key |
|
| 235 | + * @param string $key String key |
|
| 235 | 236 | * @param $value Mixed value |
| 236 | 237 | */ |
| 237 | 238 | public function set($key, $value, $a = null) { |
@@ -543,7 +544,7 @@ discard block |
||
| 543 | 544 | * Returns a list of array(namespace, directive) for all directives |
| 544 | 545 | * that are allowed in a web-form context as per an allowed |
| 545 | 546 | * namespaces/directives list. |
| 546 | - * @param $allowed List of allowed namespaces/directives |
|
| 547 | + * @param boolean $allowed List of allowed namespaces/directives |
|
| 547 | 548 | */ |
| 548 | 549 | public static function getAllowedDirectivesForForm($allowed, $schema = null) { |
| 549 | 550 | if (!$schema) { |
@@ -585,7 +586,6 @@ discard block |
||
| 585 | 586 | /** |
| 586 | 587 | * Loads configuration values from $_GET/$_POST that were posted |
| 587 | 588 | * via ConfigForm |
| 588 | - * @param $array $_GET or $_POST array to import |
|
| 589 | 589 | * @param $index Index/name that the config variables are in |
| 590 | 590 | * @param $allowed List of allowed namespaces/directives |
| 591 | 591 | * @param $mq_fix Boolean whether or not to enable magic quotes fix |
@@ -632,7 +632,7 @@ discard block |
||
| 632 | 632 | |
| 633 | 633 | /** |
| 634 | 634 | * Loads configuration values from an ini file |
| 635 | - * @param $filename Name of ini file |
|
| 635 | + * @param string $filename Name of ini file |
|
| 636 | 636 | */ |
| 637 | 637 | public function loadIni($filename) { |
| 638 | 638 | if ($this->isFinalized('Cannot load directives after finalization')) return; |
@@ -674,6 +674,7 @@ discard block |
||
| 674 | 674 | /** |
| 675 | 675 | * Produces a nicely formatted error message by supplying the |
| 676 | 676 | * stack frame information OUTSIDE of HTMLPurifier_Config. |
| 677 | + * @param integer $no |
|
| 677 | 678 | */ |
| 678 | 679 | protected function triggerError($msg, $no) { |
| 679 | 680 | // determine previous stack frame |
@@ -17,692 +17,692 @@ |
||
| 17 | 17 | class HTMLPurifier_Config |
| 18 | 18 | { |
| 19 | 19 | |
| 20 | - /** |
|
| 21 | - * HTML Purifier's version |
|
| 22 | - */ |
|
| 23 | - public $version = '4.4.0'; |
|
| 24 | - |
|
| 25 | - /** |
|
| 26 | - * Bool indicator whether or not to automatically finalize |
|
| 27 | - * the object if a read operation is done |
|
| 28 | - */ |
|
| 29 | - public $autoFinalize = true; |
|
| 30 | - |
|
| 31 | - // protected member variables |
|
| 32 | - |
|
| 33 | - /** |
|
| 34 | - * Namespace indexed array of serials for specific namespaces (see |
|
| 35 | - * getSerial() for more info). |
|
| 36 | - */ |
|
| 37 | - protected $serials = array(); |
|
| 38 | - |
|
| 39 | - /** |
|
| 40 | - * Serial for entire configuration object |
|
| 41 | - */ |
|
| 42 | - protected $serial; |
|
| 43 | - |
|
| 44 | - /** |
|
| 45 | - * Parser for variables |
|
| 46 | - */ |
|
| 47 | - protected $parser = null; |
|
| 48 | - |
|
| 49 | - /** |
|
| 50 | - * Reference HTMLPurifier_ConfigSchema for value checking |
|
| 51 | - * @note This is public for introspective purposes. Please don't |
|
| 52 | - * abuse! |
|
| 53 | - */ |
|
| 54 | - public $def; |
|
| 55 | - |
|
| 56 | - /** |
|
| 57 | - * Indexed array of definitions |
|
| 58 | - */ |
|
| 59 | - protected $definitions; |
|
| 60 | - |
|
| 61 | - /** |
|
| 62 | - * Bool indicator whether or not config is finalized |
|
| 63 | - */ |
|
| 64 | - protected $finalized = false; |
|
| 65 | - |
|
| 66 | - /** |
|
| 67 | - * Property list containing configuration directives. |
|
| 68 | - */ |
|
| 69 | - protected $plist; |
|
| 70 | - |
|
| 71 | - /** |
|
| 72 | - * Whether or not a set is taking place due to an |
|
| 73 | - * alias lookup. |
|
| 74 | - */ |
|
| 75 | - private $aliasMode; |
|
| 76 | - |
|
| 77 | - /** |
|
| 78 | - * Set to false if you do not want line and file numbers in errors |
|
| 79 | - * (useful when unit testing). This will also compress some errors |
|
| 80 | - * and exceptions. |
|
| 81 | - */ |
|
| 82 | - public $chatty = true; |
|
| 83 | - |
|
| 84 | - /** |
|
| 85 | - * Current lock; only gets to this namespace are allowed. |
|
| 86 | - */ |
|
| 87 | - private $lock; |
|
| 88 | - |
|
| 89 | - /** |
|
| 90 | - * @param $definition HTMLPurifier_ConfigSchema that defines what directives |
|
| 91 | - * are allowed. |
|
| 92 | - */ |
|
| 93 | - public function __construct($definition, $parent = null) { |
|
| 94 | - $parent = $parent ? $parent : $definition->defaultPlist; |
|
| 95 | - $this->plist = new HTMLPurifier_PropertyList($parent); |
|
| 96 | - $this->def = $definition; // keep a copy around for checking |
|
| 97 | - $this->parser = new HTMLPurifier_VarParser_Flexible(); |
|
| 98 | - } |
|
| 99 | - |
|
| 100 | - /** |
|
| 101 | - * Convenience constructor that creates a config object based on a mixed var |
|
| 102 | - * @param mixed $config Variable that defines the state of the config |
|
| 103 | - * object. Can be: a HTMLPurifier_Config() object, |
|
| 104 | - * an array of directives based on loadArray(), |
|
| 105 | - * or a string filename of an ini file. |
|
| 106 | - * @param HTMLPurifier_ConfigSchema Schema object |
|
| 107 | - * @return Configured HTMLPurifier_Config object |
|
| 108 | - */ |
|
| 109 | - public static function create($config, $schema = null) { |
|
| 110 | - if ($config instanceof HTMLPurifier_Config) { |
|
| 111 | - // pass-through |
|
| 112 | - return $config; |
|
| 113 | - } |
|
| 114 | - if (!$schema) { |
|
| 115 | - $ret = HTMLPurifier_Config::createDefault(); |
|
| 116 | - } else { |
|
| 117 | - $ret = new HTMLPurifier_Config($schema); |
|
| 118 | - } |
|
| 119 | - if (is_string($config)) $ret->loadIni($config); |
|
| 120 | - elseif (is_array($config)) $ret->loadArray($config); |
|
| 121 | - return $ret; |
|
| 122 | - } |
|
| 123 | - |
|
| 124 | - /** |
|
| 125 | - * Creates a new config object that inherits from a previous one. |
|
| 126 | - * @param HTMLPurifier_Config $config Configuration object to inherit |
|
| 127 | - * from. |
|
| 128 | - * @return HTMLPurifier_Config object with $config as its parent. |
|
| 129 | - */ |
|
| 130 | - public static function inherit(HTMLPurifier_Config $config) { |
|
| 131 | - return new HTMLPurifier_Config($config->def, $config->plist); |
|
| 132 | - } |
|
| 133 | - |
|
| 134 | - /** |
|
| 135 | - * Convenience constructor that creates a default configuration object. |
|
| 136 | - * @return Default HTMLPurifier_Config object. |
|
| 137 | - */ |
|
| 138 | - public static function createDefault() { |
|
| 139 | - $definition = HTMLPurifier_ConfigSchema::instance(); |
|
| 140 | - $config = new HTMLPurifier_Config($definition); |
|
| 141 | - return $config; |
|
| 142 | - } |
|
| 143 | - |
|
| 144 | - /** |
|
| 145 | - * Retreives a value from the configuration. |
|
| 146 | - * @param $key String key |
|
| 147 | - */ |
|
| 148 | - public function get($key, $a = null) { |
|
| 149 | - if ($a !== null) { |
|
| 150 | - $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING); |
|
| 151 | - $key = "$key.$a"; |
|
| 152 | - } |
|
| 153 | - if (!$this->finalized) $this->autoFinalize(); |
|
| 154 | - if (!isset($this->def->info[$key])) { |
|
| 155 | - // can't add % due to SimpleTest bug |
|
| 156 | - $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), |
|
| 157 | - E_USER_WARNING); |
|
| 158 | - return; |
|
| 159 | - } |
|
| 160 | - if (isset($this->def->info[$key]->isAlias)) { |
|
| 161 | - $d = $this->def->info[$key]; |
|
| 162 | - $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key, |
|
| 163 | - E_USER_ERROR); |
|
| 164 | - return; |
|
| 165 | - } |
|
| 166 | - if ($this->lock) { |
|
| 167 | - list($ns) = explode('.', $key); |
|
| 168 | - if ($ns !== $this->lock) { |
|
| 169 | - $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR); |
|
| 170 | - return; |
|
| 171 | - } |
|
| 172 | - } |
|
| 173 | - return $this->plist->get($key); |
|
| 174 | - } |
|
| 175 | - |
|
| 176 | - /** |
|
| 177 | - * Retreives an array of directives to values from a given namespace |
|
| 178 | - * @param $namespace String namespace |
|
| 179 | - */ |
|
| 180 | - public function getBatch($namespace) { |
|
| 181 | - if (!$this->finalized) $this->autoFinalize(); |
|
| 182 | - $full = $this->getAll(); |
|
| 183 | - if (!isset($full[$namespace])) { |
|
| 184 | - $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), |
|
| 185 | - E_USER_WARNING); |
|
| 186 | - return; |
|
| 187 | - } |
|
| 188 | - return $full[$namespace]; |
|
| 189 | - } |
|
| 190 | - |
|
| 191 | - /** |
|
| 192 | - * Returns a md5 signature of a segment of the configuration object |
|
| 193 | - * that uniquely identifies that particular configuration |
|
| 194 | - * @note Revision is handled specially and is removed from the batch |
|
| 195 | - * before processing! |
|
| 196 | - * @param $namespace Namespace to get serial for |
|
| 197 | - */ |
|
| 198 | - public function getBatchSerial($namespace) { |
|
| 199 | - if (empty($this->serials[$namespace])) { |
|
| 200 | - $batch = $this->getBatch($namespace); |
|
| 201 | - unset($batch['DefinitionRev']); |
|
| 202 | - $this->serials[$namespace] = md5(serialize($batch)); |
|
| 203 | - } |
|
| 204 | - return $this->serials[$namespace]; |
|
| 205 | - } |
|
| 206 | - |
|
| 207 | - /** |
|
| 208 | - * Returns a md5 signature for the entire configuration object |
|
| 209 | - * that uniquely identifies that particular configuration |
|
| 210 | - */ |
|
| 211 | - public function getSerial() { |
|
| 212 | - if (empty($this->serial)) { |
|
| 213 | - $this->serial = md5(serialize($this->getAll())); |
|
| 214 | - } |
|
| 215 | - return $this->serial; |
|
| 216 | - } |
|
| 217 | - |
|
| 218 | - /** |
|
| 219 | - * Retrieves all directives, organized by namespace |
|
| 220 | - * @warning This is a pretty inefficient function, avoid if you can |
|
| 221 | - */ |
|
| 222 | - public function getAll() { |
|
| 223 | - if (!$this->finalized) $this->autoFinalize(); |
|
| 224 | - $ret = array(); |
|
| 225 | - foreach ($this->plist->squash() as $name => $value) { |
|
| 226 | - list($ns, $key) = explode('.', $name, 2); |
|
| 227 | - $ret[$ns][$key] = $value; |
|
| 228 | - } |
|
| 229 | - return $ret; |
|
| 230 | - } |
|
| 231 | - |
|
| 232 | - /** |
|
| 233 | - * Sets a value to configuration. |
|
| 234 | - * @param $key String key |
|
| 235 | - * @param $value Mixed value |
|
| 236 | - */ |
|
| 237 | - public function set($key, $value, $a = null) { |
|
| 238 | - if (strpos($key, '.') === false) { |
|
| 239 | - $namespace = $key; |
|
| 240 | - $directive = $value; |
|
| 241 | - $value = $a; |
|
| 242 | - $key = "$key.$directive"; |
|
| 243 | - $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE); |
|
| 244 | - } else { |
|
| 245 | - list($namespace) = explode('.', $key); |
|
| 246 | - } |
|
| 247 | - if ($this->isFinalized('Cannot set directive after finalization')) return; |
|
| 248 | - if (!isset($this->def->info[$key])) { |
|
| 249 | - $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' to value', |
|
| 250 | - E_USER_WARNING); |
|
| 251 | - return; |
|
| 252 | - } |
|
| 253 | - $def = $this->def->info[$key]; |
|
| 254 | - |
|
| 255 | - if (isset($def->isAlias)) { |
|
| 256 | - if ($this->aliasMode) { |
|
| 257 | - $this->triggerError('Double-aliases not allowed, please fix '. |
|
| 258 | - 'ConfigSchema bug with' . $key, E_USER_ERROR); |
|
| 259 | - return; |
|
| 260 | - } |
|
| 261 | - $this->aliasMode = true; |
|
| 262 | - $this->set($def->key, $value); |
|
| 263 | - $this->aliasMode = false; |
|
| 264 | - $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE); |
|
| 265 | - return; |
|
| 266 | - } |
|
| 267 | - |
|
| 268 | - // Raw type might be negative when using the fully optimized form |
|
| 269 | - // of stdclass, which indicates allow_null == true |
|
| 270 | - $rtype = is_int($def) ? $def : $def->type; |
|
| 271 | - if ($rtype < 0) { |
|
| 272 | - $type = -$rtype; |
|
| 273 | - $allow_null = true; |
|
| 274 | - } else { |
|
| 275 | - $type = $rtype; |
|
| 276 | - $allow_null = isset($def->allow_null); |
|
| 277 | - } |
|
| 278 | - |
|
| 279 | - try { |
|
| 280 | - $value = $this->parser->parse($value, $type, $allow_null); |
|
| 281 | - } catch (HTMLPurifier_VarParserException $e) { |
|
| 282 | - $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); |
|
| 283 | - return; |
|
| 284 | - } |
|
| 285 | - if (is_string($value) && is_object($def)) { |
|
| 286 | - // resolve value alias if defined |
|
| 287 | - if (isset($def->aliases[$value])) { |
|
| 288 | - $value = $def->aliases[$value]; |
|
| 289 | - } |
|
| 290 | - // check to see if the value is allowed |
|
| 291 | - if (isset($def->allowed) && !isset($def->allowed[$value])) { |
|
| 292 | - $this->triggerError('Value not supported, valid values are: ' . |
|
| 293 | - $this->_listify($def->allowed), E_USER_WARNING); |
|
| 294 | - return; |
|
| 295 | - } |
|
| 296 | - } |
|
| 297 | - $this->plist->set($key, $value); |
|
| 298 | - |
|
| 299 | - // reset definitions if the directives they depend on changed |
|
| 300 | - // this is a very costly process, so it's discouraged |
|
| 301 | - // with finalization |
|
| 302 | - if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') { |
|
| 303 | - $this->definitions[$namespace] = null; |
|
| 304 | - } |
|
| 305 | - |
|
| 306 | - $this->serials[$namespace] = false; |
|
| 307 | - } |
|
| 308 | - |
|
| 309 | - /** |
|
| 310 | - * Convenience function for error reporting |
|
| 311 | - */ |
|
| 312 | - private function _listify($lookup) { |
|
| 313 | - $list = array(); |
|
| 314 | - foreach ($lookup as $name => $b) $list[] = $name; |
|
| 315 | - return implode(', ', $list); |
|
| 316 | - } |
|
| 317 | - |
|
| 318 | - /** |
|
| 319 | - * Retrieves object reference to the HTML definition. |
|
| 320 | - * @param $raw Return a copy that has not been setup yet. Must be |
|
| 321 | - * called before it's been setup, otherwise won't work. |
|
| 322 | - * @param $optimized If true, this method may return null, to |
|
| 323 | - * indicate that a cached version of the modified |
|
| 324 | - * definition object is available and no further edits |
|
| 325 | - * are necessary. Consider using |
|
| 326 | - * maybeGetRawHTMLDefinition, which is more explicitly |
|
| 327 | - * named, instead. |
|
| 328 | - */ |
|
| 329 | - public function getHTMLDefinition($raw = false, $optimized = false) { |
|
| 330 | - return $this->getDefinition('HTML', $raw, $optimized); |
|
| 331 | - } |
|
| 332 | - |
|
| 333 | - /** |
|
| 334 | - * Retrieves object reference to the CSS definition |
|
| 335 | - * @param $raw Return a copy that has not been setup yet. Must be |
|
| 336 | - * called before it's been setup, otherwise won't work. |
|
| 337 | - * @param $optimized If true, this method may return null, to |
|
| 338 | - * indicate that a cached version of the modified |
|
| 339 | - * definition object is available and no further edits |
|
| 340 | - * are necessary. Consider using |
|
| 341 | - * maybeGetRawCSSDefinition, which is more explicitly |
|
| 342 | - * named, instead. |
|
| 343 | - */ |
|
| 344 | - public function getCSSDefinition($raw = false, $optimized = false) { |
|
| 345 | - return $this->getDefinition('CSS', $raw, $optimized); |
|
| 346 | - } |
|
| 347 | - |
|
| 348 | - /** |
|
| 349 | - * Retrieves object reference to the URI definition |
|
| 350 | - * @param $raw Return a copy that has not been setup yet. Must be |
|
| 351 | - * called before it's been setup, otherwise won't work. |
|
| 352 | - * @param $optimized If true, this method may return null, to |
|
| 353 | - * indicate that a cached version of the modified |
|
| 354 | - * definition object is available and no further edits |
|
| 355 | - * are necessary. Consider using |
|
| 356 | - * maybeGetRawURIDefinition, which is more explicitly |
|
| 357 | - * named, instead. |
|
| 358 | - */ |
|
| 359 | - public function getURIDefinition($raw = false, $optimized = false) { |
|
| 360 | - return $this->getDefinition('URI', $raw, $optimized); |
|
| 361 | - } |
|
| 362 | - |
|
| 363 | - /** |
|
| 364 | - * Retrieves a definition |
|
| 365 | - * @param $type Type of definition: HTML, CSS, etc |
|
| 366 | - * @param $raw Whether or not definition should be returned raw |
|
| 367 | - * @param $optimized Only has an effect when $raw is true. Whether |
|
| 368 | - * or not to return null if the result is already present in |
|
| 369 | - * the cache. This is off by default for backwards |
|
| 370 | - * compatibility reasons, but you need to do things this |
|
| 371 | - * way in order to ensure that caching is done properly. |
|
| 372 | - * Check out enduser-customize.html for more details. |
|
| 373 | - * We probably won't ever change this default, as much as the |
|
| 374 | - * maybe semantics is the "right thing to do." |
|
| 375 | - */ |
|
| 376 | - public function getDefinition($type, $raw = false, $optimized = false) { |
|
| 377 | - if ($optimized && !$raw) { |
|
| 378 | - throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false"); |
|
| 379 | - } |
|
| 380 | - if (!$this->finalized) $this->autoFinalize(); |
|
| 381 | - // temporarily suspend locks, so we can handle recursive definition calls |
|
| 382 | - $lock = $this->lock; |
|
| 383 | - $this->lock = null; |
|
| 384 | - $factory = HTMLPurifier_DefinitionCacheFactory::instance(); |
|
| 385 | - $cache = $factory->create($type, $this); |
|
| 386 | - $this->lock = $lock; |
|
| 387 | - if (!$raw) { |
|
| 388 | - // full definition |
|
| 389 | - // --------------- |
|
| 390 | - // check if definition is in memory |
|
| 391 | - if (!empty($this->definitions[$type])) { |
|
| 392 | - $def = $this->definitions[$type]; |
|
| 393 | - // check if the definition is setup |
|
| 394 | - if ($def->setup) { |
|
| 395 | - return $def; |
|
| 396 | - } else { |
|
| 397 | - $def->setup($this); |
|
| 398 | - if ($def->optimized) $cache->add($def, $this); |
|
| 399 | - return $def; |
|
| 400 | - } |
|
| 401 | - } |
|
| 402 | - // check if definition is in cache |
|
| 403 | - $def = $cache->get($this); |
|
| 404 | - if ($def) { |
|
| 405 | - // definition in cache, save to memory and return it |
|
| 406 | - $this->definitions[$type] = $def; |
|
| 407 | - return $def; |
|
| 408 | - } |
|
| 409 | - // initialize it |
|
| 410 | - $def = $this->initDefinition($type); |
|
| 411 | - // set it up |
|
| 412 | - $this->lock = $type; |
|
| 413 | - $def->setup($this); |
|
| 414 | - $this->lock = null; |
|
| 415 | - // save in cache |
|
| 416 | - $cache->add($def, $this); |
|
| 417 | - // return it |
|
| 418 | - return $def; |
|
| 419 | - } else { |
|
| 420 | - // raw definition |
|
| 421 | - // -------------- |
|
| 422 | - // check preconditions |
|
| 423 | - $def = null; |
|
| 424 | - if ($optimized) { |
|
| 425 | - if (is_null($this->get($type . '.DefinitionID'))) { |
|
| 426 | - // fatally error out if definition ID not set |
|
| 427 | - throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID"); |
|
| 428 | - } |
|
| 429 | - } |
|
| 430 | - if (!empty($this->definitions[$type])) { |
|
| 431 | - $def = $this->definitions[$type]; |
|
| 432 | - if ($def->setup && !$optimized) { |
|
| 433 | - $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : ""; |
|
| 434 | - throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra); |
|
| 435 | - } |
|
| 436 | - if ($def->optimized === null) { |
|
| 437 | - $extra = $this->chatty ? " (try flushing your cache)" : ""; |
|
| 438 | - throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra); |
|
| 439 | - } |
|
| 440 | - if ($def->optimized !== $optimized) { |
|
| 441 | - $msg = $optimized ? "optimized" : "unoptimized"; |
|
| 442 | - $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : ""; |
|
| 443 | - throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra); |
|
| 444 | - } |
|
| 445 | - } |
|
| 446 | - // check if definition was in memory |
|
| 447 | - if ($def) { |
|
| 448 | - if ($def->setup) { |
|
| 449 | - // invariant: $optimized === true (checked above) |
|
| 450 | - return null; |
|
| 451 | - } else { |
|
| 452 | - return $def; |
|
| 453 | - } |
|
| 454 | - } |
|
| 455 | - // if optimized, check if definition was in cache |
|
| 456 | - // (because we do the memory check first, this formulation |
|
| 457 | - // is prone to cache slamming, but I think |
|
| 458 | - // guaranteeing that either /all/ of the raw |
|
| 459 | - // setup code or /none/ of it is run is more important.) |
|
| 460 | - if ($optimized) { |
|
| 461 | - // This code path only gets run once; once we put |
|
| 462 | - // something in $definitions (which is guaranteed by the |
|
| 463 | - // trailing code), we always short-circuit above. |
|
| 464 | - $def = $cache->get($this); |
|
| 465 | - if ($def) { |
|
| 466 | - // save the full definition for later, but don't |
|
| 467 | - // return it yet |
|
| 468 | - $this->definitions[$type] = $def; |
|
| 469 | - return null; |
|
| 470 | - } |
|
| 471 | - } |
|
| 472 | - // check invariants for creation |
|
| 473 | - if (!$optimized) { |
|
| 474 | - if (!is_null($this->get($type . '.DefinitionID'))) { |
|
| 475 | - if ($this->chatty) { |
|
| 476 | - $this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING); |
|
| 477 | - } else { |
|
| 478 | - $this->triggerError("Useless DefinitionID declaration", E_USER_WARNING); |
|
| 479 | - } |
|
| 480 | - } |
|
| 481 | - } |
|
| 482 | - // initialize it |
|
| 483 | - $def = $this->initDefinition($type); |
|
| 484 | - $def->optimized = $optimized; |
|
| 485 | - return $def; |
|
| 486 | - } |
|
| 487 | - throw new HTMLPurifier_Exception("The impossible happened!"); |
|
| 488 | - } |
|
| 489 | - |
|
| 490 | - private function initDefinition($type) { |
|
| 491 | - // quick checks failed, let's create the object |
|
| 492 | - if ($type == 'HTML') { |
|
| 493 | - $def = new HTMLPurifier_HTMLDefinition(); |
|
| 494 | - } elseif ($type == 'CSS') { |
|
| 495 | - $def = new HTMLPurifier_CSSDefinition(); |
|
| 496 | - } elseif ($type == 'URI') { |
|
| 497 | - $def = new HTMLPurifier_URIDefinition(); |
|
| 498 | - } else { |
|
| 499 | - throw new HTMLPurifier_Exception("Definition of $type type not supported"); |
|
| 500 | - } |
|
| 501 | - $this->definitions[$type] = $def; |
|
| 502 | - return $def; |
|
| 503 | - } |
|
| 504 | - |
|
| 505 | - public function maybeGetRawDefinition($name) { |
|
| 506 | - return $this->getDefinition($name, true, true); |
|
| 507 | - } |
|
| 508 | - |
|
| 509 | - public function maybeGetRawHTMLDefinition() { |
|
| 510 | - return $this->getDefinition('HTML', true, true); |
|
| 511 | - } |
|
| 512 | - |
|
| 513 | - public function maybeGetRawCSSDefinition() { |
|
| 514 | - return $this->getDefinition('CSS', true, true); |
|
| 515 | - } |
|
| 516 | - |
|
| 517 | - public function maybeGetRawURIDefinition() { |
|
| 518 | - return $this->getDefinition('URI', true, true); |
|
| 519 | - } |
|
| 520 | - |
|
| 521 | - /** |
|
| 522 | - * Loads configuration values from an array with the following structure: |
|
| 523 | - * Namespace.Directive => Value |
|
| 524 | - * @param $config_array Configuration associative array |
|
| 525 | - */ |
|
| 526 | - public function loadArray($config_array) { |
|
| 527 | - if ($this->isFinalized('Cannot load directives after finalization')) return; |
|
| 528 | - foreach ($config_array as $key => $value) { |
|
| 529 | - $key = str_replace('_', '.', $key); |
|
| 530 | - if (strpos($key, '.') !== false) { |
|
| 531 | - $this->set($key, $value); |
|
| 532 | - } else { |
|
| 533 | - $namespace = $key; |
|
| 534 | - $namespace_values = $value; |
|
| 535 | - foreach ($namespace_values as $directive => $value) { |
|
| 536 | - $this->set($namespace .'.'. $directive, $value); |
|
| 537 | - } |
|
| 538 | - } |
|
| 539 | - } |
|
| 540 | - } |
|
| 541 | - |
|
| 542 | - /** |
|
| 543 | - * Returns a list of array(namespace, directive) for all directives |
|
| 544 | - * that are allowed in a web-form context as per an allowed |
|
| 545 | - * namespaces/directives list. |
|
| 546 | - * @param $allowed List of allowed namespaces/directives |
|
| 547 | - */ |
|
| 548 | - public static function getAllowedDirectivesForForm($allowed, $schema = null) { |
|
| 549 | - if (!$schema) { |
|
| 550 | - $schema = HTMLPurifier_ConfigSchema::instance(); |
|
| 551 | - } |
|
| 552 | - if ($allowed !== true) { |
|
| 553 | - if (is_string($allowed)) $allowed = array($allowed); |
|
| 554 | - $allowed_ns = array(); |
|
| 555 | - $allowed_directives = array(); |
|
| 556 | - $blacklisted_directives = array(); |
|
| 557 | - foreach ($allowed as $ns_or_directive) { |
|
| 558 | - if (strpos($ns_or_directive, '.') !== false) { |
|
| 559 | - // directive |
|
| 560 | - if ($ns_or_directive[0] == '-') { |
|
| 561 | - $blacklisted_directives[substr($ns_or_directive, 1)] = true; |
|
| 562 | - } else { |
|
| 563 | - $allowed_directives[$ns_or_directive] = true; |
|
| 564 | - } |
|
| 565 | - } else { |
|
| 566 | - // namespace |
|
| 567 | - $allowed_ns[$ns_or_directive] = true; |
|
| 568 | - } |
|
| 569 | - } |
|
| 570 | - } |
|
| 571 | - $ret = array(); |
|
| 572 | - foreach ($schema->info as $key => $def) { |
|
| 573 | - list($ns, $directive) = explode('.', $key, 2); |
|
| 574 | - if ($allowed !== true) { |
|
| 575 | - if (isset($blacklisted_directives["$ns.$directive"])) continue; |
|
| 576 | - if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; |
|
| 577 | - } |
|
| 578 | - if (isset($def->isAlias)) continue; |
|
| 579 | - if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; |
|
| 580 | - $ret[] = array($ns, $directive); |
|
| 581 | - } |
|
| 582 | - return $ret; |
|
| 583 | - } |
|
| 584 | - |
|
| 585 | - /** |
|
| 586 | - * Loads configuration values from $_GET/$_POST that were posted |
|
| 587 | - * via ConfigForm |
|
| 588 | - * @param $array $_GET or $_POST array to import |
|
| 589 | - * @param $index Index/name that the config variables are in |
|
| 590 | - * @param $allowed List of allowed namespaces/directives |
|
| 591 | - * @param $mq_fix Boolean whether or not to enable magic quotes fix |
|
| 592 | - * @param $schema Instance of HTMLPurifier_ConfigSchema to use, if not global copy |
|
| 593 | - */ |
|
| 594 | - public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { |
|
| 595 | - $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema); |
|
| 596 | - $config = HTMLPurifier_Config::create($ret, $schema); |
|
| 597 | - return $config; |
|
| 598 | - } |
|
| 599 | - |
|
| 600 | - /** |
|
| 601 | - * Merges in configuration values from $_GET/$_POST to object. NOT STATIC. |
|
| 602 | - * @note Same parameters as loadArrayFromForm |
|
| 603 | - */ |
|
| 604 | - public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) { |
|
| 605 | - $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def); |
|
| 606 | - $this->loadArray($ret); |
|
| 607 | - } |
|
| 608 | - |
|
| 609 | - /** |
|
| 610 | - * Prepares an array from a form into something usable for the more |
|
| 611 | - * strict parts of HTMLPurifier_Config |
|
| 612 | - */ |
|
| 613 | - public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { |
|
| 614 | - if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); |
|
| 615 | - $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc(); |
|
| 616 | - |
|
| 617 | - $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema); |
|
| 618 | - $ret = array(); |
|
| 619 | - foreach ($allowed as $key) { |
|
| 620 | - list($ns, $directive) = $key; |
|
| 621 | - $skey = "$ns.$directive"; |
|
| 622 | - if (!empty($array["Null_$skey"])) { |
|
| 623 | - $ret[$ns][$directive] = null; |
|
| 624 | - continue; |
|
| 625 | - } |
|
| 626 | - if (!isset($array[$skey])) continue; |
|
| 627 | - $value = $mq ? stripslashes($array[$skey]) : $array[$skey]; |
|
| 628 | - $ret[$ns][$directive] = $value; |
|
| 629 | - } |
|
| 630 | - return $ret; |
|
| 631 | - } |
|
| 632 | - |
|
| 633 | - /** |
|
| 634 | - * Loads configuration values from an ini file |
|
| 635 | - * @param $filename Name of ini file |
|
| 636 | - */ |
|
| 637 | - public function loadIni($filename) { |
|
| 638 | - if ($this->isFinalized('Cannot load directives after finalization')) return; |
|
| 639 | - $array = parse_ini_file($filename, true); |
|
| 640 | - $this->loadArray($array); |
|
| 641 | - } |
|
| 642 | - |
|
| 643 | - /** |
|
| 644 | - * Checks whether or not the configuration object is finalized. |
|
| 645 | - * @param $error String error message, or false for no error |
|
| 646 | - */ |
|
| 647 | - public function isFinalized($error = false) { |
|
| 648 | - if ($this->finalized && $error) { |
|
| 649 | - $this->triggerError($error, E_USER_ERROR); |
|
| 650 | - } |
|
| 651 | - return $this->finalized; |
|
| 652 | - } |
|
| 653 | - |
|
| 654 | - /** |
|
| 655 | - * Finalizes configuration only if auto finalize is on and not |
|
| 656 | - * already finalized |
|
| 657 | - */ |
|
| 658 | - public function autoFinalize() { |
|
| 659 | - if ($this->autoFinalize) { |
|
| 660 | - $this->finalize(); |
|
| 661 | - } else { |
|
| 662 | - $this->plist->squash(true); |
|
| 663 | - } |
|
| 664 | - } |
|
| 665 | - |
|
| 666 | - /** |
|
| 667 | - * Finalizes a configuration object, prohibiting further change |
|
| 668 | - */ |
|
| 669 | - public function finalize() { |
|
| 670 | - $this->finalized = true; |
|
| 671 | - $this->parser = null; |
|
| 672 | - } |
|
| 673 | - |
|
| 674 | - /** |
|
| 675 | - * Produces a nicely formatted error message by supplying the |
|
| 676 | - * stack frame information OUTSIDE of HTMLPurifier_Config. |
|
| 677 | - */ |
|
| 678 | - protected function triggerError($msg, $no) { |
|
| 679 | - // determine previous stack frame |
|
| 680 | - $extra = ''; |
|
| 681 | - if ($this->chatty) { |
|
| 682 | - $trace = debug_backtrace(); |
|
| 683 | - // zip(tail(trace), trace) -- but PHP is not Haskell har har |
|
| 684 | - for ($i = 0, $c = count($trace); $i < $c - 1; $i++) { |
|
| 685 | - if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') { |
|
| 686 | - continue; |
|
| 687 | - } |
|
| 688 | - $frame = $trace[$i]; |
|
| 689 | - $extra = " invoked on line {$frame['line']} in file {$frame['file']}"; |
|
| 690 | - break; |
|
| 691 | - } |
|
| 692 | - } |
|
| 693 | - trigger_error($msg . $extra, $no); |
|
| 694 | - } |
|
| 695 | - |
|
| 696 | - /** |
|
| 697 | - * Returns a serialized form of the configuration object that can |
|
| 698 | - * be reconstituted. |
|
| 699 | - */ |
|
| 700 | - public function serialize() { |
|
| 701 | - $this->getDefinition('HTML'); |
|
| 702 | - $this->getDefinition('CSS'); |
|
| 703 | - $this->getDefinition('URI'); |
|
| 704 | - return serialize($this); |
|
| 705 | - } |
|
| 20 | + /** |
|
| 21 | + * HTML Purifier's version |
|
| 22 | + */ |
|
| 23 | + public $version = '4.4.0'; |
|
| 24 | + |
|
| 25 | + /** |
|
| 26 | + * Bool indicator whether or not to automatically finalize |
|
| 27 | + * the object if a read operation is done |
|
| 28 | + */ |
|
| 29 | + public $autoFinalize = true; |
|
| 30 | + |
|
| 31 | + // protected member variables |
|
| 32 | + |
|
| 33 | + /** |
|
| 34 | + * Namespace indexed array of serials for specific namespaces (see |
|
| 35 | + * getSerial() for more info). |
|
| 36 | + */ |
|
| 37 | + protected $serials = array(); |
|
| 38 | + |
|
| 39 | + /** |
|
| 40 | + * Serial for entire configuration object |
|
| 41 | + */ |
|
| 42 | + protected $serial; |
|
| 43 | + |
|
| 44 | + /** |
|
| 45 | + * Parser for variables |
|
| 46 | + */ |
|
| 47 | + protected $parser = null; |
|
| 48 | + |
|
| 49 | + /** |
|
| 50 | + * Reference HTMLPurifier_ConfigSchema for value checking |
|
| 51 | + * @note This is public for introspective purposes. Please don't |
|
| 52 | + * abuse! |
|
| 53 | + */ |
|
| 54 | + public $def; |
|
| 55 | + |
|
| 56 | + /** |
|
| 57 | + * Indexed array of definitions |
|
| 58 | + */ |
|
| 59 | + protected $definitions; |
|
| 60 | + |
|
| 61 | + /** |
|
| 62 | + * Bool indicator whether or not config is finalized |
|
| 63 | + */ |
|
| 64 | + protected $finalized = false; |
|
| 65 | + |
|
| 66 | + /** |
|
| 67 | + * Property list containing configuration directives. |
|
| 68 | + */ |
|
| 69 | + protected $plist; |
|
| 70 | + |
|
| 71 | + /** |
|
| 72 | + * Whether or not a set is taking place due to an |
|
| 73 | + * alias lookup. |
|
| 74 | + */ |
|
| 75 | + private $aliasMode; |
|
| 76 | + |
|
| 77 | + /** |
|
| 78 | + * Set to false if you do not want line and file numbers in errors |
|
| 79 | + * (useful when unit testing). This will also compress some errors |
|
| 80 | + * and exceptions. |
|
| 81 | + */ |
|
| 82 | + public $chatty = true; |
|
| 83 | + |
|
| 84 | + /** |
|
| 85 | + * Current lock; only gets to this namespace are allowed. |
|
| 86 | + */ |
|
| 87 | + private $lock; |
|
| 88 | + |
|
| 89 | + /** |
|
| 90 | + * @param $definition HTMLPurifier_ConfigSchema that defines what directives |
|
| 91 | + * are allowed. |
|
| 92 | + */ |
|
| 93 | + public function __construct($definition, $parent = null) { |
|
| 94 | + $parent = $parent ? $parent : $definition->defaultPlist; |
|
| 95 | + $this->plist = new HTMLPurifier_PropertyList($parent); |
|
| 96 | + $this->def = $definition; // keep a copy around for checking |
|
| 97 | + $this->parser = new HTMLPurifier_VarParser_Flexible(); |
|
| 98 | + } |
|
| 99 | + |
|
| 100 | + /** |
|
| 101 | + * Convenience constructor that creates a config object based on a mixed var |
|
| 102 | + * @param mixed $config Variable that defines the state of the config |
|
| 103 | + * object. Can be: a HTMLPurifier_Config() object, |
|
| 104 | + * an array of directives based on loadArray(), |
|
| 105 | + * or a string filename of an ini file. |
|
| 106 | + * @param HTMLPurifier_ConfigSchema Schema object |
|
| 107 | + * @return Configured HTMLPurifier_Config object |
|
| 108 | + */ |
|
| 109 | + public static function create($config, $schema = null) { |
|
| 110 | + if ($config instanceof HTMLPurifier_Config) { |
|
| 111 | + // pass-through |
|
| 112 | + return $config; |
|
| 113 | + } |
|
| 114 | + if (!$schema) { |
|
| 115 | + $ret = HTMLPurifier_Config::createDefault(); |
|
| 116 | + } else { |
|
| 117 | + $ret = new HTMLPurifier_Config($schema); |
|
| 118 | + } |
|
| 119 | + if (is_string($config)) $ret->loadIni($config); |
|
| 120 | + elseif (is_array($config)) $ret->loadArray($config); |
|
| 121 | + return $ret; |
|
| 122 | + } |
|
| 123 | + |
|
| 124 | + /** |
|
| 125 | + * Creates a new config object that inherits from a previous one. |
|
| 126 | + * @param HTMLPurifier_Config $config Configuration object to inherit |
|
| 127 | + * from. |
|
| 128 | + * @return HTMLPurifier_Config object with $config as its parent. |
|
| 129 | + */ |
|
| 130 | + public static function inherit(HTMLPurifier_Config $config) { |
|
| 131 | + return new HTMLPurifier_Config($config->def, $config->plist); |
|
| 132 | + } |
|
| 133 | + |
|
| 134 | + /** |
|
| 135 | + * Convenience constructor that creates a default configuration object. |
|
| 136 | + * @return Default HTMLPurifier_Config object. |
|
| 137 | + */ |
|
| 138 | + public static function createDefault() { |
|
| 139 | + $definition = HTMLPurifier_ConfigSchema::instance(); |
|
| 140 | + $config = new HTMLPurifier_Config($definition); |
|
| 141 | + return $config; |
|
| 142 | + } |
|
| 143 | + |
|
| 144 | + /** |
|
| 145 | + * Retreives a value from the configuration. |
|
| 146 | + * @param $key String key |
|
| 147 | + */ |
|
| 148 | + public function get($key, $a = null) { |
|
| 149 | + if ($a !== null) { |
|
| 150 | + $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING); |
|
| 151 | + $key = "$key.$a"; |
|
| 152 | + } |
|
| 153 | + if (!$this->finalized) $this->autoFinalize(); |
|
| 154 | + if (!isset($this->def->info[$key])) { |
|
| 155 | + // can't add % due to SimpleTest bug |
|
| 156 | + $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), |
|
| 157 | + E_USER_WARNING); |
|
| 158 | + return; |
|
| 159 | + } |
|
| 160 | + if (isset($this->def->info[$key]->isAlias)) { |
|
| 161 | + $d = $this->def->info[$key]; |
|
| 162 | + $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key, |
|
| 163 | + E_USER_ERROR); |
|
| 164 | + return; |
|
| 165 | + } |
|
| 166 | + if ($this->lock) { |
|
| 167 | + list($ns) = explode('.', $key); |
|
| 168 | + if ($ns !== $this->lock) { |
|
| 169 | + $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR); |
|
| 170 | + return; |
|
| 171 | + } |
|
| 172 | + } |
|
| 173 | + return $this->plist->get($key); |
|
| 174 | + } |
|
| 175 | + |
|
| 176 | + /** |
|
| 177 | + * Retreives an array of directives to values from a given namespace |
|
| 178 | + * @param $namespace String namespace |
|
| 179 | + */ |
|
| 180 | + public function getBatch($namespace) { |
|
| 181 | + if (!$this->finalized) $this->autoFinalize(); |
|
| 182 | + $full = $this->getAll(); |
|
| 183 | + if (!isset($full[$namespace])) { |
|
| 184 | + $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), |
|
| 185 | + E_USER_WARNING); |
|
| 186 | + return; |
|
| 187 | + } |
|
| 188 | + return $full[$namespace]; |
|
| 189 | + } |
|
| 190 | + |
|
| 191 | + /** |
|
| 192 | + * Returns a md5 signature of a segment of the configuration object |
|
| 193 | + * that uniquely identifies that particular configuration |
|
| 194 | + * @note Revision is handled specially and is removed from the batch |
|
| 195 | + * before processing! |
|
| 196 | + * @param $namespace Namespace to get serial for |
|
| 197 | + */ |
|
| 198 | + public function getBatchSerial($namespace) { |
|
| 199 | + if (empty($this->serials[$namespace])) { |
|
| 200 | + $batch = $this->getBatch($namespace); |
|
| 201 | + unset($batch['DefinitionRev']); |
|
| 202 | + $this->serials[$namespace] = md5(serialize($batch)); |
|
| 203 | + } |
|
| 204 | + return $this->serials[$namespace]; |
|
| 205 | + } |
|
| 206 | + |
|
| 207 | + /** |
|
| 208 | + * Returns a md5 signature for the entire configuration object |
|
| 209 | + * that uniquely identifies that particular configuration |
|
| 210 | + */ |
|
| 211 | + public function getSerial() { |
|
| 212 | + if (empty($this->serial)) { |
|
| 213 | + $this->serial = md5(serialize($this->getAll())); |
|
| 214 | + } |
|
| 215 | + return $this->serial; |
|
| 216 | + } |
|
| 217 | + |
|
| 218 | + /** |
|
| 219 | + * Retrieves all directives, organized by namespace |
|
| 220 | + * @warning This is a pretty inefficient function, avoid if you can |
|
| 221 | + */ |
|
| 222 | + public function getAll() { |
|
| 223 | + if (!$this->finalized) $this->autoFinalize(); |
|
| 224 | + $ret = array(); |
|
| 225 | + foreach ($this->plist->squash() as $name => $value) { |
|
| 226 | + list($ns, $key) = explode('.', $name, 2); |
|
| 227 | + $ret[$ns][$key] = $value; |
|
| 228 | + } |
|
| 229 | + return $ret; |
|
| 230 | + } |
|
| 231 | + |
|
| 232 | + /** |
|
| 233 | + * Sets a value to configuration. |
|
| 234 | + * @param $key String key |
|
| 235 | + * @param $value Mixed value |
|
| 236 | + */ |
|
| 237 | + public function set($key, $value, $a = null) { |
|
| 238 | + if (strpos($key, '.') === false) { |
|
| 239 | + $namespace = $key; |
|
| 240 | + $directive = $value; |
|
| 241 | + $value = $a; |
|
| 242 | + $key = "$key.$directive"; |
|
| 243 | + $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE); |
|
| 244 | + } else { |
|
| 245 | + list($namespace) = explode('.', $key); |
|
| 246 | + } |
|
| 247 | + if ($this->isFinalized('Cannot set directive after finalization')) return; |
|
| 248 | + if (!isset($this->def->info[$key])) { |
|
| 249 | + $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' to value', |
|
| 250 | + E_USER_WARNING); |
|
| 251 | + return; |
|
| 252 | + } |
|
| 253 | + $def = $this->def->info[$key]; |
|
| 254 | + |
|
| 255 | + if (isset($def->isAlias)) { |
|
| 256 | + if ($this->aliasMode) { |
|
| 257 | + $this->triggerError('Double-aliases not allowed, please fix '. |
|
| 258 | + 'ConfigSchema bug with' . $key, E_USER_ERROR); |
|
| 259 | + return; |
|
| 260 | + } |
|
| 261 | + $this->aliasMode = true; |
|
| 262 | + $this->set($def->key, $value); |
|
| 263 | + $this->aliasMode = false; |
|
| 264 | + $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE); |
|
| 265 | + return; |
|
| 266 | + } |
|
| 267 | + |
|
| 268 | + // Raw type might be negative when using the fully optimized form |
|
| 269 | + // of stdclass, which indicates allow_null == true |
|
| 270 | + $rtype = is_int($def) ? $def : $def->type; |
|
| 271 | + if ($rtype < 0) { |
|
| 272 | + $type = -$rtype; |
|
| 273 | + $allow_null = true; |
|
| 274 | + } else { |
|
| 275 | + $type = $rtype; |
|
| 276 | + $allow_null = isset($def->allow_null); |
|
| 277 | + } |
|
| 278 | + |
|
| 279 | + try { |
|
| 280 | + $value = $this->parser->parse($value, $type, $allow_null); |
|
| 281 | + } catch (HTMLPurifier_VarParserException $e) { |
|
| 282 | + $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); |
|
| 283 | + return; |
|
| 284 | + } |
|
| 285 | + if (is_string($value) && is_object($def)) { |
|
| 286 | + // resolve value alias if defined |
|
| 287 | + if (isset($def->aliases[$value])) { |
|
| 288 | + $value = $def->aliases[$value]; |
|
| 289 | + } |
|
| 290 | + // check to see if the value is allowed |
|
| 291 | + if (isset($def->allowed) && !isset($def->allowed[$value])) { |
|
| 292 | + $this->triggerError('Value not supported, valid values are: ' . |
|
| 293 | + $this->_listify($def->allowed), E_USER_WARNING); |
|
| 294 | + return; |
|
| 295 | + } |
|
| 296 | + } |
|
| 297 | + $this->plist->set($key, $value); |
|
| 298 | + |
|
| 299 | + // reset definitions if the directives they depend on changed |
|
| 300 | + // this is a very costly process, so it's discouraged |
|
| 301 | + // with finalization |
|
| 302 | + if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') { |
|
| 303 | + $this->definitions[$namespace] = null; |
|
| 304 | + } |
|
| 305 | + |
|
| 306 | + $this->serials[$namespace] = false; |
|
| 307 | + } |
|
| 308 | + |
|
| 309 | + /** |
|
| 310 | + * Convenience function for error reporting |
|
| 311 | + */ |
|
| 312 | + private function _listify($lookup) { |
|
| 313 | + $list = array(); |
|
| 314 | + foreach ($lookup as $name => $b) $list[] = $name; |
|
| 315 | + return implode(', ', $list); |
|
| 316 | + } |
|
| 317 | + |
|
| 318 | + /** |
|
| 319 | + * Retrieves object reference to the HTML definition. |
|
| 320 | + * @param $raw Return a copy that has not been setup yet. Must be |
|
| 321 | + * called before it's been setup, otherwise won't work. |
|
| 322 | + * @param $optimized If true, this method may return null, to |
|
| 323 | + * indicate that a cached version of the modified |
|
| 324 | + * definition object is available and no further edits |
|
| 325 | + * are necessary. Consider using |
|
| 326 | + * maybeGetRawHTMLDefinition, which is more explicitly |
|
| 327 | + * named, instead. |
|
| 328 | + */ |
|
| 329 | + public function getHTMLDefinition($raw = false, $optimized = false) { |
|
| 330 | + return $this->getDefinition('HTML', $raw, $optimized); |
|
| 331 | + } |
|
| 332 | + |
|
| 333 | + /** |
|
| 334 | + * Retrieves object reference to the CSS definition |
|
| 335 | + * @param $raw Return a copy that has not been setup yet. Must be |
|
| 336 | + * called before it's been setup, otherwise won't work. |
|
| 337 | + * @param $optimized If true, this method may return null, to |
|
| 338 | + * indicate that a cached version of the modified |
|
| 339 | + * definition object is available and no further edits |
|
| 340 | + * are necessary. Consider using |
|
| 341 | + * maybeGetRawCSSDefinition, which is more explicitly |
|
| 342 | + * named, instead. |
|
| 343 | + */ |
|
| 344 | + public function getCSSDefinition($raw = false, $optimized = false) { |
|
| 345 | + return $this->getDefinition('CSS', $raw, $optimized); |
|
| 346 | + } |
|
| 347 | + |
|
| 348 | + /** |
|
| 349 | + * Retrieves object reference to the URI definition |
|
| 350 | + * @param $raw Return a copy that has not been setup yet. Must be |
|
| 351 | + * called before it's been setup, otherwise won't work. |
|
| 352 | + * @param $optimized If true, this method may return null, to |
|
| 353 | + * indicate that a cached version of the modified |
|
| 354 | + * definition object is available and no further edits |
|
| 355 | + * are necessary. Consider using |
|
| 356 | + * maybeGetRawURIDefinition, which is more explicitly |
|
| 357 | + * named, instead. |
|
| 358 | + */ |
|
| 359 | + public function getURIDefinition($raw = false, $optimized = false) { |
|
| 360 | + return $this->getDefinition('URI', $raw, $optimized); |
|
| 361 | + } |
|
| 362 | + |
|
| 363 | + /** |
|
| 364 | + * Retrieves a definition |
|
| 365 | + * @param $type Type of definition: HTML, CSS, etc |
|
| 366 | + * @param $raw Whether or not definition should be returned raw |
|
| 367 | + * @param $optimized Only has an effect when $raw is true. Whether |
|
| 368 | + * or not to return null if the result is already present in |
|
| 369 | + * the cache. This is off by default for backwards |
|
| 370 | + * compatibility reasons, but you need to do things this |
|
| 371 | + * way in order to ensure that caching is done properly. |
|
| 372 | + * Check out enduser-customize.html for more details. |
|
| 373 | + * We probably won't ever change this default, as much as the |
|
| 374 | + * maybe semantics is the "right thing to do." |
|
| 375 | + */ |
|
| 376 | + public function getDefinition($type, $raw = false, $optimized = false) { |
|
| 377 | + if ($optimized && !$raw) { |
|
| 378 | + throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false"); |
|
| 379 | + } |
|
| 380 | + if (!$this->finalized) $this->autoFinalize(); |
|
| 381 | + // temporarily suspend locks, so we can handle recursive definition calls |
|
| 382 | + $lock = $this->lock; |
|
| 383 | + $this->lock = null; |
|
| 384 | + $factory = HTMLPurifier_DefinitionCacheFactory::instance(); |
|
| 385 | + $cache = $factory->create($type, $this); |
|
| 386 | + $this->lock = $lock; |
|
| 387 | + if (!$raw) { |
|
| 388 | + // full definition |
|
| 389 | + // --------------- |
|
| 390 | + // check if definition is in memory |
|
| 391 | + if (!empty($this->definitions[$type])) { |
|
| 392 | + $def = $this->definitions[$type]; |
|
| 393 | + // check if the definition is setup |
|
| 394 | + if ($def->setup) { |
|
| 395 | + return $def; |
|
| 396 | + } else { |
|
| 397 | + $def->setup($this); |
|
| 398 | + if ($def->optimized) $cache->add($def, $this); |
|
| 399 | + return $def; |
|
| 400 | + } |
|
| 401 | + } |
|
| 402 | + // check if definition is in cache |
|
| 403 | + $def = $cache->get($this); |
|
| 404 | + if ($def) { |
|
| 405 | + // definition in cache, save to memory and return it |
|
| 406 | + $this->definitions[$type] = $def; |
|
| 407 | + return $def; |
|
| 408 | + } |
|
| 409 | + // initialize it |
|
| 410 | + $def = $this->initDefinition($type); |
|
| 411 | + // set it up |
|
| 412 | + $this->lock = $type; |
|
| 413 | + $def->setup($this); |
|
| 414 | + $this->lock = null; |
|
| 415 | + // save in cache |
|
| 416 | + $cache->add($def, $this); |
|
| 417 | + // return it |
|
| 418 | + return $def; |
|
| 419 | + } else { |
|
| 420 | + // raw definition |
|
| 421 | + // -------------- |
|
| 422 | + // check preconditions |
|
| 423 | + $def = null; |
|
| 424 | + if ($optimized) { |
|
| 425 | + if (is_null($this->get($type . '.DefinitionID'))) { |
|
| 426 | + // fatally error out if definition ID not set |
|
| 427 | + throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID"); |
|
| 428 | + } |
|
| 429 | + } |
|
| 430 | + if (!empty($this->definitions[$type])) { |
|
| 431 | + $def = $this->definitions[$type]; |
|
| 432 | + if ($def->setup && !$optimized) { |
|
| 433 | + $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : ""; |
|
| 434 | + throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra); |
|
| 435 | + } |
|
| 436 | + if ($def->optimized === null) { |
|
| 437 | + $extra = $this->chatty ? " (try flushing your cache)" : ""; |
|
| 438 | + throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra); |
|
| 439 | + } |
|
| 440 | + if ($def->optimized !== $optimized) { |
|
| 441 | + $msg = $optimized ? "optimized" : "unoptimized"; |
|
| 442 | + $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : ""; |
|
| 443 | + throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra); |
|
| 444 | + } |
|
| 445 | + } |
|
| 446 | + // check if definition was in memory |
|
| 447 | + if ($def) { |
|
| 448 | + if ($def->setup) { |
|
| 449 | + // invariant: $optimized === true (checked above) |
|
| 450 | + return null; |
|
| 451 | + } else { |
|
| 452 | + return $def; |
|
| 453 | + } |
|
| 454 | + } |
|
| 455 | + // if optimized, check if definition was in cache |
|
| 456 | + // (because we do the memory check first, this formulation |
|
| 457 | + // is prone to cache slamming, but I think |
|
| 458 | + // guaranteeing that either /all/ of the raw |
|
| 459 | + // setup code or /none/ of it is run is more important.) |
|
| 460 | + if ($optimized) { |
|
| 461 | + // This code path only gets run once; once we put |
|
| 462 | + // something in $definitions (which is guaranteed by the |
|
| 463 | + // trailing code), we always short-circuit above. |
|
| 464 | + $def = $cache->get($this); |
|
| 465 | + if ($def) { |
|
| 466 | + // save the full definition for later, but don't |
|
| 467 | + // return it yet |
|
| 468 | + $this->definitions[$type] = $def; |
|
| 469 | + return null; |
|
| 470 | + } |
|
| 471 | + } |
|
| 472 | + // check invariants for creation |
|
| 473 | + if (!$optimized) { |
|
| 474 | + if (!is_null($this->get($type . '.DefinitionID'))) { |
|
| 475 | + if ($this->chatty) { |
|
| 476 | + $this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING); |
|
| 477 | + } else { |
|
| 478 | + $this->triggerError("Useless DefinitionID declaration", E_USER_WARNING); |
|
| 479 | + } |
|
| 480 | + } |
|
| 481 | + } |
|
| 482 | + // initialize it |
|
| 483 | + $def = $this->initDefinition($type); |
|
| 484 | + $def->optimized = $optimized; |
|
| 485 | + return $def; |
|
| 486 | + } |
|
| 487 | + throw new HTMLPurifier_Exception("The impossible happened!"); |
|
| 488 | + } |
|
| 489 | + |
|
| 490 | + private function initDefinition($type) { |
|
| 491 | + // quick checks failed, let's create the object |
|
| 492 | + if ($type == 'HTML') { |
|
| 493 | + $def = new HTMLPurifier_HTMLDefinition(); |
|
| 494 | + } elseif ($type == 'CSS') { |
|
| 495 | + $def = new HTMLPurifier_CSSDefinition(); |
|
| 496 | + } elseif ($type == 'URI') { |
|
| 497 | + $def = new HTMLPurifier_URIDefinition(); |
|
| 498 | + } else { |
|
| 499 | + throw new HTMLPurifier_Exception("Definition of $type type not supported"); |
|
| 500 | + } |
|
| 501 | + $this->definitions[$type] = $def; |
|
| 502 | + return $def; |
|
| 503 | + } |
|
| 504 | + |
|
| 505 | + public function maybeGetRawDefinition($name) { |
|
| 506 | + return $this->getDefinition($name, true, true); |
|
| 507 | + } |
|
| 508 | + |
|
| 509 | + public function maybeGetRawHTMLDefinition() { |
|
| 510 | + return $this->getDefinition('HTML', true, true); |
|
| 511 | + } |
|
| 512 | + |
|
| 513 | + public function maybeGetRawCSSDefinition() { |
|
| 514 | + return $this->getDefinition('CSS', true, true); |
|
| 515 | + } |
|
| 516 | + |
|
| 517 | + public function maybeGetRawURIDefinition() { |
|
| 518 | + return $this->getDefinition('URI', true, true); |
|
| 519 | + } |
|
| 520 | + |
|
| 521 | + /** |
|
| 522 | + * Loads configuration values from an array with the following structure: |
|
| 523 | + * Namespace.Directive => Value |
|
| 524 | + * @param $config_array Configuration associative array |
|
| 525 | + */ |
|
| 526 | + public function loadArray($config_array) { |
|
| 527 | + if ($this->isFinalized('Cannot load directives after finalization')) return; |
|
| 528 | + foreach ($config_array as $key => $value) { |
|
| 529 | + $key = str_replace('_', '.', $key); |
|
| 530 | + if (strpos($key, '.') !== false) { |
|
| 531 | + $this->set($key, $value); |
|
| 532 | + } else { |
|
| 533 | + $namespace = $key; |
|
| 534 | + $namespace_values = $value; |
|
| 535 | + foreach ($namespace_values as $directive => $value) { |
|
| 536 | + $this->set($namespace .'.'. $directive, $value); |
|
| 537 | + } |
|
| 538 | + } |
|
| 539 | + } |
|
| 540 | + } |
|
| 541 | + |
|
| 542 | + /** |
|
| 543 | + * Returns a list of array(namespace, directive) for all directives |
|
| 544 | + * that are allowed in a web-form context as per an allowed |
|
| 545 | + * namespaces/directives list. |
|
| 546 | + * @param $allowed List of allowed namespaces/directives |
|
| 547 | + */ |
|
| 548 | + public static function getAllowedDirectivesForForm($allowed, $schema = null) { |
|
| 549 | + if (!$schema) { |
|
| 550 | + $schema = HTMLPurifier_ConfigSchema::instance(); |
|
| 551 | + } |
|
| 552 | + if ($allowed !== true) { |
|
| 553 | + if (is_string($allowed)) $allowed = array($allowed); |
|
| 554 | + $allowed_ns = array(); |
|
| 555 | + $allowed_directives = array(); |
|
| 556 | + $blacklisted_directives = array(); |
|
| 557 | + foreach ($allowed as $ns_or_directive) { |
|
| 558 | + if (strpos($ns_or_directive, '.') !== false) { |
|
| 559 | + // directive |
|
| 560 | + if ($ns_or_directive[0] == '-') { |
|
| 561 | + $blacklisted_directives[substr($ns_or_directive, 1)] = true; |
|
| 562 | + } else { |
|
| 563 | + $allowed_directives[$ns_or_directive] = true; |
|
| 564 | + } |
|
| 565 | + } else { |
|
| 566 | + // namespace |
|
| 567 | + $allowed_ns[$ns_or_directive] = true; |
|
| 568 | + } |
|
| 569 | + } |
|
| 570 | + } |
|
| 571 | + $ret = array(); |
|
| 572 | + foreach ($schema->info as $key => $def) { |
|
| 573 | + list($ns, $directive) = explode('.', $key, 2); |
|
| 574 | + if ($allowed !== true) { |
|
| 575 | + if (isset($blacklisted_directives["$ns.$directive"])) continue; |
|
| 576 | + if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; |
|
| 577 | + } |
|
| 578 | + if (isset($def->isAlias)) continue; |
|
| 579 | + if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; |
|
| 580 | + $ret[] = array($ns, $directive); |
|
| 581 | + } |
|
| 582 | + return $ret; |
|
| 583 | + } |
|
| 584 | + |
|
| 585 | + /** |
|
| 586 | + * Loads configuration values from $_GET/$_POST that were posted |
|
| 587 | + * via ConfigForm |
|
| 588 | + * @param $array $_GET or $_POST array to import |
|
| 589 | + * @param $index Index/name that the config variables are in |
|
| 590 | + * @param $allowed List of allowed namespaces/directives |
|
| 591 | + * @param $mq_fix Boolean whether or not to enable magic quotes fix |
|
| 592 | + * @param $schema Instance of HTMLPurifier_ConfigSchema to use, if not global copy |
|
| 593 | + */ |
|
| 594 | + public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { |
|
| 595 | + $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema); |
|
| 596 | + $config = HTMLPurifier_Config::create($ret, $schema); |
|
| 597 | + return $config; |
|
| 598 | + } |
|
| 599 | + |
|
| 600 | + /** |
|
| 601 | + * Merges in configuration values from $_GET/$_POST to object. NOT STATIC. |
|
| 602 | + * @note Same parameters as loadArrayFromForm |
|
| 603 | + */ |
|
| 604 | + public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) { |
|
| 605 | + $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def); |
|
| 606 | + $this->loadArray($ret); |
|
| 607 | + } |
|
| 608 | + |
|
| 609 | + /** |
|
| 610 | + * Prepares an array from a form into something usable for the more |
|
| 611 | + * strict parts of HTMLPurifier_Config |
|
| 612 | + */ |
|
| 613 | + public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { |
|
| 614 | + if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); |
|
| 615 | + $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc(); |
|
| 616 | + |
|
| 617 | + $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema); |
|
| 618 | + $ret = array(); |
|
| 619 | + foreach ($allowed as $key) { |
|
| 620 | + list($ns, $directive) = $key; |
|
| 621 | + $skey = "$ns.$directive"; |
|
| 622 | + if (!empty($array["Null_$skey"])) { |
|
| 623 | + $ret[$ns][$directive] = null; |
|
| 624 | + continue; |
|
| 625 | + } |
|
| 626 | + if (!isset($array[$skey])) continue; |
|
| 627 | + $value = $mq ? stripslashes($array[$skey]) : $array[$skey]; |
|
| 628 | + $ret[$ns][$directive] = $value; |
|
| 629 | + } |
|
| 630 | + return $ret; |
|
| 631 | + } |
|
| 632 | + |
|
| 633 | + /** |
|
| 634 | + * Loads configuration values from an ini file |
|
| 635 | + * @param $filename Name of ini file |
|
| 636 | + */ |
|
| 637 | + public function loadIni($filename) { |
|
| 638 | + if ($this->isFinalized('Cannot load directives after finalization')) return; |
|
| 639 | + $array = parse_ini_file($filename, true); |
|
| 640 | + $this->loadArray($array); |
|
| 641 | + } |
|
| 642 | + |
|
| 643 | + /** |
|
| 644 | + * Checks whether or not the configuration object is finalized. |
|
| 645 | + * @param $error String error message, or false for no error |
|
| 646 | + */ |
|
| 647 | + public function isFinalized($error = false) { |
|
| 648 | + if ($this->finalized && $error) { |
|
| 649 | + $this->triggerError($error, E_USER_ERROR); |
|
| 650 | + } |
|
| 651 | + return $this->finalized; |
|
| 652 | + } |
|
| 653 | + |
|
| 654 | + /** |
|
| 655 | + * Finalizes configuration only if auto finalize is on and not |
|
| 656 | + * already finalized |
|
| 657 | + */ |
|
| 658 | + public function autoFinalize() { |
|
| 659 | + if ($this->autoFinalize) { |
|
| 660 | + $this->finalize(); |
|
| 661 | + } else { |
|
| 662 | + $this->plist->squash(true); |
|
| 663 | + } |
|
| 664 | + } |
|
| 665 | + |
|
| 666 | + /** |
|
| 667 | + * Finalizes a configuration object, prohibiting further change |
|
| 668 | + */ |
|
| 669 | + public function finalize() { |
|
| 670 | + $this->finalized = true; |
|
| 671 | + $this->parser = null; |
|
| 672 | + } |
|
| 673 | + |
|
| 674 | + /** |
|
| 675 | + * Produces a nicely formatted error message by supplying the |
|
| 676 | + * stack frame information OUTSIDE of HTMLPurifier_Config. |
|
| 677 | + */ |
|
| 678 | + protected function triggerError($msg, $no) { |
|
| 679 | + // determine previous stack frame |
|
| 680 | + $extra = ''; |
|
| 681 | + if ($this->chatty) { |
|
| 682 | + $trace = debug_backtrace(); |
|
| 683 | + // zip(tail(trace), trace) -- but PHP is not Haskell har har |
|
| 684 | + for ($i = 0, $c = count($trace); $i < $c - 1; $i++) { |
|
| 685 | + if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') { |
|
| 686 | + continue; |
|
| 687 | + } |
|
| 688 | + $frame = $trace[$i]; |
|
| 689 | + $extra = " invoked on line {$frame['line']} in file {$frame['file']}"; |
|
| 690 | + break; |
|
| 691 | + } |
|
| 692 | + } |
|
| 693 | + trigger_error($msg . $extra, $no); |
|
| 694 | + } |
|
| 695 | + |
|
| 696 | + /** |
|
| 697 | + * Returns a serialized form of the configuration object that can |
|
| 698 | + * be reconstituted. |
|
| 699 | + */ |
|
| 700 | + public function serialize() { |
|
| 701 | + $this->getDefinition('HTML'); |
|
| 702 | + $this->getDefinition('CSS'); |
|
| 703 | + $this->getDefinition('URI'); |
|
| 704 | + return serialize($this); |
|
| 705 | + } |
|
| 706 | 706 | |
| 707 | 707 | } |
| 708 | 708 | |
@@ -116,8 +116,11 @@ discard block |
||
| 116 | 116 | } else { |
| 117 | 117 | $ret = new HTMLPurifier_Config($schema); |
| 118 | 118 | } |
| 119 | - if (is_string($config)) $ret->loadIni($config); |
|
| 120 | - elseif (is_array($config)) $ret->loadArray($config); |
|
| 119 | + if (is_string($config)) { |
|
| 120 | + $ret->loadIni($config); |
|
| 121 | + } elseif (is_array($config)) { |
|
| 122 | + $ret->loadArray($config); |
|
| 123 | + } |
|
| 121 | 124 | return $ret; |
| 122 | 125 | } |
| 123 | 126 | |
@@ -150,7 +153,9 @@ discard block |
||
| 150 | 153 | $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING); |
| 151 | 154 | $key = "$key.$a"; |
| 152 | 155 | } |
| 153 | - if (!$this->finalized) $this->autoFinalize(); |
|
| 156 | + if (!$this->finalized) { |
|
| 157 | + $this->autoFinalize(); |
|
| 158 | + } |
|
| 154 | 159 | if (!isset($this->def->info[$key])) { |
| 155 | 160 | // can't add % due to SimpleTest bug |
| 156 | 161 | $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), |
@@ -178,7 +183,9 @@ discard block |
||
| 178 | 183 | * @param $namespace String namespace |
| 179 | 184 | */ |
| 180 | 185 | public function getBatch($namespace) { |
| 181 | - if (!$this->finalized) $this->autoFinalize(); |
|
| 186 | + if (!$this->finalized) { |
|
| 187 | + $this->autoFinalize(); |
|
| 188 | + } |
|
| 182 | 189 | $full = $this->getAll(); |
| 183 | 190 | if (!isset($full[$namespace])) { |
| 184 | 191 | $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), |
@@ -220,7 +227,9 @@ discard block |
||
| 220 | 227 | * @warning This is a pretty inefficient function, avoid if you can |
| 221 | 228 | */ |
| 222 | 229 | public function getAll() { |
| 223 | - if (!$this->finalized) $this->autoFinalize(); |
|
| 230 | + if (!$this->finalized) { |
|
| 231 | + $this->autoFinalize(); |
|
| 232 | + } |
|
| 224 | 233 | $ret = array(); |
| 225 | 234 | foreach ($this->plist->squash() as $name => $value) { |
| 226 | 235 | list($ns, $key) = explode('.', $name, 2); |
@@ -244,7 +253,9 @@ discard block |
||
| 244 | 253 | } else { |
| 245 | 254 | list($namespace) = explode('.', $key); |
| 246 | 255 | } |
| 247 | - if ($this->isFinalized('Cannot set directive after finalization')) return; |
|
| 256 | + if ($this->isFinalized('Cannot set directive after finalization')) { |
|
| 257 | + return; |
|
| 258 | + } |
|
| 248 | 259 | if (!isset($this->def->info[$key])) { |
| 249 | 260 | $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' to value', |
| 250 | 261 | E_USER_WARNING); |
@@ -311,7 +322,9 @@ discard block |
||
| 311 | 322 | */ |
| 312 | 323 | private function _listify($lookup) { |
| 313 | 324 | $list = array(); |
| 314 | - foreach ($lookup as $name => $b) $list[] = $name; |
|
| 325 | + foreach ($lookup as $name => $b) { |
|
| 326 | + $list[] = $name; |
|
| 327 | + } |
|
| 315 | 328 | return implode(', ', $list); |
| 316 | 329 | } |
| 317 | 330 | |
@@ -377,7 +390,9 @@ discard block |
||
| 377 | 390 | if ($optimized && !$raw) { |
| 378 | 391 | throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false"); |
| 379 | 392 | } |
| 380 | - if (!$this->finalized) $this->autoFinalize(); |
|
| 393 | + if (!$this->finalized) { |
|
| 394 | + $this->autoFinalize(); |
|
| 395 | + } |
|
| 381 | 396 | // temporarily suspend locks, so we can handle recursive definition calls |
| 382 | 397 | $lock = $this->lock; |
| 383 | 398 | $this->lock = null; |
@@ -395,7 +410,9 @@ discard block |
||
| 395 | 410 | return $def; |
| 396 | 411 | } else { |
| 397 | 412 | $def->setup($this); |
| 398 | - if ($def->optimized) $cache->add($def, $this); |
|
| 413 | + if ($def->optimized) { |
|
| 414 | + $cache->add($def, $this); |
|
| 415 | + } |
|
| 399 | 416 | return $def; |
| 400 | 417 | } |
| 401 | 418 | } |
@@ -524,7 +541,9 @@ discard block |
||
| 524 | 541 | * @param $config_array Configuration associative array |
| 525 | 542 | */ |
| 526 | 543 | public function loadArray($config_array) { |
| 527 | - if ($this->isFinalized('Cannot load directives after finalization')) return; |
|
| 544 | + if ($this->isFinalized('Cannot load directives after finalization')) { |
|
| 545 | + return; |
|
| 546 | + } |
|
| 528 | 547 | foreach ($config_array as $key => $value) { |
| 529 | 548 | $key = str_replace('_', '.', $key); |
| 530 | 549 | if (strpos($key, '.') !== false) { |
@@ -550,7 +569,9 @@ discard block |
||
| 550 | 569 | $schema = HTMLPurifier_ConfigSchema::instance(); |
| 551 | 570 | } |
| 552 | 571 | if ($allowed !== true) { |
| 553 | - if (is_string($allowed)) $allowed = array($allowed); |
|
| 572 | + if (is_string($allowed)) { |
|
| 573 | + $allowed = array($allowed); |
|
| 574 | + } |
|
| 554 | 575 | $allowed_ns = array(); |
| 555 | 576 | $allowed_directives = array(); |
| 556 | 577 | $blacklisted_directives = array(); |
@@ -572,11 +593,19 @@ discard block |
||
| 572 | 593 | foreach ($schema->info as $key => $def) { |
| 573 | 594 | list($ns, $directive) = explode('.', $key, 2); |
| 574 | 595 | if ($allowed !== true) { |
| 575 | - if (isset($blacklisted_directives["$ns.$directive"])) continue; |
|
| 576 | - if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; |
|
| 596 | + if (isset($blacklisted_directives["$ns.$directive"])) { |
|
| 597 | + continue; |
|
| 598 | + } |
|
| 599 | + if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) { |
|
| 600 | + continue; |
|
| 601 | + } |
|
| 602 | + } |
|
| 603 | + if (isset($def->isAlias)) { |
|
| 604 | + continue; |
|
| 605 | + } |
|
| 606 | + if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') { |
|
| 607 | + continue; |
|
| 577 | 608 | } |
| 578 | - if (isset($def->isAlias)) continue; |
|
| 579 | - if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; |
|
| 580 | 609 | $ret[] = array($ns, $directive); |
| 581 | 610 | } |
| 582 | 611 | return $ret; |
@@ -611,7 +640,9 @@ discard block |
||
| 611 | 640 | * strict parts of HTMLPurifier_Config |
| 612 | 641 | */ |
| 613 | 642 | public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { |
| 614 | - if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); |
|
| 643 | + if ($index !== false) { |
|
| 644 | + $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); |
|
| 645 | + } |
|
| 615 | 646 | $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc(); |
| 616 | 647 | |
| 617 | 648 | $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema); |
@@ -623,7 +654,9 @@ discard block |
||
| 623 | 654 | $ret[$ns][$directive] = null; |
| 624 | 655 | continue; |
| 625 | 656 | } |
| 626 | - if (!isset($array[$skey])) continue; |
|
| 657 | + if (!isset($array[$skey])) { |
|
| 658 | + continue; |
|
| 659 | + } |
|
| 627 | 660 | $value = $mq ? stripslashes($array[$skey]) : $array[$skey]; |
| 628 | 661 | $ret[$ns][$directive] = $value; |
| 629 | 662 | } |
@@ -635,7 +668,9 @@ discard block |
||
| 635 | 668 | * @param $filename Name of ini file |
| 636 | 669 | */ |
| 637 | 670 | public function loadIni($filename) { |
| 638 | - if ($this->isFinalized('Cannot load directives after finalization')) return; |
|
| 671 | + if ($this->isFinalized('Cannot load directives after finalization')) { |
|
| 672 | + return; |
|
| 673 | + } |
|
| 639 | 674 | $array = parse_ini_file($filename, true); |
| 640 | 675 | $this->loadArray($array); |
| 641 | 676 | } |
@@ -153,20 +153,20 @@ discard block |
||
| 153 | 153 | if (!$this->finalized) $this->autoFinalize(); |
| 154 | 154 | if (!isset($this->def->info[$key])) { |
| 155 | 155 | // can't add % due to SimpleTest bug |
| 156 | - $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), |
|
| 156 | + $this->triggerError('Cannot retrieve value of undefined directive '.htmlspecialchars($key, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), |
|
| 157 | 157 | E_USER_WARNING); |
| 158 | 158 | return; |
| 159 | 159 | } |
| 160 | 160 | if (isset($this->def->info[$key]->isAlias)) { |
| 161 | 161 | $d = $this->def->info[$key]; |
| 162 | - $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key, |
|
| 162 | + $this->triggerError('Cannot get value from aliased directive, use real name '.$d->key, |
|
| 163 | 163 | E_USER_ERROR); |
| 164 | 164 | return; |
| 165 | 165 | } |
| 166 | 166 | if ($this->lock) { |
| 167 | 167 | list($ns) = explode('.', $key); |
| 168 | 168 | if ($ns !== $this->lock) { |
| 169 | - $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR); |
|
| 169 | + $this->triggerError('Cannot get value of namespace '.$ns.' when lock for '.$this->lock.' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR); |
|
| 170 | 170 | return; |
| 171 | 171 | } |
| 172 | 172 | } |
@@ -181,7 +181,7 @@ discard block |
||
| 181 | 181 | if (!$this->finalized) $this->autoFinalize(); |
| 182 | 182 | $full = $this->getAll(); |
| 183 | 183 | if (!isset($full[$namespace])) { |
| 184 | - $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), |
|
| 184 | + $this->triggerError('Cannot retrieve undefined namespace '.htmlspecialchars($namespace, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), |
|
| 185 | 185 | E_USER_WARNING); |
| 186 | 186 | return; |
| 187 | 187 | } |
@@ -246,7 +246,7 @@ discard block |
||
| 246 | 246 | } |
| 247 | 247 | if ($this->isFinalized('Cannot set directive after finalization')) return; |
| 248 | 248 | if (!isset($this->def->info[$key])) { |
| 249 | - $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' to value', |
|
| 249 | + $this->triggerError('Cannot set undefined directive '.htmlspecialchars($key, ENT_COMPAT | ENT_HTML401, 'UTF-8', false).' to value', |
|
| 250 | 250 | E_USER_WARNING); |
| 251 | 251 | return; |
| 252 | 252 | } |
@@ -255,7 +255,7 @@ discard block |
||
| 255 | 255 | if (isset($def->isAlias)) { |
| 256 | 256 | if ($this->aliasMode) { |
| 257 | 257 | $this->triggerError('Double-aliases not allowed, please fix '. |
| 258 | - 'ConfigSchema bug with' . $key, E_USER_ERROR); |
|
| 258 | + 'ConfigSchema bug with'.$key, E_USER_ERROR); |
|
| 259 | 259 | return; |
| 260 | 260 | } |
| 261 | 261 | $this->aliasMode = true; |
@@ -279,7 +279,7 @@ discard block |
||
| 279 | 279 | try { |
| 280 | 280 | $value = $this->parser->parse($value, $type, $allow_null); |
| 281 | 281 | } catch (HTMLPurifier_VarParserException $e) { |
| 282 | - $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); |
|
| 282 | + $this->triggerError('Value for '.$key.' is of invalid type, should be '.HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); |
|
| 283 | 283 | return; |
| 284 | 284 | } |
| 285 | 285 | if (is_string($value) && is_object($def)) { |
@@ -289,7 +289,7 @@ discard block |
||
| 289 | 289 | } |
| 290 | 290 | // check to see if the value is allowed |
| 291 | 291 | if (isset($def->allowed) && !isset($def->allowed[$value])) { |
| 292 | - $this->triggerError('Value not supported, valid values are: ' . |
|
| 292 | + $this->triggerError('Value not supported, valid values are: '. |
|
| 293 | 293 | $this->_listify($def->allowed), E_USER_WARNING); |
| 294 | 294 | return; |
| 295 | 295 | } |
@@ -422,7 +422,7 @@ discard block |
||
| 422 | 422 | // check preconditions |
| 423 | 423 | $def = null; |
| 424 | 424 | if ($optimized) { |
| 425 | - if (is_null($this->get($type . '.DefinitionID'))) { |
|
| 425 | + if (is_null($this->get($type.'.DefinitionID'))) { |
|
| 426 | 426 | // fatally error out if definition ID not set |
| 427 | 427 | throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID"); |
| 428 | 428 | } |
@@ -431,16 +431,16 @@ discard block |
||
| 431 | 431 | $def = $this->definitions[$type]; |
| 432 | 432 | if ($def->setup && !$optimized) { |
| 433 | 433 | $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : ""; |
| 434 | - throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra); |
|
| 434 | + throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup".$extra); |
|
| 435 | 435 | } |
| 436 | 436 | if ($def->optimized === null) { |
| 437 | 437 | $extra = $this->chatty ? " (try flushing your cache)" : ""; |
| 438 | - throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra); |
|
| 438 | + throw new HTMLPurifier_Exception("Optimization status of definition is unknown".$extra); |
|
| 439 | 439 | } |
| 440 | 440 | if ($def->optimized !== $optimized) { |
| 441 | 441 | $msg = $optimized ? "optimized" : "unoptimized"; |
| 442 | 442 | $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : ""; |
| 443 | - throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra); |
|
| 443 | + throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals".$extra); |
|
| 444 | 444 | } |
| 445 | 445 | } |
| 446 | 446 | // check if definition was in memory |
@@ -471,7 +471,7 @@ discard block |
||
| 471 | 471 | } |
| 472 | 472 | // check invariants for creation |
| 473 | 473 | if (!$optimized) { |
| 474 | - if (!is_null($this->get($type . '.DefinitionID'))) { |
|
| 474 | + if (!is_null($this->get($type.'.DefinitionID'))) { |
|
| 475 | 475 | if ($this->chatty) { |
| 476 | 476 | $this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING); |
| 477 | 477 | } else { |
@@ -533,7 +533,7 @@ discard block |
||
| 533 | 533 | $namespace = $key; |
| 534 | 534 | $namespace_values = $value; |
| 535 | 535 | foreach ($namespace_values as $directive => $value) { |
| 536 | - $this->set($namespace .'.'. $directive, $value); |
|
| 536 | + $this->set($namespace.'.'.$directive, $value); |
|
| 537 | 537 | } |
| 538 | 538 | } |
| 539 | 539 | } |
@@ -690,7 +690,7 @@ discard block |
||
| 690 | 690 | break; |
| 691 | 691 | } |
| 692 | 692 | } |
| 693 | - trigger_error($msg . $extra, $no); |
|
| 693 | + trigger_error($msg.$extra, $no); |
|
| 694 | 694 | } |
| 695 | 695 | |
| 696 | 696 | /** |
@@ -17,10 +17,12 @@ |
||
| 17 | 17 | * Registers a doctype to the registry |
| 18 | 18 | * @note Accepts a fully-formed doctype object, or the |
| 19 | 19 | * parameters for constructing a doctype object |
| 20 | - * @param $doctype Name of doctype or literal doctype object |
|
| 20 | + * @param string $doctype Name of doctype or literal doctype object |
|
| 21 | 21 | * @param $modules Modules doctype will load |
| 22 | 22 | * @param $modules_for_modes Modules doctype will load for certain modes |
| 23 | 23 | * @param $aliases Alias names for doctype |
| 24 | + * @param string $dtd_public |
|
| 25 | + * @param string $dtd_system |
|
| 24 | 26 | * @return Editable registered doctype |
| 25 | 27 | */ |
| 26 | 28 | public function register($doctype, $xml = true, $modules = array(), |
@@ -3,100 +3,100 @@ |
||
| 3 | 3 | class HTMLPurifier_DoctypeRegistry |
| 4 | 4 | { |
| 5 | 5 | |
| 6 | - /** |
|
| 7 | - * Hash of doctype names to doctype objects |
|
| 8 | - */ |
|
| 9 | - protected $doctypes; |
|
| 6 | + /** |
|
| 7 | + * Hash of doctype names to doctype objects |
|
| 8 | + */ |
|
| 9 | + protected $doctypes; |
|
| 10 | 10 | |
| 11 | - /** |
|
| 12 | - * Lookup table of aliases to real doctype names |
|
| 13 | - */ |
|
| 14 | - protected $aliases; |
|
| 11 | + /** |
|
| 12 | + * Lookup table of aliases to real doctype names |
|
| 13 | + */ |
|
| 14 | + protected $aliases; |
|
| 15 | 15 | |
| 16 | - /** |
|
| 17 | - * Registers a doctype to the registry |
|
| 18 | - * @note Accepts a fully-formed doctype object, or the |
|
| 19 | - * parameters for constructing a doctype object |
|
| 20 | - * @param $doctype Name of doctype or literal doctype object |
|
| 21 | - * @param $modules Modules doctype will load |
|
| 22 | - * @param $modules_for_modes Modules doctype will load for certain modes |
|
| 23 | - * @param $aliases Alias names for doctype |
|
| 24 | - * @return Editable registered doctype |
|
| 25 | - */ |
|
| 26 | - public function register($doctype, $xml = true, $modules = array(), |
|
| 27 | - $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null |
|
| 28 | - ) { |
|
| 29 | - if (!is_array($modules)) $modules = array($modules); |
|
| 30 | - if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules); |
|
| 31 | - if (!is_array($aliases)) $aliases = array($aliases); |
|
| 32 | - if (!is_object($doctype)) { |
|
| 33 | - $doctype = new HTMLPurifier_Doctype( |
|
| 34 | - $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system |
|
| 35 | - ); |
|
| 36 | - } |
|
| 37 | - $this->doctypes[$doctype->name] = $doctype; |
|
| 38 | - $name = $doctype->name; |
|
| 39 | - // hookup aliases |
|
| 40 | - foreach ($doctype->aliases as $alias) { |
|
| 41 | - if (isset($this->doctypes[$alias])) continue; |
|
| 42 | - $this->aliases[$alias] = $name; |
|
| 43 | - } |
|
| 44 | - // remove old aliases |
|
| 45 | - if (isset($this->aliases[$name])) unset($this->aliases[$name]); |
|
| 46 | - return $doctype; |
|
| 47 | - } |
|
| 16 | + /** |
|
| 17 | + * Registers a doctype to the registry |
|
| 18 | + * @note Accepts a fully-formed doctype object, or the |
|
| 19 | + * parameters for constructing a doctype object |
|
| 20 | + * @param $doctype Name of doctype or literal doctype object |
|
| 21 | + * @param $modules Modules doctype will load |
|
| 22 | + * @param $modules_for_modes Modules doctype will load for certain modes |
|
| 23 | + * @param $aliases Alias names for doctype |
|
| 24 | + * @return Editable registered doctype |
|
| 25 | + */ |
|
| 26 | + public function register($doctype, $xml = true, $modules = array(), |
|
| 27 | + $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null |
|
| 28 | + ) { |
|
| 29 | + if (!is_array($modules)) $modules = array($modules); |
|
| 30 | + if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules); |
|
| 31 | + if (!is_array($aliases)) $aliases = array($aliases); |
|
| 32 | + if (!is_object($doctype)) { |
|
| 33 | + $doctype = new HTMLPurifier_Doctype( |
|
| 34 | + $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system |
|
| 35 | + ); |
|
| 36 | + } |
|
| 37 | + $this->doctypes[$doctype->name] = $doctype; |
|
| 38 | + $name = $doctype->name; |
|
| 39 | + // hookup aliases |
|
| 40 | + foreach ($doctype->aliases as $alias) { |
|
| 41 | + if (isset($this->doctypes[$alias])) continue; |
|
| 42 | + $this->aliases[$alias] = $name; |
|
| 43 | + } |
|
| 44 | + // remove old aliases |
|
| 45 | + if (isset($this->aliases[$name])) unset($this->aliases[$name]); |
|
| 46 | + return $doctype; |
|
| 47 | + } |
|
| 48 | 48 | |
| 49 | - /** |
|
| 50 | - * Retrieves reference to a doctype of a certain name |
|
| 51 | - * @note This function resolves aliases |
|
| 52 | - * @note When possible, use the more fully-featured make() |
|
| 53 | - * @param $doctype Name of doctype |
|
| 54 | - * @return Editable doctype object |
|
| 55 | - */ |
|
| 56 | - public function get($doctype) { |
|
| 57 | - if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; |
|
| 58 | - if (!isset($this->doctypes[$doctype])) { |
|
| 59 | - trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR); |
|
| 60 | - $anon = new HTMLPurifier_Doctype($doctype); |
|
| 61 | - return $anon; |
|
| 62 | - } |
|
| 63 | - return $this->doctypes[$doctype]; |
|
| 64 | - } |
|
| 49 | + /** |
|
| 50 | + * Retrieves reference to a doctype of a certain name |
|
| 51 | + * @note This function resolves aliases |
|
| 52 | + * @note When possible, use the more fully-featured make() |
|
| 53 | + * @param $doctype Name of doctype |
|
| 54 | + * @return Editable doctype object |
|
| 55 | + */ |
|
| 56 | + public function get($doctype) { |
|
| 57 | + if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; |
|
| 58 | + if (!isset($this->doctypes[$doctype])) { |
|
| 59 | + trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR); |
|
| 60 | + $anon = new HTMLPurifier_Doctype($doctype); |
|
| 61 | + return $anon; |
|
| 62 | + } |
|
| 63 | + return $this->doctypes[$doctype]; |
|
| 64 | + } |
|
| 65 | 65 | |
| 66 | - /** |
|
| 67 | - * Creates a doctype based on a configuration object, |
|
| 68 | - * will perform initialization on the doctype |
|
| 69 | - * @note Use this function to get a copy of doctype that config |
|
| 70 | - * can hold on to (this is necessary in order to tell |
|
| 71 | - * Generator whether or not the current document is XML |
|
| 72 | - * based or not). |
|
| 73 | - */ |
|
| 74 | - public function make($config) { |
|
| 75 | - return clone $this->get($this->getDoctypeFromConfig($config)); |
|
| 76 | - } |
|
| 66 | + /** |
|
| 67 | + * Creates a doctype based on a configuration object, |
|
| 68 | + * will perform initialization on the doctype |
|
| 69 | + * @note Use this function to get a copy of doctype that config |
|
| 70 | + * can hold on to (this is necessary in order to tell |
|
| 71 | + * Generator whether or not the current document is XML |
|
| 72 | + * based or not). |
|
| 73 | + */ |
|
| 74 | + public function make($config) { |
|
| 75 | + return clone $this->get($this->getDoctypeFromConfig($config)); |
|
| 76 | + } |
|
| 77 | 77 | |
| 78 | - /** |
|
| 79 | - * Retrieves the doctype from the configuration object |
|
| 80 | - */ |
|
| 81 | - public function getDoctypeFromConfig($config) { |
|
| 82 | - // recommended test |
|
| 83 | - $doctype = $config->get('HTML.Doctype'); |
|
| 84 | - if (!empty($doctype)) return $doctype; |
|
| 85 | - $doctype = $config->get('HTML.CustomDoctype'); |
|
| 86 | - if (!empty($doctype)) return $doctype; |
|
| 87 | - // backwards-compatibility |
|
| 88 | - if ($config->get('HTML.XHTML')) { |
|
| 89 | - $doctype = 'XHTML 1.0'; |
|
| 90 | - } else { |
|
| 91 | - $doctype = 'HTML 4.01'; |
|
| 92 | - } |
|
| 93 | - if ($config->get('HTML.Strict')) { |
|
| 94 | - $doctype .= ' Strict'; |
|
| 95 | - } else { |
|
| 96 | - $doctype .= ' Transitional'; |
|
| 97 | - } |
|
| 98 | - return $doctype; |
|
| 99 | - } |
|
| 78 | + /** |
|
| 79 | + * Retrieves the doctype from the configuration object |
|
| 80 | + */ |
|
| 81 | + public function getDoctypeFromConfig($config) { |
|
| 82 | + // recommended test |
|
| 83 | + $doctype = $config->get('HTML.Doctype'); |
|
| 84 | + if (!empty($doctype)) return $doctype; |
|
| 85 | + $doctype = $config->get('HTML.CustomDoctype'); |
|
| 86 | + if (!empty($doctype)) return $doctype; |
|
| 87 | + // backwards-compatibility |
|
| 88 | + if ($config->get('HTML.XHTML')) { |
|
| 89 | + $doctype = 'XHTML 1.0'; |
|
| 90 | + } else { |
|
| 91 | + $doctype = 'HTML 4.01'; |
|
| 92 | + } |
|
| 93 | + if ($config->get('HTML.Strict')) { |
|
| 94 | + $doctype .= ' Strict'; |
|
| 95 | + } else { |
|
| 96 | + $doctype .= ' Transitional'; |
|
| 97 | + } |
|
| 98 | + return $doctype; |
|
| 99 | + } |
|
| 100 | 100 | |
| 101 | 101 | } |
| 102 | 102 | |
@@ -26,9 +26,15 @@ discard block |
||
| 26 | 26 | public function register($doctype, $xml = true, $modules = array(), |
| 27 | 27 | $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null |
| 28 | 28 | ) { |
| 29 | - if (!is_array($modules)) $modules = array($modules); |
|
| 30 | - if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules); |
|
| 31 | - if (!is_array($aliases)) $aliases = array($aliases); |
|
| 29 | + if (!is_array($modules)) { |
|
| 30 | + $modules = array($modules); |
|
| 31 | + } |
|
| 32 | + if (!is_array($tidy_modules)) { |
|
| 33 | + $tidy_modules = array($tidy_modules); |
|
| 34 | + } |
|
| 35 | + if (!is_array($aliases)) { |
|
| 36 | + $aliases = array($aliases); |
|
| 37 | + } |
|
| 32 | 38 | if (!is_object($doctype)) { |
| 33 | 39 | $doctype = new HTMLPurifier_Doctype( |
| 34 | 40 | $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system |
@@ -38,11 +44,15 @@ discard block |
||
| 38 | 44 | $name = $doctype->name; |
| 39 | 45 | // hookup aliases |
| 40 | 46 | foreach ($doctype->aliases as $alias) { |
| 41 | - if (isset($this->doctypes[$alias])) continue; |
|
| 47 | + if (isset($this->doctypes[$alias])) { |
|
| 48 | + continue; |
|
| 49 | + } |
|
| 42 | 50 | $this->aliases[$alias] = $name; |
| 43 | 51 | } |
| 44 | 52 | // remove old aliases |
| 45 | - if (isset($this->aliases[$name])) unset($this->aliases[$name]); |
|
| 53 | + if (isset($this->aliases[$name])) { |
|
| 54 | + unset($this->aliases[$name]); |
|
| 55 | + } |
|
| 46 | 56 | return $doctype; |
| 47 | 57 | } |
| 48 | 58 | |
@@ -54,7 +64,9 @@ discard block |
||
| 54 | 64 | * @return Editable doctype object |
| 55 | 65 | */ |
| 56 | 66 | public function get($doctype) { |
| 57 | - if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; |
|
| 67 | + if (isset($this->aliases[$doctype])) { |
|
| 68 | + $doctype = $this->aliases[$doctype]; |
|
| 69 | + } |
|
| 58 | 70 | if (!isset($this->doctypes[$doctype])) { |
| 59 | 71 | trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR); |
| 60 | 72 | $anon = new HTMLPurifier_Doctype($doctype); |
@@ -81,9 +93,13 @@ discard block |
||
| 81 | 93 | public function getDoctypeFromConfig($config) { |
| 82 | 94 | // recommended test |
| 83 | 95 | $doctype = $config->get('HTML.Doctype'); |
| 84 | - if (!empty($doctype)) return $doctype; |
|
| 96 | + if (!empty($doctype)) { |
|
| 97 | + return $doctype; |
|
| 98 | + } |
|
| 85 | 99 | $doctype = $config->get('HTML.CustomDoctype'); |
| 86 | - if (!empty($doctype)) return $doctype; |
|
| 100 | + if (!empty($doctype)) { |
|
| 101 | + return $doctype; |
|
| 102 | + } |
|
| 87 | 103 | // backwards-compatibility |
| 88 | 104 | if ($config->get('HTML.XHTML')) { |
| 89 | 105 | $doctype = 'XHTML 1.0'; |
@@ -56,7 +56,7 @@ |
||
| 56 | 56 | public function get($doctype) { |
| 57 | 57 | if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; |
| 58 | 58 | if (!isset($this->doctypes[$doctype])) { |
| 59 | - trigger_error('Doctype ' . htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false) . ' does not exist', E_USER_ERROR); |
|
| 59 | + trigger_error('Doctype '.htmlspecialchars($doctype, ENT_COMPAT | ENT_HTML401, 'UTF-8', false).' does not exist', E_USER_ERROR); |
|
| 60 | 60 | $anon = new HTMLPurifier_Doctype($doctype); |
| 61 | 61 | return $anon; |
| 62 | 62 | } |
@@ -31,6 +31,9 @@ discard block |
||
| 31 | 31 | |
| 32 | 32 | /** |
| 33 | 33 | * iconv wrapper which mutes errors and works around bugs. |
| 34 | + * @param string $in |
|
| 35 | + * @param string $out |
|
| 36 | + * @param string $text |
|
| 34 | 37 | */ |
| 35 | 38 | public static function iconv($in, $out, $text, $max_chunk_size = 8000) { |
| 36 | 39 | $code = self::testIconvTruncateBug(); |
@@ -332,6 +335,7 @@ discard block |
||
| 332 | 335 | |
| 333 | 336 | /** |
| 334 | 337 | * Converts a string to UTF-8 based on configuration. |
| 338 | + * @param HTMLPurifier_Context $context |
|
| 335 | 339 | */ |
| 336 | 340 | public static function convertToUTF8($str, $config, $context) { |
| 337 | 341 | $encoding = $config->get('Core.Encoding'); |
@@ -362,6 +366,7 @@ discard block |
||
| 362 | 366 | * Converts a string from UTF-8 based on configuration. |
| 363 | 367 | * @note Currently, this is a lossy conversion, with unexpressable |
| 364 | 368 | * characters being omitted. |
| 369 | + * @param HTMLPurifier_Context $context |
|
| 365 | 370 | */ |
| 366 | 371 | public static function convertFromUTF8($str, $config, $context) { |
| 367 | 372 | $encoding = $config->get('Core.Encoding'); |
@@ -7,532 +7,532 @@ |
||
| 7 | 7 | class HTMLPurifier_Encoder |
| 8 | 8 | { |
| 9 | 9 | |
| 10 | - /** |
|
| 11 | - * Constructor throws fatal error if you attempt to instantiate class |
|
| 12 | - */ |
|
| 13 | - private function __construct() { |
|
| 14 | - trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR); |
|
| 15 | - } |
|
| 16 | - |
|
| 17 | - /** |
|
| 18 | - * Error-handler that mutes errors, alternative to shut-up operator. |
|
| 19 | - */ |
|
| 20 | - public static function muteErrorHandler() {} |
|
| 21 | - |
|
| 22 | - /** |
|
| 23 | - * iconv wrapper which mutes errors, but doesn't work around bugs. |
|
| 24 | - */ |
|
| 25 | - public static function unsafeIconv($in, $out, $text) { |
|
| 26 | - set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); |
|
| 27 | - $r = iconv($in, $out, $text); |
|
| 28 | - restore_error_handler(); |
|
| 29 | - return $r; |
|
| 30 | - } |
|
| 31 | - |
|
| 32 | - /** |
|
| 33 | - * iconv wrapper which mutes errors and works around bugs. |
|
| 34 | - */ |
|
| 35 | - public static function iconv($in, $out, $text, $max_chunk_size = 8000) { |
|
| 36 | - $code = self::testIconvTruncateBug(); |
|
| 37 | - if ($code == self::ICONV_OK) { |
|
| 38 | - return self::unsafeIconv($in, $out, $text); |
|
| 39 | - } elseif ($code == self::ICONV_TRUNCATES) { |
|
| 40 | - // we can only work around this if the input character set |
|
| 41 | - // is utf-8 |
|
| 42 | - if ($in == 'utf-8') { |
|
| 43 | - if ($max_chunk_size < 4) { |
|
| 44 | - trigger_error('max_chunk_size is too small', E_USER_WARNING); |
|
| 45 | - return false; |
|
| 46 | - } |
|
| 47 | - // split into 8000 byte chunks, but be careful to handle |
|
| 48 | - // multibyte boundaries properly |
|
| 49 | - if (($c = strlen($text)) <= $max_chunk_size) { |
|
| 50 | - return self::unsafeIconv($in, $out, $text); |
|
| 51 | - } |
|
| 52 | - $r = ''; |
|
| 53 | - $i = 0; |
|
| 54 | - while (true) { |
|
| 55 | - if ($i + $max_chunk_size >= $c) { |
|
| 56 | - $r .= self::unsafeIconv($in, $out, substr($text, $i)); |
|
| 57 | - break; |
|
| 58 | - } |
|
| 59 | - // wibble the boundary |
|
| 60 | - if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) { |
|
| 61 | - $chunk_size = $max_chunk_size; |
|
| 62 | - } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) { |
|
| 63 | - $chunk_size = $max_chunk_size - 1; |
|
| 64 | - } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) { |
|
| 65 | - $chunk_size = $max_chunk_size - 2; |
|
| 66 | - } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) { |
|
| 67 | - $chunk_size = $max_chunk_size - 3; |
|
| 68 | - } else { |
|
| 69 | - return false; // rather confusing UTF-8... |
|
| 70 | - } |
|
| 71 | - $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths |
|
| 72 | - $r .= self::unsafeIconv($in, $out, $chunk); |
|
| 73 | - $i += $chunk_size; |
|
| 74 | - } |
|
| 75 | - return $r; |
|
| 76 | - } else { |
|
| 77 | - return false; |
|
| 78 | - } |
|
| 79 | - } else { |
|
| 80 | - return false; |
|
| 81 | - } |
|
| 82 | - } |
|
| 83 | - |
|
| 84 | - /** |
|
| 85 | - * Cleans a UTF-8 string for well-formedness and SGML validity |
|
| 86 | - * |
|
| 87 | - * It will parse according to UTF-8 and return a valid UTF8 string, with |
|
| 88 | - * non-SGML codepoints excluded. |
|
| 89 | - * |
|
| 90 | - * @note Just for reference, the non-SGML code points are 0 to 31 and |
|
| 91 | - * 127 to 159, inclusive. However, we allow code points 9, 10 |
|
| 92 | - * and 13, which are the tab, line feed and carriage return |
|
| 93 | - * respectively. 128 and above the code points map to multibyte |
|
| 94 | - * UTF-8 representations. |
|
| 95 | - * |
|
| 96 | - * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and |
|
| 97 | - * [email protected] at <http://iki.fi/hsivonen/php-utf8/> under the |
|
| 98 | - * LGPL license. Notes on what changed are inside, but in general, |
|
| 99 | - * the original code transformed UTF-8 text into an array of integer |
|
| 100 | - * Unicode codepoints. Understandably, transforming that back to |
|
| 101 | - * a string would be somewhat expensive, so the function was modded to |
|
| 102 | - * directly operate on the string. However, this discourages code |
|
| 103 | - * reuse, and the logic enumerated here would be useful for any |
|
| 104 | - * function that needs to be able to understand UTF-8 characters. |
|
| 105 | - * As of right now, only smart lossless character encoding converters |
|
| 106 | - * would need that, and I'm probably not going to implement them. |
|
| 107 | - * Once again, PHP 6 should solve all our problems. |
|
| 108 | - */ |
|
| 109 | - public static function cleanUTF8($str, $force_php = false) { |
|
| 110 | - |
|
| 111 | - // UTF-8 validity is checked since PHP 4.3.5 |
|
| 112 | - // This is an optimization: if the string is already valid UTF-8, no |
|
| 113 | - // need to do PHP stuff. 99% of the time, this will be the case. |
|
| 114 | - // The regexp matches the XML char production, as well as well as excluding |
|
| 115 | - // non-SGML codepoints U+007F to U+009F |
|
| 116 | - if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) { |
|
| 117 | - return $str; |
|
| 118 | - } |
|
| 119 | - |
|
| 120 | - $mState = 0; // cached expected number of octets after the current octet |
|
| 121 | - // until the beginning of the next UTF8 character sequence |
|
| 122 | - $mUcs4 = 0; // cached Unicode character |
|
| 123 | - $mBytes = 1; // cached expected number of octets in the current sequence |
|
| 124 | - |
|
| 125 | - // original code involved an $out that was an array of Unicode |
|
| 126 | - // codepoints. Instead of having to convert back into UTF-8, we've |
|
| 127 | - // decided to directly append valid UTF-8 characters onto a string |
|
| 128 | - // $out once they're done. $char accumulates raw bytes, while $mUcs4 |
|
| 129 | - // turns into the Unicode code point, so there's some redundancy. |
|
| 130 | - |
|
| 131 | - $out = ''; |
|
| 132 | - $char = ''; |
|
| 133 | - |
|
| 134 | - $len = strlen($str); |
|
| 135 | - for($i = 0; $i < $len; $i++) { |
|
| 136 | - $in = ord($str{$i}); |
|
| 137 | - $char .= $str[$i]; // append byte to char |
|
| 138 | - if (0 == $mState) { |
|
| 139 | - // When mState is zero we expect either a US-ASCII character |
|
| 140 | - // or a multi-octet sequence. |
|
| 141 | - if (0 == (0x80 & ($in))) { |
|
| 142 | - // US-ASCII, pass straight through. |
|
| 143 | - if (($in <= 31 || $in == 127) && |
|
| 144 | - !($in == 9 || $in == 13 || $in == 10) // save \r\t\n |
|
| 145 | - ) { |
|
| 146 | - // control characters, remove |
|
| 147 | - } else { |
|
| 148 | - $out .= $char; |
|
| 149 | - } |
|
| 150 | - // reset |
|
| 151 | - $char = ''; |
|
| 152 | - $mBytes = 1; |
|
| 153 | - } elseif (0xC0 == (0xE0 & ($in))) { |
|
| 154 | - // First octet of 2 octet sequence |
|
| 155 | - $mUcs4 = ($in); |
|
| 156 | - $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
| 157 | - $mState = 1; |
|
| 158 | - $mBytes = 2; |
|
| 159 | - } elseif (0xE0 == (0xF0 & ($in))) { |
|
| 160 | - // First octet of 3 octet sequence |
|
| 161 | - $mUcs4 = ($in); |
|
| 162 | - $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| 163 | - $mState = 2; |
|
| 164 | - $mBytes = 3; |
|
| 165 | - } elseif (0xF0 == (0xF8 & ($in))) { |
|
| 166 | - // First octet of 4 octet sequence |
|
| 167 | - $mUcs4 = ($in); |
|
| 168 | - $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
| 169 | - $mState = 3; |
|
| 170 | - $mBytes = 4; |
|
| 171 | - } elseif (0xF8 == (0xFC & ($in))) { |
|
| 172 | - // First octet of 5 octet sequence. |
|
| 173 | - // |
|
| 174 | - // This is illegal because the encoded codepoint must be |
|
| 175 | - // either: |
|
| 176 | - // (a) not the shortest form or |
|
| 177 | - // (b) outside the Unicode range of 0-0x10FFFF. |
|
| 178 | - // Rather than trying to resynchronize, we will carry on |
|
| 179 | - // until the end of the sequence and let the later error |
|
| 180 | - // handling code catch it. |
|
| 181 | - $mUcs4 = ($in); |
|
| 182 | - $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
| 183 | - $mState = 4; |
|
| 184 | - $mBytes = 5; |
|
| 185 | - } elseif (0xFC == (0xFE & ($in))) { |
|
| 186 | - // First octet of 6 octet sequence, see comments for 5 |
|
| 187 | - // octet sequence. |
|
| 188 | - $mUcs4 = ($in); |
|
| 189 | - $mUcs4 = ($mUcs4 & 1) << 30; |
|
| 190 | - $mState = 5; |
|
| 191 | - $mBytes = 6; |
|
| 192 | - } else { |
|
| 193 | - // Current octet is neither in the US-ASCII range nor a |
|
| 194 | - // legal first octet of a multi-octet sequence. |
|
| 195 | - $mState = 0; |
|
| 196 | - $mUcs4 = 0; |
|
| 197 | - $mBytes = 1; |
|
| 198 | - $char = ''; |
|
| 199 | - } |
|
| 200 | - } else { |
|
| 201 | - // When mState is non-zero, we expect a continuation of the |
|
| 202 | - // multi-octet sequence |
|
| 203 | - if (0x80 == (0xC0 & ($in))) { |
|
| 204 | - // Legal continuation. |
|
| 205 | - $shift = ($mState - 1) * 6; |
|
| 206 | - $tmp = $in; |
|
| 207 | - $tmp = ($tmp & 0x0000003F) << $shift; |
|
| 208 | - $mUcs4 |= $tmp; |
|
| 209 | - |
|
| 210 | - if (0 == --$mState) { |
|
| 211 | - // End of the multi-octet sequence. mUcs4 now contains |
|
| 212 | - // the final Unicode codepoint to be output |
|
| 213 | - |
|
| 214 | - // Check for illegal sequences and codepoints. |
|
| 215 | - |
|
| 216 | - // From Unicode 3.1, non-shortest form is illegal |
|
| 217 | - if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || |
|
| 218 | - ((3 == $mBytes) && ($mUcs4 < 0x0800)) || |
|
| 219 | - ((4 == $mBytes) && ($mUcs4 < 0x10000)) || |
|
| 220 | - (4 < $mBytes) || |
|
| 221 | - // From Unicode 3.2, surrogate characters = illegal |
|
| 222 | - (($mUcs4 & 0xFFFFF800) == 0xD800) || |
|
| 223 | - // Codepoints outside the Unicode range are illegal |
|
| 224 | - ($mUcs4 > 0x10FFFF) |
|
| 225 | - ) { |
|
| 226 | - |
|
| 227 | - } elseif (0xFEFF != $mUcs4 && // omit BOM |
|
| 228 | - // check for valid Char unicode codepoints |
|
| 229 | - ( |
|
| 230 | - 0x9 == $mUcs4 || |
|
| 231 | - 0xA == $mUcs4 || |
|
| 232 | - 0xD == $mUcs4 || |
|
| 233 | - (0x20 <= $mUcs4 && 0x7E >= $mUcs4) || |
|
| 234 | - // 7F-9F is not strictly prohibited by XML, |
|
| 235 | - // but it is non-SGML, and thus we don't allow it |
|
| 236 | - (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) || |
|
| 237 | - (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4) |
|
| 238 | - ) |
|
| 239 | - ) { |
|
| 240 | - $out .= $char; |
|
| 241 | - } |
|
| 242 | - // initialize UTF8 cache (reset) |
|
| 243 | - $mState = 0; |
|
| 244 | - $mUcs4 = 0; |
|
| 245 | - $mBytes = 1; |
|
| 246 | - $char = ''; |
|
| 247 | - } |
|
| 248 | - } else { |
|
| 249 | - // ((0xC0 & (*in) != 0x80) && (mState != 0)) |
|
| 250 | - // Incomplete multi-octet sequence. |
|
| 251 | - // used to result in complete fail, but we'll reset |
|
| 252 | - $mState = 0; |
|
| 253 | - $mUcs4 = 0; |
|
| 254 | - $mBytes = 1; |
|
| 255 | - $char =''; |
|
| 256 | - } |
|
| 257 | - } |
|
| 258 | - } |
|
| 259 | - return $out; |
|
| 260 | - } |
|
| 261 | - |
|
| 262 | - /** |
|
| 263 | - * Translates a Unicode codepoint into its corresponding UTF-8 character. |
|
| 264 | - * @note Based on Feyd's function at |
|
| 265 | - * <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>, |
|
| 266 | - * which is in public domain. |
|
| 267 | - * @note While we're going to do code point parsing anyway, a good |
|
| 268 | - * optimization would be to refuse to translate code points that |
|
| 269 | - * are non-SGML characters. However, this could lead to duplication. |
|
| 270 | - * @note This is very similar to the unichr function in |
|
| 271 | - * maintenance/generate-entity-file.php (although this is superior, |
|
| 272 | - * due to its sanity checks). |
|
| 273 | - */ |
|
| 274 | - |
|
| 275 | - // +----------+----------+----------+----------+ |
|
| 276 | - // | 33222222 | 22221111 | 111111 | | |
|
| 277 | - // | 10987654 | 32109876 | 54321098 | 76543210 | bit |
|
| 278 | - // +----------+----------+----------+----------+ |
|
| 279 | - // | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F |
|
| 280 | - // | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF |
|
| 281 | - // | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF |
|
| 282 | - // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF |
|
| 283 | - // +----------+----------+----------+----------+ |
|
| 284 | - // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF) |
|
| 285 | - // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes |
|
| 286 | - // +----------+----------+----------+----------+ |
|
| 287 | - |
|
| 288 | - public static function unichr($code) { |
|
| 289 | - if($code > 1114111 or $code < 0 or |
|
| 290 | - ($code >= 55296 and $code <= 57343) ) { |
|
| 291 | - // bits are set outside the "valid" range as defined |
|
| 292 | - // by UNICODE 4.1.0 |
|
| 293 | - return ''; |
|
| 294 | - } |
|
| 295 | - |
|
| 296 | - $x = $y = $z = $w = 0; |
|
| 297 | - if ($code < 128) { |
|
| 298 | - // regular ASCII character |
|
| 299 | - $x = $code; |
|
| 300 | - } else { |
|
| 301 | - // set up bits for UTF-8 |
|
| 302 | - $x = ($code & 63) | 128; |
|
| 303 | - if ($code < 2048) { |
|
| 304 | - $y = (($code & 2047) >> 6) | 192; |
|
| 305 | - } else { |
|
| 306 | - $y = (($code & 4032) >> 6) | 128; |
|
| 307 | - if($code < 65536) { |
|
| 308 | - $z = (($code >> 12) & 15) | 224; |
|
| 309 | - } else { |
|
| 310 | - $z = (($code >> 12) & 63) | 128; |
|
| 311 | - $w = (($code >> 18) & 7) | 240; |
|
| 312 | - } |
|
| 313 | - } |
|
| 314 | - } |
|
| 315 | - // set up the actual character |
|
| 316 | - $ret = ''; |
|
| 317 | - if($w) $ret .= chr($w); |
|
| 318 | - if($z) $ret .= chr($z); |
|
| 319 | - if($y) $ret .= chr($y); |
|
| 320 | - $ret .= chr($x); |
|
| 321 | - |
|
| 322 | - return $ret; |
|
| 323 | - } |
|
| 324 | - |
|
| 325 | - public static function iconvAvailable() { |
|
| 326 | - static $iconv = null; |
|
| 327 | - if ($iconv === null) { |
|
| 328 | - $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE; |
|
| 329 | - } |
|
| 330 | - return $iconv; |
|
| 331 | - } |
|
| 332 | - |
|
| 333 | - /** |
|
| 334 | - * Converts a string to UTF-8 based on configuration. |
|
| 335 | - */ |
|
| 336 | - public static function convertToUTF8($str, $config, $context) { |
|
| 337 | - $encoding = $config->get('Core.Encoding'); |
|
| 338 | - if ($encoding === 'utf-8') return $str; |
|
| 339 | - static $iconv = null; |
|
| 340 | - if ($iconv === null) $iconv = self::iconvAvailable(); |
|
| 341 | - if ($iconv && !$config->get('Test.ForceNoIconv')) { |
|
| 342 | - // unaffected by bugs, since UTF-8 support all characters |
|
| 343 | - $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str); |
|
| 344 | - if ($str === false) { |
|
| 345 | - // $encoding is not a valid encoding |
|
| 346 | - trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR); |
|
| 347 | - return ''; |
|
| 348 | - } |
|
| 349 | - // If the string is bjorked by Shift_JIS or a similar encoding |
|
| 350 | - // that doesn't support all of ASCII, convert the naughty |
|
| 351 | - // characters to their true byte-wise ASCII/UTF-8 equivalents. |
|
| 352 | - $str = strtr($str, self::testEncodingSupportsASCII($encoding)); |
|
| 353 | - return $str; |
|
| 354 | - } elseif ($encoding === 'iso-8859-1') { |
|
| 355 | - $str = utf8_encode($str); |
|
| 356 | - return $str; |
|
| 357 | - } |
|
| 358 | - trigger_error('Encoding not supported, please install iconv', E_USER_ERROR); |
|
| 359 | - } |
|
| 360 | - |
|
| 361 | - /** |
|
| 362 | - * Converts a string from UTF-8 based on configuration. |
|
| 363 | - * @note Currently, this is a lossy conversion, with unexpressable |
|
| 364 | - * characters being omitted. |
|
| 365 | - */ |
|
| 366 | - public static function convertFromUTF8($str, $config, $context) { |
|
| 367 | - $encoding = $config->get('Core.Encoding'); |
|
| 368 | - if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { |
|
| 369 | - $str = self::convertToASCIIDumbLossless($str); |
|
| 370 | - } |
|
| 371 | - if ($encoding === 'utf-8') return $str; |
|
| 372 | - static $iconv = null; |
|
| 373 | - if ($iconv === null) $iconv = self::iconvAvailable(); |
|
| 374 | - if ($iconv && !$config->get('Test.ForceNoIconv')) { |
|
| 375 | - // Undo our previous fix in convertToUTF8, otherwise iconv will barf |
|
| 376 | - $ascii_fix = self::testEncodingSupportsASCII($encoding); |
|
| 377 | - if (!$escape && !empty($ascii_fix)) { |
|
| 378 | - $clear_fix = array(); |
|
| 379 | - foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = ''; |
|
| 380 | - $str = strtr($str, $clear_fix); |
|
| 381 | - } |
|
| 382 | - $str = strtr($str, array_flip($ascii_fix)); |
|
| 383 | - // Normal stuff |
|
| 384 | - $str = self::iconv('utf-8', $encoding . '//IGNORE', $str); |
|
| 385 | - return $str; |
|
| 386 | - } elseif ($encoding === 'iso-8859-1') { |
|
| 387 | - $str = utf8_decode($str); |
|
| 388 | - return $str; |
|
| 389 | - } |
|
| 390 | - trigger_error('Encoding not supported', E_USER_ERROR); |
|
| 391 | - // You might be tempted to assume that the ASCII representation |
|
| 392 | - // might be OK, however, this is *not* universally true over all |
|
| 393 | - // encodings. So we take the conservative route here, rather |
|
| 394 | - // than forcibly turn on %Core.EscapeNonASCIICharacters |
|
| 395 | - } |
|
| 396 | - |
|
| 397 | - /** |
|
| 398 | - * Lossless (character-wise) conversion of HTML to ASCII |
|
| 399 | - * @param $str UTF-8 string to be converted to ASCII |
|
| 400 | - * @returns ASCII encoded string with non-ASCII character entity-ized |
|
| 401 | - * @warning Adapted from MediaWiki, claiming fair use: this is a common |
|
| 402 | - * algorithm. If you disagree with this license fudgery, |
|
| 403 | - * implement it yourself. |
|
| 404 | - * @note Uses decimal numeric entities since they are best supported. |
|
| 405 | - * @note This is a DUMB function: it has no concept of keeping |
|
| 406 | - * character entities that the projected character encoding |
|
| 407 | - * can allow. We could possibly implement a smart version |
|
| 408 | - * but that would require it to also know which Unicode |
|
| 409 | - * codepoints the charset supported (not an easy task). |
|
| 410 | - * @note Sort of with cleanUTF8() but it assumes that $str is |
|
| 411 | - * well-formed UTF-8 |
|
| 412 | - */ |
|
| 413 | - public static function convertToASCIIDumbLossless($str) { |
|
| 414 | - $bytesleft = 0; |
|
| 415 | - $result = ''; |
|
| 416 | - $working = 0; |
|
| 417 | - $len = strlen($str); |
|
| 418 | - for( $i = 0; $i < $len; $i++ ) { |
|
| 419 | - $bytevalue = ord( $str[$i] ); |
|
| 420 | - if( $bytevalue <= 0x7F ) { //0xxx xxxx |
|
| 421 | - $result .= chr( $bytevalue ); |
|
| 422 | - $bytesleft = 0; |
|
| 423 | - } elseif( $bytevalue <= 0xBF ) { //10xx xxxx |
|
| 424 | - $working = $working << 6; |
|
| 425 | - $working += ($bytevalue & 0x3F); |
|
| 426 | - $bytesleft--; |
|
| 427 | - if( $bytesleft <= 0 ) { |
|
| 428 | - $result .= "&#" . $working . ";"; |
|
| 429 | - } |
|
| 430 | - } elseif( $bytevalue <= 0xDF ) { //110x xxxx |
|
| 431 | - $working = $bytevalue & 0x1F; |
|
| 432 | - $bytesleft = 1; |
|
| 433 | - } elseif( $bytevalue <= 0xEF ) { //1110 xxxx |
|
| 434 | - $working = $bytevalue & 0x0F; |
|
| 435 | - $bytesleft = 2; |
|
| 436 | - } else { //1111 0xxx |
|
| 437 | - $working = $bytevalue & 0x07; |
|
| 438 | - $bytesleft = 3; |
|
| 439 | - } |
|
| 440 | - } |
|
| 441 | - return $result; |
|
| 442 | - } |
|
| 443 | - |
|
| 444 | - /** No bugs detected in iconv. */ |
|
| 445 | - const ICONV_OK = 0; |
|
| 446 | - |
|
| 447 | - /** Iconv truncates output if converting from UTF-8 to another |
|
| 448 | - * character set with //IGNORE, and a non-encodable character is found */ |
|
| 449 | - const ICONV_TRUNCATES = 1; |
|
| 450 | - |
|
| 451 | - /** Iconv does not support //IGNORE, making it unusable for |
|
| 452 | - * transcoding purposes */ |
|
| 453 | - const ICONV_UNUSABLE = 2; |
|
| 454 | - |
|
| 455 | - /** |
|
| 456 | - * glibc iconv has a known bug where it doesn't handle the magic |
|
| 457 | - * //IGNORE stanza correctly. In particular, rather than ignore |
|
| 458 | - * characters, it will return an EILSEQ after consuming some number |
|
| 459 | - * of characters, and expect you to restart iconv as if it were |
|
| 460 | - * an E2BIG. Old versions of PHP did not respect the errno, and |
|
| 461 | - * returned the fragment, so as a result you would see iconv |
|
| 462 | - * mysteriously truncating output. We can work around this by |
|
| 463 | - * manually chopping our input into segments of about 8000 |
|
| 464 | - * characters, as long as PHP ignores the error code. If PHP starts |
|
| 465 | - * paying attention to the error code, iconv becomes unusable. |
|
| 466 | - * |
|
| 467 | - * @returns Error code indicating severity of bug. |
|
| 468 | - */ |
|
| 469 | - public static function testIconvTruncateBug() { |
|
| 470 | - static $code = null; |
|
| 471 | - if ($code === null) { |
|
| 472 | - // better not use iconv, otherwise infinite loop! |
|
| 473 | - $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000)); |
|
| 474 | - if ($r === false) { |
|
| 475 | - $code = self::ICONV_UNUSABLE; |
|
| 476 | - } elseif (($c = strlen($r)) < 9000) { |
|
| 477 | - $code = self::ICONV_TRUNCATES; |
|
| 478 | - } elseif ($c > 9000) { |
|
| 479 | - trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR); |
|
| 480 | - } else { |
|
| 481 | - $code = self::ICONV_OK; |
|
| 482 | - } |
|
| 483 | - } |
|
| 484 | - return $code; |
|
| 485 | - } |
|
| 486 | - |
|
| 487 | - /** |
|
| 488 | - * This expensive function tests whether or not a given character |
|
| 489 | - * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will |
|
| 490 | - * fail this test, and require special processing. Variable width |
|
| 491 | - * encodings shouldn't ever fail. |
|
| 492 | - * |
|
| 493 | - * @param string $encoding Encoding name to test, as per iconv format |
|
| 494 | - * @param bool $bypass Whether or not to bypass the precompiled arrays. |
|
| 495 | - * @return Array of UTF-8 characters to their corresponding ASCII, |
|
| 496 | - * which can be used to "undo" any overzealous iconv action. |
|
| 497 | - */ |
|
| 498 | - public static function testEncodingSupportsASCII($encoding, $bypass = false) { |
|
| 499 | - // All calls to iconv here are unsafe, proof by case analysis: |
|
| 500 | - // If ICONV_OK, no difference. |
|
| 501 | - // If ICONV_TRUNCATE, all calls involve one character inputs, |
|
| 502 | - // so bug is not triggered. |
|
| 503 | - // If ICONV_UNUSABLE, this call is irrelevant |
|
| 504 | - static $encodings = array(); |
|
| 505 | - if (!$bypass) { |
|
| 506 | - if (isset($encodings[$encoding])) return $encodings[$encoding]; |
|
| 507 | - $lenc = strtolower($encoding); |
|
| 508 | - switch ($lenc) { |
|
| 509 | - case 'shift_jis': |
|
| 510 | - return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'); |
|
| 511 | - case 'johab': |
|
| 512 | - return array("\xE2\x82\xA9" => '\\'); |
|
| 513 | - } |
|
| 514 | - if (strpos($lenc, 'iso-8859-') === 0) return array(); |
|
| 515 | - } |
|
| 516 | - $ret = array(); |
|
| 517 | - if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false; |
|
| 518 | - for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars |
|
| 519 | - $c = chr($i); // UTF-8 char |
|
| 520 | - $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion |
|
| 521 | - if ( |
|
| 522 | - $r === '' || |
|
| 523 | - // This line is needed for iconv implementations that do not |
|
| 524 | - // omit characters that do not exist in the target character set |
|
| 525 | - ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c) |
|
| 526 | - ) { |
|
| 527 | - // Reverse engineer: what's the UTF-8 equiv of this byte |
|
| 528 | - // sequence? This assumes that there's no variable width |
|
| 529 | - // encoding that doesn't support ASCII. |
|
| 530 | - $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c; |
|
| 531 | - } |
|
| 532 | - } |
|
| 533 | - $encodings[$encoding] = $ret; |
|
| 534 | - return $ret; |
|
| 535 | - } |
|
| 10 | + /** |
|
| 11 | + * Constructor throws fatal error if you attempt to instantiate class |
|
| 12 | + */ |
|
| 13 | + private function __construct() { |
|
| 14 | + trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR); |
|
| 15 | + } |
|
| 16 | + |
|
| 17 | + /** |
|
| 18 | + * Error-handler that mutes errors, alternative to shut-up operator. |
|
| 19 | + */ |
|
| 20 | + public static function muteErrorHandler() {} |
|
| 21 | + |
|
| 22 | + /** |
|
| 23 | + * iconv wrapper which mutes errors, but doesn't work around bugs. |
|
| 24 | + */ |
|
| 25 | + public static function unsafeIconv($in, $out, $text) { |
|
| 26 | + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); |
|
| 27 | + $r = iconv($in, $out, $text); |
|
| 28 | + restore_error_handler(); |
|
| 29 | + return $r; |
|
| 30 | + } |
|
| 31 | + |
|
| 32 | + /** |
|
| 33 | + * iconv wrapper which mutes errors and works around bugs. |
|
| 34 | + */ |
|
| 35 | + public static function iconv($in, $out, $text, $max_chunk_size = 8000) { |
|
| 36 | + $code = self::testIconvTruncateBug(); |
|
| 37 | + if ($code == self::ICONV_OK) { |
|
| 38 | + return self::unsafeIconv($in, $out, $text); |
|
| 39 | + } elseif ($code == self::ICONV_TRUNCATES) { |
|
| 40 | + // we can only work around this if the input character set |
|
| 41 | + // is utf-8 |
|
| 42 | + if ($in == 'utf-8') { |
|
| 43 | + if ($max_chunk_size < 4) { |
|
| 44 | + trigger_error('max_chunk_size is too small', E_USER_WARNING); |
|
| 45 | + return false; |
|
| 46 | + } |
|
| 47 | + // split into 8000 byte chunks, but be careful to handle |
|
| 48 | + // multibyte boundaries properly |
|
| 49 | + if (($c = strlen($text)) <= $max_chunk_size) { |
|
| 50 | + return self::unsafeIconv($in, $out, $text); |
|
| 51 | + } |
|
| 52 | + $r = ''; |
|
| 53 | + $i = 0; |
|
| 54 | + while (true) { |
|
| 55 | + if ($i + $max_chunk_size >= $c) { |
|
| 56 | + $r .= self::unsafeIconv($in, $out, substr($text, $i)); |
|
| 57 | + break; |
|
| 58 | + } |
|
| 59 | + // wibble the boundary |
|
| 60 | + if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) { |
|
| 61 | + $chunk_size = $max_chunk_size; |
|
| 62 | + } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) { |
|
| 63 | + $chunk_size = $max_chunk_size - 1; |
|
| 64 | + } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) { |
|
| 65 | + $chunk_size = $max_chunk_size - 2; |
|
| 66 | + } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) { |
|
| 67 | + $chunk_size = $max_chunk_size - 3; |
|
| 68 | + } else { |
|
| 69 | + return false; // rather confusing UTF-8... |
|
| 70 | + } |
|
| 71 | + $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths |
|
| 72 | + $r .= self::unsafeIconv($in, $out, $chunk); |
|
| 73 | + $i += $chunk_size; |
|
| 74 | + } |
|
| 75 | + return $r; |
|
| 76 | + } else { |
|
| 77 | + return false; |
|
| 78 | + } |
|
| 79 | + } else { |
|
| 80 | + return false; |
|
| 81 | + } |
|
| 82 | + } |
|
| 83 | + |
|
| 84 | + /** |
|
| 85 | + * Cleans a UTF-8 string for well-formedness and SGML validity |
|
| 86 | + * |
|
| 87 | + * It will parse according to UTF-8 and return a valid UTF8 string, with |
|
| 88 | + * non-SGML codepoints excluded. |
|
| 89 | + * |
|
| 90 | + * @note Just for reference, the non-SGML code points are 0 to 31 and |
|
| 91 | + * 127 to 159, inclusive. However, we allow code points 9, 10 |
|
| 92 | + * and 13, which are the tab, line feed and carriage return |
|
| 93 | + * respectively. 128 and above the code points map to multibyte |
|
| 94 | + * UTF-8 representations. |
|
| 95 | + * |
|
| 96 | + * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and |
|
| 97 | + * [email protected] at <http://iki.fi/hsivonen/php-utf8/> under the |
|
| 98 | + * LGPL license. Notes on what changed are inside, but in general, |
|
| 99 | + * the original code transformed UTF-8 text into an array of integer |
|
| 100 | + * Unicode codepoints. Understandably, transforming that back to |
|
| 101 | + * a string would be somewhat expensive, so the function was modded to |
|
| 102 | + * directly operate on the string. However, this discourages code |
|
| 103 | + * reuse, and the logic enumerated here would be useful for any |
|
| 104 | + * function that needs to be able to understand UTF-8 characters. |
|
| 105 | + * As of right now, only smart lossless character encoding converters |
|
| 106 | + * would need that, and I'm probably not going to implement them. |
|
| 107 | + * Once again, PHP 6 should solve all our problems. |
|
| 108 | + */ |
|
| 109 | + public static function cleanUTF8($str, $force_php = false) { |
|
| 110 | + |
|
| 111 | + // UTF-8 validity is checked since PHP 4.3.5 |
|
| 112 | + // This is an optimization: if the string is already valid UTF-8, no |
|
| 113 | + // need to do PHP stuff. 99% of the time, this will be the case. |
|
| 114 | + // The regexp matches the XML char production, as well as well as excluding |
|
| 115 | + // non-SGML codepoints U+007F to U+009F |
|
| 116 | + if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) { |
|
| 117 | + return $str; |
|
| 118 | + } |
|
| 119 | + |
|
| 120 | + $mState = 0; // cached expected number of octets after the current octet |
|
| 121 | + // until the beginning of the next UTF8 character sequence |
|
| 122 | + $mUcs4 = 0; // cached Unicode character |
|
| 123 | + $mBytes = 1; // cached expected number of octets in the current sequence |
|
| 124 | + |
|
| 125 | + // original code involved an $out that was an array of Unicode |
|
| 126 | + // codepoints. Instead of having to convert back into UTF-8, we've |
|
| 127 | + // decided to directly append valid UTF-8 characters onto a string |
|
| 128 | + // $out once they're done. $char accumulates raw bytes, while $mUcs4 |
|
| 129 | + // turns into the Unicode code point, so there's some redundancy. |
|
| 130 | + |
|
| 131 | + $out = ''; |
|
| 132 | + $char = ''; |
|
| 133 | + |
|
| 134 | + $len = strlen($str); |
|
| 135 | + for($i = 0; $i < $len; $i++) { |
|
| 136 | + $in = ord($str{$i}); |
|
| 137 | + $char .= $str[$i]; // append byte to char |
|
| 138 | + if (0 == $mState) { |
|
| 139 | + // When mState is zero we expect either a US-ASCII character |
|
| 140 | + // or a multi-octet sequence. |
|
| 141 | + if (0 == (0x80 & ($in))) { |
|
| 142 | + // US-ASCII, pass straight through. |
|
| 143 | + if (($in <= 31 || $in == 127) && |
|
| 144 | + !($in == 9 || $in == 13 || $in == 10) // save \r\t\n |
|
| 145 | + ) { |
|
| 146 | + // control characters, remove |
|
| 147 | + } else { |
|
| 148 | + $out .= $char; |
|
| 149 | + } |
|
| 150 | + // reset |
|
| 151 | + $char = ''; |
|
| 152 | + $mBytes = 1; |
|
| 153 | + } elseif (0xC0 == (0xE0 & ($in))) { |
|
| 154 | + // First octet of 2 octet sequence |
|
| 155 | + $mUcs4 = ($in); |
|
| 156 | + $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
| 157 | + $mState = 1; |
|
| 158 | + $mBytes = 2; |
|
| 159 | + } elseif (0xE0 == (0xF0 & ($in))) { |
|
| 160 | + // First octet of 3 octet sequence |
|
| 161 | + $mUcs4 = ($in); |
|
| 162 | + $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| 163 | + $mState = 2; |
|
| 164 | + $mBytes = 3; |
|
| 165 | + } elseif (0xF0 == (0xF8 & ($in))) { |
|
| 166 | + // First octet of 4 octet sequence |
|
| 167 | + $mUcs4 = ($in); |
|
| 168 | + $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
| 169 | + $mState = 3; |
|
| 170 | + $mBytes = 4; |
|
| 171 | + } elseif (0xF8 == (0xFC & ($in))) { |
|
| 172 | + // First octet of 5 octet sequence. |
|
| 173 | + // |
|
| 174 | + // This is illegal because the encoded codepoint must be |
|
| 175 | + // either: |
|
| 176 | + // (a) not the shortest form or |
|
| 177 | + // (b) outside the Unicode range of 0-0x10FFFF. |
|
| 178 | + // Rather than trying to resynchronize, we will carry on |
|
| 179 | + // until the end of the sequence and let the later error |
|
| 180 | + // handling code catch it. |
|
| 181 | + $mUcs4 = ($in); |
|
| 182 | + $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
| 183 | + $mState = 4; |
|
| 184 | + $mBytes = 5; |
|
| 185 | + } elseif (0xFC == (0xFE & ($in))) { |
|
| 186 | + // First octet of 6 octet sequence, see comments for 5 |
|
| 187 | + // octet sequence. |
|
| 188 | + $mUcs4 = ($in); |
|
| 189 | + $mUcs4 = ($mUcs4 & 1) << 30; |
|
| 190 | + $mState = 5; |
|
| 191 | + $mBytes = 6; |
|
| 192 | + } else { |
|
| 193 | + // Current octet is neither in the US-ASCII range nor a |
|
| 194 | + // legal first octet of a multi-octet sequence. |
|
| 195 | + $mState = 0; |
|
| 196 | + $mUcs4 = 0; |
|
| 197 | + $mBytes = 1; |
|
| 198 | + $char = ''; |
|
| 199 | + } |
|
| 200 | + } else { |
|
| 201 | + // When mState is non-zero, we expect a continuation of the |
|
| 202 | + // multi-octet sequence |
|
| 203 | + if (0x80 == (0xC0 & ($in))) { |
|
| 204 | + // Legal continuation. |
|
| 205 | + $shift = ($mState - 1) * 6; |
|
| 206 | + $tmp = $in; |
|
| 207 | + $tmp = ($tmp & 0x0000003F) << $shift; |
|
| 208 | + $mUcs4 |= $tmp; |
|
| 209 | + |
|
| 210 | + if (0 == --$mState) { |
|
| 211 | + // End of the multi-octet sequence. mUcs4 now contains |
|
| 212 | + // the final Unicode codepoint to be output |
|
| 213 | + |
|
| 214 | + // Check for illegal sequences and codepoints. |
|
| 215 | + |
|
| 216 | + // From Unicode 3.1, non-shortest form is illegal |
|
| 217 | + if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || |
|
| 218 | + ((3 == $mBytes) && ($mUcs4 < 0x0800)) || |
|
| 219 | + ((4 == $mBytes) && ($mUcs4 < 0x10000)) || |
|
| 220 | + (4 < $mBytes) || |
|
| 221 | + // From Unicode 3.2, surrogate characters = illegal |
|
| 222 | + (($mUcs4 & 0xFFFFF800) == 0xD800) || |
|
| 223 | + // Codepoints outside the Unicode range are illegal |
|
| 224 | + ($mUcs4 > 0x10FFFF) |
|
| 225 | + ) { |
|
| 226 | + |
|
| 227 | + } elseif (0xFEFF != $mUcs4 && // omit BOM |
|
| 228 | + // check for valid Char unicode codepoints |
|
| 229 | + ( |
|
| 230 | + 0x9 == $mUcs4 || |
|
| 231 | + 0xA == $mUcs4 || |
|
| 232 | + 0xD == $mUcs4 || |
|
| 233 | + (0x20 <= $mUcs4 && 0x7E >= $mUcs4) || |
|
| 234 | + // 7F-9F is not strictly prohibited by XML, |
|
| 235 | + // but it is non-SGML, and thus we don't allow it |
|
| 236 | + (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) || |
|
| 237 | + (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4) |
|
| 238 | + ) |
|
| 239 | + ) { |
|
| 240 | + $out .= $char; |
|
| 241 | + } |
|
| 242 | + // initialize UTF8 cache (reset) |
|
| 243 | + $mState = 0; |
|
| 244 | + $mUcs4 = 0; |
|
| 245 | + $mBytes = 1; |
|
| 246 | + $char = ''; |
|
| 247 | + } |
|
| 248 | + } else { |
|
| 249 | + // ((0xC0 & (*in) != 0x80) && (mState != 0)) |
|
| 250 | + // Incomplete multi-octet sequence. |
|
| 251 | + // used to result in complete fail, but we'll reset |
|
| 252 | + $mState = 0; |
|
| 253 | + $mUcs4 = 0; |
|
| 254 | + $mBytes = 1; |
|
| 255 | + $char =''; |
|
| 256 | + } |
|
| 257 | + } |
|
| 258 | + } |
|
| 259 | + return $out; |
|
| 260 | + } |
|
| 261 | + |
|
| 262 | + /** |
|
| 263 | + * Translates a Unicode codepoint into its corresponding UTF-8 character. |
|
| 264 | + * @note Based on Feyd's function at |
|
| 265 | + * <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>, |
|
| 266 | + * which is in public domain. |
|
| 267 | + * @note While we're going to do code point parsing anyway, a good |
|
| 268 | + * optimization would be to refuse to translate code points that |
|
| 269 | + * are non-SGML characters. However, this could lead to duplication. |
|
| 270 | + * @note This is very similar to the unichr function in |
|
| 271 | + * maintenance/generate-entity-file.php (although this is superior, |
|
| 272 | + * due to its sanity checks). |
|
| 273 | + */ |
|
| 274 | + |
|
| 275 | + // +----------+----------+----------+----------+ |
|
| 276 | + // | 33222222 | 22221111 | 111111 | | |
|
| 277 | + // | 10987654 | 32109876 | 54321098 | 76543210 | bit |
|
| 278 | + // +----------+----------+----------+----------+ |
|
| 279 | + // | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F |
|
| 280 | + // | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF |
|
| 281 | + // | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF |
|
| 282 | + // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF |
|
| 283 | + // +----------+----------+----------+----------+ |
|
| 284 | + // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF) |
|
| 285 | + // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes |
|
| 286 | + // +----------+----------+----------+----------+ |
|
| 287 | + |
|
| 288 | + public static function unichr($code) { |
|
| 289 | + if($code > 1114111 or $code < 0 or |
|
| 290 | + ($code >= 55296 and $code <= 57343) ) { |
|
| 291 | + // bits are set outside the "valid" range as defined |
|
| 292 | + // by UNICODE 4.1.0 |
|
| 293 | + return ''; |
|
| 294 | + } |
|
| 295 | + |
|
| 296 | + $x = $y = $z = $w = 0; |
|
| 297 | + if ($code < 128) { |
|
| 298 | + // regular ASCII character |
|
| 299 | + $x = $code; |
|
| 300 | + } else { |
|
| 301 | + // set up bits for UTF-8 |
|
| 302 | + $x = ($code & 63) | 128; |
|
| 303 | + if ($code < 2048) { |
|
| 304 | + $y = (($code & 2047) >> 6) | 192; |
|
| 305 | + } else { |
|
| 306 | + $y = (($code & 4032) >> 6) | 128; |
|
| 307 | + if($code < 65536) { |
|
| 308 | + $z = (($code >> 12) & 15) | 224; |
|
| 309 | + } else { |
|
| 310 | + $z = (($code >> 12) & 63) | 128; |
|
| 311 | + $w = (($code >> 18) & 7) | 240; |
|
| 312 | + } |
|
| 313 | + } |
|
| 314 | + } |
|
| 315 | + // set up the actual character |
|
| 316 | + $ret = ''; |
|
| 317 | + if($w) $ret .= chr($w); |
|
| 318 | + if($z) $ret .= chr($z); |
|
| 319 | + if($y) $ret .= chr($y); |
|
| 320 | + $ret .= chr($x); |
|
| 321 | + |
|
| 322 | + return $ret; |
|
| 323 | + } |
|
| 324 | + |
|
| 325 | + public static function iconvAvailable() { |
|
| 326 | + static $iconv = null; |
|
| 327 | + if ($iconv === null) { |
|
| 328 | + $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE; |
|
| 329 | + } |
|
| 330 | + return $iconv; |
|
| 331 | + } |
|
| 332 | + |
|
| 333 | + /** |
|
| 334 | + * Converts a string to UTF-8 based on configuration. |
|
| 335 | + */ |
|
| 336 | + public static function convertToUTF8($str, $config, $context) { |
|
| 337 | + $encoding = $config->get('Core.Encoding'); |
|
| 338 | + if ($encoding === 'utf-8') return $str; |
|
| 339 | + static $iconv = null; |
|
| 340 | + if ($iconv === null) $iconv = self::iconvAvailable(); |
|
| 341 | + if ($iconv && !$config->get('Test.ForceNoIconv')) { |
|
| 342 | + // unaffected by bugs, since UTF-8 support all characters |
|
| 343 | + $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str); |
|
| 344 | + if ($str === false) { |
|
| 345 | + // $encoding is not a valid encoding |
|
| 346 | + trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR); |
|
| 347 | + return ''; |
|
| 348 | + } |
|
| 349 | + // If the string is bjorked by Shift_JIS or a similar encoding |
|
| 350 | + // that doesn't support all of ASCII, convert the naughty |
|
| 351 | + // characters to their true byte-wise ASCII/UTF-8 equivalents. |
|
| 352 | + $str = strtr($str, self::testEncodingSupportsASCII($encoding)); |
|
| 353 | + return $str; |
|
| 354 | + } elseif ($encoding === 'iso-8859-1') { |
|
| 355 | + $str = utf8_encode($str); |
|
| 356 | + return $str; |
|
| 357 | + } |
|
| 358 | + trigger_error('Encoding not supported, please install iconv', E_USER_ERROR); |
|
| 359 | + } |
|
| 360 | + |
|
| 361 | + /** |
|
| 362 | + * Converts a string from UTF-8 based on configuration. |
|
| 363 | + * @note Currently, this is a lossy conversion, with unexpressable |
|
| 364 | + * characters being omitted. |
|
| 365 | + */ |
|
| 366 | + public static function convertFromUTF8($str, $config, $context) { |
|
| 367 | + $encoding = $config->get('Core.Encoding'); |
|
| 368 | + if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { |
|
| 369 | + $str = self::convertToASCIIDumbLossless($str); |
|
| 370 | + } |
|
| 371 | + if ($encoding === 'utf-8') return $str; |
|
| 372 | + static $iconv = null; |
|
| 373 | + if ($iconv === null) $iconv = self::iconvAvailable(); |
|
| 374 | + if ($iconv && !$config->get('Test.ForceNoIconv')) { |
|
| 375 | + // Undo our previous fix in convertToUTF8, otherwise iconv will barf |
|
| 376 | + $ascii_fix = self::testEncodingSupportsASCII($encoding); |
|
| 377 | + if (!$escape && !empty($ascii_fix)) { |
|
| 378 | + $clear_fix = array(); |
|
| 379 | + foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = ''; |
|
| 380 | + $str = strtr($str, $clear_fix); |
|
| 381 | + } |
|
| 382 | + $str = strtr($str, array_flip($ascii_fix)); |
|
| 383 | + // Normal stuff |
|
| 384 | + $str = self::iconv('utf-8', $encoding . '//IGNORE', $str); |
|
| 385 | + return $str; |
|
| 386 | + } elseif ($encoding === 'iso-8859-1') { |
|
| 387 | + $str = utf8_decode($str); |
|
| 388 | + return $str; |
|
| 389 | + } |
|
| 390 | + trigger_error('Encoding not supported', E_USER_ERROR); |
|
| 391 | + // You might be tempted to assume that the ASCII representation |
|
| 392 | + // might be OK, however, this is *not* universally true over all |
|
| 393 | + // encodings. So we take the conservative route here, rather |
|
| 394 | + // than forcibly turn on %Core.EscapeNonASCIICharacters |
|
| 395 | + } |
|
| 396 | + |
|
| 397 | + /** |
|
| 398 | + * Lossless (character-wise) conversion of HTML to ASCII |
|
| 399 | + * @param $str UTF-8 string to be converted to ASCII |
|
| 400 | + * @returns ASCII encoded string with non-ASCII character entity-ized |
|
| 401 | + * @warning Adapted from MediaWiki, claiming fair use: this is a common |
|
| 402 | + * algorithm. If you disagree with this license fudgery, |
|
| 403 | + * implement it yourself. |
|
| 404 | + * @note Uses decimal numeric entities since they are best supported. |
|
| 405 | + * @note This is a DUMB function: it has no concept of keeping |
|
| 406 | + * character entities that the projected character encoding |
|
| 407 | + * can allow. We could possibly implement a smart version |
|
| 408 | + * but that would require it to also know which Unicode |
|
| 409 | + * codepoints the charset supported (not an easy task). |
|
| 410 | + * @note Sort of with cleanUTF8() but it assumes that $str is |
|
| 411 | + * well-formed UTF-8 |
|
| 412 | + */ |
|
| 413 | + public static function convertToASCIIDumbLossless($str) { |
|
| 414 | + $bytesleft = 0; |
|
| 415 | + $result = ''; |
|
| 416 | + $working = 0; |
|
| 417 | + $len = strlen($str); |
|
| 418 | + for( $i = 0; $i < $len; $i++ ) { |
|
| 419 | + $bytevalue = ord( $str[$i] ); |
|
| 420 | + if( $bytevalue <= 0x7F ) { //0xxx xxxx |
|
| 421 | + $result .= chr( $bytevalue ); |
|
| 422 | + $bytesleft = 0; |
|
| 423 | + } elseif( $bytevalue <= 0xBF ) { //10xx xxxx |
|
| 424 | + $working = $working << 6; |
|
| 425 | + $working += ($bytevalue & 0x3F); |
|
| 426 | + $bytesleft--; |
|
| 427 | + if( $bytesleft <= 0 ) { |
|
| 428 | + $result .= "&#" . $working . ";"; |
|
| 429 | + } |
|
| 430 | + } elseif( $bytevalue <= 0xDF ) { //110x xxxx |
|
| 431 | + $working = $bytevalue & 0x1F; |
|
| 432 | + $bytesleft = 1; |
|
| 433 | + } elseif( $bytevalue <= 0xEF ) { //1110 xxxx |
|
| 434 | + $working = $bytevalue & 0x0F; |
|
| 435 | + $bytesleft = 2; |
|
| 436 | + } else { //1111 0xxx |
|
| 437 | + $working = $bytevalue & 0x07; |
|
| 438 | + $bytesleft = 3; |
|
| 439 | + } |
|
| 440 | + } |
|
| 441 | + return $result; |
|
| 442 | + } |
|
| 443 | + |
|
| 444 | + /** No bugs detected in iconv. */ |
|
| 445 | + const ICONV_OK = 0; |
|
| 446 | + |
|
| 447 | + /** Iconv truncates output if converting from UTF-8 to another |
|
| 448 | + * character set with //IGNORE, and a non-encodable character is found */ |
|
| 449 | + const ICONV_TRUNCATES = 1; |
|
| 450 | + |
|
| 451 | + /** Iconv does not support //IGNORE, making it unusable for |
|
| 452 | + * transcoding purposes */ |
|
| 453 | + const ICONV_UNUSABLE = 2; |
|
| 454 | + |
|
| 455 | + /** |
|
| 456 | + * glibc iconv has a known bug where it doesn't handle the magic |
|
| 457 | + * //IGNORE stanza correctly. In particular, rather than ignore |
|
| 458 | + * characters, it will return an EILSEQ after consuming some number |
|
| 459 | + * of characters, and expect you to restart iconv as if it were |
|
| 460 | + * an E2BIG. Old versions of PHP did not respect the errno, and |
|
| 461 | + * returned the fragment, so as a result you would see iconv |
|
| 462 | + * mysteriously truncating output. We can work around this by |
|
| 463 | + * manually chopping our input into segments of about 8000 |
|
| 464 | + * characters, as long as PHP ignores the error code. If PHP starts |
|
| 465 | + * paying attention to the error code, iconv becomes unusable. |
|
| 466 | + * |
|
| 467 | + * @returns Error code indicating severity of bug. |
|
| 468 | + */ |
|
| 469 | + public static function testIconvTruncateBug() { |
|
| 470 | + static $code = null; |
|
| 471 | + if ($code === null) { |
|
| 472 | + // better not use iconv, otherwise infinite loop! |
|
| 473 | + $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000)); |
|
| 474 | + if ($r === false) { |
|
| 475 | + $code = self::ICONV_UNUSABLE; |
|
| 476 | + } elseif (($c = strlen($r)) < 9000) { |
|
| 477 | + $code = self::ICONV_TRUNCATES; |
|
| 478 | + } elseif ($c > 9000) { |
|
| 479 | + trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR); |
|
| 480 | + } else { |
|
| 481 | + $code = self::ICONV_OK; |
|
| 482 | + } |
|
| 483 | + } |
|
| 484 | + return $code; |
|
| 485 | + } |
|
| 486 | + |
|
| 487 | + /** |
|
| 488 | + * This expensive function tests whether or not a given character |
|
| 489 | + * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will |
|
| 490 | + * fail this test, and require special processing. Variable width |
|
| 491 | + * encodings shouldn't ever fail. |
|
| 492 | + * |
|
| 493 | + * @param string $encoding Encoding name to test, as per iconv format |
|
| 494 | + * @param bool $bypass Whether or not to bypass the precompiled arrays. |
|
| 495 | + * @return Array of UTF-8 characters to their corresponding ASCII, |
|
| 496 | + * which can be used to "undo" any overzealous iconv action. |
|
| 497 | + */ |
|
| 498 | + public static function testEncodingSupportsASCII($encoding, $bypass = false) { |
|
| 499 | + // All calls to iconv here are unsafe, proof by case analysis: |
|
| 500 | + // If ICONV_OK, no difference. |
|
| 501 | + // If ICONV_TRUNCATE, all calls involve one character inputs, |
|
| 502 | + // so bug is not triggered. |
|
| 503 | + // If ICONV_UNUSABLE, this call is irrelevant |
|
| 504 | + static $encodings = array(); |
|
| 505 | + if (!$bypass) { |
|
| 506 | + if (isset($encodings[$encoding])) return $encodings[$encoding]; |
|
| 507 | + $lenc = strtolower($encoding); |
|
| 508 | + switch ($lenc) { |
|
| 509 | + case 'shift_jis': |
|
| 510 | + return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'); |
|
| 511 | + case 'johab': |
|
| 512 | + return array("\xE2\x82\xA9" => '\\'); |
|
| 513 | + } |
|
| 514 | + if (strpos($lenc, 'iso-8859-') === 0) return array(); |
|
| 515 | + } |
|
| 516 | + $ret = array(); |
|
| 517 | + if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false; |
|
| 518 | + for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars |
|
| 519 | + $c = chr($i); // UTF-8 char |
|
| 520 | + $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion |
|
| 521 | + if ( |
|
| 522 | + $r === '' || |
|
| 523 | + // This line is needed for iconv implementations that do not |
|
| 524 | + // omit characters that do not exist in the target character set |
|
| 525 | + ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c) |
|
| 526 | + ) { |
|
| 527 | + // Reverse engineer: what's the UTF-8 equiv of this byte |
|
| 528 | + // sequence? This assumes that there's no variable width |
|
| 529 | + // encoding that doesn't support ASCII. |
|
| 530 | + $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c; |
|
| 531 | + } |
|
| 532 | + } |
|
| 533 | + $encodings[$encoding] = $ret; |
|
| 534 | + return $ret; |
|
| 535 | + } |
|
| 536 | 536 | |
| 537 | 537 | |
| 538 | 538 | } |
@@ -314,9 +314,15 @@ discard block |
||
| 314 | 314 | } |
| 315 | 315 | // set up the actual character |
| 316 | 316 | $ret = ''; |
| 317 | - if($w) $ret .= chr($w); |
|
| 318 | - if($z) $ret .= chr($z); |
|
| 319 | - if($y) $ret .= chr($y); |
|
| 317 | + if($w) { |
|
| 318 | + $ret .= chr($w); |
|
| 319 | + } |
|
| 320 | + if($z) { |
|
| 321 | + $ret .= chr($z); |
|
| 322 | + } |
|
| 323 | + if($y) { |
|
| 324 | + $ret .= chr($y); |
|
| 325 | + } |
|
| 320 | 326 | $ret .= chr($x); |
| 321 | 327 | |
| 322 | 328 | return $ret; |
@@ -335,9 +341,13 @@ discard block |
||
| 335 | 341 | */ |
| 336 | 342 | public static function convertToUTF8($str, $config, $context) { |
| 337 | 343 | $encoding = $config->get('Core.Encoding'); |
| 338 | - if ($encoding === 'utf-8') return $str; |
|
| 344 | + if ($encoding === 'utf-8') { |
|
| 345 | + return $str; |
|
| 346 | + } |
|
| 339 | 347 | static $iconv = null; |
| 340 | - if ($iconv === null) $iconv = self::iconvAvailable(); |
|
| 348 | + if ($iconv === null) { |
|
| 349 | + $iconv = self::iconvAvailable(); |
|
| 350 | + } |
|
| 341 | 351 | if ($iconv && !$config->get('Test.ForceNoIconv')) { |
| 342 | 352 | // unaffected by bugs, since UTF-8 support all characters |
| 343 | 353 | $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str); |
@@ -368,15 +378,21 @@ discard block |
||
| 368 | 378 | if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { |
| 369 | 379 | $str = self::convertToASCIIDumbLossless($str); |
| 370 | 380 | } |
| 371 | - if ($encoding === 'utf-8') return $str; |
|
| 381 | + if ($encoding === 'utf-8') { |
|
| 382 | + return $str; |
|
| 383 | + } |
|
| 372 | 384 | static $iconv = null; |
| 373 | - if ($iconv === null) $iconv = self::iconvAvailable(); |
|
| 385 | + if ($iconv === null) { |
|
| 386 | + $iconv = self::iconvAvailable(); |
|
| 387 | + } |
|
| 374 | 388 | if ($iconv && !$config->get('Test.ForceNoIconv')) { |
| 375 | 389 | // Undo our previous fix in convertToUTF8, otherwise iconv will barf |
| 376 | 390 | $ascii_fix = self::testEncodingSupportsASCII($encoding); |
| 377 | 391 | if (!$escape && !empty($ascii_fix)) { |
| 378 | 392 | $clear_fix = array(); |
| 379 | - foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = ''; |
|
| 393 | + foreach ($ascii_fix as $utf8 => $native) { |
|
| 394 | + $clear_fix[$utf8] = ''; |
|
| 395 | + } |
|
| 380 | 396 | $str = strtr($str, $clear_fix); |
| 381 | 397 | } |
| 382 | 398 | $str = strtr($str, array_flip($ascii_fix)); |
@@ -503,7 +519,9 @@ discard block |
||
| 503 | 519 | // If ICONV_UNUSABLE, this call is irrelevant |
| 504 | 520 | static $encodings = array(); |
| 505 | 521 | if (!$bypass) { |
| 506 | - if (isset($encodings[$encoding])) return $encodings[$encoding]; |
|
| 522 | + if (isset($encodings[$encoding])) { |
|
| 523 | + return $encodings[$encoding]; |
|
| 524 | + } |
|
| 507 | 525 | $lenc = strtolower($encoding); |
| 508 | 526 | switch ($lenc) { |
| 509 | 527 | case 'shift_jis': |
@@ -511,10 +529,14 @@ discard block |
||
| 511 | 529 | case 'johab': |
| 512 | 530 | return array("\xE2\x82\xA9" => '\\'); |
| 513 | 531 | } |
| 514 | - if (strpos($lenc, 'iso-8859-') === 0) return array(); |
|
| 532 | + if (strpos($lenc, 'iso-8859-') === 0) { |
|
| 533 | + return array(); |
|
| 534 | + } |
|
| 515 | 535 | } |
| 516 | 536 | $ret = array(); |
| 517 | - if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false; |
|
| 537 | + if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) { |
|
| 538 | + return false; |
|
| 539 | + } |
|
| 518 | 540 | for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars |
| 519 | 541 | $c = chr($i); // UTF-8 char |
| 520 | 542 | $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion |
@@ -132,7 +132,7 @@ discard block |
||
| 132 | 132 | $char = ''; |
| 133 | 133 | |
| 134 | 134 | $len = strlen($str); |
| 135 | - for($i = 0; $i < $len; $i++) { |
|
| 135 | + for ($i = 0; $i < $len; $i++) { |
|
| 136 | 136 | $in = ord($str{$i}); |
| 137 | 137 | $char .= $str[$i]; // append byte to char |
| 138 | 138 | if (0 == $mState) { |
@@ -252,7 +252,7 @@ discard block |
||
| 252 | 252 | $mState = 0; |
| 253 | 253 | $mUcs4 = 0; |
| 254 | 254 | $mBytes = 1; |
| 255 | - $char =''; |
|
| 255 | + $char = ''; |
|
| 256 | 256 | } |
| 257 | 257 | } |
| 258 | 258 | } |
@@ -286,8 +286,8 @@ discard block |
||
| 286 | 286 | // +----------+----------+----------+----------+ |
| 287 | 287 | |
| 288 | 288 | public static function unichr($code) { |
| 289 | - if($code > 1114111 or $code < 0 or |
|
| 290 | - ($code >= 55296 and $code <= 57343) ) { |
|
| 289 | + if ($code > 1114111 or $code < 0 or |
|
| 290 | + ($code >= 55296 and $code <= 57343)) { |
|
| 291 | 291 | // bits are set outside the "valid" range as defined |
| 292 | 292 | // by UNICODE 4.1.0 |
| 293 | 293 | return ''; |
@@ -304,19 +304,19 @@ discard block |
||
| 304 | 304 | $y = (($code & 2047) >> 6) | 192; |
| 305 | 305 | } else { |
| 306 | 306 | $y = (($code & 4032) >> 6) | 128; |
| 307 | - if($code < 65536) { |
|
| 307 | + if ($code < 65536) { |
|
| 308 | 308 | $z = (($code >> 12) & 15) | 224; |
| 309 | 309 | } else { |
| 310 | 310 | $z = (($code >> 12) & 63) | 128; |
| 311 | - $w = (($code >> 18) & 7) | 240; |
|
| 311 | + $w = (($code >> 18) & 7) | 240; |
|
| 312 | 312 | } |
| 313 | 313 | } |
| 314 | 314 | } |
| 315 | 315 | // set up the actual character |
| 316 | 316 | $ret = ''; |
| 317 | - if($w) $ret .= chr($w); |
|
| 318 | - if($z) $ret .= chr($z); |
|
| 319 | - if($y) $ret .= chr($y); |
|
| 317 | + if ($w) $ret .= chr($w); |
|
| 318 | + if ($z) $ret .= chr($z); |
|
| 319 | + if ($y) $ret .= chr($y); |
|
| 320 | 320 | $ret .= chr($x); |
| 321 | 321 | |
| 322 | 322 | return $ret; |
@@ -343,7 +343,7 @@ discard block |
||
| 343 | 343 | $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str); |
| 344 | 344 | if ($str === false) { |
| 345 | 345 | // $encoding is not a valid encoding |
| 346 | - trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR); |
|
| 346 | + trigger_error('Invalid encoding '.$encoding, E_USER_ERROR); |
|
| 347 | 347 | return ''; |
| 348 | 348 | } |
| 349 | 349 | // If the string is bjorked by Shift_JIS or a similar encoding |
@@ -381,7 +381,7 @@ discard block |
||
| 381 | 381 | } |
| 382 | 382 | $str = strtr($str, array_flip($ascii_fix)); |
| 383 | 383 | // Normal stuff |
| 384 | - $str = self::iconv('utf-8', $encoding . '//IGNORE', $str); |
|
| 384 | + $str = self::iconv('utf-8', $encoding.'//IGNORE', $str); |
|
| 385 | 385 | return $str; |
| 386 | 386 | } elseif ($encoding === 'iso-8859-1') { |
| 387 | 387 | $str = utf8_decode($str); |
@@ -415,22 +415,22 @@ discard block |
||
| 415 | 415 | $result = ''; |
| 416 | 416 | $working = 0; |
| 417 | 417 | $len = strlen($str); |
| 418 | - for( $i = 0; $i < $len; $i++ ) { |
|
| 419 | - $bytevalue = ord( $str[$i] ); |
|
| 420 | - if( $bytevalue <= 0x7F ) { //0xxx xxxx |
|
| 421 | - $result .= chr( $bytevalue ); |
|
| 418 | + for ($i = 0; $i < $len; $i++) { |
|
| 419 | + $bytevalue = ord($str[$i]); |
|
| 420 | + if ($bytevalue <= 0x7F) { //0xxx xxxx |
|
| 421 | + $result .= chr($bytevalue); |
|
| 422 | 422 | $bytesleft = 0; |
| 423 | - } elseif( $bytevalue <= 0xBF ) { //10xx xxxx |
|
| 423 | + } elseif ($bytevalue <= 0xBF) { //10xx xxxx |
|
| 424 | 424 | $working = $working << 6; |
| 425 | 425 | $working += ($bytevalue & 0x3F); |
| 426 | 426 | $bytesleft--; |
| 427 | - if( $bytesleft <= 0 ) { |
|
| 428 | - $result .= "&#" . $working . ";"; |
|
| 427 | + if ($bytesleft <= 0) { |
|
| 428 | + $result .= "&#".$working.";"; |
|
| 429 | 429 | } |
| 430 | - } elseif( $bytevalue <= 0xDF ) { //110x xxxx |
|
| 430 | + } elseif ($bytevalue <= 0xDF) { //110x xxxx |
|
| 431 | 431 | $working = $bytevalue & 0x1F; |
| 432 | 432 | $bytesleft = 1; |
| 433 | - } elseif( $bytevalue <= 0xEF ) { //1110 xxxx |
|
| 433 | + } elseif ($bytevalue <= 0xEF) { //1110 xxxx |
|
| 434 | 434 | $working = $bytevalue & 0x0F; |
| 435 | 435 | $bytesleft = 2; |
| 436 | 436 | } else { //1111 0xxx |
@@ -470,7 +470,7 @@ discard block |
||
| 470 | 470 | static $code = null; |
| 471 | 471 | if ($code === null) { |
| 472 | 472 | // better not use iconv, otherwise infinite loop! |
| 473 | - $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000)); |
|
| 473 | + $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1".str_repeat('a', 9000)); |
|
| 474 | 474 | if ($r === false) { |
| 475 | 475 | $code = self::ICONV_UNUSABLE; |
| 476 | 476 | } elseif (($c = strlen($r)) < 9000) { |
@@ -25,6 +25,9 @@ |
||
| 25 | 25 | |
| 26 | 26 | protected $lines = array(); |
| 27 | 27 | |
| 28 | + /** |
|
| 29 | + * @param HTMLPurifier_Context $context |
|
| 30 | + */ |
|
| 28 | 31 | public function __construct($context) { |
| 29 | 32 | $this->locale =& $context->get('Locale'); |
| 30 | 33 | $this->context = $context; |
@@ -7,202 +7,202 @@ |
||
| 7 | 7 | class HTMLPurifier_ErrorCollector |
| 8 | 8 | { |
| 9 | 9 | |
| 10 | - /** |
|
| 11 | - * Identifiers for the returned error array. These are purposely numeric |
|
| 12 | - * so list() can be used. |
|
| 13 | - */ |
|
| 14 | - const LINENO = 0; |
|
| 15 | - const SEVERITY = 1; |
|
| 16 | - const MESSAGE = 2; |
|
| 17 | - const CHILDREN = 3; |
|
| 18 | - |
|
| 19 | - protected $errors; |
|
| 20 | - protected $_current; |
|
| 21 | - protected $_stacks = array(array()); |
|
| 22 | - protected $locale; |
|
| 23 | - protected $generator; |
|
| 24 | - protected $context; |
|
| 25 | - |
|
| 26 | - protected $lines = array(); |
|
| 27 | - |
|
| 28 | - public function __construct($context) { |
|
| 29 | - $this->locale =& $context->get('Locale'); |
|
| 30 | - $this->context = $context; |
|
| 31 | - $this->_current =& $this->_stacks[0]; |
|
| 32 | - $this->errors =& $this->_stacks[0]; |
|
| 33 | - } |
|
| 34 | - |
|
| 35 | - /** |
|
| 36 | - * Sends an error message to the collector for later use |
|
| 37 | - * @param $severity int Error severity, PHP error style (don't use E_USER_) |
|
| 38 | - * @param $msg string Error message text |
|
| 39 | - * @param $subst1 string First substitution for $msg |
|
| 40 | - * @param $subst2 string ... |
|
| 41 | - */ |
|
| 42 | - public function send($severity, $msg) { |
|
| 43 | - |
|
| 44 | - $args = array(); |
|
| 45 | - if (func_num_args() > 2) { |
|
| 46 | - $args = func_get_args(); |
|
| 47 | - array_shift($args); |
|
| 48 | - unset($args[0]); |
|
| 49 | - } |
|
| 50 | - |
|
| 51 | - $token = $this->context->get('CurrentToken', true); |
|
| 52 | - $line = $token ? $token->line : $this->context->get('CurrentLine', true); |
|
| 53 | - $col = $token ? $token->col : $this->context->get('CurrentCol', true); |
|
| 54 | - $attr = $this->context->get('CurrentAttr', true); |
|
| 55 | - |
|
| 56 | - // perform special substitutions, also add custom parameters |
|
| 57 | - $subst = array(); |
|
| 58 | - if (!is_null($token)) { |
|
| 59 | - $args['CurrentToken'] = $token; |
|
| 60 | - } |
|
| 61 | - if (!is_null($attr)) { |
|
| 62 | - $subst['$CurrentAttr.Name'] = $attr; |
|
| 63 | - if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr]; |
|
| 64 | - } |
|
| 65 | - |
|
| 66 | - if (empty($args)) { |
|
| 67 | - $msg = $this->locale->getMessage($msg); |
|
| 68 | - } else { |
|
| 69 | - $msg = $this->locale->formatMessage($msg, $args); |
|
| 70 | - } |
|
| 71 | - |
|
| 72 | - if (!empty($subst)) $msg = strtr($msg, $subst); |
|
| 73 | - |
|
| 74 | - // (numerically indexed) |
|
| 75 | - $error = array( |
|
| 76 | - self::LINENO => $line, |
|
| 77 | - self::SEVERITY => $severity, |
|
| 78 | - self::MESSAGE => $msg, |
|
| 79 | - self::CHILDREN => array() |
|
| 80 | - ); |
|
| 81 | - $this->_current[] = $error; |
|
| 82 | - |
|
| 83 | - |
|
| 84 | - // NEW CODE BELOW ... |
|
| 85 | - |
|
| 86 | - $struct = null; |
|
| 87 | - // Top-level errors are either: |
|
| 88 | - // TOKEN type, if $value is set appropriately, or |
|
| 89 | - // "syntax" type, if $value is null |
|
| 90 | - $new_struct = new HTMLPurifier_ErrorStruct(); |
|
| 91 | - $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN; |
|
| 92 | - if ($token) $new_struct->value = clone $token; |
|
| 93 | - if (is_int($line) && is_int($col)) { |
|
| 94 | - if (isset($this->lines[$line][$col])) { |
|
| 95 | - $struct = $this->lines[$line][$col]; |
|
| 96 | - } else { |
|
| 97 | - $struct = $this->lines[$line][$col] = $new_struct; |
|
| 98 | - } |
|
| 99 | - // These ksorts may present a performance problem |
|
| 100 | - ksort($this->lines[$line], SORT_NUMERIC); |
|
| 101 | - } else { |
|
| 102 | - if (isset($this->lines[-1])) { |
|
| 103 | - $struct = $this->lines[-1]; |
|
| 104 | - } else { |
|
| 105 | - $struct = $this->lines[-1] = $new_struct; |
|
| 106 | - } |
|
| 107 | - } |
|
| 108 | - ksort($this->lines, SORT_NUMERIC); |
|
| 109 | - |
|
| 110 | - // Now, check if we need to operate on a lower structure |
|
| 111 | - if (!empty($attr)) { |
|
| 112 | - $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr); |
|
| 113 | - if (!$struct->value) { |
|
| 114 | - $struct->value = array($attr, 'PUT VALUE HERE'); |
|
| 115 | - } |
|
| 116 | - } |
|
| 117 | - if (!empty($cssprop)) { |
|
| 118 | - $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop); |
|
| 119 | - if (!$struct->value) { |
|
| 120 | - // if we tokenize CSS this might be a little more difficult to do |
|
| 121 | - $struct->value = array($cssprop, 'PUT VALUE HERE'); |
|
| 122 | - } |
|
| 123 | - } |
|
| 124 | - |
|
| 125 | - // Ok, structs are all setup, now time to register the error |
|
| 126 | - $struct->addError($severity, $msg); |
|
| 127 | - } |
|
| 128 | - |
|
| 129 | - /** |
|
| 130 | - * Retrieves raw error data for custom formatter to use |
|
| 131 | - * @param List of arrays in format of array(line of error, |
|
| 132 | - * error severity, error message, |
|
| 133 | - * recursive sub-errors array) |
|
| 134 | - */ |
|
| 135 | - public function getRaw() { |
|
| 136 | - return $this->errors; |
|
| 137 | - } |
|
| 138 | - |
|
| 139 | - /** |
|
| 140 | - * Default HTML formatting implementation for error messages |
|
| 141 | - * @param $config Configuration array, vital for HTML output nature |
|
| 142 | - * @param $errors Errors array to display; used for recursion. |
|
| 143 | - */ |
|
| 144 | - public function getHTMLFormatted($config, $errors = null) { |
|
| 145 | - $ret = array(); |
|
| 146 | - |
|
| 147 | - $this->generator = new HTMLPurifier_Generator($config, $this->context); |
|
| 148 | - if ($errors === null) $errors = $this->errors; |
|
| 149 | - |
|
| 150 | - // 'At line' message needs to be removed |
|
| 151 | - |
|
| 152 | - // generation code for new structure goes here. It needs to be recursive. |
|
| 153 | - foreach ($this->lines as $line => $col_array) { |
|
| 154 | - if ($line == -1) continue; |
|
| 155 | - foreach ($col_array as $col => $struct) { |
|
| 156 | - $this->_renderStruct($ret, $struct, $line, $col); |
|
| 157 | - } |
|
| 158 | - } |
|
| 159 | - if (isset($this->lines[-1])) { |
|
| 160 | - $this->_renderStruct($ret, $this->lines[-1]); |
|
| 161 | - } |
|
| 162 | - |
|
| 163 | - if (empty($errors)) { |
|
| 164 | - return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>'; |
|
| 165 | - } else { |
|
| 166 | - return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>'; |
|
| 167 | - } |
|
| 168 | - |
|
| 169 | - } |
|
| 170 | - |
|
| 171 | - private function _renderStruct(&$ret, $struct, $line = null, $col = null) { |
|
| 172 | - $stack = array($struct); |
|
| 173 | - $context_stack = array(array()); |
|
| 174 | - while ($current = array_pop($stack)) { |
|
| 175 | - $context = array_pop($context_stack); |
|
| 176 | - foreach ($current->errors as $error) { |
|
| 177 | - list($severity, $msg) = $error; |
|
| 178 | - $string = ''; |
|
| 179 | - $string .= '<div>'; |
|
| 180 | - // W3C uses an icon to indicate the severity of the error. |
|
| 181 | - $error = $this->locale->getErrorName($severity); |
|
| 182 | - $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> "; |
|
| 183 | - if (!is_null($line) && !is_null($col)) { |
|
| 184 | - $string .= "<em class=\"location\">Line $line, Column $col: </em> "; |
|
| 185 | - } else { |
|
| 186 | - $string .= '<em class="location">End of Document: </em> '; |
|
| 187 | - } |
|
| 188 | - $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> '; |
|
| 189 | - $string .= '</div>'; |
|
| 190 | - // Here, have a marker for the character on the column appropriate. |
|
| 191 | - // Be sure to clip extremely long lines. |
|
| 192 | - //$string .= '<pre>'; |
|
| 193 | - //$string .= ''; |
|
| 194 | - //$string .= '</pre>'; |
|
| 195 | - $ret[] = $string; |
|
| 196 | - } |
|
| 197 | - foreach ($current->children as $type => $array) { |
|
| 198 | - $context[] = $current; |
|
| 199 | - $stack = array_merge($stack, array_reverse($array, true)); |
|
| 200 | - for ($i = count($array); $i > 0; $i--) { |
|
| 201 | - $context_stack[] = $context; |
|
| 202 | - } |
|
| 203 | - } |
|
| 204 | - } |
|
| 205 | - } |
|
| 10 | + /** |
|
| 11 | + * Identifiers for the returned error array. These are purposely numeric |
|
| 12 | + * so list() can be used. |
|
| 13 | + */ |
|
| 14 | + const LINENO = 0; |
|
| 15 | + const SEVERITY = 1; |
|
| 16 | + const MESSAGE = 2; |
|
| 17 | + const CHILDREN = 3; |
|
| 18 | + |
|
| 19 | + protected $errors; |
|
| 20 | + protected $_current; |
|
| 21 | + protected $_stacks = array(array()); |
|
| 22 | + protected $locale; |
|
| 23 | + protected $generator; |
|
| 24 | + protected $context; |
|
| 25 | + |
|
| 26 | + protected $lines = array(); |
|
| 27 | + |
|
| 28 | + public function __construct($context) { |
|
| 29 | + $this->locale =& $context->get('Locale'); |
|
| 30 | + $this->context = $context; |
|
| 31 | + $this->_current =& $this->_stacks[0]; |
|
| 32 | + $this->errors =& $this->_stacks[0]; |
|
| 33 | + } |
|
| 34 | + |
|
| 35 | + /** |
|
| 36 | + * Sends an error message to the collector for later use |
|
| 37 | + * @param $severity int Error severity, PHP error style (don't use E_USER_) |
|
| 38 | + * @param $msg string Error message text |
|
| 39 | + * @param $subst1 string First substitution for $msg |
|
| 40 | + * @param $subst2 string ... |
|
| 41 | + */ |
|
| 42 | + public function send($severity, $msg) { |
|
| 43 | + |
|
| 44 | + $args = array(); |
|
| 45 | + if (func_num_args() > 2) { |
|
| 46 | + $args = func_get_args(); |
|
| 47 | + array_shift($args); |
|
| 48 | + unset($args[0]); |
|
| 49 | + } |
|
| 50 | + |
|
| 51 | + $token = $this->context->get('CurrentToken', true); |
|
| 52 | + $line = $token ? $token->line : $this->context->get('CurrentLine', true); |
|
| 53 | + $col = $token ? $token->col : $this->context->get('CurrentCol', true); |
|
| 54 | + $attr = $this->context->get('CurrentAttr', true); |
|
| 55 | + |
|
| 56 | + // perform special substitutions, also add custom parameters |
|
| 57 | + $subst = array(); |
|
| 58 | + if (!is_null($token)) { |
|
| 59 | + $args['CurrentToken'] = $token; |
|
| 60 | + } |
|
| 61 | + if (!is_null($attr)) { |
|
| 62 | + $subst['$CurrentAttr.Name'] = $attr; |
|
| 63 | + if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr]; |
|
| 64 | + } |
|
| 65 | + |
|
| 66 | + if (empty($args)) { |
|
| 67 | + $msg = $this->locale->getMessage($msg); |
|
| 68 | + } else { |
|
| 69 | + $msg = $this->locale->formatMessage($msg, $args); |
|
| 70 | + } |
|
| 71 | + |
|
| 72 | + if (!empty($subst)) $msg = strtr($msg, $subst); |
|
| 73 | + |
|
| 74 | + // (numerically indexed) |
|
| 75 | + $error = array( |
|
| 76 | + self::LINENO => $line, |
|
| 77 | + self::SEVERITY => $severity, |
|
| 78 | + self::MESSAGE => $msg, |
|
| 79 | + self::CHILDREN => array() |
|
| 80 | + ); |
|
| 81 | + $this->_current[] = $error; |
|
| 82 | + |
|
| 83 | + |
|
| 84 | + // NEW CODE BELOW ... |
|
| 85 | + |
|
| 86 | + $struct = null; |
|
| 87 | + // Top-level errors are either: |
|
| 88 | + // TOKEN type, if $value is set appropriately, or |
|
| 89 | + // "syntax" type, if $value is null |
|
| 90 | + $new_struct = new HTMLPurifier_ErrorStruct(); |
|
| 91 | + $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN; |
|
| 92 | + if ($token) $new_struct->value = clone $token; |
|
| 93 | + if (is_int($line) && is_int($col)) { |
|
| 94 | + if (isset($this->lines[$line][$col])) { |
|
| 95 | + $struct = $this->lines[$line][$col]; |
|
| 96 | + } else { |
|
| 97 | + $struct = $this->lines[$line][$col] = $new_struct; |
|
| 98 | + } |
|
| 99 | + // These ksorts may present a performance problem |
|
| 100 | + ksort($this->lines[$line], SORT_NUMERIC); |
|
| 101 | + } else { |
|
| 102 | + if (isset($this->lines[-1])) { |
|
| 103 | + $struct = $this->lines[-1]; |
|
| 104 | + } else { |
|
| 105 | + $struct = $this->lines[-1] = $new_struct; |
|
| 106 | + } |
|
| 107 | + } |
|
| 108 | + ksort($this->lines, SORT_NUMERIC); |
|
| 109 | + |
|
| 110 | + // Now, check if we need to operate on a lower structure |
|
| 111 | + if (!empty($attr)) { |
|
| 112 | + $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr); |
|
| 113 | + if (!$struct->value) { |
|
| 114 | + $struct->value = array($attr, 'PUT VALUE HERE'); |
|
| 115 | + } |
|
| 116 | + } |
|
| 117 | + if (!empty($cssprop)) { |
|
| 118 | + $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop); |
|
| 119 | + if (!$struct->value) { |
|
| 120 | + // if we tokenize CSS this might be a little more difficult to do |
|
| 121 | + $struct->value = array($cssprop, 'PUT VALUE HERE'); |
|
| 122 | + } |
|
| 123 | + } |
|
| 124 | + |
|
| 125 | + // Ok, structs are all setup, now time to register the error |
|
| 126 | + $struct->addError($severity, $msg); |
|
| 127 | + } |
|
| 128 | + |
|
| 129 | + /** |
|
| 130 | + * Retrieves raw error data for custom formatter to use |
|
| 131 | + * @param List of arrays in format of array(line of error, |
|
| 132 | + * error severity, error message, |
|
| 133 | + * recursive sub-errors array) |
|
| 134 | + */ |
|
| 135 | + public function getRaw() { |
|
| 136 | + return $this->errors; |
|
| 137 | + } |
|
| 138 | + |
|
| 139 | + /** |
|
| 140 | + * Default HTML formatting implementation for error messages |
|
| 141 | + * @param $config Configuration array, vital for HTML output nature |
|
| 142 | + * @param $errors Errors array to display; used for recursion. |
|
| 143 | + */ |
|
| 144 | + public function getHTMLFormatted($config, $errors = null) { |
|
| 145 | + $ret = array(); |
|
| 146 | + |
|
| 147 | + $this->generator = new HTMLPurifier_Generator($config, $this->context); |
|
| 148 | + if ($errors === null) $errors = $this->errors; |
|
| 149 | + |
|
| 150 | + // 'At line' message needs to be removed |
|
| 151 | + |
|
| 152 | + // generation code for new structure goes here. It needs to be recursive. |
|
| 153 | + foreach ($this->lines as $line => $col_array) { |
|
| 154 | + if ($line == -1) continue; |
|
| 155 | + foreach ($col_array as $col => $struct) { |
|
| 156 | + $this->_renderStruct($ret, $struct, $line, $col); |
|
| 157 | + } |
|
| 158 | + } |
|
| 159 | + if (isset($this->lines[-1])) { |
|
| 160 | + $this->_renderStruct($ret, $this->lines[-1]); |
|
| 161 | + } |
|
| 162 | + |
|
| 163 | + if (empty($errors)) { |
|
| 164 | + return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>'; |
|
| 165 | + } else { |
|
| 166 | + return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>'; |
|
| 167 | + } |
|
| 168 | + |
|
| 169 | + } |
|
| 170 | + |
|
| 171 | + private function _renderStruct(&$ret, $struct, $line = null, $col = null) { |
|
| 172 | + $stack = array($struct); |
|
| 173 | + $context_stack = array(array()); |
|
| 174 | + while ($current = array_pop($stack)) { |
|
| 175 | + $context = array_pop($context_stack); |
|
| 176 | + foreach ($current->errors as $error) { |
|
| 177 | + list($severity, $msg) = $error; |
|
| 178 | + $string = ''; |
|
| 179 | + $string .= '<div>'; |
|
| 180 | + // W3C uses an icon to indicate the severity of the error. |
|
| 181 | + $error = $this->locale->getErrorName($severity); |
|
| 182 | + $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> "; |
|
| 183 | + if (!is_null($line) && !is_null($col)) { |
|
| 184 | + $string .= "<em class=\"location\">Line $line, Column $col: </em> "; |
|
| 185 | + } else { |
|
| 186 | + $string .= '<em class="location">End of Document: </em> '; |
|
| 187 | + } |
|
| 188 | + $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> '; |
|
| 189 | + $string .= '</div>'; |
|
| 190 | + // Here, have a marker for the character on the column appropriate. |
|
| 191 | + // Be sure to clip extremely long lines. |
|
| 192 | + //$string .= '<pre>'; |
|
| 193 | + //$string .= ''; |
|
| 194 | + //$string .= '</pre>'; |
|
| 195 | + $ret[] = $string; |
|
| 196 | + } |
|
| 197 | + foreach ($current->children as $type => $array) { |
|
| 198 | + $context[] = $current; |
|
| 199 | + $stack = array_merge($stack, array_reverse($array, true)); |
|
| 200 | + for ($i = count($array); $i > 0; $i--) { |
|
| 201 | + $context_stack[] = $context; |
|
| 202 | + } |
|
| 203 | + } |
|
| 204 | + } |
|
| 205 | + } |
|
| 206 | 206 | |
| 207 | 207 | } |
| 208 | 208 | |
@@ -60,7 +60,9 @@ discard block |
||
| 60 | 60 | } |
| 61 | 61 | if (!is_null($attr)) { |
| 62 | 62 | $subst['$CurrentAttr.Name'] = $attr; |
| 63 | - if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr]; |
|
| 63 | + if (isset($token->attr[$attr])) { |
|
| 64 | + $subst['$CurrentAttr.Value'] = $token->attr[$attr]; |
|
| 65 | + } |
|
| 64 | 66 | } |
| 65 | 67 | |
| 66 | 68 | if (empty($args)) { |
@@ -69,7 +71,9 @@ discard block |
||
| 69 | 71 | $msg = $this->locale->formatMessage($msg, $args); |
| 70 | 72 | } |
| 71 | 73 | |
| 72 | - if (!empty($subst)) $msg = strtr($msg, $subst); |
|
| 74 | + if (!empty($subst)) { |
|
| 75 | + $msg = strtr($msg, $subst); |
|
| 76 | + } |
|
| 73 | 77 | |
| 74 | 78 | // (numerically indexed) |
| 75 | 79 | $error = array( |
@@ -89,7 +93,9 @@ discard block |
||
| 89 | 93 | // "syntax" type, if $value is null |
| 90 | 94 | $new_struct = new HTMLPurifier_ErrorStruct(); |
| 91 | 95 | $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN; |
| 92 | - if ($token) $new_struct->value = clone $token; |
|
| 96 | + if ($token) { |
|
| 97 | + $new_struct->value = clone $token; |
|
| 98 | + } |
|
| 93 | 99 | if (is_int($line) && is_int($col)) { |
| 94 | 100 | if (isset($this->lines[$line][$col])) { |
| 95 | 101 | $struct = $this->lines[$line][$col]; |
@@ -145,13 +151,17 @@ discard block |
||
| 145 | 151 | $ret = array(); |
| 146 | 152 | |
| 147 | 153 | $this->generator = new HTMLPurifier_Generator($config, $this->context); |
| 148 | - if ($errors === null) $errors = $this->errors; |
|
| 154 | + if ($errors === null) { |
|
| 155 | + $errors = $this->errors; |
|
| 156 | + } |
|
| 149 | 157 | |
| 150 | 158 | // 'At line' message needs to be removed |
| 151 | 159 | |
| 152 | 160 | // generation code for new structure goes here. It needs to be recursive. |
| 153 | 161 | foreach ($this->lines as $line => $col_array) { |
| 154 | - if ($line == -1) continue; |
|
| 162 | + if ($line == -1) { |
|
| 163 | + continue; |
|
| 164 | + } |
|
| 155 | 165 | foreach ($col_array as $col => $struct) { |
| 156 | 166 | $this->_renderStruct($ret, $struct, $line, $col); |
| 157 | 167 | } |
@@ -26,10 +26,10 @@ discard block |
||
| 26 | 26 | protected $lines = array(); |
| 27 | 27 | |
| 28 | 28 | public function __construct($context) { |
| 29 | - $this->locale =& $context->get('Locale'); |
|
| 29 | + $this->locale = & $context->get('Locale'); |
|
| 30 | 30 | $this->context = $context; |
| 31 | - $this->_current =& $this->_stacks[0]; |
|
| 32 | - $this->errors =& $this->_stacks[0]; |
|
| 31 | + $this->_current = & $this->_stacks[0]; |
|
| 32 | + $this->errors = & $this->_stacks[0]; |
|
| 33 | 33 | } |
| 34 | 34 | |
| 35 | 35 | /** |
@@ -50,7 +50,7 @@ discard block |
||
| 50 | 50 | |
| 51 | 51 | $token = $this->context->get('CurrentToken', true); |
| 52 | 52 | $line = $token ? $token->line : $this->context->get('CurrentLine', true); |
| 53 | - $col = $token ? $token->col : $this->context->get('CurrentCol', true); |
|
| 53 | + $col = $token ? $token->col : $this->context->get('CurrentCol', true); |
|
| 54 | 54 | $attr = $this->context->get('CurrentAttr', true); |
| 55 | 55 | |
| 56 | 56 | // perform special substitutions, also add custom parameters |
@@ -161,9 +161,9 @@ discard block |
||
| 161 | 161 | } |
| 162 | 162 | |
| 163 | 163 | if (empty($errors)) { |
| 164 | - return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>'; |
|
| 164 | + return '<p>'.$this->locale->getMessage('ErrorCollector: No errors').'</p>'; |
|
| 165 | 165 | } else { |
| 166 | - return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>'; |
|
| 166 | + return '<ul><li>'.implode('</li><li>', $ret).'</li></ul>'; |
|
| 167 | 167 | } |
| 168 | 168 | |
| 169 | 169 | } |
@@ -185,7 +185,7 @@ discard block |
||
| 185 | 185 | } else { |
| 186 | 186 | $string .= '<em class="location">End of Document: </em> '; |
| 187 | 187 | } |
| 188 | - $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> '; |
|
| 188 | + $string .= '<strong class="description">'.$this->generator->escape($msg).'</strong> '; |
|
| 189 | 189 | $string .= '</div>'; |
| 190 | 190 | // Here, have a marker for the character on the column appropriate. |
| 191 | 191 | // Be sure to clip extremely long lines. |
@@ -70,7 +70,7 @@ discard block |
||
| 70 | 70 | * Generates HTML from an array of tokens. |
| 71 | 71 | * @param $tokens Array of HTMLPurifier_Token |
| 72 | 72 | * @param $config HTMLPurifier_Config object |
| 73 | - * @return Generated HTML |
|
| 73 | + * @return string HTML |
|
| 74 | 74 | */ |
| 75 | 75 | public function generateFromTokens($tokens) { |
| 76 | 76 | if (!$tokens) return ''; |
@@ -115,7 +115,7 @@ discard block |
||
| 115 | 115 | /** |
| 116 | 116 | * Generates HTML from a single token. |
| 117 | 117 | * @param $token HTMLPurifier_Token object. |
| 118 | - * @return Generated HTML |
|
| 118 | + * @return string HTML |
|
| 119 | 119 | */ |
| 120 | 120 | public function generateFromToken($token) { |
| 121 | 121 | if (!$token instanceof HTMLPurifier_Token) { |
@@ -181,7 +181,7 @@ discard block |
||
| 181 | 181 | * @param $assoc_array_of_attributes Attribute array |
| 182 | 182 | * @param $element Name of element attributes are for, used to check |
| 183 | 183 | * attribute minimization. |
| 184 | - * @return Generate HTML fragment for insertion. |
|
| 184 | + * @return string HTML fragment for insertion. |
|
| 185 | 185 | */ |
| 186 | 186 | public function generateAttributes($assoc_array_of_attributes, $element = false) { |
| 187 | 187 | $html = ''; |
@@ -238,7 +238,7 @@ discard block |
||
| 238 | 238 | * for properly generating HTML here w/o using tokens, it stays |
| 239 | 239 | * public. |
| 240 | 240 | * @param $string String data to escape for HTML. |
| 241 | - * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is |
|
| 241 | + * @param integer $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is |
|
| 242 | 242 | * permissible for non-attribute output. |
| 243 | 243 | * @return String escaped data. |
| 244 | 244 | */ |
@@ -10,244 +10,244 @@ |
||
| 10 | 10 | class HTMLPurifier_Generator |
| 11 | 11 | { |
| 12 | 12 | |
| 13 | - /** |
|
| 14 | - * Whether or not generator should produce XML output |
|
| 15 | - */ |
|
| 16 | - private $_xhtml = true; |
|
| 17 | - |
|
| 18 | - /** |
|
| 19 | - * :HACK: Whether or not generator should comment the insides of <script> tags |
|
| 20 | - */ |
|
| 21 | - private $_scriptFix = false; |
|
| 22 | - |
|
| 23 | - /** |
|
| 24 | - * Cache of HTMLDefinition during HTML output to determine whether or |
|
| 25 | - * not attributes should be minimized. |
|
| 26 | - */ |
|
| 27 | - private $_def; |
|
| 28 | - |
|
| 29 | - /** |
|
| 30 | - * Cache of %Output.SortAttr |
|
| 31 | - */ |
|
| 32 | - private $_sortAttr; |
|
| 33 | - |
|
| 34 | - /** |
|
| 35 | - * Cache of %Output.FlashCompat |
|
| 36 | - */ |
|
| 37 | - private $_flashCompat; |
|
| 38 | - |
|
| 39 | - /** |
|
| 40 | - * Cache of %Output.FixInnerHTML |
|
| 41 | - */ |
|
| 42 | - private $_innerHTMLFix; |
|
| 43 | - |
|
| 44 | - /** |
|
| 45 | - * Stack for keeping track of object information when outputting IE |
|
| 46 | - * compatibility code. |
|
| 47 | - */ |
|
| 48 | - private $_flashStack = array(); |
|
| 49 | - |
|
| 50 | - /** |
|
| 51 | - * Configuration for the generator |
|
| 52 | - */ |
|
| 53 | - protected $config; |
|
| 54 | - |
|
| 55 | - /** |
|
| 56 | - * @param $config Instance of HTMLPurifier_Config |
|
| 57 | - * @param $context Instance of HTMLPurifier_Context |
|
| 58 | - */ |
|
| 59 | - public function __construct($config, $context) { |
|
| 60 | - $this->config = $config; |
|
| 61 | - $this->_scriptFix = $config->get('Output.CommentScriptContents'); |
|
| 62 | - $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); |
|
| 63 | - $this->_sortAttr = $config->get('Output.SortAttr'); |
|
| 64 | - $this->_flashCompat = $config->get('Output.FlashCompat'); |
|
| 65 | - $this->_def = $config->getHTMLDefinition(); |
|
| 66 | - $this->_xhtml = $this->_def->doctype->xml; |
|
| 67 | - } |
|
| 68 | - |
|
| 69 | - /** |
|
| 70 | - * Generates HTML from an array of tokens. |
|
| 71 | - * @param $tokens Array of HTMLPurifier_Token |
|
| 72 | - * @param $config HTMLPurifier_Config object |
|
| 73 | - * @return Generated HTML |
|
| 74 | - */ |
|
| 75 | - public function generateFromTokens($tokens) { |
|
| 76 | - if (!$tokens) return ''; |
|
| 77 | - |
|
| 78 | - // Basic algorithm |
|
| 79 | - $html = ''; |
|
| 80 | - for ($i = 0, $size = count($tokens); $i < $size; $i++) { |
|
| 81 | - if ($this->_scriptFix && $tokens[$i]->name === 'script' |
|
| 82 | - && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { |
|
| 83 | - // script special case |
|
| 84 | - // the contents of the script block must be ONE token |
|
| 85 | - // for this to work. |
|
| 86 | - $html .= $this->generateFromToken($tokens[$i++]); |
|
| 87 | - $html .= $this->generateScriptFromToken($tokens[$i++]); |
|
| 88 | - } |
|
| 89 | - $html .= $this->generateFromToken($tokens[$i]); |
|
| 90 | - } |
|
| 91 | - |
|
| 92 | - // Tidy cleanup |
|
| 93 | - if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { |
|
| 94 | - $tidy = new Tidy; |
|
| 95 | - $tidy->parseString($html, array( |
|
| 96 | - 'indent'=> true, |
|
| 97 | - 'output-xhtml' => $this->_xhtml, |
|
| 98 | - 'show-body-only' => true, |
|
| 99 | - 'indent-spaces' => 2, |
|
| 100 | - 'wrap' => 68, |
|
| 101 | - ), 'utf8'); |
|
| 102 | - $tidy->cleanRepair(); |
|
| 103 | - $html = (string) $tidy; // explicit cast necessary |
|
| 104 | - } |
|
| 105 | - |
|
| 106 | - // Normalize newlines to system defined value |
|
| 107 | - if ($this->config->get('Core.NormalizeNewlines')) { |
|
| 108 | - $nl = $this->config->get('Output.Newline'); |
|
| 109 | - if ($nl === null) $nl = PHP_EOL; |
|
| 110 | - if ($nl !== "\n") $html = str_replace("\n", $nl, $html); |
|
| 111 | - } |
|
| 112 | - return $html; |
|
| 113 | - } |
|
| 114 | - |
|
| 115 | - /** |
|
| 116 | - * Generates HTML from a single token. |
|
| 117 | - * @param $token HTMLPurifier_Token object. |
|
| 118 | - * @return Generated HTML |
|
| 119 | - */ |
|
| 120 | - public function generateFromToken($token) { |
|
| 121 | - if (!$token instanceof HTMLPurifier_Token) { |
|
| 122 | - trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); |
|
| 123 | - return ''; |
|
| 124 | - |
|
| 125 | - } elseif ($token instanceof HTMLPurifier_Token_Start) { |
|
| 126 | - $attr = $this->generateAttributes($token->attr, $token->name); |
|
| 127 | - if ($this->_flashCompat) { |
|
| 128 | - if ($token->name == "object") { |
|
| 129 | - $flash = new stdclass(); |
|
| 130 | - $flash->attr = $token->attr; |
|
| 131 | - $flash->param = array(); |
|
| 132 | - $this->_flashStack[] = $flash; |
|
| 133 | - } |
|
| 134 | - } |
|
| 135 | - return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; |
|
| 136 | - |
|
| 137 | - } elseif ($token instanceof HTMLPurifier_Token_End) { |
|
| 138 | - $_extra = ''; |
|
| 139 | - if ($this->_flashCompat) { |
|
| 140 | - if ($token->name == "object" && !empty($this->_flashStack)) { |
|
| 141 | - // doesn't do anything for now |
|
| 142 | - } |
|
| 143 | - } |
|
| 144 | - return $_extra . '</' . $token->name . '>'; |
|
| 145 | - |
|
| 146 | - } elseif ($token instanceof HTMLPurifier_Token_Empty) { |
|
| 147 | - if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { |
|
| 148 | - $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; |
|
| 149 | - } |
|
| 150 | - $attr = $this->generateAttributes($token->attr, $token->name); |
|
| 151 | - return '<' . $token->name . ($attr ? ' ' : '') . $attr . |
|
| 152 | - ( $this->_xhtml ? ' /': '' ) // <br /> v. <br> |
|
| 153 | - . '>'; |
|
| 154 | - |
|
| 155 | - } elseif ($token instanceof HTMLPurifier_Token_Text) { |
|
| 156 | - return $this->escape($token->data, ENT_NOQUOTES); |
|
| 157 | - |
|
| 158 | - } elseif ($token instanceof HTMLPurifier_Token_Comment) { |
|
| 159 | - return '<!--' . $token->data . '-->'; |
|
| 160 | - } else { |
|
| 161 | - return ''; |
|
| 162 | - |
|
| 163 | - } |
|
| 164 | - } |
|
| 165 | - |
|
| 166 | - /** |
|
| 167 | - * Special case processor for the contents of script tags |
|
| 168 | - * @warning This runs into problems if there's already a literal |
|
| 169 | - * --> somewhere inside the script contents. |
|
| 170 | - */ |
|
| 171 | - public function generateScriptFromToken($token) { |
|
| 172 | - if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); |
|
| 173 | - // Thanks <http://lachy.id.au/log/2005/05/script-comments> |
|
| 174 | - $data = preg_replace('#//\s*$#', '', $token->data); |
|
| 175 | - return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; |
|
| 176 | - } |
|
| 177 | - |
|
| 178 | - /** |
|
| 179 | - * Generates attribute declarations from attribute array. |
|
| 180 | - * @note This does not include the leading or trailing space. |
|
| 181 | - * @param $assoc_array_of_attributes Attribute array |
|
| 182 | - * @param $element Name of element attributes are for, used to check |
|
| 183 | - * attribute minimization. |
|
| 184 | - * @return Generate HTML fragment for insertion. |
|
| 185 | - */ |
|
| 186 | - public function generateAttributes($assoc_array_of_attributes, $element = false) { |
|
| 187 | - $html = ''; |
|
| 188 | - if ($this->_sortAttr) ksort($assoc_array_of_attributes); |
|
| 189 | - foreach ($assoc_array_of_attributes as $key => $value) { |
|
| 190 | - if (!$this->_xhtml) { |
|
| 191 | - // Remove namespaced attributes |
|
| 192 | - if (strpos($key, ':') !== false) continue; |
|
| 193 | - // Check if we should minimize the attribute: val="val" -> val |
|
| 194 | - if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { |
|
| 195 | - $html .= $key . ' '; |
|
| 196 | - continue; |
|
| 197 | - } |
|
| 198 | - } |
|
| 199 | - // Workaround for Internet Explorer innerHTML bug. |
|
| 200 | - // Essentially, Internet Explorer, when calculating |
|
| 201 | - // innerHTML, omits quotes if there are no instances of |
|
| 202 | - // angled brackets, quotes or spaces. However, when parsing |
|
| 203 | - // HTML (for example, when you assign to innerHTML), it |
|
| 204 | - // treats backticks as quotes. Thus, |
|
| 205 | - // <img alt="``" /> |
|
| 206 | - // becomes |
|
| 207 | - // <img alt=`` /> |
|
| 208 | - // becomes |
|
| 209 | - // <img alt='' /> |
|
| 210 | - // Fortunately, all we need to do is trigger an appropriate |
|
| 211 | - // quoting style, which we do by adding an extra space. |
|
| 212 | - // This also is consistent with the W3C spec, which states |
|
| 213 | - // that user agents may ignore leading or trailing |
|
| 214 | - // whitespace (in fact, most don't, at least for attributes |
|
| 215 | - // like alt, but an extra space at the end is barely |
|
| 216 | - // noticeable). Still, we have a configuration knob for |
|
| 217 | - // this, since this transformation is not necesary if you |
|
| 218 | - // don't process user input with innerHTML or you don't plan |
|
| 219 | - // on supporting Internet Explorer. |
|
| 220 | - if ($this->_innerHTMLFix) { |
|
| 221 | - if (strpos($value, '`') !== false) { |
|
| 222 | - // check if correct quoting style would not already be |
|
| 223 | - // triggered |
|
| 224 | - if (strcspn($value, '"\' <>') === strlen($value)) { |
|
| 225 | - // protect! |
|
| 226 | - $value .= ' '; |
|
| 227 | - } |
|
| 228 | - } |
|
| 229 | - } |
|
| 230 | - $html .= $key.'="'.$this->escape($value).'" '; |
|
| 231 | - } |
|
| 232 | - return rtrim($html); |
|
| 233 | - } |
|
| 234 | - |
|
| 235 | - /** |
|
| 236 | - * Escapes raw text data. |
|
| 237 | - * @todo This really ought to be protected, but until we have a facility |
|
| 238 | - * for properly generating HTML here w/o using tokens, it stays |
|
| 239 | - * public. |
|
| 240 | - * @param $string String data to escape for HTML. |
|
| 241 | - * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is |
|
| 242 | - * permissible for non-attribute output. |
|
| 243 | - * @return String escaped data. |
|
| 244 | - */ |
|
| 245 | - public function escape($string, $quote = null) { |
|
| 246 | - // Workaround for APC bug on Mac Leopard reported by sidepodcast |
|
| 247 | - // http://htmlpurifier.org/phorum/read.php?3,4823,4846 |
|
| 248 | - if ($quote === null) $quote = ENT_COMPAT; |
|
| 249 | - return htmlspecialchars($string, $quote, 'UTF-8', false); |
|
| 250 | - } |
|
| 13 | + /** |
|
| 14 | + * Whether or not generator should produce XML output |
|
| 15 | + */ |
|
| 16 | + private $_xhtml = true; |
|
| 17 | + |
|
| 18 | + /** |
|
| 19 | + * :HACK: Whether or not generator should comment the insides of <script> tags |
|
| 20 | + */ |
|
| 21 | + private $_scriptFix = false; |
|
| 22 | + |
|
| 23 | + /** |
|
| 24 | + * Cache of HTMLDefinition during HTML output to determine whether or |
|
| 25 | + * not attributes should be minimized. |
|
| 26 | + */ |
|
| 27 | + private $_def; |
|
| 28 | + |
|
| 29 | + /** |
|
| 30 | + * Cache of %Output.SortAttr |
|
| 31 | + */ |
|
| 32 | + private $_sortAttr; |
|
| 33 | + |
|
| 34 | + /** |
|
| 35 | + * Cache of %Output.FlashCompat |
|
| 36 | + */ |
|
| 37 | + private $_flashCompat; |
|
| 38 | + |
|
| 39 | + /** |
|
| 40 | + * Cache of %Output.FixInnerHTML |
|
| 41 | + */ |
|
| 42 | + private $_innerHTMLFix; |
|
| 43 | + |
|
| 44 | + /** |
|
| 45 | + * Stack for keeping track of object information when outputting IE |
|
| 46 | + * compatibility code. |
|
| 47 | + */ |
|
| 48 | + private $_flashStack = array(); |
|
| 49 | + |
|
| 50 | + /** |
|
| 51 | + * Configuration for the generator |
|
| 52 | + */ |
|
| 53 | + protected $config; |
|
| 54 | + |
|
| 55 | + /** |
|
| 56 | + * @param $config Instance of HTMLPurifier_Config |
|
| 57 | + * @param $context Instance of HTMLPurifier_Context |
|
| 58 | + */ |
|
| 59 | + public function __construct($config, $context) { |
|
| 60 | + $this->config = $config; |
|
| 61 | + $this->_scriptFix = $config->get('Output.CommentScriptContents'); |
|
| 62 | + $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); |
|
| 63 | + $this->_sortAttr = $config->get('Output.SortAttr'); |
|
| 64 | + $this->_flashCompat = $config->get('Output.FlashCompat'); |
|
| 65 | + $this->_def = $config->getHTMLDefinition(); |
|
| 66 | + $this->_xhtml = $this->_def->doctype->xml; |
|
| 67 | + } |
|
| 68 | + |
|
| 69 | + /** |
|
| 70 | + * Generates HTML from an array of tokens. |
|
| 71 | + * @param $tokens Array of HTMLPurifier_Token |
|
| 72 | + * @param $config HTMLPurifier_Config object |
|
| 73 | + * @return Generated HTML |
|
| 74 | + */ |
|
| 75 | + public function generateFromTokens($tokens) { |
|
| 76 | + if (!$tokens) return ''; |
|
| 77 | + |
|
| 78 | + // Basic algorithm |
|
| 79 | + $html = ''; |
|
| 80 | + for ($i = 0, $size = count($tokens); $i < $size; $i++) { |
|
| 81 | + if ($this->_scriptFix && $tokens[$i]->name === 'script' |
|
| 82 | + && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { |
|
| 83 | + // script special case |
|
| 84 | + // the contents of the script block must be ONE token |
|
| 85 | + // for this to work. |
|
| 86 | + $html .= $this->generateFromToken($tokens[$i++]); |
|
| 87 | + $html .= $this->generateScriptFromToken($tokens[$i++]); |
|
| 88 | + } |
|
| 89 | + $html .= $this->generateFromToken($tokens[$i]); |
|
| 90 | + } |
|
| 91 | + |
|
| 92 | + // Tidy cleanup |
|
| 93 | + if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { |
|
| 94 | + $tidy = new Tidy; |
|
| 95 | + $tidy->parseString($html, array( |
|
| 96 | + 'indent'=> true, |
|
| 97 | + 'output-xhtml' => $this->_xhtml, |
|
| 98 | + 'show-body-only' => true, |
|
| 99 | + 'indent-spaces' => 2, |
|
| 100 | + 'wrap' => 68, |
|
| 101 | + ), 'utf8'); |
|
| 102 | + $tidy->cleanRepair(); |
|
| 103 | + $html = (string) $tidy; // explicit cast necessary |
|
| 104 | + } |
|
| 105 | + |
|
| 106 | + // Normalize newlines to system defined value |
|
| 107 | + if ($this->config->get('Core.NormalizeNewlines')) { |
|
| 108 | + $nl = $this->config->get('Output.Newline'); |
|
| 109 | + if ($nl === null) $nl = PHP_EOL; |
|
| 110 | + if ($nl !== "\n") $html = str_replace("\n", $nl, $html); |
|
| 111 | + } |
|
| 112 | + return $html; |
|
| 113 | + } |
|
| 114 | + |
|
| 115 | + /** |
|
| 116 | + * Generates HTML from a single token. |
|
| 117 | + * @param $token HTMLPurifier_Token object. |
|
| 118 | + * @return Generated HTML |
|
| 119 | + */ |
|
| 120 | + public function generateFromToken($token) { |
|
| 121 | + if (!$token instanceof HTMLPurifier_Token) { |
|
| 122 | + trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); |
|
| 123 | + return ''; |
|
| 124 | + |
|
| 125 | + } elseif ($token instanceof HTMLPurifier_Token_Start) { |
|
| 126 | + $attr = $this->generateAttributes($token->attr, $token->name); |
|
| 127 | + if ($this->_flashCompat) { |
|
| 128 | + if ($token->name == "object") { |
|
| 129 | + $flash = new stdclass(); |
|
| 130 | + $flash->attr = $token->attr; |
|
| 131 | + $flash->param = array(); |
|
| 132 | + $this->_flashStack[] = $flash; |
|
| 133 | + } |
|
| 134 | + } |
|
| 135 | + return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; |
|
| 136 | + |
|
| 137 | + } elseif ($token instanceof HTMLPurifier_Token_End) { |
|
| 138 | + $_extra = ''; |
|
| 139 | + if ($this->_flashCompat) { |
|
| 140 | + if ($token->name == "object" && !empty($this->_flashStack)) { |
|
| 141 | + // doesn't do anything for now |
|
| 142 | + } |
|
| 143 | + } |
|
| 144 | + return $_extra . '</' . $token->name . '>'; |
|
| 145 | + |
|
| 146 | + } elseif ($token instanceof HTMLPurifier_Token_Empty) { |
|
| 147 | + if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { |
|
| 148 | + $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; |
|
| 149 | + } |
|
| 150 | + $attr = $this->generateAttributes($token->attr, $token->name); |
|
| 151 | + return '<' . $token->name . ($attr ? ' ' : '') . $attr . |
|
| 152 | + ( $this->_xhtml ? ' /': '' ) // <br /> v. <br> |
|
| 153 | + . '>'; |
|
| 154 | + |
|
| 155 | + } elseif ($token instanceof HTMLPurifier_Token_Text) { |
|
| 156 | + return $this->escape($token->data, ENT_NOQUOTES); |
|
| 157 | + |
|
| 158 | + } elseif ($token instanceof HTMLPurifier_Token_Comment) { |
|
| 159 | + return '<!--' . $token->data . '-->'; |
|
| 160 | + } else { |
|
| 161 | + return ''; |
|
| 162 | + |
|
| 163 | + } |
|
| 164 | + } |
|
| 165 | + |
|
| 166 | + /** |
|
| 167 | + * Special case processor for the contents of script tags |
|
| 168 | + * @warning This runs into problems if there's already a literal |
|
| 169 | + * --> somewhere inside the script contents. |
|
| 170 | + */ |
|
| 171 | + public function generateScriptFromToken($token) { |
|
| 172 | + if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); |
|
| 173 | + // Thanks <http://lachy.id.au/log/2005/05/script-comments> |
|
| 174 | + $data = preg_replace('#//\s*$#', '', $token->data); |
|
| 175 | + return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; |
|
| 176 | + } |
|
| 177 | + |
|
| 178 | + /** |
|
| 179 | + * Generates attribute declarations from attribute array. |
|
| 180 | + * @note This does not include the leading or trailing space. |
|
| 181 | + * @param $assoc_array_of_attributes Attribute array |
|
| 182 | + * @param $element Name of element attributes are for, used to check |
|
| 183 | + * attribute minimization. |
|
| 184 | + * @return Generate HTML fragment for insertion. |
|
| 185 | + */ |
|
| 186 | + public function generateAttributes($assoc_array_of_attributes, $element = false) { |
|
| 187 | + $html = ''; |
|
| 188 | + if ($this->_sortAttr) ksort($assoc_array_of_attributes); |
|
| 189 | + foreach ($assoc_array_of_attributes as $key => $value) { |
|
| 190 | + if (!$this->_xhtml) { |
|
| 191 | + // Remove namespaced attributes |
|
| 192 | + if (strpos($key, ':') !== false) continue; |
|
| 193 | + // Check if we should minimize the attribute: val="val" -> val |
|
| 194 | + if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { |
|
| 195 | + $html .= $key . ' '; |
|
| 196 | + continue; |
|
| 197 | + } |
|
| 198 | + } |
|
| 199 | + // Workaround for Internet Explorer innerHTML bug. |
|
| 200 | + // Essentially, Internet Explorer, when calculating |
|
| 201 | + // innerHTML, omits quotes if there are no instances of |
|
| 202 | + // angled brackets, quotes or spaces. However, when parsing |
|
| 203 | + // HTML (for example, when you assign to innerHTML), it |
|
| 204 | + // treats backticks as quotes. Thus, |
|
| 205 | + // <img alt="``" /> |
|
| 206 | + // becomes |
|
| 207 | + // <img alt=`` /> |
|
| 208 | + // becomes |
|
| 209 | + // <img alt='' /> |
|
| 210 | + // Fortunately, all we need to do is trigger an appropriate |
|
| 211 | + // quoting style, which we do by adding an extra space. |
|
| 212 | + // This also is consistent with the W3C spec, which states |
|
| 213 | + // that user agents may ignore leading or trailing |
|
| 214 | + // whitespace (in fact, most don't, at least for attributes |
|
| 215 | + // like alt, but an extra space at the end is barely |
|
| 216 | + // noticeable). Still, we have a configuration knob for |
|
| 217 | + // this, since this transformation is not necesary if you |
|
| 218 | + // don't process user input with innerHTML or you don't plan |
|
| 219 | + // on supporting Internet Explorer. |
|
| 220 | + if ($this->_innerHTMLFix) { |
|
| 221 | + if (strpos($value, '`') !== false) { |
|
| 222 | + // check if correct quoting style would not already be |
|
| 223 | + // triggered |
|
| 224 | + if (strcspn($value, '"\' <>') === strlen($value)) { |
|
| 225 | + // protect! |
|
| 226 | + $value .= ' '; |
|
| 227 | + } |
|
| 228 | + } |
|
| 229 | + } |
|
| 230 | + $html .= $key.'="'.$this->escape($value).'" '; |
|
| 231 | + } |
|
| 232 | + return rtrim($html); |
|
| 233 | + } |
|
| 234 | + |
|
| 235 | + /** |
|
| 236 | + * Escapes raw text data. |
|
| 237 | + * @todo This really ought to be protected, but until we have a facility |
|
| 238 | + * for properly generating HTML here w/o using tokens, it stays |
|
| 239 | + * public. |
|
| 240 | + * @param $string String data to escape for HTML. |
|
| 241 | + * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is |
|
| 242 | + * permissible for non-attribute output. |
|
| 243 | + * @return String escaped data. |
|
| 244 | + */ |
|
| 245 | + public function escape($string, $quote = null) { |
|
| 246 | + // Workaround for APC bug on Mac Leopard reported by sidepodcast |
|
| 247 | + // http://htmlpurifier.org/phorum/read.php?3,4823,4846 |
|
| 248 | + if ($quote === null) $quote = ENT_COMPAT; |
|
| 249 | + return htmlspecialchars($string, $quote, 'UTF-8', false); |
|
| 250 | + } |
|
| 251 | 251 | |
| 252 | 252 | } |
| 253 | 253 | |
@@ -73,7 +73,9 @@ discard block |
||
| 73 | 73 | * @return Generated HTML |
| 74 | 74 | */ |
| 75 | 75 | public function generateFromTokens($tokens) { |
| 76 | - if (!$tokens) return ''; |
|
| 76 | + if (!$tokens) { |
|
| 77 | + return ''; |
|
| 78 | + } |
|
| 77 | 79 | |
| 78 | 80 | // Basic algorithm |
| 79 | 81 | $html = ''; |
@@ -106,8 +108,12 @@ discard block |
||
| 106 | 108 | // Normalize newlines to system defined value |
| 107 | 109 | if ($this->config->get('Core.NormalizeNewlines')) { |
| 108 | 110 | $nl = $this->config->get('Output.Newline'); |
| 109 | - if ($nl === null) $nl = PHP_EOL; |
|
| 110 | - if ($nl !== "\n") $html = str_replace("\n", $nl, $html); |
|
| 111 | + if ($nl === null) { |
|
| 112 | + $nl = PHP_EOL; |
|
| 113 | + } |
|
| 114 | + if ($nl !== "\n") { |
|
| 115 | + $html = str_replace("\n", $nl, $html); |
|
| 116 | + } |
|
| 111 | 117 | } |
| 112 | 118 | return $html; |
| 113 | 119 | } |
@@ -169,7 +175,9 @@ discard block |
||
| 169 | 175 | * --> somewhere inside the script contents. |
| 170 | 176 | */ |
| 171 | 177 | public function generateScriptFromToken($token) { |
| 172 | - if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); |
|
| 178 | + if (!$token instanceof HTMLPurifier_Token_Text) { |
|
| 179 | + return $this->generateFromToken($token); |
|
| 180 | + } |
|
| 173 | 181 | // Thanks <http://lachy.id.au/log/2005/05/script-comments> |
| 174 | 182 | $data = preg_replace('#//\s*$#', '', $token->data); |
| 175 | 183 | return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; |
@@ -185,11 +193,15 @@ discard block |
||
| 185 | 193 | */ |
| 186 | 194 | public function generateAttributes($assoc_array_of_attributes, $element = false) { |
| 187 | 195 | $html = ''; |
| 188 | - if ($this->_sortAttr) ksort($assoc_array_of_attributes); |
|
| 196 | + if ($this->_sortAttr) { |
|
| 197 | + ksort($assoc_array_of_attributes); |
|
| 198 | + } |
|
| 189 | 199 | foreach ($assoc_array_of_attributes as $key => $value) { |
| 190 | 200 | if (!$this->_xhtml) { |
| 191 | 201 | // Remove namespaced attributes |
| 192 | - if (strpos($key, ':') !== false) continue; |
|
| 202 | + if (strpos($key, ':') !== false) { |
|
| 203 | + continue; |
|
| 204 | + } |
|
| 193 | 205 | // Check if we should minimize the attribute: val="val" -> val |
| 194 | 206 | if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { |
| 195 | 207 | $html .= $key . ' '; |
@@ -245,7 +257,9 @@ discard block |
||
| 245 | 257 | public function escape($string, $quote = null) { |
| 246 | 258 | // Workaround for APC bug on Mac Leopard reported by sidepodcast |
| 247 | 259 | // http://htmlpurifier.org/phorum/read.php?3,4823,4846 |
| 248 | - if ($quote === null) $quote = ENT_COMPAT; |
|
| 260 | + if ($quote === null) { |
|
| 261 | + $quote = ENT_COMPAT; |
|
| 262 | + } |
|
| 249 | 263 | return htmlspecialchars($string, $quote, 'UTF-8', false); |
| 250 | 264 | } |
| 251 | 265 | |
@@ -79,7 +79,7 @@ discard block |
||
| 79 | 79 | $html = ''; |
| 80 | 80 | for ($i = 0, $size = count($tokens); $i < $size; $i++) { |
| 81 | 81 | if ($this->_scriptFix && $tokens[$i]->name === 'script' |
| 82 | - && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { |
|
| 82 | + && $i + 2 < $size && $tokens[$i + 2] instanceof HTMLPurifier_Token_End) { |
|
| 83 | 83 | // script special case |
| 84 | 84 | // the contents of the script block must be ONE token |
| 85 | 85 | // for this to work. |
@@ -132,7 +132,7 @@ discard block |
||
| 132 | 132 | $this->_flashStack[] = $flash; |
| 133 | 133 | } |
| 134 | 134 | } |
| 135 | - return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; |
|
| 135 | + return '<'.$token->name.($attr ? ' ' : '').$attr.'>'; |
|
| 136 | 136 | |
| 137 | 137 | } elseif ($token instanceof HTMLPurifier_Token_End) { |
| 138 | 138 | $_extra = ''; |
@@ -141,22 +141,22 @@ discard block |
||
| 141 | 141 | // doesn't do anything for now |
| 142 | 142 | } |
| 143 | 143 | } |
| 144 | - return $_extra . '</' . $token->name . '>'; |
|
| 144 | + return $_extra.'</'.$token->name.'>'; |
|
| 145 | 145 | |
| 146 | 146 | } elseif ($token instanceof HTMLPurifier_Token_Empty) { |
| 147 | 147 | if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { |
| 148 | - $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; |
|
| 148 | + $this->_flashStack[count($this->_flashStack) - 1]->param[$token->attr['name']] = $token->attr['value']; |
|
| 149 | 149 | } |
| 150 | 150 | $attr = $this->generateAttributes($token->attr, $token->name); |
| 151 | - return '<' . $token->name . ($attr ? ' ' : '') . $attr . |
|
| 152 | - ( $this->_xhtml ? ' /': '' ) // <br /> v. <br> |
|
| 151 | + return '<'.$token->name.($attr ? ' ' : '').$attr. |
|
| 152 | + ($this->_xhtml ? ' /' : '') // <br /> v. <br> |
|
| 153 | 153 | . '>'; |
| 154 | 154 | |
| 155 | 155 | } elseif ($token instanceof HTMLPurifier_Token_Text) { |
| 156 | 156 | return $this->escape($token->data, ENT_NOQUOTES); |
| 157 | 157 | |
| 158 | 158 | } elseif ($token instanceof HTMLPurifier_Token_Comment) { |
| 159 | - return '<!--' . $token->data . '-->'; |
|
| 159 | + return '<!--'.$token->data.'-->'; |
|
| 160 | 160 | } else { |
| 161 | 161 | return ''; |
| 162 | 162 | |
@@ -172,7 +172,7 @@ discard block |
||
| 172 | 172 | if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); |
| 173 | 173 | // Thanks <http://lachy.id.au/log/2005/05/script-comments> |
| 174 | 174 | $data = preg_replace('#//\s*$#', '', $token->data); |
| 175 | - return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; |
|
| 175 | + return '<!--//--><![CDATA[//><!--'."\n".trim($data)."\n".'//--><!]]>'; |
|
| 176 | 176 | } |
| 177 | 177 | |
| 178 | 178 | /** |
@@ -192,7 +192,7 @@ discard block |
||
| 192 | 192 | if (strpos($key, ':') !== false) continue; |
| 193 | 193 | // Check if we should minimize the attribute: val="val" -> val |
| 194 | 194 | if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { |
| 195 | - $html .= $key . ' '; |
|
| 195 | + $html .= $key.' '; |
|
| 196 | 196 | continue; |
| 197 | 197 | } |
| 198 | 198 | } |